From ad26aa6c60974acf3228ed0ade97ba5793093dbe Mon Sep 17 00:00:00 2001
From: Jonghwa Lee <jonghwa3.lee@samsung.com>
Date: Thu, 8 Jan 2015 11:04:07 +0900
Subject: regulator: s2mps11: Fix wrong calculation of register offset

This patch adds missing registers('BUCK7_SW' & 'LDO29_CTRL'). Since BUCK7 has
1 more register (BUCK7_SW) than others, register offset should
be added one more for which has bigger address than BUCK7 registers.

Fixes: 76b9840b24ae04(regulator: s2mps11: Add support S2MPS13 regulator device)
Signed-off-by: Jonghwa Lee <jonghwa3.lee@samsung.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: <stable@vger.kernel.org>
---
 include/linux/mfd/samsung/s2mps13.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h
index ce5dda8958fe..b1fd675fa36f 100644
--- a/include/linux/mfd/samsung/s2mps13.h
+++ b/include/linux/mfd/samsung/s2mps13.h
@@ -59,6 +59,7 @@ enum s2mps13_reg {
 	S2MPS13_REG_B6CTRL,
 	S2MPS13_REG_B6OUT,
 	S2MPS13_REG_B7CTRL,
+	S2MPS13_REG_B7SW,
 	S2MPS13_REG_B7OUT,
 	S2MPS13_REG_B8CTRL,
 	S2MPS13_REG_B8OUT,
@@ -102,6 +103,7 @@ enum s2mps13_reg {
 	S2MPS13_REG_L26CTRL,
 	S2MPS13_REG_L27CTRL,
 	S2MPS13_REG_L28CTRL,
+	S2MPS13_REG_L29CTRL,
 	S2MPS13_REG_L30CTRL,
 	S2MPS13_REG_L31CTRL,
 	S2MPS13_REG_L32CTRL,
-- 
cgit v1.2.3


From 85b4545629663486b7f71047ce3b54fa0ad3eb28 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 12 Jan 2015 17:51:14 +0000
Subject: iommu: Consolidate IOVA allocator code

In order to share the IOVA allocator with other architectures, break
the unnecssary dependency on the Intel IOMMU driver and move the
remaining IOVA internals to iova.c

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 19e81d5ccb6d..ad0507c61cc7 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -39,6 +39,9 @@ static inline unsigned long iova_size(struct iova *iova)
 	return iova->pfn_hi - iova->pfn_lo + 1;
 }
 
+int iommu_iova_cache_init(void);
+void iommu_iova_cache_destroy(void);
+
 struct iova *alloc_iova_mem(void);
 void free_iova_mem(struct iova *iova);
 void free_iova(struct iova_domain *iovad, unsigned long pfn);
-- 
cgit v1.2.3


From 1b72250076dde4276acecf3a7da722b185703e78 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 12 Jan 2015 17:51:15 +0000
Subject: iommu: Make IOVA domain low limit flexible

To share the IOVA allocator with other architectures, it needs to
accommodate more general aperture restrictions; move the lower limit
from a compile-time constant to a runtime domain property to allow
IOVA domains with different requirements to co-exist.

Also reword the slightly unclear description of alloc_iova since we're
touching it anyway.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index ad0507c61cc7..591b19626b46 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -16,9 +16,6 @@
 #include <linux/rbtree.h>
 #include <linux/dma-mapping.h>
 
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN		(1)
-
 /* iova structure */
 struct iova {
 	struct rb_node	node;
@@ -31,6 +28,7 @@ struct iova_domain {
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
 };
 
@@ -52,7 +50,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn,
+	unsigned long pfn_32bit);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
-- 
cgit v1.2.3


From 0fb5fe874c42942e16c450ae05da453e13a1c09e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 12 Jan 2015 17:51:16 +0000
Subject: iommu: Make IOVA domain page size explicit

Systems may contain heterogeneous IOMMUs supporting differing minimum
page sizes, which may also not be common with the CPU page size.
Thus it is practical to have an explicit notion of IOVA granularity
to simplify handling of mapping and allocation constraints.

As an initial step, move the IOVA page granularity from an implicit
compile-time constant to a per-domain property so we can make use
of it in IOVA domain context at runtime. To keep the abstraction tidy,
extend the little API of inline iova_* helpers to parallel some of the
equivalent PAGE_* macros.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 591b19626b46..3920a19d8194 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -28,6 +28,7 @@ struct iova_domain {
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	granule;	/* pfn granularity for this domain */
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
 };
@@ -37,6 +38,36 @@ static inline unsigned long iova_size(struct iova *iova)
 	return iova->pfn_hi - iova->pfn_lo + 1;
 }
 
+static inline unsigned long iova_shift(struct iova_domain *iovad)
+{
+	return __ffs(iovad->granule);
+}
+
+static inline unsigned long iova_mask(struct iova_domain *iovad)
+{
+	return iovad->granule - 1;
+}
+
+static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova)
+{
+	return iova & iova_mask(iovad);
+}
+
+static inline size_t iova_align(struct iova_domain *iovad, size_t size)
+{
+	return ALIGN(size, iovad->granule);
+}
+
+static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova)
+{
+	return (dma_addr_t)iova->pfn_lo << iova_shift(iovad);
+}
+
+static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova)
+{
+	return iova >> iova_shift(iovad);
+}
+
 int iommu_iova_cache_init(void);
 void iommu_iova_cache_destroy(void);
 
@@ -50,8 +81,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn,
-	unsigned long pfn_32bit);
+void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
+	unsigned long start_pfn, unsigned long pfn_32bit);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 struct iova *split_and_remove_iova(struct iova_domain *iovad,
-- 
cgit v1.2.3


From db8614d35bb8fc6d032792c801bd5b38ce860f19 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Fri, 16 Jan 2015 20:53:17 -0700
Subject: iommu: Change trace unmap api to report unmapped size

Currently map and unmap are implemented as events under a
common trace class declaration. The common class forces
trace_unmap() to require a bogus physical address argument
that it doesn't use. Changing unmap to report unmapped size
will provide useful information for debugging. Remove common
map_unmap trace class and change map and unmap into separate
events as opposed to events under the same class to allow for
differences in the reporting information. In addition, map and
unmap are changed to handle size value as size_t instead of int
to match the passed size value and avoid overflow.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/trace/events/iommu.h | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h
index a8f5c32d174b..2c7befb10f13 100644
--- a/include/trace/events/iommu.h
+++ b/include/trace/events/iommu.h
@@ -83,7 +83,7 @@ DEFINE_EVENT(iommu_device_event, detach_device_from_domain,
 	TP_ARGS(dev)
 );
 
-DECLARE_EVENT_CLASS(iommu_map_unmap,
+TRACE_EVENT(map,
 
 	TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size),
 
@@ -92,7 +92,7 @@ DECLARE_EVENT_CLASS(iommu_map_unmap,
 	TP_STRUCT__entry(
 		__field(u64, iova)
 		__field(u64, paddr)
-		__field(int, size)
+		__field(size_t, size)
 	),
 
 	TP_fast_assign(
@@ -101,26 +101,31 @@ DECLARE_EVENT_CLASS(iommu_map_unmap,
 		__entry->size = size;
 	),
 
-	TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=0x%x",
+	TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu",
 			__entry->iova, __entry->paddr, __entry->size
 	)
 );
 
-DEFINE_EVENT(iommu_map_unmap, map,
+TRACE_EVENT(unmap,
 
-	TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size),
-
-	TP_ARGS(iova, paddr, size)
-);
+	TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size),
 
-DEFINE_EVENT_PRINT(iommu_map_unmap, unmap,
+	TP_ARGS(iova, size, unmapped_size),
 
-	TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size),
+	TP_STRUCT__entry(
+		__field(u64, iova)
+		__field(size_t, size)
+		__field(size_t, unmapped_size)
+	),
 
-	TP_ARGS(iova, paddr, size),
+	TP_fast_assign(
+		__entry->iova = iova;
+		__entry->size = size;
+		__entry->unmapped_size = unmapped_size;
+	),
 
-	TP_printk("IOMMU: iova=0x%016llx size=0x%x",
-			__entry->iova, __entry->size
+	TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu",
+			__entry->iova, __entry->size, __entry->unmapped_size
 	)
 );
 
-- 
cgit v1.2.3


From d5fd120e7860c2b3d4c936a2ebadb6b244bec4c8 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 26 Jan 2015 20:59:31 +0100
Subject: i2c: Only include slave support if selected

Make the slave support depend on CONFIG_I2C_SLAVE. Otherwise it gets
included unconditionally, even when it is not needed.

I2C bus drivers which implement slave support must select
I2C_SLAVE.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index e3a1721c8354..7c7695940ddd 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -228,7 +228,9 @@ struct i2c_client {
 	struct device dev;		/* the device structure		*/
 	int irq;			/* irq issued by device		*/
 	struct list_head detected;
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
 	i2c_slave_cb_t slave_cb;	/* callback for slave mode	*/
+#endif
 };
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
 
@@ -253,6 +255,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data)
 
 /* I2C slave support */
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
 enum i2c_slave_event {
 	I2C_SLAVE_REQ_READ_START,
 	I2C_SLAVE_REQ_READ_END,
@@ -269,6 +272,7 @@ static inline int i2c_slave_event(struct i2c_client *client,
 {
 	return client->slave_cb(client, event, val);
 }
+#endif
 
 /**
  * struct i2c_board_info - template for device creation
@@ -404,8 +408,10 @@ struct i2c_algorithm {
 	/* To determine what the adapter supports */
 	u32 (*functionality) (struct i2c_adapter *);
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
 	int (*reg_slave)(struct i2c_client *client);
 	int (*unreg_slave)(struct i2c_client *client);
+#endif
 };
 
 /**
-- 
cgit v1.2.3


From 9879de7373fcfb466ec198293b6ccc1ad7a42dd8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 26 Jan 2015 12:58:32 -0800
Subject: mm: page_alloc: embed OOM killing naturally into allocation slowpath

The OOM killing invocation does a lot of duplicative checks against the
task's allocation context.  Rework it to take advantage of the existing
checks in the allocator slowpath.

The OOM killer is invoked when the allocator is unable to reclaim any
pages but the allocation has to keep looping.  Instead of having a check
for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM
invocation to the true branch of should_alloc_retry().  The __GFP_FS
check from oom_gfp_allowed() can then be moved into the OOM avoidance
branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test.

__alloc_pages_may_oom() can then signal to the caller whether the OOM
killer was invoked, instead of requiring it to duplicate the order and
high_zoneidx checks to guess this when deciding whether to continue.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 853698c721f7..76200984d1e2 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -85,11 +85,6 @@ static inline void oom_killer_enable(void)
 	oom_killer_disabled = false;
 }
 
-static inline bool oom_gfp_allowed(gfp_t gfp_mask)
-{
-	return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
-}
-
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
 static inline bool task_will_free_mem(struct task_struct *task)
-- 
cgit v1.2.3


From 07261edb971492c6b41b44d7b1b51f76807d30ad Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Mon, 26 Jan 2015 12:58:43 -0800
Subject: printk: add dummy routine for when CONFIG_PRINTK=n

There are missing dummy routines for log_buf_addr_get() and
log_buf_len_get() for when CONFIG_PRINTK is not set causing build
failures.

This patch adds these dummy routines at the appropriate location.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index c8f170324e64..4d5bf5726578 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,9 +10,6 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
-extern char *log_buf_addr_get(void);
-extern u32 log_buf_len_get(void);
-
 static inline int printk_get_level(const char *buffer)
 {
 	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
@@ -163,6 +160,8 @@ extern int kptr_restrict;
 
 extern void wake_up_klogd(void);
 
+char *log_buf_addr_get(void);
+u32 log_buf_len_get(void);
 void log_buf_kexec_setup(void);
 void __init setup_log_buf(int early);
 void dump_stack_set_arch_desc(const char *fmt, ...);
@@ -198,6 +197,16 @@ static inline void wake_up_klogd(void)
 {
 }
 
+static inline char *log_buf_addr_get(void)
+{
+	return NULL;
+}
+
+static inline u32 log_buf_len_get(void)
+{
+	return 0;
+}
+
 static inline void log_buf_kexec_setup(void)
 {
 }
-- 
cgit v1.2.3


From df4d92549f23e1c037e83323aff58a21b3de7fe0 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 23 Jan 2015 12:01:26 +0100
Subject: ipv4: try to cache dst_entries which would cause a redirect

Not caching dst_entries which cause redirects could be exploited by hosts
on the same subnet, causing a severe DoS attack. This effect aggravated
since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()").

Lookups causing redirects will be allocated with DST_NOCACHE set which
will force dst_release to free them via RCU.  Unfortunately waiting for
RCU grace period just takes too long, we can end up with >1M dst_entries
waiting to be released and the system will run OOM. rcuos threads cannot
catch up under high softirq load.

Attaching the flag to emit a redirect later on to the specific skb allows
us to cache those dst_entries thus reducing the pressure on allocation
and deallocation.

This issue was discovered by Marcelo Leitner.

Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Leitner <mleitner@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..f7cbd703d15d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -39,11 +39,12 @@ struct inet_skb_parm {
 	struct ip_options	opt;		/* Compiled IP options		*/
 	unsigned char		flags;
 
-#define IPSKB_FORWARDED		1
-#define IPSKB_XFRM_TUNNEL_SIZE	2
-#define IPSKB_XFRM_TRANSFORMED	4
-#define IPSKB_FRAG_COMPLETE	8
-#define IPSKB_REROUTED		16
+#define IPSKB_FORWARDED		BIT(0)
+#define IPSKB_XFRM_TUNNEL_SIZE	BIT(1)
+#define IPSKB_XFRM_TRANSFORMED	BIT(2)
+#define IPSKB_FRAG_COMPLETE	BIT(3)
+#define IPSKB_REROUTED		BIT(4)
+#define IPSKB_DOREDIRECT	BIT(5)
 
 	u16			frag_max_size;
 };
-- 
cgit v1.2.3


From 14bf61ffe6ac54afcd1e888a4407fe16054483db Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 9 Oct 2014 16:03:13 +0200
Subject: quota: Switch ->get_dqblk() and ->set_dqblk() to use bytes as space
 units

Currently ->get_dqblk() and ->set_dqblk() use struct fs_disk_quota which
tracks space limits and usage in 512-byte blocks. However VFS quotas
track usage in bytes (as some filesystems require that) and we need to
somehow pass this information. Upto now it wasn't a problem because we
didn't do any unit conversion (thus VFS quota routines happily stuck
number of bytes into d_bcount field of struct fd_disk_quota). Only if
you tried to use Q_XGETQUOTA or Q_XSETQLIM for VFS quotas (or Q_GETQUOTA
/ Q_SETQUOTA for XFS quotas), you got bogus results. Hardly anyone
tried this but reportedly some Samba users hit the problem in practice.
So when we want interfaces compatible we need to fix this.

We bite the bullet and define another quota structure used for passing
information from/to ->get_dqblk()/->set_dqblk. It's somewhat sad we have
to have more conversion routines in fs/quota/quota.c and another copying
of quota structure slows down getting of quota information by about 2%
but it seems cleaner than overloading e.g. units of d_bcount to bytes.

CC: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quota.h    | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/quotaops.h |  4 ++--
 2 files changed, 47 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 50978b781a19..097d7eb2441e 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -321,6 +321,49 @@ struct dquot_operations {
 
 struct path;
 
+/* Structure for communicating via ->get_dqblk() & ->set_dqblk() */
+struct qc_dqblk {
+	int d_fieldmask;	/* mask of fields to change in ->set_dqblk() */
+	u64 d_spc_hardlimit;	/* absolute limit on used space */
+	u64 d_spc_softlimit;	/* preferred limit on used space */
+	u64 d_ino_hardlimit;	/* maximum # allocated inodes */
+	u64 d_ino_softlimit;	/* preferred inode limit */
+	u64 d_space;		/* Space owned by the user */
+	u64 d_ino_count;	/* # inodes owned by the user */
+	s64 d_ino_timer;	/* zero if within inode limits */
+				/* if not, we refuse service */
+	s64 d_spc_timer;	/* similar to above; for space */
+	int d_ino_warns;	/* # warnings issued wrt num inodes */
+	int d_spc_warns;	/* # warnings issued wrt used space */
+	u64 d_rt_spc_hardlimit;	/* absolute limit on realtime space */
+	u64 d_rt_spc_softlimit;	/* preferred limit on RT space */
+	u64 d_rt_space;		/* realtime space owned */
+	s64 d_rt_spc_timer;	/* similar to above; for RT space */
+	int d_rt_spc_warns;	/* # warnings issued wrt RT space */
+};
+
+/* Field specifiers for ->set_dqblk() in struct qc_dqblk */
+#define	QC_INO_SOFT	(1<<0)
+#define	QC_INO_HARD	(1<<1)
+#define	QC_SPC_SOFT	(1<<2)
+#define	QC_SPC_HARD	(1<<3)
+#define	QC_RT_SPC_SOFT	(1<<4)
+#define	QC_RT_SPC_HARD	(1<<5)
+#define QC_LIMIT_MASK (QC_INO_SOFT | QC_INO_HARD | QC_SPC_SOFT | QC_SPC_HARD | \
+		       QC_RT_SPC_SOFT | QC_RT_SPC_HARD)
+#define	QC_SPC_TIMER	(1<<6)
+#define	QC_INO_TIMER	(1<<7)
+#define	QC_RT_SPC_TIMER	(1<<8)
+#define QC_TIMER_MASK (QC_SPC_TIMER | QC_INO_TIMER | QC_RT_SPC_TIMER)
+#define	QC_SPC_WARNS	(1<<9)
+#define	QC_INO_WARNS	(1<<10)
+#define	QC_RT_SPC_WARNS	(1<<11)
+#define QC_WARNS_MASK (QC_SPC_WARNS | QC_INO_WARNS | QC_RT_SPC_WARNS)
+#define	QC_SPACE	(1<<12)
+#define	QC_INO_COUNT	(1<<13)
+#define	QC_RT_SPACE	(1<<14)
+#define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE)
+
 /* Operations handling requests from userspace */
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, struct path *);
@@ -329,8 +372,8 @@ struct quotactl_ops {
 	int (*quota_sync)(struct super_block *, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
-	int (*get_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *);
-	int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *);
+	int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
+	int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f23538a6e411..29e3455f7d41 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -98,9 +98,9 @@ int dquot_quota_sync(struct super_block *sb, int type);
 int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_get_dqblk(struct super_block *sb, struct kqid id,
-		struct fs_disk_quota *di);
+		struct qc_dqblk *di);
 int dquot_set_dqblk(struct super_block *sb, struct kqid id,
-		struct fs_disk_quota *di);
+		struct qc_dqblk *di);
 
 int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
-- 
cgit v1.2.3


From c3c87e770458aa004bd7ed3f29945ff436fd6511 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 23 Jan 2015 11:19:48 +0100
Subject: perf: Tighten (and fix) the grouping condition

The fix from 9fc81d87420d ("perf: Fix events installation during
moving group") was incomplete in that it failed to recognise that
creating a group with events for different CPUs is semantically
broken -- they cannot be co-scheduled.

Furthermore, it leads to real breakage where, when we create an event
for CPU Y and then migrate it to form a group on CPU X, the code gets
confused where the counter is programmed -- triggered in practice
as well by me via the perf fuzzer.

Fix this by tightening the rules for creating groups. Only allow
grouping of counters that can be co-scheduled in the same context.
This means for the same task and/or the same cpu.

Fixes: 9fc81d87420d ("perf: Fix events installation during moving group")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20150123125834.090683288@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f7a61ca4b39..664de5a4ec46 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -450,11 +450,6 @@ struct perf_event {
 #endif /* CONFIG_PERF_EVENTS */
 };
 
-enum perf_event_context_type {
-	task_context,
-	cpu_context,
-};
-
 /**
  * struct perf_event_context - event context structure
  *
@@ -462,7 +457,6 @@ enum perf_event_context_type {
  */
 struct perf_event_context {
 	struct pmu			*pmu;
-	enum perf_event_context_type	type;
 	/*
 	 * Protect the states of the events in the list,
 	 * nr_active, and the list:
-- 
cgit v1.2.3


From 33692f27597fcab536d7cbbcc8f52905133e4aa7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 29 Jan 2015 10:51:32 -0800
Subject: vm: add VM_FAULT_SIGSEGV handling support

The core VM already knows about VM_FAULT_SIGBUS, but cannot return a
"you should SIGSEGV" error, because the SIGSEGV case was generally
handled by the caller - usually the architecture fault handler.

That results in lots of duplication - all the architecture fault
handlers end up doing very similar "look up vma, check permissions, do
retries etc" - but it generally works.  However, there are cases where
the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV.

In particular, when accessing the stack guard page, libsigsegv expects a
SIGSEGV.  And it usually got one, because the stack growth is handled by
that duplicated architecture fault handler.

However, when the generic VM layer started propagating the error return
from the stack expansion in commit fee7e49d4514 ("mm: propagate error
from stack expansion even for guard page"), that now exposed the
existing VM_FAULT_SIGBUS result to user space.  And user space really
expected SIGSEGV, not SIGBUS.

To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those
duplicate architecture fault handlers about it.  They all already have
the code to handle SIGSEGV, so it's about just tying that new return
value to the existing code, but it's all a bit annoying.

This is the mindless minimal patch to do this.  A more extensive patch
would be to try to gather up the mostly shared fault handling logic into
one generic helper routine, and long-term we really should do that
cleanup.

Just from this patch, you can generally see that most architectures just
copied (directly or indirectly) the old x86 way of doing things, but in
the meantime that original x86 model has been improved to hold the VM
semaphore for shorter times etc and to handle VM_FAULT_RETRY and other
"newer" things, so it would be a good idea to bring all those
improvements to the generic case and teach other architectures about
them too.

Reported-and-tested-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Jan Engelhardt <jengelh@inai.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # "s390 still compiles and boots"
Cc: linux-arch@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80fc92a49649..dd5ea3016fc4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1070,6 +1070,7 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
 #define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned small page */
 #define VM_FAULT_HWPOISON_LARGE 0x0020  /* Hit poisoned large page. Index encoded in upper bits */
+#define VM_FAULT_SIGSEGV 0x0040
 
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
@@ -1078,8 +1079,9 @@ static inline int page_mapped(struct page *page)
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
-#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
-			 VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)
+#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
+			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
+			 VM_FAULT_FALLBACK)
 
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
-- 
cgit v1.2.3


From 00845eb968ead28007338b2bb852b8beef816583 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 1 Feb 2015 12:23:32 -0800
Subject: sched: don't cause task state changes in nested sleep debugging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 8eb23b9f35aa ("sched: Debug nested sleeps") added code to report
on nested sleep conditions, which we generally want to avoid because the
inner sleeping operation can re-set the thread state to TASK_RUNNING,
but that will then cause the outer sleep loop not actually sleep when it
calls schedule.

However, that's actually valid traditional behavior, with the inner
sleep being some fairly rare case (like taking a sleeping lock that
normally doesn't actually need to sleep).

And the debug code would actually change the state of the task to
TASK_RUNNING internally, which makes that kind of traditional and
working code not work at all, because now the nested sleep doesn't just
sometimes cause the outer one to not block, but will cause it to happen
every time.

In particular, it will cause the cardbus kernel daemon (pccardd) to
basically busy-loop doing scheduling, converting a laptop into a heater,
as reported by Bruno Prémont.  But there may be other legacy uses of
that nested sleep model in other drivers that are also likely to never
get converted to the new model.

This fixes both cases:

 - don't set TASK_RUNNING when the nested condition happens (note: even
   if WARN_ONCE() only _warns_ once, the return value isn't whether the
   warning happened, but whether the condition for the warning was true.
   So despite the warning only happening once, the "if (WARN_ON(..))"
   would trigger for every nested sleep.

 - in the cases where we knowingly disable the warning by using
   "sched_annotate_sleep()", don't change the task state (that is used
   for all core scheduling decisions), instead use '->task_state_change'
   that is used for the debugging decision itself.

(Credit for the second part of the fix goes to Oleg Nesterov: "Can't we
avoid this subtle change in behaviour DEBUG_ATOMIC_SLEEP adds?" with the
suggested change to use 'task_state_change' as part of the test)

Reported-and-bisected-by: Bruno Prémont <bonbons@linux-vserver.org>
Tested-by: Rafael J Wysocki <rjw@rjwysocki.net>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
Cc: Ilya Dryomov <ilya.dryomov@inktank.com>,
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>,
Cc: Davidlohr Bueso <dave@stgolabs.net>,
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5449d2f4a1ef..64ce58bee6f5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,7 +176,7 @@ extern int _cond_resched(void);
  */
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
-# define sched_annotate_sleep()	__set_current_state(TASK_RUNNING)
+# define sched_annotate_sleep()	(current->task_state_change = 0)
 #else
   static inline void ___might_sleep(const char *file, int line,
 				   int preempt_offset) { }
-- 
cgit v1.2.3