From ad26aa6c60974acf3228ed0ade97ba5793093dbe Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 8 Jan 2015 11:04:07 +0900 Subject: regulator: s2mps11: Fix wrong calculation of register offset This patch adds missing registers('BUCK7_SW' & 'LDO29_CTRL'). Since BUCK7 has 1 more register (BUCK7_SW) than others, register offset should be added one more for which has bigger address than BUCK7 registers. Fixes: 76b9840b24ae04(regulator: s2mps11: Add support S2MPS13 regulator device) Signed-off-by: Jonghwa Lee Signed-off-by: Chanwoo Choi Signed-off-by: Mark Brown Cc: --- include/linux/mfd/samsung/s2mps13.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h index ce5dda8958fe..b1fd675fa36f 100644 --- a/include/linux/mfd/samsung/s2mps13.h +++ b/include/linux/mfd/samsung/s2mps13.h @@ -59,6 +59,7 @@ enum s2mps13_reg { S2MPS13_REG_B6CTRL, S2MPS13_REG_B6OUT, S2MPS13_REG_B7CTRL, + S2MPS13_REG_B7SW, S2MPS13_REG_B7OUT, S2MPS13_REG_B8CTRL, S2MPS13_REG_B8OUT, @@ -102,6 +103,7 @@ enum s2mps13_reg { S2MPS13_REG_L26CTRL, S2MPS13_REG_L27CTRL, S2MPS13_REG_L28CTRL, + S2MPS13_REG_L29CTRL, S2MPS13_REG_L30CTRL, S2MPS13_REG_L31CTRL, S2MPS13_REG_L32CTRL, -- cgit v1.2.3 From 85b4545629663486b7f71047ce3b54fa0ad3eb28 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:14 +0000 Subject: iommu: Consolidate IOVA allocator code In order to share the IOVA allocator with other architectures, break the unnecssary dependency on the Intel IOMMU driver and move the remaining IOVA internals to iova.c Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 19e81d5ccb6d..ad0507c61cc7 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -39,6 +39,9 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +int iommu_iova_cache_init(void); +void iommu_iova_cache_destroy(void); + struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); -- cgit v1.2.3 From 1b72250076dde4276acecf3a7da722b185703e78 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:15 +0000 Subject: iommu: Make IOVA domain low limit flexible To share the IOVA allocator with other architectures, it needs to accommodate more general aperture restrictions; move the lower limit from a compile-time constant to a runtime domain property to allow IOVA domains with different requirements to co-exist. Also reword the slightly unclear description of alloc_iova since we're touching it anyway. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index ad0507c61cc7..591b19626b46 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -16,9 +16,6 @@ #include #include -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - /* iova structure */ struct iova { struct rb_node node; @@ -31,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -52,7 +50,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From 0fb5fe874c42942e16c450ae05da453e13a1c09e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:16 +0000 Subject: iommu: Make IOVA domain page size explicit Systems may contain heterogeneous IOMMUs supporting differing minimum page sizes, which may also not be common with the CPU page size. Thus it is practical to have an explicit notion of IOVA granularity to simplify handling of mapping and allocation constraints. As an initial step, move the IOVA page granularity from an implicit compile-time constant to a per-domain property so we can make use of it in IOVA domain context at runtime. To keep the abstraction tidy, extend the little API of inline iova_* helpers to parallel some of the equivalent PAGE_* macros. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 591b19626b46..3920a19d8194 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -37,6 +38,36 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +static inline unsigned long iova_shift(struct iova_domain *iovad) +{ + return __ffs(iovad->granule); +} + +static inline unsigned long iova_mask(struct iova_domain *iovad) +{ + return iovad->granule - 1; +} + +static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova & iova_mask(iovad); +} + +static inline size_t iova_align(struct iova_domain *iovad, size_t size) +{ + return ALIGN(size, iovad->granule); +} + +static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) +{ + return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); +} + +static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova >> iova_shift(iovad); +} + int iommu_iova_cache_init(void); void iommu_iova_cache_destroy(void); @@ -50,8 +81,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From db8614d35bb8fc6d032792c801bd5b38ce860f19 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 16 Jan 2015 20:53:17 -0700 Subject: iommu: Change trace unmap api to report unmapped size Currently map and unmap are implemented as events under a common trace class declaration. The common class forces trace_unmap() to require a bogus physical address argument that it doesn't use. Changing unmap to report unmapped size will provide useful information for debugging. Remove common map_unmap trace class and change map and unmap into separate events as opposed to events under the same class to allow for differences in the reporting information. In addition, map and unmap are changed to handle size value as size_t instead of int to match the passed size value and avoid overflow. Signed-off-by: Shuah Khan Suggested-by: Alex Williamson Signed-off-by: Joerg Roedel --- include/trace/events/iommu.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index a8f5c32d174b..2c7befb10f13 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -83,7 +83,7 @@ DEFINE_EVENT(iommu_device_event, detach_device_from_domain, TP_ARGS(dev) ); -DECLARE_EVENT_CLASS(iommu_map_unmap, +TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), @@ -92,7 +92,7 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, TP_STRUCT__entry( __field(u64, iova) __field(u64, paddr) - __field(int, size) + __field(size_t, size) ), TP_fast_assign( @@ -101,26 +101,31 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, __entry->size = size; ), - TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=0x%x", + TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", __entry->iova, __entry->paddr, __entry->size ) ); -DEFINE_EVENT(iommu_map_unmap, map, +TRACE_EVENT(unmap, - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), - - TP_ARGS(iova, paddr, size) -); + TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size), -DEFINE_EVENT_PRINT(iommu_map_unmap, unmap, + TP_ARGS(iova, size, unmapped_size), - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + __field(size_t, unmapped_size) + ), - TP_ARGS(iova, paddr, size), + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + __entry->unmapped_size = unmapped_size; + ), - TP_printk("IOMMU: iova=0x%016llx size=0x%x", - __entry->iova, __entry->size + TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", + __entry->iova, __entry->size, __entry->unmapped_size ) ); -- cgit v1.2.3 From d5fd120e7860c2b3d4c936a2ebadb6b244bec4c8 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2015 20:59:31 +0100 Subject: i2c: Only include slave support if selected Make the slave support depend on CONFIG_I2C_SLAVE. Otherwise it gets included unconditionally, even when it is not needed. I2C bus drivers which implement slave support must select I2C_SLAVE. Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e3a1721c8354..7c7695940ddd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -228,7 +228,9 @@ struct i2c_client { struct device dev; /* the device structure */ int irq; /* irq issued by device */ struct list_head detected; +#if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ +#endif }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) @@ -253,6 +255,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) /* I2C slave support */ +#if IS_ENABLED(CONFIG_I2C_SLAVE) enum i2c_slave_event { I2C_SLAVE_REQ_READ_START, I2C_SLAVE_REQ_READ_END, @@ -269,6 +272,7 @@ static inline int i2c_slave_event(struct i2c_client *client, { return client->slave_cb(client, event, val); } +#endif /** * struct i2c_board_info - template for device creation @@ -404,8 +408,10 @@ struct i2c_algorithm { /* To determine what the adapter supports */ u32 (*functionality) (struct i2c_adapter *); +#if IS_ENABLED(CONFIG_I2C_SLAVE) int (*reg_slave)(struct i2c_client *client); int (*unreg_slave)(struct i2c_client *client); +#endif }; /** -- cgit v1.2.3 From 9879de7373fcfb466ec198293b6ccc1ad7a42dd8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 26 Jan 2015 12:58:32 -0800 Subject: mm: page_alloc: embed OOM killing naturally into allocation slowpath The OOM killing invocation does a lot of duplicative checks against the task's allocation context. Rework it to take advantage of the existing checks in the allocator slowpath. The OOM killer is invoked when the allocator is unable to reclaim any pages but the allocation has to keep looping. Instead of having a check for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM invocation to the true branch of should_alloc_retry(). The __GFP_FS check from oom_gfp_allowed() can then be moved into the OOM avoidance branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test. __alloc_pages_may_oom() can then signal to the caller whether the OOM killer was invoked, instead of requiring it to duplicate the order and high_zoneidx checks to guess this when deciding whether to continue. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/oom.h b/include/linux/oom.h index 853698c721f7..76200984d1e2 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } -static inline bool oom_gfp_allowed(gfp_t gfp_mask) -{ - return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY); -} - extern struct task_struct *find_lock_task_mm(struct task_struct *p); static inline bool task_will_free_mem(struct task_struct *task) -- cgit v1.2.3 From 07261edb971492c6b41b44d7b1b51f76807d30ad Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 26 Jan 2015 12:58:43 -0800 Subject: printk: add dummy routine for when CONFIG_PRINTK=n There are missing dummy routines for log_buf_addr_get() and log_buf_len_get() for when CONFIG_PRINTK is not set causing build failures. This patch adds these dummy routines at the appropriate location. Signed-off-by: Pranith Kumar Cc: Michael Ellerman Reviewed-by: Petr Mladek Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index c8f170324e64..4d5bf5726578 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -10,9 +10,6 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; -extern char *log_buf_addr_get(void); -extern u32 log_buf_len_get(void); - static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { @@ -163,6 +160,8 @@ extern int kptr_restrict; extern void wake_up_klogd(void); +char *log_buf_addr_get(void); +u32 log_buf_len_get(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); void dump_stack_set_arch_desc(const char *fmt, ...); @@ -198,6 +197,16 @@ static inline void wake_up_klogd(void) { } +static inline char *log_buf_addr_get(void) +{ + return NULL; +} + +static inline u32 log_buf_len_get(void) +{ + return 0; +} + static inline void log_buf_kexec_setup(void) { } -- cgit v1.2.3 From df4d92549f23e1c037e83323aff58a21b3de7fe0 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 23 Jan 2015 12:01:26 +0100 Subject: ipv4: try to cache dst_entries which would cause a redirect Not caching dst_entries which cause redirects could be exploited by hosts on the same subnet, causing a severe DoS attack. This effect aggravated since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()"). Lookups causing redirects will be allocated with DST_NOCACHE set which will force dst_release to free them via RCU. Unfortunately waiting for RCU grace period just takes too long, we can end up with >1M dst_entries waiting to be released and the system will run OOM. rcuos threads cannot catch up under high softirq load. Attaching the flag to emit a redirect later on to the specific skb allows us to cache those dst_entries thus reducing the pressure on allocation and deallocation. This issue was discovered by Marcelo Leitner. Cc: Julian Anastasov Signed-off-by: Marcelo Leitner Signed-off-by: Florian Westphal Signed-off-by: Hannes Frederic Sowa Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 0bb620702929..f7cbd703d15d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -39,11 +39,12 @@ struct inet_skb_parm { struct ip_options opt; /* Compiled IP options */ unsigned char flags; -#define IPSKB_FORWARDED 1 -#define IPSKB_XFRM_TUNNEL_SIZE 2 -#define IPSKB_XFRM_TRANSFORMED 4 -#define IPSKB_FRAG_COMPLETE 8 -#define IPSKB_REROUTED 16 +#define IPSKB_FORWARDED BIT(0) +#define IPSKB_XFRM_TUNNEL_SIZE BIT(1) +#define IPSKB_XFRM_TRANSFORMED BIT(2) +#define IPSKB_FRAG_COMPLETE BIT(3) +#define IPSKB_REROUTED BIT(4) +#define IPSKB_DOREDIRECT BIT(5) u16 frag_max_size; }; -- cgit v1.2.3 From 14bf61ffe6ac54afcd1e888a4407fe16054483db Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2014 16:03:13 +0200 Subject: quota: Switch ->get_dqblk() and ->set_dqblk() to use bytes as space units Currently ->get_dqblk() and ->set_dqblk() use struct fs_disk_quota which tracks space limits and usage in 512-byte blocks. However VFS quotas track usage in bytes (as some filesystems require that) and we need to somehow pass this information. Upto now it wasn't a problem because we didn't do any unit conversion (thus VFS quota routines happily stuck number of bytes into d_bcount field of struct fd_disk_quota). Only if you tried to use Q_XGETQUOTA or Q_XSETQLIM for VFS quotas (or Q_GETQUOTA / Q_SETQUOTA for XFS quotas), you got bogus results. Hardly anyone tried this but reportedly some Samba users hit the problem in practice. So when we want interfaces compatible we need to fix this. We bite the bullet and define another quota structure used for passing information from/to ->get_dqblk()/->set_dqblk. It's somewhat sad we have to have more conversion routines in fs/quota/quota.c and another copying of quota structure slows down getting of quota information by about 2% but it seems cleaner than overloading e.g. units of d_bcount to bytes. CC: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 47 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/quotaops.h | 4 ++-- 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 50978b781a19..097d7eb2441e 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -321,6 +321,49 @@ struct dquot_operations { struct path; +/* Structure for communicating via ->get_dqblk() & ->set_dqblk() */ +struct qc_dqblk { + int d_fieldmask; /* mask of fields to change in ->set_dqblk() */ + u64 d_spc_hardlimit; /* absolute limit on used space */ + u64 d_spc_softlimit; /* preferred limit on used space */ + u64 d_ino_hardlimit; /* maximum # allocated inodes */ + u64 d_ino_softlimit; /* preferred inode limit */ + u64 d_space; /* Space owned by the user */ + u64 d_ino_count; /* # inodes owned by the user */ + s64 d_ino_timer; /* zero if within inode limits */ + /* if not, we refuse service */ + s64 d_spc_timer; /* similar to above; for space */ + int d_ino_warns; /* # warnings issued wrt num inodes */ + int d_spc_warns; /* # warnings issued wrt used space */ + u64 d_rt_spc_hardlimit; /* absolute limit on realtime space */ + u64 d_rt_spc_softlimit; /* preferred limit on RT space */ + u64 d_rt_space; /* realtime space owned */ + s64 d_rt_spc_timer; /* similar to above; for RT space */ + int d_rt_spc_warns; /* # warnings issued wrt RT space */ +}; + +/* Field specifiers for ->set_dqblk() in struct qc_dqblk */ +#define QC_INO_SOFT (1<<0) +#define QC_INO_HARD (1<<1) +#define QC_SPC_SOFT (1<<2) +#define QC_SPC_HARD (1<<3) +#define QC_RT_SPC_SOFT (1<<4) +#define QC_RT_SPC_HARD (1<<5) +#define QC_LIMIT_MASK (QC_INO_SOFT | QC_INO_HARD | QC_SPC_SOFT | QC_SPC_HARD | \ + QC_RT_SPC_SOFT | QC_RT_SPC_HARD) +#define QC_SPC_TIMER (1<<6) +#define QC_INO_TIMER (1<<7) +#define QC_RT_SPC_TIMER (1<<8) +#define QC_TIMER_MASK (QC_SPC_TIMER | QC_INO_TIMER | QC_RT_SPC_TIMER) +#define QC_SPC_WARNS (1<<9) +#define QC_INO_WARNS (1<<10) +#define QC_RT_SPC_WARNS (1<<11) +#define QC_WARNS_MASK (QC_SPC_WARNS | QC_INO_WARNS | QC_RT_SPC_WARNS) +#define QC_SPACE (1<<12) +#define QC_INO_COUNT (1<<13) +#define QC_RT_SPACE (1<<14) +#define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE) + /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); @@ -329,8 +372,8 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); + int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); + int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index f23538a6e411..29e3455f7d41 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -98,9 +98,9 @@ int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int dquot_set_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); int dquot_transfer(struct inode *inode, struct iattr *iattr); -- cgit v1.2.3 From c3c87e770458aa004bd7ed3f29945ff436fd6511 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2015 11:19:48 +0100 Subject: perf: Tighten (and fix) the grouping condition The fix from 9fc81d87420d ("perf: Fix events installation during moving group") was incomplete in that it failed to recognise that creating a group with events for different CPUs is semantically broken -- they cannot be co-scheduled. Furthermore, it leads to real breakage where, when we create an event for CPU Y and then migrate it to form a group on CPU X, the code gets confused where the counter is programmed -- triggered in practice as well by me via the perf fuzzer. Fix this by tightening the rules for creating groups. Only allow grouping of counters that can be co-scheduled in the same context. This means for the same task and/or the same cpu. Fixes: 9fc81d87420d ("perf: Fix events installation during moving group") Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150123125834.090683288@infradead.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f7a61ca4b39..664de5a4ec46 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -450,11 +450,6 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; -enum perf_event_context_type { - task_context, - cpu_context, -}; - /** * struct perf_event_context - event context structure * @@ -462,7 +457,6 @@ enum perf_event_context_type { */ struct perf_event_context { struct pmu *pmu; - enum perf_event_context_type type; /* * Protect the states of the events in the list, * nr_active, and the list: -- cgit v1.2.3 From 33692f27597fcab536d7cbbcc8f52905133e4aa7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Jan 2015 10:51:32 -0800 Subject: vm: add VM_FAULT_SIGSEGV handling support The core VM already knows about VM_FAULT_SIGBUS, but cannot return a "you should SIGSEGV" error, because the SIGSEGV case was generally handled by the caller - usually the architecture fault handler. That results in lots of duplication - all the architecture fault handlers end up doing very similar "look up vma, check permissions, do retries etc" - but it generally works. However, there are cases where the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV. In particular, when accessing the stack guard page, libsigsegv expects a SIGSEGV. And it usually got one, because the stack growth is handled by that duplicated architecture fault handler. However, when the generic VM layer started propagating the error return from the stack expansion in commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page"), that now exposed the existing VM_FAULT_SIGBUS result to user space. And user space really expected SIGSEGV, not SIGBUS. To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those duplicate architecture fault handlers about it. They all already have the code to handle SIGSEGV, so it's about just tying that new return value to the existing code, but it's all a bit annoying. This is the mindless minimal patch to do this. A more extensive patch would be to try to gather up the mostly shared fault handling logic into one generic helper routine, and long-term we really should do that cleanup. Just from this patch, you can generally see that most architectures just copied (directly or indirectly) the old x86 way of doing things, but in the meantime that original x86 model has been improved to hold the VM semaphore for shorter times etc and to handle VM_FAULT_RETRY and other "newer" things, so it would be a good idea to bring all those improvements to the generic case and teach other architectures about them too. Reported-and-tested-by: Takashi Iwai Tested-by: Jan Engelhardt Acked-by: Heiko Carstens # "s390 still compiles and boots" Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 80fc92a49649..dd5ea3016fc4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1070,6 +1070,7 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ +#define VM_FAULT_SIGSEGV 0x0040 #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ @@ -1078,8 +1079,9 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ - VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ + VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ + VM_FAULT_FALLBACK) /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) -- cgit v1.2.3 From 00845eb968ead28007338b2bb852b8beef816583 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Feb 2015 12:23:32 -0800 Subject: sched: don't cause task state changes in nested sleep debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8eb23b9f35aa ("sched: Debug nested sleeps") added code to report on nested sleep conditions, which we generally want to avoid because the inner sleeping operation can re-set the thread state to TASK_RUNNING, but that will then cause the outer sleep loop not actually sleep when it calls schedule. However, that's actually valid traditional behavior, with the inner sleep being some fairly rare case (like taking a sleeping lock that normally doesn't actually need to sleep). And the debug code would actually change the state of the task to TASK_RUNNING internally, which makes that kind of traditional and working code not work at all, because now the nested sleep doesn't just sometimes cause the outer one to not block, but will cause it to happen every time. In particular, it will cause the cardbus kernel daemon (pccardd) to basically busy-loop doing scheduling, converting a laptop into a heater, as reported by Bruno Prémont. But there may be other legacy uses of that nested sleep model in other drivers that are also likely to never get converted to the new model. This fixes both cases: - don't set TASK_RUNNING when the nested condition happens (note: even if WARN_ONCE() only _warns_ once, the return value isn't whether the warning happened, but whether the condition for the warning was true. So despite the warning only happening once, the "if (WARN_ON(..))" would trigger for every nested sleep. - in the cases where we knowingly disable the warning by using "sched_annotate_sleep()", don't change the task state (that is used for all core scheduling decisions), instead use '->task_state_change' that is used for the debugging decision itself. (Credit for the second part of the fix goes to Oleg Nesterov: "Can't we avoid this subtle change in behaviour DEBUG_ATOMIC_SLEEP adds?" with the suggested change to use 'task_state_change' as part of the test) Reported-and-bisected-by: Bruno Prémont Tested-by: Rafael J Wysocki Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner , Cc: Ilya Dryomov , Cc: Mike Galbraith Cc: Ingo Molnar Cc: Peter Hurley , Cc: Davidlohr Bueso , Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5449d2f4a1ef..64ce58bee6f5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -176,7 +176,7 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -# define sched_annotate_sleep() __set_current_state(TASK_RUNNING) +# define sched_annotate_sleep() (current->task_state_change = 0) #else static inline void ___might_sleep(const char *file, int line, int preempt_offset) { } -- cgit v1.2.3