diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_elf.c | 2 | ||||
-rw-r--r-- | fs/cifs/transport.c | 2 | ||||
-rw-r--r-- | fs/eventpoll.c | 38 | ||||
-rw-r--r-- | fs/ext4/super.c | 4 | ||||
-rw-r--r-- | fs/fat/dir.c | 13 | ||||
-rw-r--r-- | fs/fat/fat.h | 1 | ||||
-rw-r--r-- | fs/fat/inode.c | 9 | ||||
-rw-r--r-- | fs/fs-writeback.c | 2 | ||||
-rw-r--r-- | fs/fuse/dev.c | 6 | ||||
-rw-r--r-- | fs/nfs/inode.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 4 | ||||
-rw-r--r-- | fs/proc/Kconfig | 12 | ||||
-rw-r--r-- | fs/proc/base.c | 46 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 12 | ||||
-rw-r--r-- | fs/proc/stat.c | 5 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 62 | ||||
-rw-r--r-- | fs/pstore/ram.c | 9 | ||||
-rw-r--r-- | fs/pstore/ram_core.c | 46 | ||||
-rw-r--r-- | fs/select.c | 9 | ||||
-rw-r--r-- | fs/timerfd.c | 132 |
21 files changed, 363 insertions, 55 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3aac8e9edac3..b6df167050fb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1394,7 +1394,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) } static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, - siginfo_t *siginfo) + const siginfo_t *siginfo) { mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bfbf4700d160..b70aa7c91394 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; - error = wait_event_freezekillable(server->response_q, + error = wait_event_freezekillable_unsafe(server->response_q, midQ->mid_state != MID_REQUEST_SUBMITTED); if (error < 0) return -ERESTARTSYS; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc7294a67..ab2157896d1d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -9,6 +9,8 @@ * * Davide Libenzi <davidel@xmailserver.org> * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * */ #include <linux/init.h> @@ -34,6 +36,7 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -966,7 +969,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) * mechanism. It is called by the stored file descriptors when they * have events to report. */ -static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) +int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; unsigned long flags; @@ -1063,6 +1066,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); + pwq->wait.private = get_thread_process(current); pwq->whead = whead; pwq->base = epi; add_wait_queue(whead, &pwq->wait); @@ -1286,6 +1290,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, spin_lock(&tfile->f_lock); list_add_tail(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); + tfile->f_path.dentry->d_inode->i_private = get_thread_process(current); /* * Add the current item to the RB tree. All RB tree operations are @@ -1602,7 +1607,8 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); @@ -1726,6 +1732,34 @@ static void clear_tfile_check_list(void) INIT_LIST_HEAD(&tfile_check_list); } +struct task_struct *get_epoll_file_task(struct file *file) +{ + struct list_head *lh; + struct epitem *epi = NULL; + struct eppoll_entry *pwq = NULL; + struct task_struct *task = NULL; + wait_queue_t *wq = NULL; + + lh = &file->f_ep_links; + if (!list_empty(lh)) { + lh = lh->next; + epi = list_entry(lh, struct epitem, fllink); + lh = &epi->pwqlist; + if (!list_empty(lh)) { + lh = lh->next; + pwq = list_entry(lh, struct eppoll_entry, llink); + lh = &pwq->whead->task_list; + if (!list_empty(lh)) { + lh = lh->next; + wq = list_entry(lh, wait_queue_t, task_list); + task = wq->private; + } + } + } + + return task; +} + /* * Open an eventpoll file descriptor. */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a7a5f7ea74db..468e26500dfa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3581,8 +3581,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; - sbi->s_itb_per_group = sbi->s_inodes_per_group / - sbi->s_inodes_per_block; + sbi->s_itb_per_group = DIV_ROUND_UP(sbi->s_inodes_per_group, + sbi->s_inodes_per_block); sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); sbi->s_sbh = bh; sbi->s_mount_state = le16_to_cpu(es->s_state); diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 7a6f02caf286..6cc4c9e9c7b5 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -103,8 +103,6 @@ next: *bh = sb_bread(sb, phys); if (*bh == NULL) { - fat_msg_ratelimit(sb, KERN_ERR, - "Directory bread(block %llu) failed", (llu)phys); /* skip this block */ *pos = (iblock + 1) << sb->s_blocksize_bits; goto next; @@ -776,6 +774,13 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, return ret; } +static int fat_ioctl_volume_id(struct inode *dir) +{ + struct super_block *sb = dir->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + return sbi->vol_id; +} + static long fat_dir_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -792,6 +797,8 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd, short_only = 0; both = 1; break; + case VFAT_IOCTL_GET_VOLUME_ID: + return fat_ioctl_volume_id(inode); default: return fat_generic_ioctl(filp, cmd, arg); } @@ -832,6 +839,8 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, short_only = 0; both = 1; break; + case VFAT_IOCTL_GET_VOLUME_ID: + return fat_ioctl_volume_id(inode); default: return fat_generic_ioctl(filp, cmd, (unsigned long)arg); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 21664fcf3616..8180ecef59fc 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -86,6 +86,7 @@ struct msdos_sb_info { const void *dir_ops; /* Opaque; default directory operations */ int dir_per_block; /* dir entries per block */ int dir_per_block_bits; /* log2(dir_per_block) */ + unsigned long vol_id; /* volume ID */ int fatent_shift; struct fatent_operations *fatent_ops; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5d4513cb1b3c..a14dd4c0528a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1252,6 +1252,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, struct inode *fsinfo_inode = NULL; struct buffer_head *bh; struct fat_boot_sector *b; + struct fat_boot_bsx *bsx; struct msdos_sb_info *sbi; u16 logical_sector_size; u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors; @@ -1398,6 +1399,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, goto out_fail; } + bsx = (struct fat_boot_bsx *)(bh->b_data + FAT32_BSX_OFFSET); + fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; if (!IS_FSINFO(fsinfo)) { fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: " @@ -1413,8 +1416,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, } brelse(fsinfo_bh); + } else { + bsx = (struct fat_boot_bsx *)(bh->b_data + FAT16_BSX_OFFSET); } + /* interpret volume ID as a little endian 32 bit integer */ + sbi->vol_id = (((u32)bsx->vol_id[0]) | ((u32)bsx->vol_id[1] << 8) | + ((u32)bsx->vol_id[2] << 16) | ((u32)bsx->vol_id[3] << 24)); + sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 387213ac2608..556af9eff336 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1155,7 +1155,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return; - if (unlikely(block_dump)) + if (unlikely(block_dump > 1)) block_dump___mark_inode_dirty(inode); spin_lock(&inode->i_lock); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 23bf1a52a5da..8eb5a0992658 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -20,6 +20,7 @@ #include <linux/swap.h> #include <linux/splice.h> #include <linux/aio.h> +#include <linux/freezer.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -464,7 +465,10 @@ __acquires(fc->lock) * Wait it out. */ spin_unlock(&fc->lock); - wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + + while (req->state != FUSE_REQ_FINISHED) + wait_event_freezable(req->waitq, + req->state == FUSE_REQ_FINISHED); spin_lock(&fc->lock); if (!req->aborted) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c1c7a9d78722..ce727047ee87 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 43ea96ced28c..ce90eb4775c2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1ae7dd5956c5..bfeb1d13b08f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -268,7 +268,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable_unsafe(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -4529,7 +4529,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable_unsafe(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 15af6222f8a4..ddb83a0e15e8 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -67,3 +67,15 @@ config PROC_PAGE_MONITOR /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, /proc/kpagecount, and /proc/kpageflags. Disabling these interfaces will reduce the size of the kernel by approximately 4kb. + +config REPORT_PRESENT_CPUS + default n + depends on PROC_FS && SMP + bool "Report present cpus instead of online cpus" + help + This is a work around to report Present CPUs instead of Online CPUs. + Some power savings implements use CPU hotplug for power domains. + It is a bug to enable this on a server or other architecture that + uses cpu hotplug in the correct way. + + diff --git a/fs/proc/base.c b/fs/proc/base.c index de12b8128b95..4823113f2584 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -45,6 +45,8 @@ * * Paul Mundt <paul.mundt@nokia.com>: * Overall revision about smaps. + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. */ #include <asm/uaccess.h> @@ -139,6 +141,12 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) +/* ANDROID is for special files in /proc. */ +#define ANDROID(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + &proc_##OTYPE##_inode_operations, \ + &proc_##OTYPE##_operations, {}) + /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -200,7 +208,12 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static int proc_pid_cmdline(struct task_struct *task, char * buffer) +struct mm_struct *mm_for_maps(struct task_struct *task) +{ + return mm_access(task, PTRACE_MODE_READ); +} + +static int proc_pid_cmdline(struct task_struct *task, char *buffer) { int res = 0; unsigned int len; @@ -1000,6 +1013,35 @@ out: return err < 0 ? err : count; } +static int oom_adjust_permission(struct inode *inode, int mask) +{ + uid_t uid; + struct task_struct *p; + + p = get_proc_task(inode); + if(p) { + uid = task_uid(p); + put_task_struct(p); + } + + /* + * System Server (uid == 1000) is granted access to oom_adj of all + * android applications (uid > 10000) as and services (uid >= 1000) + */ + if (p && (current_fsuid() == 1000) && (uid >= 1000)) { + if (inode->i_mode >> 6 & mask) { + return 0; + } + } + + /* Fall back to default. */ + return generic_permission(inode, mask); +} + +static const struct inode_operations proc_oom_adj_inode_operations = { + .permission = oom_adjust_permission, +}; + static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, @@ -2698,7 +2740,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + ANDROID("oom_adj", S_IRUGO|S_IWUSR, oom_adj), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5aa847a603c0..677ab67ad6cc 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -16,6 +16,10 @@ #include <asm/pgtable.h> #include "internal.h" +#if defined(CONFIG_TEGRA_NVMAP) +#include <linux/nvmap.h> +#endif + void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) { } @@ -105,6 +109,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_TRANSPARENT_HUGEPAGE "AnonHugePages: %8lu kB\n" #endif +#if defined(CONFIG_TEGRA_NVMAP) + "NvMapMemFree: %8lu kB\n" + "NvMapMemUsed: %8lu kB\n" +#endif , K(i.totalram), K(i.freeram), @@ -165,6 +173,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR) #endif +#if defined(CONFIG_TEGRA_NVMAP) + , K(nvmap_page_pool_get_unused_pages()), + K(nvmap_iovmm_get_used_pages()) +#endif ); hugetlb_report_meminfo(m); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 1cf86c0e8689..36a15504b6e9 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -130,7 +130,12 @@ static int show_stat(struct seq_file *p, void *v) seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); seq_putc(p, '\n'); +#if defined(CONFIG_REPORT_PRESENT_CPUS) + for_each_present_cpu(i) { +#else for_each_online_cpu(i) { +#endif + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 65fc60a07c47..db17f98bc564 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -134,6 +134,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif +static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) +{ + const char __user *name = vma_get_anon_name(vma); + struct mm_struct *mm = vma->vm_mm; + + unsigned long page_start_vaddr; + unsigned long page_offset; + unsigned long num_pages; + unsigned long max_len = NAME_MAX; + int i; + + page_start_vaddr = (unsigned long)name & PAGE_MASK; + page_offset = (unsigned long)name - page_start_vaddr; + num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE); + + seq_puts(m, "[anon:"); + + for (i = 0; i < num_pages; i++) { + int len; + int write_len; + const char *kaddr; + long pages_pinned; + struct page *page; + + pages_pinned = get_user_pages(current, mm, page_start_vaddr, + 1, 0, 0, &page, NULL); + if (pages_pinned < 1) { + seq_puts(m, "<fault>]"); + return; + } + + kaddr = (const char *)kmap(page); + len = min(max_len, PAGE_SIZE - page_offset); + write_len = strnlen(kaddr + page_offset, len); + seq_write(m, kaddr + page_offset, write_len); + kunmap(page); + put_page(page); + + /* if strnlen hit a null terminator then we're done */ + if (write_len != len) + break; + + max_len -= len; + page_offset = 0; + page_start_vaddr += PAGE_SIZE; + } + + seq_putc(m, ']'); +} + static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { if (vma && vma != priv->tail_vma) { @@ -335,6 +385,12 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) pad_len_spaces(m, len); seq_printf(m, "[stack:%d]", tid); } + goto done; + } + + if (vma_get_anon_name(vma)) { + pad_len_spaces(m, len); + seq_print_vma_name(m, vma); } } @@ -634,6 +690,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) show_smap_vma_flags(m, vma); + if (vma_get_anon_name(vma)) { + seq_puts(m, "Name: "); + seq_print_vma_name(m, vma); + seq_putc(m, '\n'); + } + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..7fd001ccdef4 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -161,6 +161,9 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, /* ECC correction notice */ ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); + if (!(size + ecc_notice_size)) + return 0; + *buf = kmalloc(size + ecc_notice_size + 1, GFP_KERNEL); if (*buf == NULL) return -ENOMEM; @@ -377,6 +380,12 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return 0; } +void notrace ramoops_console_write_buf(const char *buf, size_t size) +{ + struct ramoops_context *cxt = &oops_cxt; + persistent_ram_write(cxt->cprz, buf, size); +} + static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 59337326e288..5b1f222b5044 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -24,25 +24,43 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/pstore_ram.h> +#include <linux/spinlock.h> #include <asm/page.h> struct persistent_ram_buffer { - uint32_t sig; - atomic_t start; - atomic_t size; - uint8_t data[0]; + uint32_t sig; + size_t start; + size_t size; + uint8_t data[0]; }; +static DEFINE_SPINLOCK(buffer_lock); + #define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */ static inline size_t buffer_size(struct persistent_ram_zone *prz) { - return atomic_read(&prz->buffer->size); + return prz->buffer->size; } static inline size_t buffer_start(struct persistent_ram_zone *prz) { - return atomic_read(&prz->buffer->start); + return prz->buffer->start; +} + +static inline int compare_and_exchange(size_t *v, int old, int new) +{ + size_t ret; + + spin_lock(&buffer_lock); + + ret = *v; + if (likely(ret == old)) + *v = new; + + spin_unlock(&buffer_lock); + + return ret; } /* increase and wrap the start pointer, returning the old value */ @@ -52,11 +70,11 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) int new; do { - old = atomic_read(&prz->buffer->start); + old = prz->buffer->start; new = old + a; while (unlikely(new > prz->buffer_size)) new -= prz->buffer_size; - } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old); + } while (compare_and_exchange(&prz->buffer->start, old, new) != old); return old; } @@ -67,15 +85,15 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) size_t old; size_t new; - if (atomic_read(&prz->buffer->size) == prz->buffer_size) + if (prz->buffer->size == prz->buffer_size) return; do { - old = atomic_read(&prz->buffer->size); + old = prz->buffer->size; new = old + a; if (new > prz->buffer_size) new = prz->buffer_size; - } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); + } while (compare_and_exchange(&prz->buffer->size, old, new) != old); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -328,8 +346,8 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz) void persistent_ram_zap(struct persistent_ram_zone *prz) { - atomic_set(&prz->buffer->start, 0); - atomic_set(&prz->buffer->size, 0); + prz->buffer->start = 0; + prz->buffer->size = 0; persistent_ram_update_header_ecc(prz); } @@ -345,7 +363,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) page_start = start - offset_in_page(start); page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); - prot = pgprot_noncached(PAGE_KERNEL); + prot = pgprot_writecombine(PAGE_KERNEL); pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); if (!pages) { diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..0595305eeb0a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -12,6 +12,8 @@ * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). + * + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. */ #include <linux/kernel.h> @@ -27,6 +29,7 @@ #include <linux/rcupdate.h> #include <linux/hrtimer.h> #include <linux/sched/rt.h> +#include <linux/freezer.h> #include <asm/uaccess.h> @@ -203,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) return default_wake_function(&dummy_wait, mode, sync, key); } -static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct poll_table_entry *entry; @@ -212,6 +215,7 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) return 0; return __pollwake(wait, mode, sync, key); } +EXPORT_SYMBOL(pollwake); /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, @@ -236,7 +240,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + rc = freezable_schedule_hrtimeout_range(expires, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/timerfd.c b/fs/timerfd.c index 32b644f03690..0013142c0475 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -8,6 +8,7 @@ * */ +#include <linux/alarmtimer.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> @@ -26,7 +27,10 @@ #include <linux/rcupdate.h> struct timerfd_ctx { - struct hrtimer tmr; + union { + struct hrtimer tmr; + struct alarm alarm; + } t; ktime_t tintv; ktime_t moffs; wait_queue_head_t wqh; @@ -41,14 +45,19 @@ struct timerfd_ctx { static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); +static inline bool isalarm(struct timerfd_ctx *ctx) +{ + return ctx->clockid == CLOCK_REALTIME_ALARM || + ctx->clockid == CLOCK_BOOTTIME_ALARM; +} + /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, * tintv.tv64 != 0) until the timer is accessed. */ -static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) +static void timerfd_triggered(struct timerfd_ctx *ctx) { - struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); @@ -56,10 +65,25 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) ctx->ticks++; wake_up_locked(&ctx->wqh); spin_unlock_irqrestore(&ctx->wqh.lock, flags); +} +static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) +{ + struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, + t.tmr); + timerfd_triggered(ctx); return HRTIMER_NORESTART; } +static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, + ktime_t now) +{ + struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, + t.alarm); + timerfd_triggered(ctx); + return ALARMTIMER_NORESTART; +} + /* * Called when the clock was set to cancel the timers in the cancel * list. This will wake up processes waiting on these timers. The @@ -107,8 +131,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { - if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && - (flags & TFD_TIMER_CANCEL_ON_SET)) { + if ((ctx->clockid == CLOCK_REALTIME || + ctx->clockid == CLOCK_REALTIME_ALARM) && + (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); @@ -124,7 +149,11 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; - remaining = hrtimer_expires_remaining(&ctx->tmr); + if (isalarm(ctx)) + remaining = alarm_expires_remaining(&ctx->t.alarm); + else + remaining = hrtimer_expires_remaining(&ctx->t.tmr); + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } @@ -142,11 +171,28 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, clockid, htmode); - hrtimer_set_expires(&ctx->tmr, texp); - ctx->tmr.function = timerfd_tmrproc; + + if (isalarm(ctx)) { + alarm_init(&ctx->t.alarm, + ctx->clockid == CLOCK_REALTIME_ALARM ? + ALARM_REALTIME : ALARM_BOOTTIME, + timerfd_alarmproc); + } else { + hrtimer_init(&ctx->t.tmr, clockid, htmode); + hrtimer_set_expires(&ctx->t.tmr, texp); + ctx->t.tmr.function = timerfd_tmrproc; + } + if (texp.tv64 != 0) { - hrtimer_start(&ctx->tmr, texp, htmode); + if (isalarm(ctx)) { + if (flags & TFD_TIMER_ABSTIME) + alarm_start(&ctx->t.alarm, texp); + else + alarm_start_relative(&ctx->t.alarm, texp); + } else { + hrtimer_start(&ctx->t.tmr, texp, htmode); + } + if (timerfd_canceled(ctx)) return -ECANCELED; } @@ -158,7 +204,11 @@ static int timerfd_release(struct inode *inode, struct file *file) struct timerfd_ctx *ctx = file->private_data; timerfd_remove_cancel(ctx); - hrtimer_cancel(&ctx->tmr); + + if (isalarm(ctx)) + alarm_cancel(&ctx->t.alarm); + else + hrtimer_cancel(&ctx->t.tmr); kfree_rcu(ctx, rcu); return 0; } @@ -215,9 +265,15 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, * callback to avoid DoS attacks specifying a very * short timer period. */ - ticks += hrtimer_forward_now(&ctx->tmr, - ctx->tintv) - 1; - hrtimer_restart(&ctx->tmr); + if (isalarm(ctx)) { + ticks += alarm_forward_now( + &ctx->t.alarm, ctx->tintv) - 1; + alarm_restart(&ctx->t.alarm); + } else { + ticks += hrtimer_forward_now(&ctx->t.tmr, + ctx->tintv) - 1; + hrtimer_restart(&ctx->t.tmr); + } } ctx->expired = 0; ctx->ticks = 0; @@ -259,7 +315,10 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME)) + clockid != CLOCK_REALTIME && + clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME && + clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -268,7 +327,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; - hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + + if (isalarm(ctx)) + alarm_init(&ctx->t.alarm, + ctx->clockid == CLOCK_REALTIME_ALARM ? + ALARM_REALTIME : ALARM_BOOTTIME, + timerfd_alarmproc); + else + hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); + ctx->moffs = ktime_get_monotonic_offset(); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, @@ -305,8 +372,14 @@ static int do_timerfd_settime(int ufd, int flags, */ for (;;) { spin_lock_irq(&ctx->wqh.lock); - if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) - break; + + if (isalarm(ctx)) { + if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) + break; + } else { + if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) + break; + } spin_unlock_irq(&ctx->wqh.lock); cpu_relax(); } @@ -317,8 +390,12 @@ static int do_timerfd_settime(int ufd, int flags, * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ - if (ctx->expired && ctx->tintv.tv64) - hrtimer_forward_now(&ctx->tmr, ctx->tintv); + if (ctx->expired && ctx->tintv.tv64) { + if (isalarm(ctx)) + alarm_forward_now(&ctx->t.alarm, ctx->tintv); + else + hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); + } old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec(ctx->tintv); @@ -345,9 +422,18 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t) spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { ctx->expired = 0; - ctx->ticks += - hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; - hrtimer_restart(&ctx->tmr); + + if (isalarm(ctx)) { + ctx->ticks += + alarm_forward_now( + &ctx->t.alarm, ctx->tintv) - 1; + alarm_restart(&ctx->t.alarm); + } else { + ctx->ticks += + hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) + - 1; + hrtimer_restart(&ctx->t.tmr); + } } t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec(ctx->tintv); |