From b795218075a1e1183169abb66a90dcdcf30367f9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 27 Oct 2010 15:34:16 -0700 Subject: ipc/shm.c: add RSS and swap size information to /proc/sysvipc/shm The kernel currently provides no functionality to analyze the RSS and swap space usage of each individual sysvipc shared memory segment. This patch adds this info for each existing shm segment by extending the output of /proc/sysvipc/shm by two columns for RSS and swap. Since shmctl(SHM_INFO) already provides a similiar calculation (it currently sums up all RSS/swap info for all segments), I did split out a static function which is now used by the /proc/sysvipc/shm output and shmctl(SHM_INFO). SAP products (esp. the SAP Netweaver ABAP Kernel) uses lots of big shared memory segments (we often have Linux systems with >= 16GB shm usage). Sometimes we get customer reports about "slow" system responses and while looking into their configurations we often find massive swapping activity on the system. With this patch it's now easy to see from the command line if and which shm segments gets swapped out (and how much) and can more easily give recommendations for system tuning. Without the patch it's currently not possible to do such shm analysis at all. Also... Add some spaces in front of the "size" field for 64bit kernels to get the columns correct if you cat the contents of the file. In sysvipc_shm_proc_show() the kernel prints the size value in "SPEC_SIZE" format, which is defined like this: #if BITS_PER_LONG <= 32 #define SIZE_SPEC "%10lu" #else #define SIZE_SPEC "%21lu" #endif So, if the header is not adjusted, the columns are not correctly aligned. I actually tested this on 32- and 64-bit and it seems correct now. Signed-off-by: Helge Deller Cc: Manfred Spraul Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 63 ++++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 21 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 7bc46a9fe1f8..fd658a1c2b88 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -108,7 +108,11 @@ void __init shm_init (void) { shm_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/shm", - " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", +#if BITS_PER_LONG <= 32 + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#else + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#endif IPC_SHM_IDS, sysvipc_shm_proc_show); } @@ -543,6 +547,34 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf } } +/* + * Calculate and add used RSS and swap pages of a shm. + * Called with shm_ids.rw_mutex held as a reader + */ +static void shm_add_rss_swap(struct shmid_kernel *shp, + unsigned long *rss_add, unsigned long *swp_add) +{ + struct inode *inode; + + inode = shp->shm_file->f_path.dentry->d_inode; + + if (is_file_hugepages(shp->shm_file)) { + struct address_space *mapping = inode->i_mapping; + struct hstate *h = hstate_file(shp->shm_file); + *rss_add += pages_per_huge_page(h) * mapping->nrpages; + } else { +#ifdef CONFIG_SHMEM + struct shmem_inode_info *info = SHMEM_I(inode); + spin_lock(&info->lock); + *rss_add += inode->i_mapping->nrpages; + *swp_add += info->swapped; + spin_unlock(&info->lock); +#else + *rss_add += inode->i_mapping->nrpages; +#endif + } +} + /* * Called with shm_ids.rw_mutex held as a reader */ @@ -560,30 +592,13 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, for (total = 0, next_id = 0; total < in_use; next_id++) { struct kern_ipc_perm *ipc; struct shmid_kernel *shp; - struct inode *inode; ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); if (ipc == NULL) continue; shp = container_of(ipc, struct shmid_kernel, shm_perm); - inode = shp->shm_file->f_path.dentry->d_inode; - - if (is_file_hugepages(shp->shm_file)) { - struct address_space *mapping = inode->i_mapping; - struct hstate *h = hstate_file(shp->shm_file); - *rss += pages_per_huge_page(h) * mapping->nrpages; - } else { -#ifdef CONFIG_SHMEM - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - *rss += inode->i_mapping->nrpages; - *swp += info->swapped; - spin_unlock(&info->lock); -#else - *rss += inode->i_mapping->nrpages; -#endif - } + shm_add_rss_swap(shp, rss, swp); total++; } @@ -1072,6 +1087,9 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { struct shmid_kernel *shp = it; + unsigned long rss = 0, swp = 0; + + shm_add_rss_swap(shp, &rss, &swp); #if BITS_PER_LONG <= 32 #define SIZE_SPEC "%10lu" @@ -1081,7 +1099,8 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) return seq_printf(s, "%10d %10d %4o " SIZE_SPEC " %5u %5u " - "%5lu %5u %5u %5u %5u %10lu %10lu %10lu\n", + "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " + SIZE_SPEC " " SIZE_SPEC "\n", shp->shm_perm.key, shp->shm_perm.id, shp->shm_perm.mode, @@ -1095,6 +1114,8 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) shp->shm_perm.cgid, shp->shm_atim, shp->shm_dtim, - shp->shm_ctim); + shp->shm_ctim, + rss * PAGE_SIZE, + swp * PAGE_SIZE); } #endif -- cgit v1.2.3 From 03145beb455cf5c20a761e8451e30b8a74ba58d9 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 27 Oct 2010 15:34:17 -0700 Subject: ipc: initialize structure memory to zero for compat functions This takes care of leaking uninitialized kernel stack memory to userspace from non-zeroed fields in structs in compat ipc functions. Signed-off-by: Dan Rosenberg Cc: Manfred Spraul Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/compat.c | 6 ++++++ ipc/compat_mq.c | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'ipc') diff --git a/ipc/compat.c b/ipc/compat.c index 9dc2c7d3c9e6..845a28738d3a 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -241,6 +241,8 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr) struct semid64_ds __user *up64; int version = compat_ipc_parse_version(&third); + memset(&s64, 0, sizeof(s64)); + if (!uptr) return -EINVAL; if (get_user(pad, (u32 __user *) uptr)) @@ -421,6 +423,8 @@ long compat_sys_msgctl(int first, int second, void __user *uptr) int version = compat_ipc_parse_version(&second); void __user *p; + memset(&m64, 0, sizeof(m64)); + switch (second & (~IPC_64)) { case IPC_INFO: case IPC_RMID: @@ -594,6 +598,8 @@ long compat_sys_shmctl(int first, int second, void __user *uptr) int err, err2; int version = compat_ipc_parse_version(&second); + memset(&s64, 0, sizeof(s64)); + switch (second & (~IPC_64)) { case IPC_RMID: case SHM_LOCK: diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c index d8d1e9ff4e88..380ea4fe08e7 100644 --- a/ipc/compat_mq.c +++ b/ipc/compat_mq.c @@ -53,6 +53,9 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name, void __user *p = NULL; if (u_attr && oflag & O_CREAT) { struct mq_attr attr; + + memset(&attr, 0, sizeof(attr)); + p = compat_alloc_user_space(sizeof(attr)); if (get_compat_mq_attr(&attr, u_attr) || copy_to_user(p, &attr, sizeof(attr))) @@ -127,6 +130,8 @@ asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p)); long ret; + memset(&mqstat, 0, sizeof(mqstat)); + if (u_mqstat) { if (get_compat_mq_attr(&mqstat, u_mqstat) || copy_to_user(p, &mqstat, sizeof(mqstat))) -- cgit v1.2.3