summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--include/linux/mempolicy.h22
-rw-r--r--include/linux/shmem_fs.h4
-rw-r--r--mm/mempolicy.c144
-rw-r--r--mm/shmem.c57
5 files changed, 134 insertions, 95 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2e9e5bdd5629..9783723e8ffe 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -504,7 +504,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
INIT_LIST_HEAD(&inode->i_mapping->private_list);
info = HUGETLBFS_I(inode);
- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0, NULL);
+ mpol_shared_policy_init(&info->policy, NULL);
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index dcc17378c952..3a39570b81b8 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -182,8 +182,7 @@ struct shared_policy {
spinlock_t lock;
};
-void mpol_shared_policy_init(struct shared_policy *info, unsigned short mode,
- unsigned short flags, nodemask_t *nodes);
+void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma,
struct mempolicy *new);
@@ -216,10 +215,10 @@ int do_migrate_pages(struct mm_struct *mm,
#ifdef CONFIG_TMPFS
-extern int mpol_parse_str(char *str, unsigned short *mode,
- unsigned short *mode_flags, nodemask_t *policy_nodes);
+extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
+extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
+ int no_context);
#endif
#else
@@ -262,8 +261,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info,
return -EINVAL;
}
-static inline void mpol_shared_policy_init(struct shared_policy *info,
- unsigned short mode, unsigned short flags, nodemask_t *nodes)
+static inline void mpol_shared_policy_init(struct shared_policy *sp,
+ struct mempolicy *mpol)
{
}
@@ -322,13 +321,14 @@ static inline void check_highest_zone(int k)
}
#ifdef CONFIG_TMPFS
-static inline int mpol_parse_str(char *value, unsigned short *policy,
- unsigned short flags, nodemask_t *policy_nodes)
+static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
+ int no_context)
{
- return 1;
+ return 1; /* error */
}
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
+ int no_context)
{
return 0;
}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index d7699a628d78..f2d12d5a21b8 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -34,9 +34,7 @@ struct shmem_sb_info {
uid_t uid; /* Mount uid for root directory */
gid_t gid; /* Mount gid for root directory */
mode_t mode; /* Mount mode for root directory */
- unsigned short policy; /* Default NUMA memory alloc policy */
- unsigned short flags; /* Optional mempolicy flags */
- nodemask_t policy_nodes; /* nodemask for preferred and bind */
+ struct mempolicy *mpol; /* default memory policy for mappings */
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6b751565eed1..a37a5034f63d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1828,27 +1828,35 @@ restart:
return 0;
}
-void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
- unsigned short flags, nodemask_t *policy_nodes)
-{
- info->root = RB_ROOT;
- spin_lock_init(&info->lock);
-
- if (policy != MPOL_DEFAULT) {
- struct mempolicy *newpol;
-
- /* Falls back to NULL policy [MPOL_DEFAULT] on any error */
- newpol = mpol_new(policy, flags, policy_nodes);
- if (!IS_ERR(newpol)) {
- /* Create pseudo-vma that contains just the policy */
- struct vm_area_struct pvma;
-
- memset(&pvma, 0, sizeof(struct vm_area_struct));
- /* Policy covers entire file */
- pvma.vm_end = TASK_SIZE;
- mpol_set_shared_policy(info, &pvma, newpol);
- mpol_put(newpol);
- }
+/**
+ * mpol_shared_policy_init - initialize shared policy for inode
+ * @sp: pointer to inode shared policy
+ * @mpol: struct mempolicy to install
+ *
+ * Install non-NULL @mpol in inode's shared policy rb-tree.
+ * On entry, the current task has a reference on a non-NULL @mpol.
+ * This must be released on exit.
+ */
+void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
+{
+ sp->root = RB_ROOT; /* empty tree == default mempolicy */
+ spin_lock_init(&sp->lock);
+
+ if (mpol) {
+ struct vm_area_struct pvma;
+ struct mempolicy *new;
+
+ /* contextualize the tmpfs mount point mempolicy */
+ new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
+ mpol_put(mpol); /* drop our ref on sb mpol */
+ if (IS_ERR(new))
+ return; /* no valid nodemask intersection */
+
+ /* Create pseudo-vma that contains just the policy */
+ memset(&pvma, 0, sizeof(struct vm_area_struct));
+ pvma.vm_end = TASK_SIZE; /* policy covers entire file */
+ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
+ mpol_put(new); /* drop initial ref */
}
}
@@ -1962,18 +1970,27 @@ static const char * const policy_types[] =
/**
* mpol_parse_str - parse string to mempolicy
* @str: string containing mempolicy to parse
- * @mode: pointer to returned policy mode
- * @mode_flags: pointer to returned flags
- * @policy_nodes: pointer to returned nodemask
+ * @mpol: pointer to struct mempolicy pointer, returned on success.
+ * @no_context: flag whether to "contextualize" the mempolicy
*
* Format of input:
* <mode>[=<flags>][:<nodelist>]
*
- * Currently only used for tmpfs/shmem mount options
+ * if @no_context is true, save the input nodemask in w.user_nodemask in
+ * the returned mempolicy. This will be used to "clone" the mempolicy in
+ * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
+ * mount option. Note that if 'static' or 'relative' mode flags were
+ * specified, the input nodemask will already have been saved. Saving
+ * it again is redundant, but safe.
+ *
+ * On success, returns 0, else 1
*/
-int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
- nodemask_t *policy_nodes)
+int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
{
+ struct mempolicy *new = NULL;
+ unsigned short uninitialized_var(mode);
+ unsigned short uninitialized_var(mode_flags);
+ nodemask_t nodes;
char *nodelist = strchr(str, ':');
char *flags = strchr(str, '=');
int i;
@@ -1982,26 +1999,30 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
if (nodelist) {
/* NUL-terminate mode or flags string */
*nodelist++ = '\0';
- if (nodelist_parse(nodelist, *policy_nodes))
+ if (nodelist_parse(nodelist, nodes))
goto out;
- if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
+ if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
goto out;
- }
+ } else
+ nodes_clear(nodes);
+
if (flags)
*flags++ = '\0'; /* terminate mode string */
for (i = 0; i <= MPOL_LOCAL; i++) {
if (!strcmp(str, policy_types[i])) {
- *mode = i;
+ mode = i;
break;
}
}
if (i > MPOL_LOCAL)
goto out;
- switch (*mode) {
+ switch (mode) {
case MPOL_PREFERRED:
- /* Insist on a nodelist of one node only */
+ /*
+ * Insist on a nodelist of one node only
+ */
if (nodelist) {
char *rest = nodelist;
while (isdigit(*rest))
@@ -2010,63 +2031,73 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
err = 0;
}
break;
- case MPOL_BIND:
- /* Insist on a nodelist */
- if (nodelist)
- err = 0;
- break;
case MPOL_INTERLEAVE:
/*
* Default to online nodes with memory if no nodelist
*/
if (!nodelist)
- *policy_nodes = node_states[N_HIGH_MEMORY];
+ nodes = node_states[N_HIGH_MEMORY];
err = 0;
break;
- default:
+ case MPOL_LOCAL:
/*
- * MPOL_DEFAULT or MPOL_LOCAL
- * Don't allow a nodelist nor flags
+ * Don't allow a nodelist; mpol_new() checks flags
*/
- if (!nodelist && !flags)
- err = 0;
- if (*mode == MPOL_DEFAULT)
+ if (nodelist)
goto out;
- /* else MPOL_LOCAL */
- *mode = MPOL_PREFERRED;
- nodes_clear(*policy_nodes);
+ mode = MPOL_PREFERRED;
break;
+
+ /*
+ * case MPOL_BIND: mpol_new() enforces non-empty nodemask.
+ * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags.
+ */
}
- *mode_flags = 0;
+ mode_flags = 0;
if (flags) {
/*
* Currently, we only support two mutually exclusive
* mode flags.
*/
if (!strcmp(flags, "static"))
- *mode_flags |= MPOL_F_STATIC_NODES;
+ mode_flags |= MPOL_F_STATIC_NODES;
else if (!strcmp(flags, "relative"))
- *mode_flags |= MPOL_F_RELATIVE_NODES;
+ mode_flags |= MPOL_F_RELATIVE_NODES;
else
err = 1;
}
+
+ new = mpol_new(mode, mode_flags, &nodes);
+ if (IS_ERR(new))
+ err = 1;
+ else if (no_context)
+ new->w.user_nodemask = nodes; /* save for contextualization */
+
out:
/* Restore string for error message */
if (nodelist)
*--nodelist = ':';
if (flags)
*--flags = '=';
+ if (!err)
+ *mpol = new;
return err;
}
#endif /* CONFIG_TMPFS */
-/*
+/**
+ * mpol_to_str - format a mempolicy structure for printing
+ * @buffer: to contain formatted mempolicy string
+ * @maxlen: length of @buffer
+ * @pol: pointer to mempolicy to be formatted
+ * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask
+ *
* Convert a mempolicy into a string.
* Returns the number of characters in buffer (if positive)
* or an error (negative)
*/
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
{
char *p = buffer;
int l;
@@ -2100,7 +2131,10 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
case MPOL_BIND:
/* Fall through */
case MPOL_INTERLEAVE:
- nodes = pol->v.nodes;
+ if (no_context)
+ nodes = pol->w.user_nodemask;
+ else
+ nodes = pol->v.nodes;
break;
default:
@@ -2231,7 +2265,7 @@ int show_numa_map(struct seq_file *m, void *v)
return 0;
pol = get_vma_policy(priv->task, vma, vma->vm_start);
- mpol_to_str(buffer, sizeof(buffer), pol);
+ mpol_to_str(buffer, sizeof(buffer), pol, 0);
mpol_cond_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/mm/shmem.c b/mm/shmem.c
index 3c620dc10135..e6d9298aa22a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1079,23 +1079,29 @@ redirty:
#ifdef CONFIG_NUMA
#ifdef CONFIG_TMPFS
-static void shmem_show_mpol(struct seq_file *seq, unsigned short mode,
- unsigned short flags, const nodemask_t policy_nodes)
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
- struct mempolicy temp;
char buffer[64];
- if (mode == MPOL_DEFAULT)
+ if (!mpol || mpol->mode == MPOL_DEFAULT)
return; /* show nothing */
- temp.mode = mode;
- temp.flags = flags;
- temp.v.nodes = policy_nodes;
-
- mpol_to_str(buffer, sizeof(buffer), &temp);
+ mpol_to_str(buffer, sizeof(buffer), mpol, 1);
seq_printf(seq, ",mpol=%s", buffer);
}
+
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ struct mempolicy *mpol = NULL;
+ if (sbinfo->mpol) {
+ spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
+ mpol = sbinfo->mpol;
+ mpol_get(mpol);
+ spin_unlock(&sbinfo->stat_lock);
+ }
+ return mpol;
+}
#endif /* CONFIG_TMPFS */
static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
@@ -1135,8 +1141,7 @@ static struct page *shmem_alloc_page(gfp_t gfp,
}
#else /* !CONFIG_NUMA */
#ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy,
- unsigned short flags, const nodemask_t policy_nodes)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
{
}
#endif /* CONFIG_TMPFS */
@@ -1154,6 +1159,13 @@ static inline struct page *shmem_alloc_page(gfp_t gfp,
}
#endif /* CONFIG_NUMA */
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+ return NULL;
+}
+#endif
+
/*
* shmem_getpage - either get the page from swap or allocate a new one
*
@@ -1508,8 +1520,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
case S_IFREG:
inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations;
- mpol_shared_policy_init(&info->policy, sbinfo->policy,
- sbinfo->flags, &sbinfo->policy_nodes);
+ mpol_shared_policy_init(&info->policy,
+ shmem_get_sbmpol(sbinfo));
break;
case S_IFDIR:
inc_nlink(inode);
@@ -1523,8 +1535,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
* Must not load anything in the rbtree,
* mpol_free_shared_policy will not be called.
*/
- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0,
- NULL);
+ mpol_shared_policy_init(&info->policy, NULL);
break;
}
} else
@@ -2139,8 +2150,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
if (*rest)
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
- if (mpol_parse_str(value, &sbinfo->policy,
- &sbinfo->flags, &sbinfo->policy_nodes))
+ if (mpol_parse_str(value, &sbinfo->mpol, 1))
goto bad_val;
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2191,9 +2201,9 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
sbinfo->free_blocks = config.max_blocks - blocks;
sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes;
- sbinfo->policy = config.policy;
- sbinfo->flags = config.flags;
- sbinfo->policy_nodes = config.policy_nodes;
+
+ mpol_put(sbinfo->mpol);
+ sbinfo->mpol = config.mpol; /* transfers initial ref */
out:
spin_unlock(&sbinfo->stat_lock);
return error;
@@ -2214,8 +2224,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_printf(seq, ",uid=%u", sbinfo->uid);
if (sbinfo->gid != 0)
seq_printf(seq, ",gid=%u", sbinfo->gid);
- shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags,
- sbinfo->policy_nodes);
+ shmem_show_mpol(seq, sbinfo->mpol);
return 0;
}
#endif /* CONFIG_TMPFS */
@@ -2245,9 +2254,7 @@ static int shmem_fill_super(struct super_block *sb,
sbinfo->mode = S_IRWXUGO | S_ISVTX;
sbinfo->uid = current->fsuid;
sbinfo->gid = current->fsgid;
- sbinfo->policy = MPOL_DEFAULT;
- sbinfo->flags = 0;
- sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+ sbinfo->mpol = NULL;
sb->s_fs_info = sbinfo;
#ifdef CONFIG_TMPFS