summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>2025-08-12 16:44:10 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-09-13 16:54:56 -0700
commitbb6525f2f8c41e89ba3fc506bc1705c68cf845ae (patch)
treefcd25992b2c56db9e0f3871cfad334284b629e91
parent4a12633e87abec277a9911614d4aee3ef66b9c4c (diff)
mm: add bitmap mm->flags field
Patch series "mm: make mm->flags a bitmap and 64-bit on all arches". We are currently in the bizarre situation where we are constrained on the number of flags we can set in an mm_struct based on whether this is a 32-bit or 64-bit kernel. This is because mm->flags is an unsigned long field, which is 32-bits on a 32-bit system and 64-bits on a 64-bit system. In order to keep things functional across both architectures, we do not permit mm flag bits to be set above flag 31 (i.e. the 32nd bit). This is a silly situation, especially given how profligate we are in storing metadata in mm_struct, so let's convert mm->flags into a bitmap and allow ourselves as many bits as we like. In order to execute this change, we introduce a new opaque type - mm_flags_t - which wraps a bitmap. We go further and mark the bitmap field __private, which forces users to have to use accessors, which allows us to enforce atomicity rules around mm->flags (except on those occasions they are not required - fork, etc.) and makes it far easier to keep track of how mm flags are being utilised. In order to implement this change sensibly and an an iterative way, we start by introducing the type with the same bitsize as the current mm flags (system word size) and place it in union with mm->flags. We are then able to gradually update users as we go without being forced to do everything in a single patch. In the course of working on this series I noticed the MMF_* flag masks encounter a sign extension bug that, due to the 32-bit limit on mm->flags thus far, has not caused any issues in practice, but required fixing for this series. We must make special dispensation for two cases - coredump and initailisation on fork, but of which use masks extensively. Since coredump flags are set in stone, we can safely assume they will remain in the first 32-bits of the flags. We therefore provide special non-atomic accessors for this case that access the first system word of flags, keeping everything there essentially the same. For mm->flags initialisation on fork, we adjust the logic to ensure all bits are cleared correctly, and then adjust the existing intialisation logic, dubbing the implementation utilising flags as legacy. This means we get the same fast operations as we do now, but in future we can also choose to update the forking logic to additionally propagate flags beyond 32-bits across fork. With this change in place we can, in future, decide to have as many bits as we please. Since the size of the bitmap will scale in system word multiples, there should be no issues with changes in alignment in mm_struct. Additionally, the really sensitive field (mmap_lock) is located prior to the flags field so this should have no impact on that either. This patch (of 10): We are currently in the bizarre situation where we are constrained on the number of flags we can set in an mm_struct based on whether this is a 32-bit or 64-bit kernel. This is because mm->flags is an unsigned long field, which is 32-bits on a 32-bit system and 64-bits on a 64-bit system. In order to keep things functional across both architectures, we do not permit mm flag bits to be set above flag 31 (i.e. the 32nd bit). This is a silly situation, especially given how profligate we are in storing metadata in mm_struct, so let's convert mm->flags into a bitmap and allow ourselves as many bits as we like. To keep things manageable, firstly we introduce the bitmap at a system word system as a new field mm->_flags, in union. This means the new bitmap mm->_flags is bitwise exactly identical to the existing mm->flags field. We have an opportunity to also introduce some type safety here, so let's wrap the mm flags field as a struct and declare it as an mm_flags_t typedef to keep it consistent with vm_flags_t for VMAs. We make the internal field privately accessible, in order to force the use of helper functions so we can enforce that accesses are bitwise as required. We therefore introduce accessors prefixed with mm_flags_*() for callers to use. We place the bit parameter first so as to match the parameter ordering of the *_bit() functions. Having this temporary union arrangement allows us to incrementally swap over users of mm->flags patch-by-patch rather than having to do everything in one fell swoop. [lorenzo.stoakes@oracle.com: place __private in correct place, const-ify __mm_flags_get_word] Link: https://lkml.kernel.org/r/d4ba117d-6234-4069-b871-254d152d7d21@lucifer.local Link: https://lkml.kernel.org/r/cover.1755012943.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/9de8dfd9de8c95cd31622d6e52051ba0d1848f5a.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Barry Song <baohua@kernel.org> Cc: Ben Segall <bsegall@google.com> Cc: Borislav Betkov <bp@alien8.de> Cc: Chengming Zhou <chengming.zhou@linux.dev> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: David S. Miller <davem@davemloft.net> Cc: Dev Jain <dev.jain@arm.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Marc Rutland <mark.rutland@arm.com> Cc: Mariano Pache <npache@redhat.com> Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mel Gorman <mgorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Namhyung kim <namhyung@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Gleinxer <tglx@linutronix.de> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: xu xin <xu.xin16@zte.com.cn> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/mm.h32
-rw-r--r--include/linux/mm_types.h38
2 files changed, 69 insertions, 1 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3868ca1a25f9..4ed4a0b9dad6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -34,6 +34,8 @@
#include <linux/slab.h>
#include <linux/cacheinfo.h>
#include <linux/rcuwait.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
struct mempolicy;
struct anon_vma;
@@ -720,6 +722,36 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
}
#endif /* CONFIG_PER_VMA_LOCK */
+static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
+{
+ return test_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm)
+{
+ return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm)
+{
+ return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_set(int flag, struct mm_struct *mm)
+{
+ set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_clear(int flag, struct mm_struct *mm)
+{
+ clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags));
+}
+
+static inline void mm_flags_clear_all(struct mm_struct *mm)
+{
+ bitmap_zero(ACCESS_PRIVATE(&mm->_flags, __mm_flags), NUM_MM_FLAG_BITS);
+}
+
extern const struct vm_operations_struct vma_dummy_vm_ops;
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cf94df4955c7..0e001dbad455 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -20,6 +20,7 @@
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
#include <linux/types.h>
+#include <linux/bitmap.h>
#include <asm/mmu.h>
@@ -927,6 +928,15 @@ struct mm_cid {
};
#endif
+/*
+ * Opaque type representing current mm_struct flag state. Must be accessed via
+ * mm_flags_xxx() helper functions.
+ */
+#define NUM_MM_FLAG_BITS BITS_PER_LONG
+typedef struct {
+ DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
+} __private mm_flags_t;
+
struct kioctx_table;
struct iommu_mm_data;
struct mm_struct {
@@ -1109,7 +1119,11 @@ struct mm_struct {
/* Architecture-specific MM context */
mm_context_t context;
- unsigned long flags; /* Must use atomic bitops to access */
+ /* Temporary union while we convert users to mm_flags_t. */
+ union {
+ unsigned long flags; /* Must use atomic bitops to access */
+ mm_flags_t _flags; /* Must use mm_flags_* helpers to access */
+ };
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
@@ -1219,6 +1233,28 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
+/* Set the first system word of mm flags, non-atomically. */
+static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+
+ bitmap_copy(bitmap, &value, BITS_PER_LONG);
+}
+
+/* Obtain a read-only view of the bitmap. */
+static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm)
+{
+ return (const unsigned long *)ACCESS_PRIVATE(&mm->_flags, __mm_flags);
+}
+
+/* Read the first system word of mm flags, non-atomically. */
+static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm)
+{
+ const unsigned long *bitmap = __mm_flags_get_bitmap(mm);
+
+ return bitmap_read(bitmap, 0, BITS_PER_LONG);
+}
+
#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
MT_FLAGS_USE_RCU)
extern struct mm_struct init_mm;