From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 May 2021 17:27:04 -0700 Subject: mm/hugetlb: fix F_SEAL_FUTURE_WRITE Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2. Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to hugetlbfs, which I can easily verify using the memfd_test program, which seems that the program is hardly run with hugetlbfs pages (as by default shmem). Meanwhile I found another probably even more severe issue on that hugetlb fork won't wr-protect child cow pages, so child can potentially write to parent private pages. Patch 2 addresses that. After this series applied, "memfd_test hugetlbfs" should start to pass. This patch (of 2): F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day. There is a test program for that and it fails constantly. $ ./memfd_test hugetlbfs memfd-hugetlb: CREATE memfd-hugetlb: BASIC memfd-hugetlb: SEAL-WRITE memfd-hugetlb: SEAL-FUTURE-WRITE mmap() didn't fail as expected Aborted (core dumped) I think it's probably because no one is really running the hugetlbfs test. Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we do in shmem_mmap(). Generalize a helper for that. Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd") Signed-off-by: Peter Xu Reported-by: Hugh Dickins Reviewed-by: Mike Kravetz Cc: Joel Fernandes (Google) Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 322ec61d0da7..c274f75efcf9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object); static inline void mem_dump_obj(void *object) {} #endif +/** + * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * @seals: the seals to check + * @vma: the vma to operate on + * + * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on + * the vma flags. Return 0 if check pass, or <0 for errors. + */ +static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +{ + if (seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * MAP_SHARED and read-only, take care to not allow mprotect to + * revert protections on such mappings. Do this only for shared + * mappings. For private mappings, don't need to mask + * VM_MAYWRITE as we still want them to be COW-writable. + */ + if (vma->vm_flags & VM_SHARED) + vma->vm_flags &= ~(VM_MAYWRITE); + } + + return 0; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 17:27:24 -0700 Subject: mm: fix struct page layout on 32-bit systems 32-bit architectures which expect 8-byte alignment for 8-byte integers and need 64-bit DMA addresses (arm, mips, ppc) had their struct page inadvertently expanded in 2019. When the dma_addr_t was added, it forced the alignment of the union to 8 bytes, which inserted a 4 byte gap between 'flags' and the union. Fix this by storing the dma_addr_t in one or two adjacent unsigned longs. This restores the alignment to that of an unsigned long. We always store the low bits in the first word to prevent the PageTail bit from being inadvertently set on a big endian platform. If that happened, get_user_pages_fast() racing against a page which was freed and reallocated to the page_pool could dereference a bogus compound_head(), which would be hard to trace back to this cause. Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org Fixes: c25fff7171be ("mm: add dma_addr_t to struct page") Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Acked-by: Vlastimil Babka Tested-by: Matteo Croce Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 4 ++-- include/net/page_pool.h | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6613b26a8894..5aacc1c10a45 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -97,10 +97,10 @@ struct page { }; struct { /* page_pool used by netstack */ /** - * @dma_addr: might require a 64-bit value even on + * @dma_addr: might require a 64-bit value on * 32-bit architectures. */ - dma_addr_t dma_addr; + unsigned long dma_addr[2]; }; struct { /* slab, slob and slub */ union { diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 6d517a37c18b..b4b6de909c93 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - return page->dma_addr; + dma_addr_t ret = page->dma_addr[0]; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + return ret; +} + +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + page->dma_addr[0] = addr; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + page->dma_addr[1] = upper_32_bits(addr); } static inline bool is_page_pool_compiled_in(void) -- cgit v1.2.3 From 076171a67789ad0107de44c2964f2e46a7d0d7b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 17:27:30 -0700 Subject: mm/filemap: fix readahead return types A readahead request will not allocate more memory than can be represented by a size_t, even on systems that have HIGHMEM available. Change the length functions from returning an loff_t to a size_t. Link: https://lkml.kernel.org/r/20210510201201.1558972-1-willy@infradead.org Fixes: 32c0a6bcaa1f57 ("btrfs: add and use readahead_batch_length") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reported-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a4bd41128bf3..e89df447fae3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac) * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ -static inline loff_t readahead_length(struct readahead_control *rac) +static inline size_t readahead_length(struct readahead_control *rac) { - return (loff_t)rac->_nr_pages * PAGE_SIZE; + return rac->_nr_pages * PAGE_SIZE; } /** @@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac) * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ -static inline loff_t readahead_batch_length(struct readahead_control *rac) +static inline size_t readahead_batch_length(struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } -- cgit v1.2.3