From db0aa2e9566fda2d23dc8f6c102856ead95578a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2024 00:20:42 +0100 Subject: mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells cc: Matthew Wilcox cc: Jeff Layton cc: Steve French cc: Ilya Dryomov cc: Gao Xiang cc: Mike Marshall cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 138 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 include/linux/folio_queue.h (limited to 'include/linux/folio_queue.h') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h new file mode 100644 index 000000000000..52773613bf23 --- /dev/null +++ b/include/linux/folio_queue.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Queue of folios definitions + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_FOLIO_QUEUE_H +#define _LINUX_FOLIO_QUEUE_H + +#include + +/* + * Segment in a queue of running buffers. Each segment can hold a number of + * folios and a portion of the queue can be referenced with the ITER_FOLIOQ + * iterator. The possibility exists of inserting non-folio elements into the + * queue (such as gaps). + * + * Explicit prev and next pointers are used instead of a list_head to make it + * easier to add segments to tail and remove them from the head without the + * need for a lock. + */ +struct folio_queue { + struct folio_batch vec; /* Folios in the queue segment */ + u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ + struct folio_queue *next; /* Next queue segment or NULL */ + struct folio_queue *prev; /* Previous queue segment of NULL */ + unsigned long marks; /* 1-bit mark per folio */ + unsigned long marks2; /* Second 1-bit mark per folio */ +#if PAGEVEC_SIZE > BITS_PER_LONG +#error marks is not big enough +#endif +}; + +static inline void folioq_init(struct folio_queue *folioq) +{ + folio_batch_init(&folioq->vec); + folioq->next = NULL; + folioq->prev = NULL; + folioq->marks = 0; + folioq->marks2 = 0; +} + +static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) +{ + return PAGEVEC_SIZE; +} + +static inline unsigned int folioq_count(struct folio_queue *folioq) +{ + return folio_batch_count(&folioq->vec); +} + +static inline bool folioq_full(struct folio_queue *folioq) +{ + //return !folio_batch_space(&folioq->vec); + return folioq_count(folioq) >= folioq_nr_slots(folioq); +} + +static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks); +} + +static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks); +} + +static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks); +} + +static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks2); +} + +static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks2); +} + +static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks2); +} + +static inline unsigned int __folio_order(struct folio *folio) +{ + if (!folio_test_large(folio)) + return 0; + return folio->_flags_1 & 0xff; +} + +static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + return slot; +} + +static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + folioq_mark(folioq, slot); + return slot; +} + +static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->vec.folios[slot]; +} + +static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->orders[slot]; +} + +static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) +{ + return PAGE_SIZE << folioq_folio_order(folioq, slot); +} + +static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) +{ + folioq->vec.folios[slot] = NULL; + folioq_unmark(folioq, slot); + folioq_unmark2(folioq, slot); +} + +#endif /* _LINUX_FOLIO_QUEUE_H */ -- cgit v1.2.3 From ee4cdf7ba857a894ad1650d6ab77669cbbfa329e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2024 00:40:22 +0100 Subject: netfs: Speed up buffered reading Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/folio_queue.h') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 52773613bf23..955680c3bb5f 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -27,6 +27,7 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ + unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif @@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; + folioq->marks3 = 0; } static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) @@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks2); } +static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks3); +} + +static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks3); +} + +static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks3); +} + static inline unsigned int __folio_order(struct folio *folio) { if (!folio_test_large(folio)) @@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); + folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ -- cgit v1.2.3 From 28e8c5c095ec28edeedab5e976e62e0419a89fc1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 30 Sep 2024 11:14:41 +0100 Subject: netfs: Add folio_queue API documentation Add API documentation for folio_queue. Signed-off-by: David Howells Link: https://lore.kernel.org/r/2912369.1727691281@warthog.procyon.org.uk cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-doc@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 168 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) (limited to 'include/linux/folio_queue.h') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 955680c3bb5f..af871405ae55 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -3,6 +3,12 @@ * * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) + * + * See: + * + * Documentation/core-api/folio_queue.rst + * + * for a description of the API. */ #ifndef _LINUX_FOLIO_QUEUE_H @@ -33,6 +39,13 @@ struct folio_queue { #endif }; +/** + * folioq_init - Initialise a folio queue segment + * @folioq: The segment to initialise + * + * Initialise a folio queue segment. Note that the folio pointers are + * left uninitialised. + */ static inline void folioq_init(struct folio_queue *folioq) { folio_batch_init(&folioq->vec); @@ -43,62 +56,155 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->marks3 = 0; } +/** + * folioq_nr_slots: Query the capacity of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that a particular folio queue segment might hold. + * [!] NOTE: This must not be assumed to be the same for every segment! + */ static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) { return PAGEVEC_SIZE; } +/** + * folioq_count: Query the occupancy of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that have been added to a folio queue segment. + * Note that this is not decreased as folios are removed from a segment. + */ static inline unsigned int folioq_count(struct folio_queue *folioq) { return folio_batch_count(&folioq->vec); } +/** + * folioq_count: Query if a folio queue segment is full + * @folioq: The segment to query + * + * Query if a folio queue segment is fully occupied. Note that this does not + * change if folios are removed from a segment. + */ static inline bool folioq_full(struct folio_queue *folioq) { //return !folio_batch_space(&folioq->vec); return folioq_count(folioq) >= folioq_nr_slots(folioq); } +/** + * folioq_is_marked: Check first folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the first mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks); } +/** + * folioq_mark: Set the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the first mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks); } +/** + * folioq_unmark: Clear the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the first mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks); } +/** + * folioq_is_marked2: Check second folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the second mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks2); } +/** + * folioq_mark2: Set the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the second mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks2); } +/** + * folioq_unmark2: Clear the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the second mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks2); } +/** + * folioq_is_marked3: Check third folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the third mark is set for the folio in the specified slot in a + * folio queue segment. + */ static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks3); } +/** + * folioq_mark3: Set the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the third mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks3); } +/** + * folioq_unmark3: Clear the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the third mark for the folio in the specified slot in a folio queue + * segment. + */ static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks3); @@ -111,6 +217,19 @@ static inline unsigned int __folio_order(struct folio *folio) return folio->_flags_1 & 0xff; } +/** + * folioq_append: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue and the marks are left + * unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; @@ -120,6 +239,19 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli return slot; } +/** + * folioq_append_mark: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue, the first mark is set + * and and the second and third marks are left unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; @@ -130,21 +262,57 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct return slot; } +/** + * folioq_folio: Get a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the folio in the specified slot from a folio queue segment. Note + * that no bounds check is made and if the slot hasn't been added into yet, the + * pointer will be undefined. If the slot has been cleared, NULL will be + * returned. + */ static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) { return folioq->vec.folios[slot]; } +/** + * folioq_folio_order: Get the order of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the order of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the order returned will be 0. + */ static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) { return folioq->orders[slot]; } +/** + * folioq_folio_size: Get the size of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the size of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the size returned will be PAGE_SIZE. + */ static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) { return PAGE_SIZE << folioq_folio_order(folioq, slot); } +/** + * folioq_clear: Clear a folio from a folio queue segment + * @folioq: The segment to clear + * @slot: The folio slot to clear + * + * Clear a folio from a sequence in a folio queue segment and clear its marks. + * The occupancy count is left unchanged. + */ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) { folioq->vec.folios[slot] = NULL; -- cgit v1.2.3 From f5c82730bedbc4a424cb94d2653bcb8be9dbd2ec Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 1 Oct 2024 17:01:40 +0200 Subject: folio_queue: fix documentation s/folioq_count/folioq_full/ Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20241001134729.3f65ae78@canb.auug.org.au Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/folio_queue.h') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index af871405ae55..3abe614ef5f0 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -81,7 +81,7 @@ static inline unsigned int folioq_count(struct folio_queue *folioq) } /** - * folioq_count: Query if a folio queue segment is full + * folioq_full: Query if a folio queue segment is full * @folioq: The segment to query * * Query if a folio queue segment is fully occupied. Note that this does not -- cgit v1.2.3