summaryrefslogtreecommitdiff
path: root/lib/maple_tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/maple_tree.c')
-rw-r--r--lib/maple_tree.c2112
1 files changed, 943 insertions, 1169 deletions
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 5aa4c9500018..d18d7ed9ab67 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -101,6 +101,7 @@ static const unsigned long mt_max[] = {
[maple_leaf_64] = ULONG_MAX,
[maple_range_64] = ULONG_MAX,
[maple_arange_64] = ULONG_MAX,
+ [maple_copy] = ULONG_MAX,
};
#define mt_node_max(x) mt_max[mte_node_type(x)]
#endif
@@ -110,6 +111,7 @@ static const unsigned char mt_slots[] = {
[maple_leaf_64] = MAPLE_RANGE64_SLOTS,
[maple_range_64] = MAPLE_RANGE64_SLOTS,
[maple_arange_64] = MAPLE_ARANGE64_SLOTS,
+ [maple_copy] = 3,
};
#define mt_slot_count(x) mt_slots[mte_node_type(x)]
@@ -118,6 +120,7 @@ static const unsigned char mt_pivots[] = {
[maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
[maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
[maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
+ [maple_copy] = 3,
};
#define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
@@ -126,48 +129,10 @@ static const unsigned char mt_min_slots[] = {
[maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
[maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
[maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
+ [maple_copy] = 1, /* Should never be used */
};
#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
-#define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2)
-#define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
-
-struct maple_big_node {
- unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
- union {
- struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
- struct {
- unsigned long padding[MAPLE_BIG_NODE_GAPS];
- unsigned long gap[MAPLE_BIG_NODE_GAPS];
- };
- };
- unsigned char b_end;
- enum maple_type type;
-};
-
-/*
- * The maple_subtree_state is used to build a tree to replace a segment of an
- * existing tree in a more atomic way. Any walkers of the older tree will hit a
- * dead node and restart on updates.
- */
-struct maple_subtree_state {
- struct ma_state *orig_l; /* Original left side of subtree */
- struct ma_state *orig_r; /* Original right side of subtree */
- struct ma_state *l; /* New left side of subtree */
- struct ma_state *m; /* New middle of subtree (rare) */
- struct ma_state *r; /* New right side of subtree */
- struct ma_topiary *free; /* nodes to be freed */
- struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */
- struct maple_big_node *bn;
-};
-
-#ifdef CONFIG_KASAN_STACK
-/* Prevent mas_wr_bnode() from exceeding the stack frame limit */
-#define noinline_for_kasan noinline_for_stack
-#else
-#define noinline_for_kasan inline
-#endif
-
/* Functions */
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
{
@@ -349,6 +314,13 @@ static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
(type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
}
+static inline void ma_init_slot(void __rcu **slot, const struct maple_node *mn,
+ const enum maple_type mt)
+{
+ /* WARNING: this is unsafe if the slot is exposed to readers. */
+ RCU_INIT_POINTER(*slot, (void *)mt_mk_node(mn, mt));
+}
+
static inline void *mte_mk_root(const struct maple_enode *node)
{
return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
@@ -605,6 +577,8 @@ static inline unsigned long *ma_pivots(struct maple_node *node,
case maple_range_64:
case maple_leaf_64:
return node->mr64.pivot;
+ case maple_copy:
+ return node->cp.pivot;
case maple_dense:
return NULL;
}
@@ -624,6 +598,8 @@ static inline unsigned long *ma_gaps(struct maple_node *node,
switch (type) {
case maple_arange_64:
return node->ma64.gap;
+ case maple_copy:
+ return node->cp.gap;
case maple_range_64:
case maple_leaf_64:
case maple_dense:
@@ -690,6 +666,7 @@ static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
case maple_arange_64:
node->ma64.pivot[piv] = val;
break;
+ case maple_copy:
case maple_dense:
break;
}
@@ -711,6 +688,8 @@ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
case maple_range_64:
case maple_leaf_64:
return mn->mr64.slot;
+ case maple_copy:
+ return mn->cp.slot;
case maple_dense:
return mn->slot;
}
@@ -1298,25 +1277,40 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
return mt_pivots[type];
}
-/*
- * mas_leaf_max_gap() - Returns the largest gap in a leaf node
- * @mas: the maple state
- *
- * Return: The maximum gap in the leaf.
- */
-static unsigned long mas_leaf_max_gap(struct ma_state *mas)
+static inline
+void wr_mas_setup(struct ma_wr_state *wr_mas, struct ma_state *mas)
+{
+ wr_mas->node = mas_mn(mas);
+ wr_mas->type = mte_node_type(mas->node);
+ wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
+ wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
+ wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, mas->offset);
+ wr_mas->r_max = mas_safe_pivot(mas, wr_mas->pivots, mas->offset,
+ wr_mas->type);
+}
+
+static inline
+void wr_mas_ascend(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ mas_ascend(mas);
+ wr_mas_setup(wr_mas, mas);
+ mas->end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots,
+ mas->max);
+ /* Careful, this may be wrong.. */
+ wr_mas->end_piv = wr_mas->r_max;
+ wr_mas->offset_end = mas->offset;
+}
+
+static inline unsigned long ma_leaf_max_gap(struct maple_node *mn,
+ enum maple_type mt, unsigned long min, unsigned long max,
+ unsigned long *pivots, void __rcu **slots)
{
- enum maple_type mt;
unsigned long pstart, gap, max_gap;
- struct maple_node *mn;
- unsigned long *pivots;
- void __rcu **slots;
unsigned char i;
unsigned char max_piv;
- mt = mte_node_type(mas->node);
- mn = mas_mn(mas);
- slots = ma_slots(mn, mt);
max_gap = 0;
if (unlikely(ma_is_dense(mt))) {
gap = 0;
@@ -1338,26 +1332,25 @@ static unsigned long mas_leaf_max_gap(struct ma_state *mas)
* Check the first implied pivot optimizes the loop below and slot 1 may
* be skipped if there is a gap in slot 0.
*/
- pivots = ma_pivots(mn, mt);
if (likely(!slots[0])) {
- max_gap = pivots[0] - mas->min + 1;
+ max_gap = pivots[0] - min + 1;
i = 2;
} else {
i = 1;
}
/* reduce max_piv as the special case is checked before the loop */
- max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
+ max_piv = ma_data_end(mn, mt, pivots, max) - 1;
/*
* Check end implied pivot which can only be a gap on the right most
* node.
*/
- if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
+ if (unlikely(max == ULONG_MAX) && !slots[max_piv + 1]) {
gap = ULONG_MAX - pivots[max_piv];
if (gap > max_gap)
max_gap = gap;
- if (max_gap > pivots[max_piv] - mas->min)
+ if (max_gap > pivots[max_piv] - min)
return max_gap;
}
@@ -1378,6 +1371,27 @@ static unsigned long mas_leaf_max_gap(struct ma_state *mas)
}
/*
+ * mas_leaf_max_gap() - Returns the largest gap in a leaf node
+ * @mas: the maple state
+ *
+ * Return: The maximum gap in the leaf.
+ */
+static inline unsigned long mas_leaf_max_gap(struct ma_state *mas)
+{
+ enum maple_type mt;
+ struct maple_node *mn;
+ unsigned long *pivots;
+ void __rcu **slots;
+
+ mn = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ slots = ma_slots(mn, mt);
+ pivots = ma_pivots(mn, mt);
+
+ return ma_leaf_max_gap(mn, mt, mas->min, mas->max, pivots, slots);
+}
+
+/*
* ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
* @node: The maple node
* @gaps: The pointer to the gaps
@@ -1617,169 +1631,6 @@ static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
}
/*
- * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
- * old data or set b_node->b_end.
- * @b_node: the maple_big_node
- * @shift: the shift count
- */
-static inline void mab_shift_right(struct maple_big_node *b_node,
- unsigned char shift)
-{
- unsigned long size = b_node->b_end * sizeof(unsigned long);
-
- memmove(b_node->pivot + shift, b_node->pivot, size);
- memmove(b_node->slot + shift, b_node->slot, size);
- if (b_node->type == maple_arange_64)
- memmove(b_node->gap + shift, b_node->gap, size);
-}
-
-/*
- * mab_middle_node() - Check if a middle node is needed (unlikely)
- * @b_node: the maple_big_node that contains the data.
- * @split: the potential split location
- * @slot_count: the size that can be stored in a single node being considered.
- *
- * Return: true if a middle node is required.
- */
-static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
- unsigned char slot_count)
-{
- unsigned char size = b_node->b_end;
-
- if (size >= 2 * slot_count)
- return true;
-
- if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
- return true;
-
- return false;
-}
-
-/*
- * mab_no_null_split() - ensure the split doesn't fall on a NULL
- * @b_node: the maple_big_node with the data
- * @split: the suggested split location
- * @slot_count: the number of slots in the node being considered.
- *
- * Return: the split location.
- */
-static inline int mab_no_null_split(struct maple_big_node *b_node,
- unsigned char split, unsigned char slot_count)
-{
- if (!b_node->slot[split]) {
- /*
- * If the split is less than the max slot && the right side will
- * still be sufficient, then increment the split on NULL.
- */
- if ((split < slot_count - 1) &&
- (b_node->b_end - split) > (mt_min_slots[b_node->type]))
- split++;
- else
- split--;
- }
- return split;
-}
-
-/*
- * mab_calc_split() - Calculate the split location and if there needs to be two
- * splits.
- * @mas: The maple state
- * @bn: The maple_big_node with the data
- * @mid_split: The second split, if required. 0 otherwise.
- *
- * Return: The first split location. The middle split is set in @mid_split.
- */
-static inline int mab_calc_split(struct ma_state *mas,
- struct maple_big_node *bn, unsigned char *mid_split)
-{
- unsigned char b_end = bn->b_end;
- int split = b_end / 2; /* Assume equal split. */
- unsigned char slot_count = mt_slots[bn->type];
-
- /*
- * To support gap tracking, all NULL entries are kept together and a node cannot
- * end on a NULL entry, with the exception of the left-most leaf. The
- * limitation means that the split of a node must be checked for this condition
- * and be able to put more data in one direction or the other.
- *
- * Although extremely rare, it is possible to enter what is known as the 3-way
- * split scenario. The 3-way split comes about by means of a store of a range
- * that overwrites the end and beginning of two full nodes. The result is a set
- * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can
- * also be located in different parent nodes which are also full. This can
- * carry upwards all the way to the root in the worst case.
- */
- if (unlikely(mab_middle_node(bn, split, slot_count))) {
- split = b_end / 3;
- *mid_split = split * 2;
- } else {
- *mid_split = 0;
- }
-
- /* Avoid ending a node on a NULL entry */
- split = mab_no_null_split(bn, split, slot_count);
-
- if (unlikely(*mid_split))
- *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
-
- return split;
-}
-
-/*
- * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
- * and set @b_node->b_end to the next free slot.
- * @mas: The maple state
- * @mas_start: The starting slot to copy
- * @mas_end: The end slot to copy (inclusively)
- * @b_node: The maple_big_node to place the data
- * @mab_start: The starting location in maple_big_node to store the data.
- */
-static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
- unsigned char mas_end, struct maple_big_node *b_node,
- unsigned char mab_start)
-{
- enum maple_type mt;
- struct maple_node *node;
- void __rcu **slots;
- unsigned long *pivots, *gaps;
- int i = mas_start, j = mab_start;
- unsigned char piv_end;
-
- node = mas_mn(mas);
- mt = mte_node_type(mas->node);
- pivots = ma_pivots(node, mt);
- if (!i) {
- b_node->pivot[j] = pivots[i++];
- if (unlikely(i > mas_end))
- goto complete;
- j++;
- }
-
- piv_end = min(mas_end, mt_pivots[mt]);
- for (; i < piv_end; i++, j++) {
- b_node->pivot[j] = pivots[i];
- if (unlikely(!b_node->pivot[j]))
- goto complete;
-
- if (unlikely(mas->max == b_node->pivot[j]))
- goto complete;
- }
-
- b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
-
-complete:
- b_node->b_end = ++j;
- j -= mab_start;
- slots = ma_slots(node, mt);
- memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
- if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
- gaps = ma_gaps(node, mt);
- memcpy(b_node->gap + mab_start, gaps + mas_start,
- sizeof(unsigned long) * j);
- }
-}
-
-/*
* mas_leaf_set_meta() - Set the metadata of a leaf if possible.
* @node: The maple node
* @mt: The maple type
@@ -1793,134 +1644,6 @@ static inline void mas_leaf_set_meta(struct maple_node *node,
}
/*
- * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
- * @b_node: the maple_big_node that has the data
- * @mab_start: the start location in @b_node.
- * @mab_end: The end location in @b_node (inclusively)
- * @mas: The maple state with the maple encoded node.
- */
-static inline void mab_mas_cp(struct maple_big_node *b_node,
- unsigned char mab_start, unsigned char mab_end,
- struct ma_state *mas, bool new_max)
-{
- int i, j = 0;
- enum maple_type mt = mte_node_type(mas->node);
- struct maple_node *node = mte_to_node(mas->node);
- void __rcu **slots = ma_slots(node, mt);
- unsigned long *pivots = ma_pivots(node, mt);
- unsigned long *gaps = NULL;
- unsigned char end;
-
- if (mab_end - mab_start > mt_pivots[mt])
- mab_end--;
-
- if (!pivots[mt_pivots[mt] - 1])
- slots[mt_pivots[mt]] = NULL;
-
- i = mab_start;
- do {
- pivots[j++] = b_node->pivot[i++];
- } while (i <= mab_end && likely(b_node->pivot[i]));
-
- memcpy(slots, b_node->slot + mab_start,
- sizeof(void *) * (i - mab_start));
-
- if (new_max)
- mas->max = b_node->pivot[i - 1];
-
- end = j - 1;
- if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
- unsigned long max_gap = 0;
- unsigned char offset = 0;
-
- gaps = ma_gaps(node, mt);
- do {
- gaps[--j] = b_node->gap[--i];
- if (gaps[j] > max_gap) {
- offset = j;
- max_gap = gaps[j];
- }
- } while (j);
-
- ma_set_meta(node, mt, offset, end);
- } else {
- mas_leaf_set_meta(node, mt, end);
- }
-}
-
-/*
- * mas_store_b_node() - Store an @entry into the b_node while also copying the
- * data from a maple encoded node.
- * @wr_mas: the maple write state
- * @b_node: the maple_big_node to fill with data
- * @offset_end: the offset to end copying
- *
- * Return: The actual end of the data stored in @b_node
- */
-static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
- struct maple_big_node *b_node, unsigned char offset_end)
-{
- unsigned char slot;
- unsigned char b_end;
- /* Possible underflow of piv will wrap back to 0 before use. */
- unsigned long piv;
- struct ma_state *mas = wr_mas->mas;
-
- b_node->type = wr_mas->type;
- b_end = 0;
- slot = mas->offset;
- if (slot) {
- /* Copy start data up to insert. */
- mas_mab_cp(mas, 0, slot - 1, b_node, 0);
- b_end = b_node->b_end;
- piv = b_node->pivot[b_end - 1];
- } else
- piv = mas->min - 1;
-
- if (piv + 1 < mas->index) {
- /* Handle range starting after old range */
- b_node->slot[b_end] = wr_mas->content;
- if (!wr_mas->content)
- b_node->gap[b_end] = mas->index - 1 - piv;
- b_node->pivot[b_end++] = mas->index - 1;
- }
-
- /* Store the new entry. */
- mas->offset = b_end;
- b_node->slot[b_end] = wr_mas->entry;
- b_node->pivot[b_end] = mas->last;
-
- /* Appended. */
- if (mas->last >= mas->max)
- goto b_end;
-
- /* Handle new range ending before old range ends */
- piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
- if (piv > mas->last) {
- if (offset_end != slot)
- wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
- offset_end);
-
- b_node->slot[++b_end] = wr_mas->content;
- if (!wr_mas->content)
- b_node->gap[b_end] = piv - mas->last + 1;
- b_node->pivot[b_end] = piv;
- }
-
- slot = offset_end + 1;
- if (slot > mas->end)
- goto b_end;
-
- /* Copy end data to the end of the node. */
- mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
- b_node->b_end--;
- return;
-
-b_end:
- b_node->b_end = b_end;
-}
-
-/*
* mas_prev_sibling() - Find the previous node with the same parent.
* @mas: the maple state
*
@@ -1965,25 +1688,6 @@ static inline bool mas_next_sibling(struct ma_state *mas)
}
/*
- * mas_node_or_none() - Set the enode and state.
- * @mas: the maple state
- * @enode: The encoded maple node.
- *
- * Set the node to the enode and the status.
- */
-static inline void mas_node_or_none(struct ma_state *mas,
- struct maple_enode *enode)
-{
- if (enode) {
- mas->node = enode;
- mas->status = ma_active;
- } else {
- mas->node = NULL;
- mas->status = ma_none;
- }
-}
-
-/*
* mas_wr_node_walk() - Find the correct offset for the index in the @mas.
* If @mas->index cannot be found within the containing
* node, we traverse to the last entry in the node.
@@ -2016,277 +1720,55 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
wr_mas->offset_end = mas->offset = offset;
}
-/*
- * mast_rebalance_next() - Rebalance against the next node
- * @mast: The maple subtree state
- */
-static inline void mast_rebalance_next(struct maple_subtree_state *mast)
-{
- unsigned char b_end = mast->bn->b_end;
-
- mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
- mast->bn, b_end);
- mast->orig_r->last = mast->orig_r->max;
-}
-
-/*
- * mast_rebalance_prev() - Rebalance against the previous node
- * @mast: The maple subtree state
- */
-static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
+static inline void rebalance_sib(struct ma_state *parent, struct ma_state *sib)
{
- unsigned char end = mas_data_end(mast->orig_l) + 1;
- unsigned char b_end = mast->bn->b_end;
+ *sib = *parent;
+ /* Prioritize move right to pull data left */
+ if (sib->offset < sib->end)
+ sib->offset++;
+ else
+ sib->offset--;
- mab_shift_right(mast->bn, end);
- mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
- mast->l->min = mast->orig_l->min;
- mast->orig_l->index = mast->orig_l->min;
- mast->bn->b_end = end + b_end;
- mast->l->offset += end;
+ mas_descend(sib);
+ sib->end = mas_data_end(sib);
}
-/*
- * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
- * the node to the right. Checking the nodes to the right then the left at each
- * level upwards until root is reached.
- * Data is copied into the @mast->bn.
- * @mast: The maple_subtree_state.
- */
static inline
-bool mast_spanning_rebalance(struct maple_subtree_state *mast)
+void spanning_sib(struct ma_wr_state *l_wr_mas,
+ struct ma_wr_state *r_wr_mas, struct ma_state *nneighbour)
{
- struct ma_state r_tmp = *mast->orig_r;
- struct ma_state l_tmp = *mast->orig_l;
+ struct ma_state l_tmp = *l_wr_mas->mas;
+ struct ma_state r_tmp = *r_wr_mas->mas;
unsigned char depth = 0;
do {
- mas_ascend(mast->orig_r);
- mas_ascend(mast->orig_l);
+ mas_ascend(&r_tmp);
+ mas_ascend(&l_tmp);
depth++;
- if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
- mast->orig_r->offset++;
- do {
- mas_descend(mast->orig_r);
- mast->orig_r->offset = 0;
- } while (--depth);
-
- mast_rebalance_next(mast);
- *mast->orig_l = l_tmp;
- return true;
- } else if (mast->orig_l->offset != 0) {
- mast->orig_l->offset--;
+ if (r_tmp.offset < mas_data_end(&r_tmp)) {
+ r_tmp.offset++;
+ mas_descend(&r_tmp);
+ r_tmp.offset = 0;
+ while (--depth)
+ mas_descend(&r_tmp);
+
+ r_tmp.end = mas_data_end(&r_tmp);
+ *nneighbour = r_tmp;
+ return;
+ } else if (l_tmp.offset) {
+ l_tmp.offset--;
do {
- mas_descend(mast->orig_l);
- mast->orig_l->offset =
- mas_data_end(mast->orig_l);
+ mas_descend(&l_tmp);
+ l_tmp.offset = mas_data_end(&l_tmp);
} while (--depth);
- mast_rebalance_prev(mast);
- *mast->orig_r = r_tmp;
- return true;
+ l_tmp.end = l_tmp.offset;
+ *nneighbour = l_tmp;
+ return;
}
- } while (!mte_is_root(mast->orig_r->node));
-
- *mast->orig_r = r_tmp;
- *mast->orig_l = l_tmp;
- return false;
-}
-
-/*
- * mast_ascend() - Ascend the original left and right maple states.
- * @mast: the maple subtree state.
- *
- * Ascend the original left and right sides. Set the offsets to point to the
- * data already in the new tree (@mast->l and @mast->r).
- */
-static inline void mast_ascend(struct maple_subtree_state *mast)
-{
- MA_WR_STATE(wr_mas, mast->orig_r, NULL);
- mas_ascend(mast->orig_l);
- mas_ascend(mast->orig_r);
-
- mast->orig_r->offset = 0;
- mast->orig_r->index = mast->r->max;
- /* last should be larger than or equal to index */
- if (mast->orig_r->last < mast->orig_r->index)
- mast->orig_r->last = mast->orig_r->index;
-
- wr_mas.type = mte_node_type(mast->orig_r->node);
- mas_wr_node_walk(&wr_mas);
- /* Set up the left side of things */
- mast->orig_l->offset = 0;
- mast->orig_l->index = mast->l->min;
- wr_mas.mas = mast->orig_l;
- wr_mas.type = mte_node_type(mast->orig_l->node);
- mas_wr_node_walk(&wr_mas);
-
- mast->bn->type = wr_mas.type;
-}
-
-/*
- * mas_new_ma_node() - Create and return a new maple node. Helper function.
- * @mas: the maple state with the allocations.
- * @b_node: the maple_big_node with the type encoding.
- *
- * Use the node type from the maple_big_node to allocate a new node from the
- * ma_state. This function exists mainly for code readability.
- *
- * Return: A new maple encoded node
- */
-static inline struct maple_enode
-*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
-{
- return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
-}
-
-/*
- * mas_mab_to_node() - Set up right and middle nodes
- *
- * @mas: the maple state that contains the allocations.
- * @b_node: the node which contains the data.
- * @left: The pointer which will have the left node
- * @right: The pointer which may have the right node
- * @middle: the pointer which may have the middle node (rare)
- * @mid_split: the split location for the middle node
- *
- * Return: the split of left.
- */
-static inline unsigned char mas_mab_to_node(struct ma_state *mas,
- struct maple_big_node *b_node, struct maple_enode **left,
- struct maple_enode **right, struct maple_enode **middle,
- unsigned char *mid_split)
-{
- unsigned char split = 0;
- unsigned char slot_count = mt_slots[b_node->type];
-
- *left = mas_new_ma_node(mas, b_node);
- *right = NULL;
- *middle = NULL;
- *mid_split = 0;
-
- if (b_node->b_end < slot_count) {
- split = b_node->b_end;
- } else {
- split = mab_calc_split(mas, b_node, mid_split);
- *right = mas_new_ma_node(mas, b_node);
- }
-
- if (*mid_split)
- *middle = mas_new_ma_node(mas, b_node);
-
- return split;
-
-}
-
-/*
- * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
- * pointer.
- * @b_node: the big node to add the entry
- * @mas: the maple state to get the pivot (mas->max)
- * @entry: the entry to add, if NULL nothing happens.
- */
-static inline void mab_set_b_end(struct maple_big_node *b_node,
- struct ma_state *mas,
- void *entry)
-{
- if (!entry)
- return;
-
- b_node->slot[b_node->b_end] = entry;
- if (mt_is_alloc(mas->tree))
- b_node->gap[b_node->b_end] = mas_max_gap(mas);
- b_node->pivot[b_node->b_end++] = mas->max;
-}
+ } while (!mte_is_root(r_tmp.node));
-/*
- * mas_set_split_parent() - combine_then_separate helper function. Sets the parent
- * of @mas->node to either @left or @right, depending on @slot and @split
- *
- * @mas: the maple state with the node that needs a parent
- * @left: possible parent 1
- * @right: possible parent 2
- * @slot: the slot the mas->node was placed
- * @split: the split location between @left and @right
- */
-static inline void mas_set_split_parent(struct ma_state *mas,
- struct maple_enode *left,
- struct maple_enode *right,
- unsigned char *slot, unsigned char split)
-{
- if (mas_is_none(mas))
- return;
-
- if ((*slot) <= split)
- mas_set_parent(mas, mas->node, left, *slot);
- else if (right)
- mas_set_parent(mas, mas->node, right, (*slot) - split - 1);
-
- (*slot)++;
-}
-
-/*
- * mte_mid_split_check() - Check if the next node passes the mid-split
- * @l: Pointer to left encoded maple node.
- * @m: Pointer to middle encoded maple node.
- * @r: Pointer to right encoded maple node.
- * @slot: The offset
- * @split: The split location.
- * @mid_split: The middle split.
- */
-static inline void mte_mid_split_check(struct maple_enode **l,
- struct maple_enode **r,
- struct maple_enode *right,
- unsigned char slot,
- unsigned char *split,
- unsigned char mid_split)
-{
- if (*r == right)
- return;
-
- if (slot < mid_split)
- return;
-
- *l = *r;
- *r = right;
- *split = mid_split;
-}
-
-/*
- * mast_set_split_parents() - Helper function to set three nodes parents. Slot
- * is taken from @mast->l.
- * @mast: the maple subtree state
- * @left: the left node
- * @right: the right node
- * @split: the split location.
- */
-static inline void mast_set_split_parents(struct maple_subtree_state *mast,
- struct maple_enode *left,
- struct maple_enode *middle,
- struct maple_enode *right,
- unsigned char split,
- unsigned char mid_split)
-{
- unsigned char slot;
- struct maple_enode *l = left;
- struct maple_enode *r = right;
-
- if (mas_is_none(mast->l))
- return;
-
- if (middle)
- r = middle;
-
- slot = mast->l->offset;
-
- mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
- mas_set_split_parent(mast->l, l, r, &slot, split);
-
- mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
- mas_set_split_parent(mast->m, l, r, &slot, split);
-
- mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
- mas_set_split_parent(mast->r, l, r, &slot, split);
+ WARN_ON_ONCE(1);
}
/*
@@ -2419,120 +1901,109 @@ static inline void mas_topiary_replace(struct ma_state *mas,
}
/*
- * mas_wmb_replace() - Write memory barrier and replace
- * @mas: The maple state
- * @old_enode: The old maple encoded node that is being replaced.
- * @new_height: The new height of the tree as a result of the operation
+ * node_copy() - Copy from one node to another.
*
- * Updates gap as necessary.
+ * @mas: The maple state
+ * @src: The source node
+ * @start: The offset into the src to start copying
+ * @size: The size to copy (non-zero)
+ * @s_max: The source node max
+ * @s_mt: The source maple node type
+ * @dst: The destination
+ * @d_start: The start location in the destination node
+ * @d_mt: The destination maple node type
*/
-static inline void mas_wmb_replace(struct ma_state *mas,
- struct maple_enode *old_enode, unsigned char new_height)
+static inline
+unsigned long node_copy(struct ma_state *mas, struct maple_node *src,
+ unsigned char start, unsigned char size, unsigned long s_max,
+ enum maple_type s_mt, struct maple_node *dst, unsigned char d_start,
+ enum maple_type d_mt)
{
- /* Insert the new data in the tree */
- mas_topiary_replace(mas, old_enode, new_height);
-
- if (mte_is_leaf(mas->node))
- return;
-
- mas_update_gap(mas);
-}
+ unsigned long *s_pivots, *d_pivots;
+ void __rcu **s_slots, **d_slots;
+ unsigned long *s_gaps, *d_gaps;
+ unsigned long d_max;
-/*
- * mast_cp_to_nodes() - Copy data out to nodes.
- * @mast: The maple subtree state
- * @left: The left encoded maple node
- * @middle: The middle encoded maple node
- * @right: The right encoded maple node
- * @split: The location to split between left and (middle ? middle : right)
- * @mid_split: The location to split between middle and right.
- */
-static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
- struct maple_enode *left, struct maple_enode *middle,
- struct maple_enode *right, unsigned char split, unsigned char mid_split)
-{
- bool new_lmax = true;
+ d_slots = ma_slots(dst, d_mt) + d_start;
+ d_pivots = ma_pivots(dst, d_mt) + d_start;
+ s_slots = ma_slots(src, s_mt) + start;
+ s_pivots = ma_pivots(src, s_mt) + start;
+ memcpy(d_slots, s_slots, size * sizeof(void __rcu *));
+ if (!ma_is_leaf(d_mt) && s_mt == maple_copy) {
+ struct maple_enode *edst = mt_mk_node(dst, d_mt);
- mas_node_or_none(mast->l, left);
- mas_node_or_none(mast->m, middle);
- mas_node_or_none(mast->r, right);
- mast->l->min = mast->orig_l->min;
- if (split == mast->bn->b_end) {
- mast->l->max = mast->orig_r->max;
- new_lmax = false;
+ for (int i = 0; i < size; i++)
+ mas_set_parent(mas,
+ mt_slot_locked(mas->tree, d_slots, i),
+ edst, d_start + i);
}
- mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
-
- if (middle) {
- mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
- mast->m->min = mast->bn->pivot[split] + 1;
- split = mid_split;
+ d_gaps = ma_gaps(dst, d_mt);
+ if (d_gaps) {
+ s_gaps = ma_gaps(src, s_mt) + start;
+ d_gaps += d_start;
+ memcpy(d_gaps, s_gaps, size * sizeof(unsigned long));
}
- mast->r->max = mast->orig_r->max;
- if (right) {
- mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
- mast->r->min = mast->bn->pivot[split] + 1;
- }
-}
+ if (start + size - 1 < mt_pivots[s_mt])
+ d_max = s_pivots[size - 1];
+ else
+ d_max = s_max;
-/*
- * mast_combine_cp_left - Copy in the original left side of the tree into the
- * combined data set in the maple subtree state big node.
- * @mast: The maple subtree state
- */
-static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
-{
- unsigned char l_slot = mast->orig_l->offset;
+ if (d_start + size <= mt_pivots[d_mt])
+ d_pivots[size - 1] = d_max;
- if (!l_slot)
- return;
+ size--;
+ if (size)
+ memcpy(d_pivots, s_pivots, size * sizeof(unsigned long));
- mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
+ return d_max;
}
/*
- * mast_combine_cp_right: Copy in the original right side of the tree into the
- * combined data set in the maple subtree state big node.
- * @mast: The maple subtree state
+ * node_finalise() - Zero out unused area and populate metadata
+ * @node: The maple node
+ * @mt: The maple node type
+ * @end: The end of the used area
*/
-static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
+static inline
+void node_finalise(struct maple_node *node, enum maple_type mt,
+ unsigned char end)
{
- if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
- return;
+ unsigned char max_end = mt_slots[mt];
+ unsigned char size;
+ unsigned long *gaps;
+ unsigned char gap_slot;
- mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
- mt_slot_count(mast->orig_r->node), mast->bn,
- mast->bn->b_end);
- mast->orig_r->last = mast->orig_r->max;
-}
+ gaps = ma_gaps(node, mt);
+ if (end < max_end - 1) {
+ size = max_end - end;
+ memset(ma_slots(node, mt) + end, 0, size * sizeof(void *));
-/*
- * mast_sufficient: Check if the maple subtree state has enough data in the big
- * node to create at least one sufficient node
- * @mast: the maple subtree state
- */
-static inline bool mast_sufficient(struct maple_subtree_state *mast)
-{
- if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
- return true;
+ if (gaps)
+ memset(gaps + end, 0, size * sizeof(unsigned long));
- return false;
-}
+ if (--size)
+ memset(ma_pivots(node, mt) + end, 0, size * sizeof(unsigned long));
+ }
-/*
- * mast_overflow: Check if there is too much data in the subtree state for a
- * single node.
- * @mast: The maple subtree state
- */
-static inline bool mast_overflow(struct maple_subtree_state *mast)
-{
- if (mast->bn->b_end > mt_slot_count(mast->orig_l->node))
- return true;
+ gap_slot = 0;
+ if (gaps && !ma_is_leaf(mt)) {
+ unsigned long max_gap;
- return false;
+ max_gap = 0;
+ for (int i = 0; i <= end; i++)
+ if (gaps[i] > max_gap) {
+ gap_slot = i;
+ max_gap = gaps[i];
+ }
+ }
+
+ if (mt == maple_arange_64)
+ ma_set_meta(node, mt, gap_slot, end - 1);
+ else if (end <= max_end - 1)
+ ma_set_meta(node, mt, gap_slot, end - 1);
}
static inline void *mtree_range_walk(struct ma_state *mas)
@@ -2596,480 +2067,662 @@ dead_node:
}
/*
- * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
- * @mas: The starting maple state
- * @mast: The maple_subtree_state, keeps track of 4 maple states.
- * @count: The estimated count of iterations needed.
- *
- * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
- * is hit. First @b_node is split into two entries which are inserted into the
- * next iteration of the loop. @b_node is returned populated with the final
- * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the
- * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
- * to account of what has been copied into the new sub-tree. The update of
- * orig_l_mas->last is used in mas_consume to find the slots that will need to
- * be either freed or destroyed. orig_l_mas->depth keeps track of the height of
- * the new sub-tree in case the sub-tree becomes the full tree.
- */
-static void mas_spanning_rebalance(struct ma_state *mas,
- struct maple_subtree_state *mast, unsigned char count)
-{
- unsigned char split, mid_split;
- unsigned char slot = 0;
- unsigned char new_height = 0; /* used if node is a new root */
- struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
+ * mas_wmb_replace() - Write memory barrier and replace
+ * @mas: The maple state
+ * @cp: The maple copy node
+ *
+ * Updates gap as necessary.
+ */
+static inline void mas_wmb_replace(struct ma_state *mas, struct maple_copy *cp)
+{
struct maple_enode *old_enode;
- MA_STATE(l_mas, mas->tree, mas->index, mas->index);
- MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- MA_STATE(m_mas, mas->tree, mas->index, mas->index);
+ old_enode = mas->node;
+ mas->node = mt_slot_locked(mas->tree, cp->slot, 0);
+ /* Insert the new data in the tree */
+ mas_topiary_replace(mas, old_enode, cp->height);
+ if (!mte_is_leaf(mas->node))
+ mas_update_gap(mas);
+
+ mtree_range_walk(mas);
+}
+
+
+/*
+ * cp_leaf_init() - Initialize a maple_copy node for the leaf level of a
+ * spanning store
+ * @cp: The maple copy node
+ * @mas: The maple state
+ * @l_wr_mas: The left write state of the spanning store
+ * @r_wr_mas: The right write state of the spanning store
+ */
+static inline void cp_leaf_init(struct maple_copy *cp,
+ struct ma_state *mas, struct ma_wr_state *l_wr_mas,
+ struct ma_wr_state *r_wr_mas)
+{
+ unsigned char end = 0;
/*
- * The tree needs to be rebalanced and leaves need to be kept at the same level.
- * Rebalancing is done by use of the ``struct maple_topiary``.
+ * WARNING: The use of RCU_INIT_POINTER() makes it extremely important
+ * to not expose the maple_copy node to any readers. Exposure may
+ * result in buggy code when a compiler reorders the instructions.
*/
- mast->l = &l_mas;
- mast->m = &m_mas;
- mast->r = &r_mas;
- l_mas.status = r_mas.status = m_mas.status = ma_none;
- /* Check if this is not root and has sufficient data. */
- if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
- unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
- mast_spanning_rebalance(mast);
+ cp->height = 1;
+ /* Create entries to insert including split entries to left and right */
+ if (l_wr_mas->r_min < mas->index) {
+ end++;
+ RCU_INIT_POINTER(cp->slot[0], l_wr_mas->content);
+ cp->pivot[0] = mas->index - 1;
+ }
+ RCU_INIT_POINTER(cp->slot[end], l_wr_mas->entry);
+ cp->pivot[end] = mas->last;
+
+ if (r_wr_mas->end_piv > mas->last) {
+ end++;
+ RCU_INIT_POINTER(cp->slot[end],
+ r_wr_mas->slots[r_wr_mas->offset_end]);
+ cp->pivot[end] = r_wr_mas->end_piv;
+ }
+
+ cp->min = l_wr_mas->r_min;
+ cp->max = cp->pivot[end];
+ cp->end = end;
+}
+
+/*
+ * cp_data_calc() - Calculate the size of the data (1 indexed).
+ * @cp: The maple copy struct with the new data populated.
+ * @l_wr_mas: The maple write state containing the data to the left of the write
+ * @r_wr_mas: The maple write state containing the data to the right of the
+ * write
+ *
+ * cp->data is a size (not indexed by 0).
+ */
+static inline void cp_data_calc(struct maple_copy *cp,
+ struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
+{
+
+ /* Add 1 every time for the 0th element */
+ cp->data = l_wr_mas->mas->offset;
+ /* Add the new data and any partial overwrites */
+ cp->data += cp->end + 1;
+ /* Data from right (offset + 1 to end), +1 for zero */
+ cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end;
+}
+
+static bool data_fits(struct ma_state *sib, struct ma_state *mas,
+ struct maple_copy *cp)
+{
+ unsigned char new_data;
+ enum maple_type type;
+ unsigned char space;
+ unsigned char end;
+
+ type = mte_node_type(mas->node);
+ space = 2 * mt_slots[type];
+ end = sib->end;
+
+ new_data = end + 1 + cp->data;
+ if (new_data > space)
+ return false;
/*
- * Each level of the tree is examined and balanced, pushing data to the left or
- * right, or rebalancing against left or right nodes is employed to avoid
- * rippling up the tree to limit the amount of churn. Once a new sub-section of
- * the tree is created, there may be a mix of new and old nodes. The old nodes
- * will have the incorrect parent pointers and currently be in two trees: the
- * original tree and the partially new tree. To remedy the parent pointers in
- * the old tree, the new data is swapped into the active tree and a walk down
- * the tree is performed and the parent pointers are updated.
- * See mas_topiary_replace() for more information.
+ * This is off by one by design. The extra space is left to reduce
+ * jitter in operations that add then remove two entries.
+ *
+ * end is an index while new space and data are both sizes. Adding one
+ * to end to convert the index to a size means that the below
+ * calculation should be <=, but we want to keep an extra space in nodes
+ * to reduce jitter.
+ *
+ * Note that it is still possible to get a full node on the left by the
+ * NULL landing exactly on the split. The NULL ending of a node happens
+ * in the dst_setup() function, where we will either increase the split
+ * by one or decrease it by one, if possible. In the case of split
+ * (this case), it is always possible to shift the spilt by one - again
+ * because there is at least one slot free by the below checking.
*/
- while (count--) {
- mast->bn->b_end--;
- mast->bn->type = mte_node_type(mast->orig_l->node);
- split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
- &mid_split);
- mast_set_split_parents(mast, left, middle, right, split,
- mid_split);
- mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
- new_height++;
+ if (new_data < space)
+ return true;
- /*
- * Copy data from next level in the tree to mast->bn from next
- * iteration
- */
- memset(mast->bn, 0, sizeof(struct maple_big_node));
- mast->bn->type = mte_node_type(left);
-
- /* Root already stored in l->node. */
- if (mas_is_root_limits(mast->l))
- goto new_root;
-
- mast_ascend(mast);
- mast_combine_cp_left(mast);
- l_mas.offset = mast->bn->b_end;
- mab_set_b_end(mast->bn, &l_mas, left);
- mab_set_b_end(mast->bn, &m_mas, middle);
- mab_set_b_end(mast->bn, &r_mas, right);
-
- /* Copy anything necessary out of the right node. */
- mast_combine_cp_right(mast);
- mast->orig_l->last = mast->orig_l->max;
-
- if (mast_sufficient(mast)) {
- if (mast_overflow(mast))
- continue;
+ return false;
+}
- if (mast->orig_l->node == mast->orig_r->node) {
- /*
- * The data in b_node should be stored in one
- * node and in the tree
- */
- slot = mast->l->offset;
- break;
- }
+static inline void push_data_sib(struct maple_copy *cp, struct ma_state *mas,
+ struct ma_state *sib, struct ma_state *parent)
+{
- continue;
- }
+ if (mte_is_root(mas->node))
+ goto no_push;
- /* May be a new root stored in mast->bn */
- if (mas_is_root_limits(mast->orig_l))
- break;
- mast_spanning_rebalance(mast);
+ *sib = *parent;
+ if (sib->offset) {
+ sib->offset--;
+ mas_descend(sib);
+ sib->end = mas_data_end(sib);
+ if (data_fits(sib, mas, cp)) /* Push left */
+ return;
- /* rebalancing from other nodes may require another loop. */
- if (!count)
- count++;
+ *sib = *parent;
}
- l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
- mte_node_type(mast->orig_l->node));
+ if (sib->offset >= sib->end)
+ goto no_push;
- mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
- new_height++;
- mas_set_parent(mas, left, l_mas.node, slot);
- if (middle)
- mas_set_parent(mas, middle, l_mas.node, ++slot);
+ sib->offset++;
+ mas_descend(sib);
+ sib->end = mas_data_end(sib);
+ if (data_fits(sib, mas, cp)) /* Push right*/
+ return;
- if (right)
- mas_set_parent(mas, right, l_mas.node, ++slot);
+no_push:
+ sib->end = 0;
+}
- if (mas_is_root_limits(mast->l)) {
-new_root:
- mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
- while (!mte_is_root(mast->orig_l->node))
- mast_ascend(mast);
- } else {
- mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
+/*
+ * rebalance_data() - Calculate the @cp data, populate @sib if insufficient or
+ * if the data can be pushed into a sibling.
+ * @cp: The maple copy node
+ * @wr_mas: The left write maple state
+ * @sib: The maple state of the sibling.
+ *
+ * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
+ * indicate it will not be used.
+ *
+ */
+static inline void rebalance_data(struct maple_copy *cp,
+ struct ma_wr_state *wr_mas, struct ma_state *sib,
+ struct ma_state *parent)
+{
+ cp_data_calc(cp, wr_mas, wr_mas);
+ sib->end = 0;
+ if (cp->data > mt_slots[wr_mas->type]) {
+ push_data_sib(cp, wr_mas->mas, sib, parent);
+ if (sib->end)
+ goto use_sib;
+ } else if (cp->data <= mt_min_slots[wr_mas->type]) {
+ if ((wr_mas->mas->min != 0) ||
+ (wr_mas->mas->max != ULONG_MAX)) {
+ rebalance_sib(parent, sib);
+ goto use_sib;
+ }
}
- old_enode = mast->orig_l->node;
- mas->depth = l_mas.depth;
- mas->node = l_mas.node;
- mas->min = l_mas.min;
- mas->max = l_mas.max;
- mas->offset = l_mas.offset;
- mas_wmb_replace(mas, old_enode, new_height);
- mtree_range_walk(mas);
return;
+
+use_sib:
+
+ cp->data += sib->end + 1;
}
/*
- * mas_rebalance() - Rebalance a given node.
+ * spanning_data() - Calculate the @cp data and populate @sib if insufficient
+ * @cp: The maple copy node
+ * @l_wr_mas: The left write maple state
+ * @r_wr_mas: The right write maple state
+ * @sib: The maple state of the sibling.
+ *
+ * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
+ * indicate it will not be used.
+ */
+static inline void spanning_data(struct maple_copy *cp,
+ struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
+ struct ma_state *sib)
+{
+ cp_data_calc(cp, l_wr_mas, r_wr_mas);
+ if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) &&
+ (cp->data <= mt_min_slots[l_wr_mas->type])) {
+ spanning_sib(l_wr_mas, r_wr_mas, sib);
+ cp->data += sib->end + 1;
+ } else {
+ sib->end = 0;
+ }
+}
+
+/*
+ * dst_setup() - Set up one or more destinations for the new data.
+ * @cp: The maple copy node
* @mas: The maple state
- * @b_node: The big maple node.
- *
- * Rebalance two nodes into a single node or two new nodes that are sufficient.
- * Continue upwards until tree is sufficient.
+ * @mt: The source node type
*/
-static inline void mas_rebalance(struct ma_state *mas,
- struct maple_big_node *b_node)
+static inline
+void dst_setup(struct maple_copy *cp, struct ma_state *mas, enum maple_type mt)
{
- char empty_count = mas_mt_height(mas);
- struct maple_subtree_state mast;
- unsigned char shift, b_end = ++b_node->b_end;
+ /* Data is 1 indexed, every src has +1 added. */
- MA_STATE(l_mas, mas->tree, mas->index, mas->last);
- MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+ if (cp->data <= mt_slots[mt]) {
+ cp->split = cp->data - 1;
+ cp->d_count = 1;
+ goto node_setup;
+ }
- trace_ma_op(TP_FCT, mas);
+ cp->split = (cp->data - 1) / 2;
+ cp->d_count = 2;
+ if (cp->data < mt_slots[mt] * 2)
+ goto node_setup;
- /*
- * Rebalancing occurs if a node is insufficient. Data is rebalanced
- * against the node to the right if it exists, otherwise the node to the
- * left of this node is rebalanced against this node. If rebalancing
- * causes just one node to be produced instead of two, then the parent
- * is also examined and rebalanced if it is insufficient. Every level
- * tries to combine the data in the same way. If one node contains the
- * entire range of the tree, then that node is used as a new root node.
- */
+ if (cp->data == mt_slots[mt] * 2) {
+ unsigned char off;
+ unsigned char s;
+
+ if (!ma_is_leaf(mt))
+ goto node_setup;
+
+ /*
+ * Leaf nodes are a bit tricky because we cannot assume the data
+ * can fit due to the NULL limitation on node ends.
+ */
+ off = cp->split;
+ for (s = 0; s < cp->s_count; s++) {
+ unsigned char s_off;
- mast.orig_l = &l_mas;
- mast.orig_r = &r_mas;
- mast.bn = b_node;
- mast.bn->type = mte_node_type(mas->node);
+ s_off = cp->src[s].end - cp->src[s].start;
+ if (s_off >= off)
+ break;
- l_mas = r_mas = *mas;
+ s_off++;
+ off -= s_off;
+ }
- if (mas_next_sibling(&r_mas)) {
- mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
- r_mas.last = r_mas.index = r_mas.max;
- } else {
- mas_prev_sibling(&l_mas);
- shift = mas_data_end(&l_mas) + 1;
- mab_shift_right(b_node, shift);
- mas->offset += shift;
- mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
- b_node->b_end = shift + b_end;
- l_mas.index = l_mas.last = l_mas.min;
- }
+ off += cp->src[s].start;
+ if (ma_slots(cp->src[s].node, cp->src[s].mt)[off])
+ goto node_setup;
- return mas_spanning_rebalance(mas, &mast, empty_count);
-}
+ cp->split++;
+ if (cp->split < mt_slots[mt])
+ goto node_setup;
-/*
- * mas_split_final_node() - Split the final node in a subtree operation.
- * @mast: the maple subtree state
- * @mas: The maple state
- */
-static inline void mas_split_final_node(struct maple_subtree_state *mast,
- struct ma_state *mas)
-{
- struct maple_enode *ancestor;
+ cp->split -= 2;
+ if (cp->data - 2 - cp->split < mt_slots[mt])
+ goto node_setup;
- if (mte_is_root(mas->node)) {
- if (mt_is_alloc(mas->tree))
- mast->bn->type = maple_arange_64;
- else
- mast->bn->type = maple_range_64;
}
- /*
- * Only a single node is used here, could be root.
- * The Big_node data should just fit in a single node.
- */
- ancestor = mas_new_ma_node(mas, mast->bn);
- mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
- mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
- mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
- mast->l->node = ancestor;
- mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
- mas->offset = mast->bn->b_end - 1;
+ /* No other choice but to 3-way split the data */
+ cp->split = (cp->data + 2) / 3;
+ cp->d_count = 3;
+
+node_setup:
+ for (int i = 0; i < cp->d_count; i++) {
+ cp->dst[i].mt = mt;
+ cp->dst[i].node = ma_mnode_ptr(mas_pop_node(mas));
+ }
}
-/*
- * mast_fill_bnode() - Copy data into the big node in the subtree state
- * @mast: The maple subtree state
- * @mas: the maple state
- * @skip: The number of entries to skip for new nodes insertion.
- */
-static inline void mast_fill_bnode(struct maple_subtree_state *mast,
- struct ma_state *mas,
- unsigned char skip)
+static inline void append_mas_cp(struct maple_copy *cp,
+ struct ma_state *mas, unsigned char start, unsigned char end)
{
- bool cp = true;
- unsigned char split;
+ struct maple_node *node;
+ enum maple_type mt;
+ unsigned char count;
- memset(mast->bn, 0, sizeof(struct maple_big_node));
+ count = cp->s_count;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ cp->src[count].node = node;
+ cp->src[count].mt = mt;
+ if (mas->end <= end)
+ cp->src[count].max = mas->max;
+ else
+ cp->src[count].max = ma_pivots(node, mt)[end];
- if (mte_is_root(mas->node)) {
- cp = false;
- } else {
- mas_ascend(mas);
- mas->offset = mte_parent_slot(mas->node);
- }
+ cp->src[count].start = start;
+ cp->src[count].end = end;
+ cp->s_count++;
+}
- if (cp && mast->l->offset)
- mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
+static inline void append_wr_mas_cp(struct maple_copy *cp,
+ struct ma_wr_state *wr_mas, unsigned char start, unsigned char end)
+{
+ unsigned char count;
- split = mast->bn->b_end;
- mab_set_b_end(mast->bn, mast->l, mast->l->node);
- mast->r->offset = mast->bn->b_end;
- mab_set_b_end(mast->bn, mast->r, mast->r->node);
- if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
- cp = false;
+ count = cp->s_count;
+ cp->src[count].node = wr_mas->node;
+ cp->src[count].mt = wr_mas->type;
+ if (wr_mas->mas->end <= end)
+ cp->src[count].max = wr_mas->mas->max;
+ else
+ cp->src[count].max = wr_mas->pivots[end];
- if (cp)
- mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
- mast->bn, mast->bn->b_end);
+ cp->src[count].start = start;
+ cp->src[count].end = end;
+ cp->s_count++;
+}
- mast->bn->b_end--;
- mast->bn->type = mte_node_type(mas->node);
+static inline void init_cp_src(struct maple_copy *cp)
+{
+ cp->src[cp->s_count].node = ma_mnode_ptr(cp);
+ cp->src[cp->s_count].mt = maple_copy;
+ cp->src[cp->s_count].max = cp->max;
+ cp->src[cp->s_count].start = 0;
+ cp->src[cp->s_count].end = cp->end;
+ cp->s_count++;
}
/*
- * mast_split_data() - Split the data in the subtree state big node into regular
- * nodes.
- * @mast: The maple subtree state
- * @mas: The maple state
- * @split: The location to split the big node
+ * multi_src_setup() - Set the @cp node up with multiple sources to copy from.
+ * @cp: The maple copy node
+ * @l_wr_mas: The left write maple state
+ * @r_wr_mas: The right write maple state
+ * @sib: The sibling maple state
+ *
+ * Note: @sib->end == 0 indicates no sibling will be used.
*/
-static inline void mast_split_data(struct maple_subtree_state *mast,
- struct ma_state *mas, unsigned char split)
+static inline
+void multi_src_setup(struct maple_copy *cp, struct ma_wr_state *l_wr_mas,
+ struct ma_wr_state *r_wr_mas, struct ma_state *sib)
{
- unsigned char p_slot;
+ cp->s_count = 0;
+ if (sib->end && sib->max < l_wr_mas->mas->min)
+ append_mas_cp(cp, sib, 0, sib->end);
- mab_mas_cp(mast->bn, 0, split, mast->l, true);
- mte_set_pivot(mast->r->node, 0, mast->r->max);
- mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
- mast->l->offset = mte_parent_slot(mas->node);
- mast->l->max = mast->bn->pivot[split];
- mast->r->min = mast->l->max + 1;
- if (mte_is_leaf(mas->node))
- return;
+ /* Copy left 0 - offset */
+ if (l_wr_mas->mas->offset) {
+ unsigned char off = l_wr_mas->mas->offset - 1;
+
+ append_wr_mas_cp(cp, l_wr_mas, 0, off);
+ cp->src[cp->s_count - 1].max = cp->min - 1;
+ }
- p_slot = mast->orig_l->offset;
- mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
- &p_slot, split);
- mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
- &p_slot, split);
+ init_cp_src(cp);
+
+ /* Copy right either from offset or offset + 1 pending on r_max */
+ if (r_wr_mas->mas->end != r_wr_mas->offset_end)
+ append_wr_mas_cp(cp, r_wr_mas, r_wr_mas->offset_end + 1,
+ r_wr_mas->mas->end);
+
+ if (sib->end && sib->min > r_wr_mas->mas->max)
+ append_mas_cp(cp, sib, 0, sib->end);
}
-/*
- * mas_push_data() - Instead of splitting a node, it is beneficial to push the
- * data to the right or left node if there is room.
- * @mas: The maple state
- * @mast: The maple subtree state
- * @left: Push left or not.
- *
- * Keeping the height of the tree low means faster lookups.
- *
- * Return: True if pushed, false otherwise.
- */
-static inline bool mas_push_data(struct ma_state *mas,
- struct maple_subtree_state *mast, bool left)
+static inline
+void cp_data_write(struct maple_copy *cp, struct ma_state *mas)
{
- unsigned char slot_total = mast->bn->b_end;
- unsigned char end, space, split;
+ struct maple_node *dst, *src;
+ unsigned char s, d;
+ unsigned char dst_offset;
+ unsigned char data_offset;
+ unsigned char src_end, s_offset;
+ unsigned char split;
+ unsigned long s_max, d_max;
+ unsigned char dst_size;
+ enum maple_type s_mt, d_mt;
+
+ data_offset = 0;
+ s = d = 0;
+ /* Readability help */
+ src = cp->src[s].node;
+ dst = cp->dst[d].node;
+ s_offset = cp->src[s].start;
+ src_end = cp->src[s].end;
+ split = cp->split;
+ s_max = cp->src[s].max;
+ s_mt = cp->src[s].mt;
+ d_mt = cp->dst[d].mt;
+ do {
+ dst_offset = 0;
+ d_max = 0;
+ dst = cp->dst[d].node;
+ d_mt = cp->dst[d].mt;
+ dst_size = split + 1;
- MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
- tmp_mas = *mas;
- tmp_mas.depth = mast->l->depth;
+ while (dst_size) {
+ unsigned char size;
- if (left && !mas_prev_sibling(&tmp_mas))
- return false;
- else if (!left && !mas_next_sibling(&tmp_mas))
- return false;
+ if (src_end - s_offset + 1 < dst_size)
+ size = src_end - s_offset + 1;
+ else
+ size = dst_size;
+
+ d_max = node_copy(mas, src, s_offset, size, s_max, s_mt,
+ dst, dst_offset, d_mt);
+
+ dst_offset += size;
+ s_offset += size;
+ if (s_offset > src_end) {
+ /* This source is exhausted */
+ s++;
+ if (s >= cp->s_count) {
+ cp->dst[d].max = d_max;
+ node_finalise(dst, d_mt, dst_offset);
+ return;
+ }
+ /* Reset local src */
+ src = cp->src[s].node;
+ s_offset = cp->src[s].start;
+ src_end = cp->src[s].end;
+ s_max = cp->src[s].max;
+ s_mt = cp->src[s].mt;
+ }
- end = mas_data_end(&tmp_mas);
- slot_total += end;
- space = 2 * mt_slot_count(mas->node) - 2;
- /* -2 instead of -1 to ensure there isn't a triple split */
- if (ma_is_leaf(mast->bn->type))
- space--;
+ dst_size -= size;
+ data_offset += size;
+ }
- if (mas->max == ULONG_MAX)
- space--;
+ split = cp->split;
+ cp->dst[d].max = d_max;
+ /* Handle null entries */
+ if (cp->dst[d].max != ULONG_MAX &&
+ !ma_slots(dst, d_mt)[dst_offset - 1]) {
+ if (s_offset == cp->src[s].start) {
+ s--;
+ src = cp->src[s].node;
+ src_end = cp->src[s].end;
+ s_max = cp->src[s].max;
+ s_mt = cp->src[s].mt;
+ s_offset = src_end;
+ } else {
+ s_offset--;
+ }
+ /* Set dst max and clear pivot */
+ split++;
+ data_offset--;
+ dst_offset--;
+ cp->dst[d].max = ma_pivots(dst, d_mt)[dst_offset - 1];
+ }
- if (slot_total >= space)
- return false;
+ node_finalise(dst, d_mt, dst_offset);
+ ++d; /* Next destination */
+ if (d == cp->d_count - 1)
+ split = cp->data - data_offset;
- /* Get the data; Fill mast->bn */
- mast->bn->b_end++;
- if (left) {
- mab_shift_right(mast->bn, end + 1);
- mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
- mast->bn->b_end = slot_total + 1;
- } else {
- mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
- }
+ if (d >= cp->d_count) {
+ WARN_ON(data_offset < cp->data);
+ return;
+ }
- /* Configure mast for splitting of mast->bn */
- split = mt_slots[mast->bn->type] - 2;
- if (left) {
- /* Switch mas to prev node */
- *mas = tmp_mas;
- /* Start using mast->l for the left side. */
- tmp_mas.node = mast->l->node;
- *mast->l = tmp_mas;
- } else {
- tmp_mas.node = mast->r->node;
- *mast->r = tmp_mas;
- split = slot_total - split;
- }
- split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
- /* Update parent slot for split calculation. */
- if (left)
- mast->orig_l->offset += end + 1;
-
- mast_split_data(mast, mas, split);
- mast_fill_bnode(mast, mas, 2);
- mas_split_final_node(mast, mas);
- return true;
+ } while (data_offset <= cp->data);
}
/*
- * mas_split() - Split data that is too big for one node into two.
+ * cp_dst_to_slots() - Migrate the maple copy destination to the maple copy
+ * slots
+ * @cp: The maple copy node
+ * @min: The minimal value represented
+ * @max: The maximum value represented
* @mas: The maple state
- * @b_node: The maple big node
*/
-static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
+static inline void cp_dst_to_slots(struct maple_copy *cp, unsigned long min,
+ unsigned long max, struct ma_state *mas)
{
- struct maple_subtree_state mast;
- int height = 0;
- unsigned int orig_height = mas_mt_height(mas);
- unsigned char mid_split, split = 0;
- struct maple_enode *old;
+ unsigned char d;
+ unsigned long slot_min = min;
- /*
- * Splitting is handled differently from any other B-tree; the Maple
- * Tree splits upwards. Splitting up means that the split operation
- * occurs when the walk of the tree hits the leaves and not on the way
- * down. The reason for splitting up is that it is impossible to know
- * how much space will be needed until the leaf is (or leaves are)
- * reached. Since overwriting data is allowed and a range could
- * overwrite more than one range or result in changing one entry into 3
- * entries, it is impossible to know if a split is required until the
- * data is examined.
- *
- * Splitting is a balancing act between keeping allocations to a minimum
- * and avoiding a 'jitter' event where a tree is expanded to make room
- * for an entry followed by a contraction when the entry is removed. To
- * accomplish the balance, there are empty slots remaining in both left
- * and right nodes after a split.
- */
- MA_STATE(l_mas, mas->tree, mas->index, mas->last);
- MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
- MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
+ for (d = 0; d < cp->d_count; d++) {
+ struct maple_node *mn = cp->dst[d].node;
+ enum maple_type mt = cp->dst[d].mt;
+ unsigned long slot_max = cp->dst[d].max;
- trace_ma_op(TP_FCT, mas);
+ /*
+ * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
+ * documentation. Since these are new nodes, there are no
+ * read-side operations that can view them until they are
+ * inserted into the tree after an rcu_assign_pointer() call.
+ */
+ ma_init_slot(&cp->slot[d], mn, mt);
+ cp->pivot[d] = slot_max;
+ if (mt_is_alloc(mas->tree)) {
+ if (ma_is_leaf(mt)) {
+ cp->gap[d] = ma_leaf_max_gap(mn, mt, slot_min,
+ slot_max, ma_pivots(mn, mt),
+ ma_slots(mn, mt));
+ } else {
+ unsigned long *gaps = ma_gaps(mn, mt);
- mast.l = &l_mas;
- mast.r = &r_mas;
- mast.orig_l = &prev_l_mas;
- mast.orig_r = &prev_r_mas;
- mast.bn = b_node;
+ if (gaps) {
+ unsigned char gap_slot;
- while (height++ <= orig_height) {
- if (mt_slots[b_node->type] > b_node->b_end) {
- mas_split_final_node(&mast, mas);
- break;
+ gap_slot = ma_meta_gap(mn);
+ cp->gap[d] = gaps[gap_slot];
+ }
+ }
}
+ slot_min = slot_max + 1;
+ }
- l_mas = r_mas = *mas;
- l_mas.node = mas_new_ma_node(mas, b_node);
- r_mas.node = mas_new_ma_node(mas, b_node);
- /*
- * Another way that 'jitter' is avoided is to terminate a split up early if the
- * left or right node has space to spare. This is referred to as "pushing left"
- * or "pushing right" and is similar to the B* tree, except the nodes left or
- * right can rarely be reused due to RCU, but the ripple upwards is halted which
- * is a significant savings.
- */
- /* Try to push left. */
- if (mas_push_data(mas, &mast, true)) {
- height++;
- break;
- }
- /* Try to push right. */
- if (mas_push_data(mas, &mast, false)) {
- height++;
- break;
- }
+ cp->end = cp->d_count - 1;
+ cp->min = min;
+ cp->max = max;
+}
+
+static inline bool cp_is_new_root(struct maple_copy *cp, struct ma_state *mas)
+{
+ if (cp->min || cp->max != ULONG_MAX)
+ return false;
- split = mab_calc_split(mas, b_node, &mid_split);
- mast_split_data(&mast, mas, split);
+ if (cp->d_count != 1) {
+ enum maple_type mt = maple_arange_64;
+
+ if (!mt_is_alloc(mas->tree))
+ mt = maple_range_64;
+
+ cp->data = cp->d_count;
+ cp->s_count = 0;
+ dst_setup(cp, mas, mt);
+ init_cp_src(cp);
+ node_copy(mas, cp->src[0].node, 0, cp->data, cp->max, maple_copy,
+ cp->dst[0].node, 0, mt);
+ node_finalise(cp->dst[0].node, mt, cp->end + 1);
/*
- * Usually correct, mab_mas_cp in the above call overwrites
- * r->max.
+ * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
+ * documentation. Since this is a new root, there are no
+ * read-side operations that can view it until it is insert into
+ * the tree after an rcu_assign_pointer() call.
*/
- mast.r->max = mas->max;
- mast_fill_bnode(&mast, mas, 1);
- prev_l_mas = *mast.l;
- prev_r_mas = *mast.r;
+ ma_init_slot(&cp->slot[0], cp->dst[0].node, mt);
+ cp->height++;
+ }
+ WARN_ON_ONCE(cp->dst[0].node != mte_to_node(
+ mt_slot_locked(mas->tree, cp->slot, 0)));
+ cp->dst[0].node->parent = ma_parent_ptr(mas_tree_parent(mas));
+ mas->min = 0;
+ mas->max = ULONG_MAX;
+ mas->depth = 0;
+ mas->node = mas_root_locked(mas);
+ return true;
+}
+
+static inline bool cp_converged(struct maple_copy *cp, struct ma_state *mas,
+ struct ma_state *sib)
+{
+ if (cp->d_count != 1 || sib->end)
+ return false;
+
+ cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent);
+ return true;
+}
+
+/*
+ * spanning_ascend() - See if a spanning store operation has to keep walking up
+ * the tree
+ * @cp: The maple_copy node
+ * @l_wr_mas: The left maple write state
+ * @r_wr_mas: The right maple write state
+ * @sib: the maple state of the sibling
+ *
+ * Returns: True if another iteration is necessary.
+ */
+static bool spanning_ascend(struct maple_copy *cp, struct ma_state *mas,
+ struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
+ struct ma_state *sib)
+{
+ if (sib->end) {
+ if (sib->max < l_wr_mas->mas->min)
+ *l_wr_mas->mas = *sib;
+ else
+ *r_wr_mas->mas = *sib;
}
- /* Set the original node as dead */
- old = mas->node;
- mas->node = l_mas.node;
- mas_wmb_replace(mas, old, height);
- mtree_range_walk(mas);
- return;
+ cp_dst_to_slots(cp, l_wr_mas->mas->min, r_wr_mas->mas->max, mas);
+ if (cp_is_new_root(cp, mas))
+ return false;
+
+ /* Converged and has a single destination */
+ if ((cp->d_count == 1) &&
+ (l_wr_mas->mas->node == r_wr_mas->mas->node)) {
+ cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent);
+ return false;
+ }
+
+ cp->height++;
+ wr_mas_ascend(l_wr_mas);
+ wr_mas_ascend(r_wr_mas);
+ return true;
+}
+
+static inline
+void copy_tree_location(const struct ma_state *src, struct ma_state *dst)
+{
+ dst->node = src->node;
+ dst->offset = src->offset;
+ dst->min = src->min;
+ dst->max = src->max;
+ dst->end = src->end;
+ dst->depth = src->depth;
}
/*
- * mas_commit_b_node() - Commit the big node into the tree.
- * @wr_mas: The maple write state
- * @b_node: The maple big node
+ * rebalance_ascend() - Ascend the tree and set up for the next loop - if
+ * necessary
+ *
+ * Return: True if there another rebalancing operation on the next level is
+ * needed, false otherwise.
*/
-static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
- struct maple_big_node *b_node)
+static inline bool rebalance_ascend(struct maple_copy *cp,
+ struct ma_wr_state *wr_mas, struct ma_state *sib,
+ struct ma_state *parent)
{
- enum store_type type = wr_mas->mas->store_type;
+ struct ma_state *mas;
+ unsigned long min, max;
- WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
+ mas = wr_mas->mas;
+ if (!sib->end) {
+ min = mas->min;
+ max = mas->max;
+ } else if (sib->min > mas->max) { /* Move right succeeded */
+ min = mas->min;
+ max = sib->max;
+ wr_mas->offset_end = parent->offset + 1;
+ } else {
+ min = sib->min;
+ max = mas->max;
+ wr_mas->offset_end = parent->offset;
+ parent->offset--;
+ }
- if (type == wr_rebalance)
- return mas_rebalance(wr_mas->mas, b_node);
+ cp_dst_to_slots(cp, min, max, mas);
+ if (cp_is_new_root(cp, mas))
+ return false;
+
+ if (cp_converged(cp, mas, sib))
+ return false;
- return mas_split(wr_mas->mas, b_node);
+ cp->height++;
+ copy_tree_location(parent, mas);
+ wr_mas_setup(wr_mas, mas);
+ return true;
}
/*
@@ -3112,7 +2765,6 @@ static inline void mas_root_expand(struct ma_state *mas, void *entry)
ma_set_meta(node, maple_leaf_64, 0, slot);
/* swap the new root into the tree */
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- return;
}
/*
@@ -3269,6 +2921,7 @@ static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
l_mas->index = l_mas->min;
l_mas->offset = l_slot - 1;
+ l_wr_mas->r_min = l_mas->index;
}
if (!r_wr_mas->content) {
@@ -3281,6 +2934,7 @@ static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
r_wr_mas->type, r_mas->offset + 1);
r_mas->offset++;
+ r_wr_mas->r_max = r_mas->last;
}
}
@@ -3382,8 +3036,6 @@ static inline void mas_new_root(struct ma_state *mas, void *entry)
done:
if (xa_is_node(root))
mte_destroy_walk(root, mas->tree);
-
- return;
}
/*
* mas_wr_spanning_store() - Create a subtree with the store operation completed
@@ -3392,18 +3044,15 @@ done:
* span.
* @wr_mas: The maple write state
*/
-static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
+static void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
{
- struct maple_subtree_state mast;
- struct maple_big_node b_node;
+ struct maple_copy cp;
struct ma_state *mas;
- unsigned char height;
+ struct ma_state sib;
/* Left and Right side of spanning store */
- MA_STATE(l_mas, NULL, 0, 0);
MA_STATE(r_mas, NULL, 0, 0);
MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
- MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
/*
* A store operation that spans multiple nodes is called a spanning
@@ -3426,7 +3075,6 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
* Node rebalancing may occur due to this store, so there may be three new
* entries per level plus a new root.
*/
- height = mas_mt_height(mas);
/*
* Set up right side. Need to get to the next offset after the spanning
@@ -3441,42 +3089,31 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
r_mas.index = r_mas.last;
mas_wr_walk_index(&r_wr_mas);
r_mas.last = r_mas.index = mas->last;
+ r_wr_mas.end_piv = r_wr_mas.r_max;
/* Set up left side. */
- l_mas = *mas;
- mas_wr_walk_index(&l_wr_mas);
+ mas_wr_walk_index(wr_mas);
if (!wr_mas->entry) {
- mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
- mas->offset = l_mas.offset;
- mas->index = l_mas.index;
- mas->last = l_mas.last = r_mas.last;
+ mas_extend_spanning_null(wr_mas, &r_wr_mas);
+ mas->last = r_mas.last;
}
/* expanding NULLs may make this cover the entire range */
- if (!l_mas.index && r_mas.last == ULONG_MAX) {
+ if (!mas->index && r_mas.last == ULONG_MAX) {
mas_set_range(mas, 0, ULONG_MAX);
return mas_new_root(mas, wr_mas->entry);
}
- memset(&b_node, 0, sizeof(struct maple_big_node));
- /* Copy l_mas and store the value in b_node. */
- mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
- /* Copy r_mas into b_node if there is anything to copy. */
- if (r_mas.max > r_mas.last)
- mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
- &b_node, b_node.b_end + 1);
- else
- b_node.b_end++;
-
- /* Stop spanning searches by searching for just index. */
- l_mas.index = l_mas.last = mas->index;
+ cp_leaf_init(&cp, mas, wr_mas, &r_wr_mas);
+ do {
+ spanning_data(&cp, wr_mas, &r_wr_mas, &sib);
+ multi_src_setup(&cp, wr_mas, &r_wr_mas, &sib);
+ dst_setup(&cp, mas, wr_mas->type);
+ cp_data_write(&cp, mas);
+ } while (spanning_ascend(&cp, mas, wr_mas, &r_wr_mas, &sib));
- mast.bn = &b_node;
- mast.orig_l = &l_mas;
- mast.orig_r = &r_mas;
- /* Combine l_mas and r_mas and split them up evenly again. */
- return mas_spanning_rebalance(mas, &mast, height + 1);
+ mas_wmb_replace(mas, &cp);
}
/*
@@ -3485,20 +3122,28 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
*
* Attempts to reuse the node, but may allocate.
*/
-static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
- unsigned char new_end)
+static inline void mas_wr_node_store(struct ma_wr_state *wr_mas)
{
- struct ma_state *mas = wr_mas->mas;
- void __rcu **dst_slots;
- unsigned long *dst_pivots;
- unsigned char dst_offset, offset_end = wr_mas->offset_end;
+ unsigned char dst_offset, offset_end;
+ unsigned char copy_size, node_pivots;
struct maple_node reuse, *newnode;
- unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
- bool in_rcu = mt_in_rcu(mas->tree);
- unsigned char height = mas_mt_height(mas);
+ unsigned long *dst_pivots;
+ void __rcu **dst_slots;
+ unsigned char new_end;
+ struct ma_state *mas;
+ bool in_rcu;
- if (mas->last == wr_mas->end_piv)
+ mas = wr_mas->mas;
+ trace_ma_op(TP_FCT, mas);
+ in_rcu = mt_in_rcu(mas->tree);
+ offset_end = wr_mas->offset_end;
+ node_pivots = mt_pivots[wr_mas->type];
+ /* Assume last adds an entry */
+ new_end = mas->end + 1 - offset_end + mas->offset;
+ if (mas->last == wr_mas->end_piv) {
offset_end++; /* don't copy this offset */
+ new_end--;
+ }
/* set up node. */
if (in_rcu) {
@@ -3512,13 +3157,16 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
dst_pivots = ma_pivots(newnode, wr_mas->type);
dst_slots = ma_slots(newnode, wr_mas->type);
/* Copy from start to insert point */
- memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
- memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);
+ if (mas->offset) {
+ memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
+ memcpy(dst_slots, wr_mas->slots, sizeof(void __rcu *) * mas->offset);
+ }
/* Handle insert of new range starting after old range */
if (wr_mas->r_min < mas->index) {
rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
dst_pivots[mas->offset++] = mas->index - 1;
+ new_end++;
}
/* Store the new entry and range end. */
@@ -3537,7 +3185,7 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
/* Copy to the end of node if necessary. */
copy_size = mas->end - offset_end + 1;
memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
- sizeof(void *) * copy_size);
+ sizeof(void __rcu *) * copy_size);
memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
sizeof(unsigned long) * (copy_size - 1));
@@ -3550,14 +3198,13 @@ done:
struct maple_enode *old_enode = mas->node;
mas->node = mt_mk_node(newnode, wr_mas->type);
- mas_replace_node(mas, old_enode, height);
+ mas_replace_node(mas, old_enode, mas_mt_height(mas));
} else {
memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
}
trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
mas_update_gap(mas);
mas->end = new_end;
- return;
}
/*
@@ -3605,8 +3252,6 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
*/
if (!wr_mas->entry || gap)
mas_update_gap(mas);
-
- return;
}
static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
@@ -3675,18 +3320,17 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
/*
* mas_wr_append: Attempt to append
* @wr_mas: the maple write state
- * @new_end: The end of the node after the modification
*
* This is currently unsafe in rcu mode since the end of the node may be cached
* by readers while the node contents may be updated which could result in
* inaccurate information.
*/
-static inline void mas_wr_append(struct ma_wr_state *wr_mas,
- unsigned char new_end)
+static inline void mas_wr_append(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
void __rcu **slots;
unsigned char end = mas->end;
+ unsigned char new_end = mas_wr_new_end(wr_mas);
if (new_end < mt_pivots[wr_mas->type]) {
wr_mas->pivots[new_end] = wr_mas->pivots[end];
@@ -3720,23 +3364,147 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas,
mas->end = new_end;
trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
- return;
}
/*
- * mas_wr_bnode() - Slow path for a modification.
- * @wr_mas: The write maple state
+ * split_ascend() - See if a split operation has to keep walking up the tree
+ * @cp: The maple_copy node
+ * @wr_mas: The maple write state
+ * @sib: the maple state of the sibling
*
- * This is where split, rebalance end up.
+ * Return: true if another split operation on the next level is needed, false
+ * otherwise
*/
-static void mas_wr_bnode(struct ma_wr_state *wr_mas)
+static inline bool split_ascend(struct maple_copy *cp,
+ struct ma_wr_state *wr_mas, struct ma_state *sib,
+ struct ma_state *parent)
{
- struct maple_big_node b_node;
+ struct ma_state *mas;
+ unsigned long min, max;
+
+ mas = wr_mas->mas;
+ min = mas->min; /* push right, or normal split */
+ max = mas->max;
+ wr_mas->offset_end = parent->offset;
+ if (sib->end) {
+ if (sib->max < mas->min) {
+ min = sib->min; /* push left */
+ parent->offset--;
+ } else {
+ max = sib->max; /* push right */
+ wr_mas->offset_end++;
+ }
+ }
+
+ cp_dst_to_slots(cp, min, max, mas);
+ if (cp_is_new_root(cp, mas))
+ return false;
+ if (cp_converged(cp, mas, sib))
+ return false;
+
+ cp->height++;
+ copy_tree_location(parent, mas);
+ wr_mas_setup(wr_mas, mas);
+ return true;
+}
+
+/*
+ * split_data() - Calculate the @cp data, populate @sib if the data can be
+ * pushed into a sibling.
+ * @cp: The maple copy node
+ * @wr_mas: The left write maple state
+ * @sib: The maple state of the sibling.
+ *
+ * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
+ * indicate it will not be used.
+ *
+ */
+static inline void split_data(struct maple_copy *cp,
+ struct ma_wr_state *wr_mas, struct ma_state *sib,
+ struct ma_state *parent)
+{
+ cp_data_calc(cp, wr_mas, wr_mas);
+ if (cp->data <= mt_slots[wr_mas->type]) {
+ sib->end = 0;
+ return;
+ }
+
+ push_data_sib(cp, wr_mas->mas, sib, parent);
+ if (sib->end)
+ cp->data += sib->end + 1;
+}
+
+/*
+ * mas_wr_split() - Expand one node into two
+ * @wr_mas: The write maple state
+ */
+static void mas_wr_split(struct ma_wr_state *wr_mas)
+{
+ struct ma_state parent;
+ struct ma_state *mas;
+ struct maple_copy cp;
+ struct ma_state sib;
+
+ mas = wr_mas->mas;
trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
- memset(&b_node, 0, sizeof(struct maple_big_node));
- mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
- mas_commit_b_node(wr_mas, &b_node);
+ parent = *mas;
+ cp_leaf_init(&cp, mas, wr_mas, wr_mas);
+ do {
+ if (!mte_is_root(parent.node)) {
+ mas_ascend(&parent);
+ parent.end = mas_data_end(&parent);
+ }
+ split_data(&cp, wr_mas, &sib, &parent);
+ multi_src_setup(&cp, wr_mas, wr_mas, &sib);
+ dst_setup(&cp, mas, wr_mas->type);
+ cp_data_write(&cp, mas);
+ } while (split_ascend(&cp, wr_mas, &sib, &parent));
+
+ mas_wmb_replace(mas, &cp);
+}
+
+/*
+ * mas_wr_rebalance() - Insufficient data in one node needs to either get data
+ * from a sibling or absorb a sibling all together.
+ * @wr_mas: The write maple state
+ *
+ * Rebalance is different than a spanning store in that the write state is
+ * already at the leaf node that's being altered.
+ */
+static void mas_wr_rebalance(struct ma_wr_state *wr_mas)
+{
+ struct ma_state parent;
+ struct ma_state *mas;
+ struct maple_copy cp;
+ struct ma_state sib;
+
+ /*
+ * Rebalancing occurs if a node is insufficient. Data is rebalanced
+ * against the node to the right if it exists, otherwise the node to the
+ * left of this node is rebalanced against this node. If rebalancing
+ * causes just one node to be produced instead of two, then the parent
+ * is also examined and rebalanced if it is insufficient. Every level
+ * tries to combine the data in the same way. If one node contains the
+ * entire range of the tree, then that node is used as a new root node.
+ */
+
+ mas = wr_mas->mas;
+ trace_ma_op(TP_FCT, mas);
+ parent = *mas;
+ cp_leaf_init(&cp, mas, wr_mas, wr_mas);
+ do {
+ if (!mte_is_root(parent.node)) {
+ mas_ascend(&parent);
+ parent.end = mas_data_end(&parent);
+ }
+ rebalance_data(&cp, wr_mas, &sib, &parent);
+ multi_src_setup(&cp, wr_mas, wr_mas, &sib);
+ dst_setup(&cp, mas, wr_mas->type);
+ cp_data_write(&cp, mas);
+ } while (rebalance_ascend(&cp, wr_mas, &sib, &parent));
+
+ mas_wmb_replace(mas, &cp);
}
/*
@@ -3746,7 +3514,6 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas)
static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
- unsigned char new_end = mas_wr_new_end(wr_mas);
switch (mas->store_type) {
case wr_exact_fit:
@@ -3755,20 +3522,22 @@ static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
mas_update_gap(mas);
break;
case wr_append:
- mas_wr_append(wr_mas, new_end);
+ mas_wr_append(wr_mas);
break;
case wr_slot_store:
mas_wr_slot_store(wr_mas);
break;
case wr_node_store:
- mas_wr_node_store(wr_mas, new_end);
+ mas_wr_node_store(wr_mas);
break;
case wr_spanning_store:
mas_wr_spanning_store(wr_mas);
break;
case wr_split_store:
+ mas_wr_split(wr_mas);
+ break;
case wr_rebalance:
- mas_wr_bnode(wr_mas);
+ mas_wr_rebalance(wr_mas);
break;
case wr_new_root:
mas_new_root(mas, wr_mas->entry);
@@ -3779,8 +3548,6 @@ static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
case wr_invalid:
MT_BUG_ON(mas->tree, 1);
}
-
- return;
}
static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
@@ -6260,8 +6027,15 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
for (i = 0; i < count; i++) {
val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
val &= MAPLE_NODE_MASK;
- new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
- val);
+ /*
+ * Warning, see rcu_assign_pointer() documentation. Since this
+ * is a duplication of a tree, there are no readers walking the
+ * tree until after the rcu_assign_pointer() call in
+ * mas_dup_build().
+ */
+ RCU_INIT_POINTER(new_slots[i],
+ ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
+ val));
}
}