From b3b33b0e43323af4fb697f4378218d3c268d02cd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:24 +0200 Subject: crush: pass weight vector size to map function Pass the size of the weight vector into crush_do_rule() to ensure that we don't access values past the end. This can happen if the caller misbehaves and passes a weight vector that is smaller than max_devices. Currently the monitor tries to prevent that from happening, but this will gracefully tolerate previous bad osdmaps that got into this state. It's also a bit more defensive. Reflects ceph.git commit 5922e2c2b8335b5e46c9504349c3a55b7434c01a. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 5772dee3ecbf..69310b031875 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -14,6 +14,6 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, - const __u32 *weights); + const __u32 *weights, int weight_max); #endif -- cgit v1.2.3 From bfb16d7d69f0272451ad85a6e50aab3c4262fbc0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:24 +0200 Subject: crush: factor out (trivial) crush_destroy_rule() Reflects ceph.git commit 43a01c9973c4b83f2eaa98be87429941a227ddde. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 6a1101f24cfb..09561a04c127 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -174,6 +174,7 @@ extern void crush_destroy_bucket_list(struct crush_bucket_list *b); extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); extern void crush_destroy_bucket(struct crush_bucket *b); +extern void crush_destroy_rule(struct crush_rule *r); extern void crush_destroy(struct crush_map *map); static inline int crush_calc_tree_node(int i) -- cgit v1.2.3 From e8ef19c4ad161768e1d8309d5ae18481c098eb81 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:24 +0200 Subject: crush: eliminate CRUSH_MAX_SET result size limitation This is only present to size the temporary scratch arrays that we put on the stack. Let the caller allocate them as they wish and remove the limitation. Reflects ceph.git commit 1cfe140bf2dab99517589a82a916f4c75b9492d1. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 1 - include/linux/crush/mapper.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 09561a04c127..83543c504b5a 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -21,7 +21,6 @@ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ -#define CRUSH_MAX_SET 10 /* max size of a mapping result */ /* diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 69310b031875..eab367446eea 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -14,6 +14,7 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, - const __u32 *weights, int weight_max); + const __u32 *weights, int weight_max, + int *scratch); #endif -- cgit v1.2.3 From c6d98a603a02594f6ecf16d0a0af989ae9fa7abd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:25 +0200 Subject: crush: return CRUSH_ITEM_UNDEF for failed placements with indep For firstn mode, if we fail to make a valid placement choice, we just continue and return a short result to the caller. For indep mode, however, we need to make the position stable, and return an undefined value on failed placements to avoid shifting later results to the left. Reflects ceph.git commit b1d4dd4eb044875874a1d01c01c7d766db5d0a80. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 83543c504b5a..3d6a12928560 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -19,10 +19,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ - #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_ITEM_UNDEF 0x7fffffff /* undefined result */ + /* * CRUSH uses user-defined "rules" to describe how inputs should be * mapped to devices. A rule consists of sequence of steps to perform -- cgit v1.2.3 From 9a3b490a20e06368c81d7a81506e99388e733379 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:25 +0200 Subject: crush: use breadth-first search for indep mode Reflects ceph.git commit 86e978036a4ecbac4c875e7c00f6c5bbe37282d3. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 3d6a12928560..4023b1b52296 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -22,7 +22,8 @@ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ -#define CRUSH_ITEM_UNDEF 0x7fffffff /* undefined result */ +#define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ +#define CRUSH_ITEM_NONE 0x7fffffff /* no result */ /* * CRUSH uses user-defined "rules" to describe how inputs should be -- cgit v1.2.3 From be3226acc5544bcc91e756eb3ee6ca7b74f6f0a8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:26 +0200 Subject: crush: new SET_CHOOSE_LEAF_TRIES command Explicitly control the number of sample attempts, and allow the number of tries in the recursive call to be explicitly controlled via the rule. This is important because the amount of time we want to spend looking for a solution may be rule dependent (e.g., higher for the wide indep pool than the rep pools). (We should do the same for the other tunables, by the way!) Reflects ceph.git commit c43c893be872f709c787bc57f46c0e97876ff681. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 4023b1b52296..2e50bab91655 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -46,6 +46,8 @@ enum { CRUSH_RULE_EMIT = 4, /* no args */ CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, + + CRUSH_RULE_SET_CHOOSE_LEAF_TRIES = 9, }; /* -- cgit v1.2.3 From f18650ace38ef200dd1578257c75e9407297953c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:26 +0200 Subject: crush: apply chooseleaf_tries to firstn mode too Parameterize the attempts for the _firstn choose method, and apply the rule-specified tries count to firstn mode as well. Note that we have slightly different behavior here than with indep: If the firstn value is not specified for firstn, we pass through the normal attempt count. This maintains compatibility with legacy behavior. Note that this is usually *not* actually N^2 work, though, because of the descend_once tunable. However, descend_once is unfortunately *not* the same thing as 1 chooseleaf try because it is only checked on a reject but not on a collision. Sigh. In contrast, for indep, if tries is not specified we default to 1 recursive attempt, because that is simply more sane, and we have the option to do so. The descend_once tunable has no effect for indep. Reflects ceph.git commit 64aeded50d80942d66a5ec7b604ff2fcbf5d7b63. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 2e50bab91655..07b8fd4f81fc 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -165,7 +165,10 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; - /* attempt chooseleaf inner descent once; on failure retry outer descent */ + /* attempt chooseleaf inner descent once for firstn mode; on + * reject retry outer descent. Note that this does *not* + * apply to a collision: in that case we will retry as we used + * to. */ __u32 chooseleaf_descend_once; }; -- cgit v1.2.3 From cc10df4a3a5c34cb1d5b635ac70dd1fc406153ce Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:26 +0200 Subject: crush: add SET_CHOOSE_TRIES rule step Since we can specify the recursive retries in a rule, we may as well also specify the non-recursive tries too for completeness. Reflects ceph.git commit d1b97462cffccc871914859eaee562f2786abfd1. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 07b8fd4f81fc..5f95969347ec 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -47,7 +47,8 @@ enum { CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, - CRUSH_RULE_SET_CHOOSE_LEAF_TRIES = 9, + CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */ + CRUSH_RULE_SET_CHOOSE_LEAF_TRIES = 9, /* override chooseleaf_descend_once */ }; /* -- cgit v1.2.3 From 917edad5d1d62070436b74ecbf5ea019b651ff69 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:26 +0200 Subject: crush: CHOOSE_LEAF -> CHOOSELEAF throughout This aligns the internal identifier names with the user-visible names in the decompiled crush map language. Reflects ceph.git commit caa0e22e15e4226c3671318ba1f61314bf6da2a6. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 5f95969347ec..7b0fc4aba75b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -44,11 +44,11 @@ enum { /* arg2 = type */ CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ CRUSH_RULE_EMIT = 4, /* no args */ - CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, - CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, + CRUSH_RULE_CHOOSELEAF_FIRSTN = 6, + CRUSH_RULE_CHOOSELEAF_INDEP = 7, CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */ - CRUSH_RULE_SET_CHOOSE_LEAF_TRIES = 9, /* override chooseleaf_descend_once */ + CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ }; /* -- cgit v1.2.3 From f046bf92080cbdc4a94c6e86698c5a3f10716445 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Dec 2013 21:19:27 +0200 Subject: crush: add set_choose_local_[fallback_]tries steps This allows all of the tunables to be overridden by a specific rule. Reflects ceph.git commits d129e09e57fbc61cfd4f492e3ee77d0750c9d292, 0497db49e5973b50df26251ed0e3f4ac7578e66e. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/crush') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 7b0fc4aba75b..acaa5615d634 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -49,6 +49,8 @@ enum { CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */ CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ + CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, + CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, }; /* -- cgit v1.2.3