From b2aa5d0bc86cb901cc6c8737cfff66360cbff00c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 7 Jun 2016 21:57:15 +0200 Subject: libceph: fix some missing includes - decode.h needs slab.h for kmalloc() - osd_client.h needs msgpool.h for struct ceph_msgpool - msgpool.h doesn't need messenger.h Signed-off-by: Ilya Dryomov --- include/linux/ceph/decode.h | 1 + include/linux/ceph/msgpool.h | 1 - include/linux/ceph/osd_client.h | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 19e9932f3e77..e83f3c81ef43 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h index 4b0d38960726..ddd0d48d0384 100644 --- a/include/linux/ceph/msgpool.h +++ b/include/linux/ceph/msgpool.h @@ -2,7 +2,6 @@ #define _FS_CEPH_MSGPOOL #include -#include /* * we use memory pools for preallocating messages we may receive, to diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 1b3b6e155392..858932304260 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 281dbe5db81c6137def9757e07a7aea14b1ed86e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 26 Jul 2016 15:22:35 +0200 Subject: libceph: add an ONSTACK initializer for oids An on-stack oid in ceph_ioctl_get_dataloc() is not initialized, resulting in a WARN and a NULL pointer dereference later on. We will have more of these on-stack in the future, so fix it with a convenience macro. Fixes: d30291b985d1 ("libceph: variable-sized ceph_object_id") Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 9ccf4dbe55f8..21d7f048959f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -115,6 +115,11 @@ static inline void ceph_oid_init(struct ceph_object_id *oid) oid->name_len = 0; } +#define CEPH_OID_INIT_ONSTACK(oid) \ + ({ ceph_oid_init(&oid); oid; }) +#define CEPH_DEFINE_OID_ONSTACK(oid) \ + struct ceph_object_id oid = CEPH_OID_INIT_ONSTACK(oid) + static inline bool ceph_oid_empty(const struct ceph_object_id *oid) { return oid->name == oid->inline_name && !oid->name_len; -- cgit v1.2.3 From 22748f9d617b8cd0a915c3a4c656c7232645b3b5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 2 Jun 2016 14:23:32 +0200 Subject: libceph: add start en/decoding block helpers Add ceph_start_encoding() and ceph_start_decoding(), the equivalent of ENCODE_START and DECODE_START in the userspace ceph code. This is based on a patch from Mike Christie . Signed-off-by: Ilya Dryomov --- include/linux/ceph/decode.h | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index e83f3c81ef43..f990f2cc907a 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -218,6 +218,60 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } +/* + * version and length starting block encoders/decoders + */ + +/* current code version (u8) + compat code version (u8) + len of struct (u32) */ +#define CEPH_ENCODING_START_BLK_LEN 6 + +/** + * ceph_start_encoding - start encoding block + * @struct_v: current (code) version of the encoding + * @struct_compat: oldest code version that can decode it + * @struct_len: length of struct encoding + */ +static inline void ceph_start_encoding(void **p, u8 struct_v, u8 struct_compat, + u32 struct_len) +{ + ceph_encode_8(p, struct_v); + ceph_encode_8(p, struct_compat); + ceph_encode_32(p, struct_len); +} + +/** + * ceph_start_decoding - start decoding block + * @v: current version of the encoding that the code supports + * @name: name of the struct (free-form) + * @struct_v: out param for the encoding version + * @struct_len: out param for the length of struct encoding + * + * Validates the length of struct encoding, so unsafe ceph_decode_* + * variants can be used for decoding. + */ +static inline int ceph_start_decoding(void **p, void *end, u8 v, + const char *name, u8 *struct_v, + u32 *struct_len) +{ + u8 struct_compat; + + ceph_decode_need(p, end, CEPH_ENCODING_START_BLK_LEN, bad); + *struct_v = ceph_decode_8(p); + struct_compat = ceph_decode_8(p); + if (v < struct_compat) { + pr_warn("got struct_v %d struct_compat %d > %d of %s\n", + *struct_v, struct_compat, v, name); + return -EINVAL; + } + + *struct_len = ceph_decode_32(p); + ceph_decode_need(p, end, *struct_len, bad); + return 0; + +bad: + return -ERANGE; +} + #define ceph_encode_need(p, end, n, bad) \ do { \ if (!likely(ceph_has_room(p, end, n))) \ -- cgit v1.2.3 From 7627151ea30bce2051e3cb27d7bb2c30083f86a5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 3 Feb 2016 21:24:49 +0800 Subject: libceph: define new ceph_file_layout structure Define new ceph_file_layout structure and rename old ceph_file_layout to ceph_file_layout_legacy. This is preparation for adding namespace to ceph_file_layout structure. Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 50 +++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index dfce616002ad..e5a5fb9ca3f5 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -34,9 +34,9 @@ #define CEPH_MAX_MON 31 /* - * ceph_file_layout - describe data layout for a file/inode + * legacy ceph_file_layoute */ -struct ceph_file_layout { +struct ceph_file_layout_legacy { /* file -> object mapping */ __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple of page size. */ @@ -53,33 +53,25 @@ struct ceph_file_layout { __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); -#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) -#define ceph_file_layout_stripe_count(l) \ - ((__s32)le32_to_cpu((l).fl_stripe_count)) -#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) -#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) -#define ceph_file_layout_object_su(l) \ - ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) -#define ceph_file_layout_pg_pool(l) \ - ((__s32)le32_to_cpu((l).fl_pg_pool)) - -static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l) -{ - return le32_to_cpu(l->fl_stripe_unit) * - le32_to_cpu(l->fl_stripe_count); -} - -/* "period" == bytes before i start on a new set of objects */ -static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) -{ - return le32_to_cpu(l->fl_object_size) * - le32_to_cpu(l->fl_stripe_count); -} +/* + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + u32 stripe_unit; /* stripe unit, in bytes */ + u32 stripe_count; /* over this many objects */ + u32 object_size; /* until objects are this big */ + s64 pool_id; /* rados pool id */ +}; + +extern int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); +extern void ceph_file_layout_from_legacy(struct ceph_file_layout *fl, + struct ceph_file_layout_legacy *legacy); +extern void ceph_file_layout_to_legacy(struct ceph_file_layout *fl, + struct ceph_file_layout_legacy *legacy); #define CEPH_MIN_STRIPE_UNIT 65536 -int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); - struct ceph_dir_layout { __u8 dl_dir_hash; /* see ceph_hash.h for ids */ __u8 dl_unused1; @@ -399,7 +391,7 @@ union ceph_mds_request_args { __le32 flags; } __attribute__ ((packed)) setxattr; struct { - struct ceph_file_layout layout; + struct ceph_file_layout_legacy layout; } __attribute__ ((packed)) setlayout; struct { __u8 rule; /* currently fcntl or flock */ @@ -478,7 +470,7 @@ struct ceph_mds_reply_inode { __le64 version; /* inode version */ __le64 xattr_version; /* version for xattr blob */ struct ceph_mds_reply_cap cap; /* caps issued for this inode */ - struct ceph_file_layout layout; + struct ceph_file_layout_legacy layout; struct ceph_timespec ctime, mtime, atime; __le32 time_warp_seq; __le64 size, max_size, truncate_size; @@ -673,7 +665,7 @@ struct ceph_mds_caps { __le64 size, max_size, truncate_size; __le32 truncate_seq; struct ceph_timespec mtime, atime, ctime; - struct ceph_file_layout layout; + struct ceph_file_layout_legacy layout; __le32 time_warp_seq; } __attribute__ ((packed)); -- cgit v1.2.3 From 51e9273796a57c08801f45580d3db3c51987a0cb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 5 Feb 2016 15:36:22 +0800 Subject: libceph: introduce reference counted string The data structure is for storing namesapce string. It allows namespace string to be shared between cephfs inodes with same layout. This data structure can also be referenced by OSD request. Signed-off-by: Yan, Zheng --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/string_table.h | 62 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 include/linux/ceph/string_table.h (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 690985daad1c..6afc5d98fad1 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * mount options diff --git a/include/linux/ceph/string_table.h b/include/linux/ceph/string_table.h new file mode 100644 index 000000000000..1b02c96daf75 --- /dev/null +++ b/include/linux/ceph/string_table.h @@ -0,0 +1,62 @@ +#ifndef _FS_CEPH_STRING_TABLE_H +#define _FS_CEPH_STRING_TABLE_H + +#include +#include +#include +#include + +struct ceph_string { + struct kref kref; + union { + struct rb_node node; + struct rcu_head rcu; + }; + size_t len; + char str[]; +}; + +extern void ceph_release_string(struct kref *ref); +extern struct ceph_string *ceph_find_or_create_string(const char *str, + size_t len); +extern bool ceph_strings_empty(void); + +static inline struct ceph_string *ceph_get_string(struct ceph_string *str) +{ + kref_get(&str->kref); + return str; +} + +static inline void ceph_put_string(struct ceph_string *str) +{ + if (!str) + return; + kref_put(&str->kref, ceph_release_string); +} + +static inline int ceph_compare_string(struct ceph_string *cs, + const char* str, size_t len) +{ + size_t cs_len = cs ? cs->len : 0; + if (cs_len != len) + return cs_len - len; + if (len == 0) + return 0; + return strncmp(cs->str, str, len); +} + +#define ceph_try_get_string(x) \ +({ \ + struct ceph_string *___str; \ + rcu_read_lock(); \ + for (;;) { \ + ___str = rcu_dereference(x); \ + if (!___str || \ + kref_get_unless_zero(&___str->kref)) \ + break; \ + } \ + rcu_read_unlock(); \ + (___str); \ +}) + +#endif -- cgit v1.2.3 From 30c156d9951e0aa88202707d80c583b0a09d3167 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 14 Feb 2016 11:24:31 +0800 Subject: libceph: rados pool namespace support Add pool namesapce pointer to struct ceph_file_layout and struct ceph_object_locator. Pool namespace is used by when mapping object to PG, it's also used when composing OSD request. The namespace pointer in struct ceph_file_layout is RCU protected. So libceph can read namespace without taking lock. Signed-off-by: Yan, Zheng [idryomov@gmail.com: ceph_oloc_destroy(), misc minor changes] Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 ++ include/linux/ceph/osdmap.h | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index e5a5fb9ca3f5..08b8fd72261e 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -53,6 +53,7 @@ struct ceph_file_layout_legacy { __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); +struct ceph_string; /* * ceph_file_layout - describe data layout for a file/inode */ @@ -62,6 +63,7 @@ struct ceph_file_layout { u32 stripe_count; /* over this many objects */ u32 object_size; /* until objects are this big */ s64 pool_id; /* rados pool id */ + struct ceph_string __rcu *pool_ns; /* rados pool namespace */ }; extern int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 21d7f048959f..9a9041784dcf 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -63,11 +63,13 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) struct ceph_object_locator { s64 pool; + struct ceph_string *pool_ns; }; static inline void ceph_oloc_init(struct ceph_object_locator *oloc) { oloc->pool = -1; + oloc->pool_ns = NULL; } static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc) @@ -75,11 +77,9 @@ static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc) return oloc->pool == -1; } -static inline void ceph_oloc_copy(struct ceph_object_locator *dest, - const struct ceph_object_locator *src) -{ - dest->pool = src->pool; -} +void ceph_oloc_copy(struct ceph_object_locator *dest, + const struct ceph_object_locator *src); +void ceph_oloc_destroy(struct ceph_object_locator *oloc); /* * Maximum supported by kernel client object name length -- cgit v1.2.3 From 774a6a118c70f8c11fcfe636032b5016ad71a746 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 6 Jun 2016 16:01:39 +0800 Subject: ceph: reduce i_nr_by_mode array size Track usage count for individual fmode bit. This can reduce the array size by half. Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 08b8fd72261e..6bc7730d22ac 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -525,7 +525,7 @@ struct ceph_filelock { #define CEPH_FILE_MODE_WR 2 #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ -#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ +#define CEPH_FILE_MODE_BITS 4 int ceph_flags_to_mode(int flags); -- cgit v1.2.3 From 0cabbd94ff52c4803fc0ad9ad0ad5e43df493ab0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 7 Apr 2016 11:34:43 +0800 Subject: libceph: fsmap.user subscription support Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + include/linux/ceph/mon_client.h | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 6bc7730d22ac..7868d602c0a0 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -121,6 +121,7 @@ struct ceph_dir_layout { /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 +#define CEPH_MSG_FS_MAP_USER 103 #define CEPH_MSG_CLIENT_SESSION 22 #define CEPH_MSG_CLIENT_RECONNECT 23 diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index e2a92df08b47..24d704d1ea5c 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -95,7 +95,7 @@ struct ceph_mon_client { struct ceph_mon_subscribe_item item; bool want; u32 have; /* epoch */ - } subs[3]; + } subs[4]; int fs_cluster_id; /* "mdsmap." sub */ #ifdef CONFIG_DEBUG_FS @@ -111,9 +111,10 @@ extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); extern void ceph_monc_stop(struct ceph_mon_client *monc); enum { - CEPH_SUB_MDSMAP = 0, - CEPH_SUB_MONMAP, + CEPH_SUB_MONMAP = 0, CEPH_SUB_OSDMAP, + CEPH_SUB_FSMAP, + CEPH_SUB_MDSMAP, }; extern const char *ceph_sub_str[]; -- cgit v1.2.3 From a0f2b65275413b3438e9f55b1427273cd893c3b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Jun 2016 15:04:56 +0200 Subject: ceph: fix symbol versioning for ceph_monc_do_statfs The genksyms helper in the kernel cannot parse a type definition like "typeof(((type *)0)->keyfld)" that is used in the DEFINE_RB_FUNCS helper, causing the following EXPORT_SYMBOL() statement to be ignored when computing the crcs, and triggering a warning about this: WARNING: "ceph_monc_do_statfs" [fs/ceph/ceph.ko] has no CRC To work around the problem, we can rewrite the type to reference an undefined 'extern' symbol instead of a NULL pointer. This is evidently ok for genksyms, and it no longer complains about the line when calling it with 'genksyms -w'. I've looked briefly into extending genksyms instead, but it seems really hard to do. Jan Beulich introduced basic support for 'typeof' a while ago in dc53324060f3 ("genksyms: fix typeof() handling"), but that is not sufficient for the expression we have here. Signed-off-by: Arnd Bergmann Fixes: fcd00b68bbe2 ("libceph: DEFINE_RB_FUNCS macro") Cc: Jan Beulich Cc: Michal Marek Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6afc5d98fad1..83fc1fff7061 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -215,8 +215,9 @@ static void erase_##name(struct rb_root *root, type *t) \ } #define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +extern type __lookup_##name##_key; \ static type *lookup_##name(struct rb_root *root, \ - typeof(((type *)0)->keyfld) key) \ + typeof(__lookup_##name##_key.keyfld) key) \ { \ struct rb_node *n = root->rb_node; \ \ -- cgit v1.2.3