summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre')
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs.h6
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h10
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h3
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h21
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-lnet.h65
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-types.h7
-rw-r--r--drivers/staging/lustre/include/linux/lnet/types.h16
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c16
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h18
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c201
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c5
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c48
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c207
-rw-r--r--drivers/staging/lustre/lnet/libcfs/debug.c9
-rw-r--r--drivers/staging/lustre/lnet/libcfs/fail.c6
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_string.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c17
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c4
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c46
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c14
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c30
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c363
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c18
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c21
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c39
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c20
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c4
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c15
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c2
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h1
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c4
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c8
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_lib.c2
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_request.c8
-rw-r--r--drivers/staging/lustre/lustre/fid/lproc_fid.c2
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_internal.h19
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_request.c57
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h108
-rw-r--r--drivers/staging/lustre/lustre/include/interval_tree.h26
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_lite.h91
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_user.h66
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h143
-rw-r--r--drivers/staging/lustre/lustre/include/lu_object.h38
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h486
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h412
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h329
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_cfg.h26
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_compat.h (renamed from drivers/staging/lustre/lustre/include/linux/lustre_compat25.h)6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h16
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm_flags.h36
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_eacl.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h32
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_handles.h5
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h24
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h318
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_linkea.h79
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lite.h97
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lmv.h184
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h3
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h52
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mds.h3
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h102
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_param.h3
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_patchless_compat.h (renamed from drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h)0
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h23
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ver.h19
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h390
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h195
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h34
-rw-r--r--drivers/staging/lustre/lustre/ldlm/interval_tree.c100
-rw-r--r--drivers/staging/lustre/lustre/ldlm/l_lock.c4
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_extent.c4
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c109
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_internal.h20
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lib.c32
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lock.c84
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c28
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c49
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_request.c119
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_resource.c53
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile2
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c61
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c899
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c678
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c1
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c1
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_close.c1
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h356
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c715
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c8
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c70
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c221
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c378
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.c233
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.h82
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c37
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c28
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c1439
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c7
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c13
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c7
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h15
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c31
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c1
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c15
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c26
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_req.c5
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c338
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c25
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_fld.c16
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_intent.c365
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_internal.h126
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_obd.c1484
-rw-r--r--drivers/staging/lustre/lustre/lmv/lproc_lmv.c4
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_cl_internal.h14
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_dev.c1
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_ea.c17
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_internal.h9
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_io.c25
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_merge.c39
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_obd.c349
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_object.c50
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pack.c60
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_page.c12
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pool.c18
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_request.c78
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_object.c6
-rw-r--r--drivers/staging/lustre/lustre/mdc/lproc_mdc.c17
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_internal.h63
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_lib.c236
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_locks.c176
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_reint.c36
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c725
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c27
-rw-r--r--drivers/staging/lustre/lustre/obdclass/Makefile2
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c19
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c50
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c30
-rw-r--r--drivers/staging/lustre/lustre/obdclass/class_obd.c57
-rw-r--r--drivers/staging/lustre/lustre/obdclass/debug.c4
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c148
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linkea.c201
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c2
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c5
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c10
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_cat.c10
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_internal.h5
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_obd.c1
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_swab.c26
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c150
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c240
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_handles.c13
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_peer.c1
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c48
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c41
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c13
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c170
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_internal.h4
-rw-r--r--drivers/staging/lustre/lustre/osc/lproc_osc.c41
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c279
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h9
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c46
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_lock.c4
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c7
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c278
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c400
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c127
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c5
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c332
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c100
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c4
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c36
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c245
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pinger.c1
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h9
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c4
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c2
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c26
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c24
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_config.c1
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_gc.c5
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c32
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c47
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c745
190 files changed, 10666 insertions, 8358 deletions
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 3f6447c65042..3b92d38d37e2 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -138,8 +138,8 @@ struct lnet_debugfs_symlink_def {
void lustre_insert_debugfs(struct ctl_table *table,
const struct lnet_debugfs_symlink_def *symlinks);
int lprocfs_call_handler(void *data, int write, loff_t *ppos,
- void __user *buffer, size_t *lenp,
- int (*handler)(void *data, int write,
- loff_t pos, void __user *buffer, int len));
+ void __user *buffer, size_t *lenp,
+ int (*handler)(void *data, int write, loff_t pos,
+ void __user *buffer, int len));
#endif /* _LIBCFS_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
index 25adab19fd86..b7bd6e8ab33f 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -247,19 +247,19 @@ do { \
#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
- const char *format1, ...)
+ const char *format1, ...)
__printf(2, 3);
int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
- const char *format1,
- va_list args, const char *format2, ...)
+ const char *format1,
+ va_list args, const char *format2, ...)
__printf(4, 5);
/* other external symbols that tracefile provides: */
int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob);
+ const char __user *usr_buffer, int usr_buffer_nob);
int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_buffer, char *append);
+ const char *knl_buffer, char *append);
#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
index d3f9a6020ee3..bdbbe934584c 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -143,6 +143,9 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
+#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \
+ cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_RESET)
+
#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 4daa3823f60a..e0e1a5d0949d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -310,13 +310,13 @@ do { \
#define MKSTR(ptr) ((ptr)) ? (ptr) : ""
-static inline int cfs_size_round4(int val)
+static inline size_t cfs_size_round4(int val)
{
return (val + 3) & (~0x3);
}
#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round(int val)
+static inline size_t cfs_size_round(int val)
{
return (val + 7) & (~0x7);
}
@@ -324,17 +324,17 @@ static inline int cfs_size_round(int val)
#define HAVE_CFS_SIZE_ROUND
#endif
-static inline int cfs_size_round16(int val)
+static inline size_t cfs_size_round16(int val)
{
return (val + 0xf) & (~0xf);
}
-static inline int cfs_size_round32(int val)
+static inline size_t cfs_size_round32(int val)
{
return (val + 0x1f) & (~0x1f);
}
-static inline int cfs_size_round0(int val)
+static inline size_t cfs_size_round0(int val)
{
if (!val)
return 0;
@@ -343,7 +343,7 @@ static inline int cfs_size_round0(int val)
static inline size_t cfs_round_strlen(char *fset)
{
- return (size_t)cfs_size_round((int)strlen(fset) + 1);
+ return cfs_size_round((int)strlen(fset) + 1);
}
#define LOGL(var, len, ptr) \
@@ -360,13 +360,4 @@ do { \
ptr += cfs_size_round(len); \
} while (0)
-#define LOGL0(var, len, ptr) \
-do { \
- if (!len) \
- break; \
- memcpy((char *)ptr, (const char *)var, len); \
- *((char *)(ptr) + len) = 0; \
- ptr += cfs_size_round(len + 1); \
-} while (0)
-
#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 513a8225f888..a59c5e99cbd3 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -605,73 +605,20 @@ void lnet_counters_reset(void);
unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
int lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, struct kvec *src,
+ int src_niov, const struct kvec *src,
unsigned int offset, unsigned int len);
unsigned int lnet_kiov_nob(unsigned int niov, lnet_kiov_t *iov);
int lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
+ int src_niov, const lnet_kiov_t *src,
unsigned int offset, unsigned int len);
-void lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov,
- unsigned int doffset,
- unsigned int nsiov, struct kvec *siov,
+void lnet_copy_iov2iter(struct iov_iter *to,
+ unsigned int nsiov, const struct kvec *siov,
unsigned int soffset, unsigned int nob);
-void lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov,
- unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov,
+void lnet_copy_kiov2iter(struct iov_iter *to,
+ unsigned int nkiov, const lnet_kiov_t *kiov,
unsigned int kiovoffset, unsigned int nob);
-void lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset,
- unsigned int niov, struct kvec *iov,
- unsigned int iovoffset, unsigned int nob);
-void lnet_copy_kiov2kiov(unsigned int ndkiov, lnet_kiov_t *dkiov,
- unsigned int doffset,
- unsigned int nskiov, lnet_kiov_t *skiov,
- unsigned int soffset, unsigned int nob);
-
-static inline void
-lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
- unsigned int nsiov, struct kvec *siov, unsigned int soffset,
- unsigned int nob)
-{
- struct kvec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
-
- lnet_copy_iov2iov(1, &diov, doffset,
- nsiov, siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
- unsigned int nsiov, lnet_kiov_t *skiov,
- unsigned int soffset, unsigned int nob)
-{
- struct kvec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
-
- lnet_copy_kiov2iov(1, &diov, doffset,
- nsiov, skiov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
- int slen, void *src, unsigned int soffset, unsigned int nob)
-{
- struct kvec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
-
- lnet_copy_iov2iov(ndiov, diov, doffset,
- 1, &siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov,
- unsigned int doffset, int slen, void *src,
- unsigned int soffset, unsigned int nob)
-{
- struct kvec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
-
- lnet_copy_iov2kiov(ndiov, dkiov, doffset,
- 1, &siov, soffset, nob);
-}
void lnet_me_unlink(lnet_me_t *me);
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 7967b013cbae..b84a5bb9186c 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -220,10 +220,7 @@ typedef struct lnet_lnd {
* credit if the LND does flow control.
*/
int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen,
- unsigned int rlen);
+ int delayed, struct iov_iter *to, unsigned int rlen);
/*
* lnet_parse() has had to delay processing of this message
@@ -278,6 +275,8 @@ typedef struct lnet_ni {
struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
/* equivalent interfaces to use */
char *ni_interfaces[LNET_MAX_INTERFACES];
+ /* original net namespace */
+ struct net *ni_net_ns;
} lnet_ni_t;
#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index e098b6c086e1..f8be0e2f7bf7 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -503,21 +503,7 @@ typedef struct {
/* NB lustre portals uses struct iovec internally! */
typedef struct iovec lnet_md_iovec_t;
-/**
- * A page-based fragment of a MD.
- */
-typedef struct {
- /** Pointer to the page where the fragment resides */
- struct page *kiov_page;
- /** Length in bytes of the fragment */
- unsigned int kiov_len;
- /**
- * Starting offset of the fragment within the page. Note that the
- * end of the fragment must not pass the end of the page; i.e.,
- * kiov_len + kiov_offset <= PAGE_SIZE.
- */
- unsigned int kiov_offset;
-} lnet_kiov_t;
+typedef struct bio_vec lnet_kiov_t;
/** @} lnet_md */
/** \addtogroup lnet_eq
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 4f5978b3767b..c7a5d49e487f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -128,6 +128,7 @@ static int kiblnd_msgtype2size(int type)
static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
{
struct kib_rdma_desc *rd;
+ int msg_size;
int nob;
int n;
int i;
@@ -146,12 +147,6 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
n = rd->rd_nfrags;
- if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
- CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
- n, IBLND_MAX_RDMA_FRAGS);
- return 1;
- }
-
nob = offsetof(struct kib_msg, ibm_u) +
kiblnd_rd_msg_size(rd, msg->ibm_type, n);
@@ -161,6 +156,13 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
return 1;
}
+ msg_size = kiblnd_rd_size(rd);
+ if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
+ CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
+ msg_size, LNET_MAX_PAYLOAD);
+ return 1;
+ }
+
if (!flip)
return 0;
@@ -618,7 +620,7 @@ static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
}
struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
- int state, int version)
+ int state, int version)
{
/*
* CAVEAT EMPTOR:
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 078a0c3e8845..14576977200f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -113,8 +113,9 @@ extern struct kib_tunables kiblnd_tunables;
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
+#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */
+#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
+#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
/************************/
/* derived constants... */
@@ -133,8 +134,8 @@ extern struct kib_tunables kiblnd_tunables;
/* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
#define IBLND_SEND_WRS(c) \
- ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
- c->ibc_peer->ibp_ni))
+ (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
+ kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
@@ -582,6 +583,8 @@ struct kib_peer {
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
+ /* counter of how many times we triggered a conn race */
+ unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
@@ -607,14 +610,14 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
mod = tunables->lnd_map_on_demand;
- return mod ? mod : IBLND_MAX_RDMA_FRAGS;
+ return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
}
static inline int
kiblnd_rdma_frags(int version, struct lnet_ni *ni)
{
return version == IBLND_MSG_VERSION_1 ?
- IBLND_MAX_RDMA_FRAGS :
+ (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
kiblnd_cfg_rdma_frags(ni);
}
@@ -1034,5 +1037,4 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit);
int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 596a697b9d39..b27de8888149 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -36,16 +36,19 @@
#include "o2iblnd.h"
+#define MAX_CONN_RACES_BEFORE_ABORT 20
+
static void kiblnd_peer_alive(struct kib_peer *peer);
static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_check_sends(struct kib_conn *conn);
static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx,
- int type, int body_nob);
+ int type, int body_nob);
static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie);
+ int resid, struct kib_rdma_desc *dstrd,
+ __u64 dstcookie);
static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx);
+static void kiblnd_check_sends_locked(struct kib_conn *conn);
static void
kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx)
@@ -211,9 +214,9 @@ kiblnd_post_rx(struct kib_rx *rx, int credit)
conn->ibc_outstanding_credits++;
else
conn->ibc_reserved_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
out:
kiblnd_conn_decref(conn);
return rc;
@@ -344,8 +347,8 @@ kiblnd_handle_rx(struct kib_rx *rx)
!IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
conn->ibc_outstanding_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
}
switch (msg->ibm_type) {
@@ -648,7 +651,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc
static int
kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- unsigned int niov, struct kvec *iov, int offset, int nob)
+ unsigned int niov, const struct kvec *iov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct page *page;
@@ -705,7 +708,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
static int
kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
+ int nkiov, const lnet_kiov_t *kiov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct scatterlist *sg;
@@ -717,8 +720,8 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
LASSERT(nkiov > 0);
LASSERT(net);
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
+ while (offset >= kiov->bv_len) {
+ offset -= kiov->bv_len;
nkiov--;
kiov++;
LASSERT(nkiov > 0);
@@ -728,10 +731,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
do {
LASSERT(nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
+ fragnob = min((int)(kiov->bv_len - offset), nob);
- sg_set_page(sg, kiov->kiov_page, fragnob,
- kiov->kiov_offset + offset);
+ sg_set_page(sg, kiov->bv_page, fragnob,
+ kiov->bv_offset + offset);
sg = sg_next(sg);
if (!sg) {
CERROR("lacking enough sg entries to map tx\n");
@@ -761,7 +764,6 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
LASSERT(tx->tx_queued);
/* We rely on this for QP sizing */
LASSERT(tx->tx_nwrq > 0);
- LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
LASSERT(!credit || credit == 1);
LASSERT(conn->ibc_outstanding_credits >= 0);
@@ -800,7 +802,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
/*
* OK to drop when posted enough NOOPs, since
- * kiblnd_check_sends will queue NOOP again when
+ * kiblnd_check_sends_locked will queue NOOP again when
* posted NOOPs complete
*/
spin_unlock(&conn->ibc_lock);
@@ -905,7 +907,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
}
static void
-kiblnd_check_sends(struct kib_conn *conn)
+kiblnd_check_sends_locked(struct kib_conn *conn)
{
int ver = conn->ibc_version;
lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
@@ -918,8 +920,6 @@ kiblnd_check_sends(struct kib_conn *conn)
return;
}
- spin_lock(&conn->ibc_lock);
-
LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
LASSERT(!IBLND_OOB_CAPABLE(ver) ||
conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
@@ -969,8 +969,6 @@ kiblnd_check_sends(struct kib_conn *conn)
if (kiblnd_post_tx_locked(conn, tx, credit))
break;
}
-
- spin_unlock(&conn->ibc_lock);
}
static void
@@ -1016,16 +1014,11 @@ kiblnd_tx_complete(struct kib_tx *tx, int status)
if (idle)
list_del(&tx->tx_list);
- kiblnd_conn_addref(conn); /* 1 ref for me.... */
-
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
if (idle)
kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-
- kiblnd_check_sends(conn);
-
- kiblnd_conn_decref(conn); /* ...until here */
}
static void
@@ -1078,6 +1071,15 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
LASSERT(type == IBLND_MSG_GET_DONE ||
type == IBLND_MSG_PUT_DONE);
+ if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
+ CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ conn->ibc_max_frags << PAGE_SHIFT,
+ kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
+ rc = -EMSGSIZE;
+ goto too_big;
+ }
+
while (resid > 0) {
if (srcidx >= srcrd->rd_nfrags) {
CERROR("Src buffer exhausted: %d frags\n", srcidx);
@@ -1091,10 +1093,10 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
break;
}
- if (tx->tx_nwrq >= conn->ibc_max_frags) {
+ if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_max_frags,
+ IBLND_MAX_RDMA_FRAGS,
srcidx, srcrd->rd_nfrags,
dstidx, dstrd->rd_nfrags);
rc = -EMSGSIZE;
@@ -1132,7 +1134,7 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
wrq++;
sge++;
}
-
+too_big:
if (rc < 0) /* no RDMA if completing with failure */
tx->tx_nwrq = 0;
@@ -1204,9 +1206,8 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
{
spin_lock(&conn->ibc_lock);
kiblnd_queue_tx_locked(tx, conn);
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
}
static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
@@ -1499,6 +1500,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
unsigned int payload_offset = lntmsg->msg_offset;
unsigned int payload_nob = lntmsg->msg_len;
+ struct iov_iter from;
struct kib_msg *ibmsg;
struct kib_rdma_desc *rd;
struct kib_tx *tx;
@@ -1518,6 +1520,17 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
/* payload is either all vaddrs or all pages */
LASSERT(!(payload_kiov && payload_iov));
+ if (payload_kiov)
+ iov_iter_bvec(&from, ITER_BVEC | WRITE,
+ payload_kiov, payload_niov,
+ payload_nob + payload_offset);
+ else
+ iov_iter_kvec(&from, ITER_KVEC | WRITE,
+ payload_iov, payload_niov,
+ payload_nob + payload_offset);
+
+ iov_iter_advance(&from, payload_offset);
+
switch (type) {
default:
LBUG();
@@ -1637,17 +1650,8 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
ibmsg = tx->tx_msg;
ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
- if (payload_kiov)
- lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
+ copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE,
+ &from);
nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
@@ -1719,8 +1723,7 @@ kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg)
int
kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct kib_rx *rx = private;
struct kib_msg *rxmsg = rx->rx_msg;
@@ -1730,10 +1733,9 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
int post_credit = IBLND_POSTRX_PEER_CREDIT;
int rc = 0;
- LASSERT(mlen <= rlen);
+ LASSERT(iov_iter_count(to) <= rlen);
LASSERT(!in_interrupt());
/* Either all pages or all vaddrs */
- LASSERT(!(kiov && iov));
switch (rxmsg->ibm_type) {
default:
@@ -1749,16 +1751,8 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
break;
}
- if (kiov)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
+ copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload,
+ IBLND_MSG_SIZE, to);
lnet_finalize(ni, lntmsg, 0);
break;
@@ -1766,7 +1760,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
struct kib_msg *txmsg;
struct kib_rdma_desc *rd;
- if (!mlen) {
+ if (!iov_iter_count(to)) {
lnet_finalize(ni, lntmsg, 0);
kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
rxmsg->ibm_u.putreq.ibprm_cookie);
@@ -1784,12 +1778,16 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
txmsg = tx->tx_msg;
rd = &txmsg->ibm_u.putack.ibpam_rd;
- if (!kiov)
+ if (!(to->type & ITER_BVEC))
rc = kiblnd_setup_rd_iov(ni, tx, rd,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset,
+ iov_iter_count(to));
else
rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset,
+ iov_iter_count(to));
if (rc) {
CERROR("Can't setup PUT sink for %s: %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
@@ -2183,14 +2181,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
return;
}
- /**
- * refcount taken by cmid is not reliable after I released the glock
- * because this connection is visible to other threads now, another
- * thread can find and close this connection right after I released
- * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
- * called, it can release the connection refcount taken by cmid.
- * It means the connection could be destroyed before I finish my
- * operations on it.
+ /*
+ * +1 ref for myself, this connection is visible to other threads
+ * now, refcount of peer:ibp_conns can be released by connection
+ * close from either a different thread, or the calling of
+ * kiblnd_check_sends_locked() below. See bz21911 for details.
*/
kiblnd_conn_addref(conn);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
@@ -2202,10 +2197,9 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
kiblnd_queue_tx_locked(tx, conn);
}
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
-
/* schedule blocked rxs */
kiblnd_handle_early_rxs(conn);
@@ -2240,6 +2234,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
struct kib_rej rej;
int version = IBLND_MSG_VERSION;
unsigned long flags;
+ int max_frags;
int rc;
struct sockaddr_in *peer_addr;
@@ -2346,22 +2341,20 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- if (reqmsg->ibm_u.connparams.ibcp_max_frags >
- kiblnd_rdma_frags(version, ni)) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
+ if (max_frags > kiblnd_rdma_frags(version, ni)) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
- } else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
- kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ } else if (max_frags < kiblnd_rdma_frags(version, ni) &&
+ !net->ibn_fmr_ps) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version == IBLND_MSG_VERSION)
@@ -2387,7 +2380,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
}
/* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+ peer->ibp_max_frags = max_frags;
peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
write_lock_irqsave(g_lock, flags);
@@ -2419,23 +2412,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- /* tie-break connection race in favour of the higher NID */
+ /*
+ * Tie-break connection race in favour of the higher NID.
+ * If we keep running into a race condition multiple times,
+ * we have to assume that the connection attempt with the
+ * higher NID is stuck in a connecting state and will never
+ * recover. As such, we pass through this if-block and let
+ * the lower NID connection win so we can move forward.
+ */
if (peer2->ibp_connecting &&
- nid < ni->ni_nid) {
+ nid < ni->ni_nid && peer2->ibp_races <
+ MAX_CONN_RACES_BEFORE_ABORT) {
+ peer2->ibp_races++;
write_unlock_irqrestore(g_lock, flags);
- CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+ CDEBUG(D_NET, "Conn race %s\n",
+ libcfs_nid2str(peer2->ibp_nid));
kiblnd_peer_decref(peer);
rej.ibr_why = IBLND_REJECT_CONN_RACE;
goto failed;
}
-
+ if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
+ CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
+ libcfs_nid2str(peer2->ibp_nid),
+ MAX_CONN_RACES_BEFORE_ABORT);
/**
* passive connection is allowed even this peer is waiting for
* reconnection.
*/
peer2->ibp_reconnecting = 0;
+ peer2->ibp_races = 0;
peer2->ibp_accepting++;
kiblnd_peer_addref(peer2);
@@ -2494,7 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
sizeof(ackmsg->ibm_u.connparams));
ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
@@ -2526,9 +2533,9 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
failed:
if (ni) {
- lnet_ni_decref(ni);
rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+ lnet_ni_decref(ni);
}
rej.ibr_version = version;
@@ -2556,7 +2563,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
if (cp) {
msg_size = cp->ibcp_max_msg_size;
- frag_num = cp->ibcp_max_frags;
+ frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
queue_dep = cp->ibcp_queue_depth;
}
@@ -2821,11 +2828,11 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
goto failed;
}
- if (msg->ibm_u.connparams.ibcp_max_frags >
+ if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
conn->ibc_max_frags) {
CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags,
+ msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
conn->ibc_max_frags);
rc = -EPROTO;
goto failed;
@@ -2859,7 +2866,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
+ conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
@@ -2916,7 +2923,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
memset(msg, 0, sizeof(*msg));
kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(peer->ibp_ni, msg, version,
@@ -3233,7 +3240,11 @@ kiblnd_check_conns(int idx)
*/
list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
list_del(&conn->ibc_connd_list);
- kiblnd_check_sends(conn);
+
+ spin_lock(&conn->ibc_lock);
+ kiblnd_check_sends_locked(conn);
+ spin_unlock(&conn->ibc_lock);
+
kiblnd_conn_decref(conn);
}
}
@@ -3419,6 +3430,12 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
case IB_EVENT_COMM_EST:
CDEBUG(D_NET, "%s established\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ /*
+ * We received a packet but connection isn't established
+ * probably handshake packet was lost, so free to
+ * force make connection established
+ */
+ rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
return;
default:
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 07ec540946cd..cbc9a9c5385f 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1468,11 +1468,6 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
conn->ksnc_route = NULL;
-#if 0 /* irrelevant with only eager routes */
- /* make route least favourite */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-#endif
ksocknal_route_decref(route); /* drop conn's ref on route */
}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index a56632b4ee37..e6ca0cf52691 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -86,8 +86,6 @@ struct ksock_sched { /* per scheduler state */
int kss_nconns; /* # connections assigned to
* this scheduler */
struct ksock_sched_info *kss_info; /* owner of it */
- struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
- struct kvec kss_scratch_iov[LNET_MAX_IOV];
};
struct ksock_sched_info {
@@ -616,9 +614,7 @@ void ksocknal_shutdown(lnet_ni_t *ni);
int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ int delayed, struct iov_iter *to, unsigned int rlen);
int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
@@ -635,7 +631,7 @@ int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
- struct ksock_tx *tx, int nonblk);
+ struct ksock_tx *tx, int nonblk);
int ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
lnet_process_id_t id);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 303576d815c6..c1c6f604e6ad 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -35,8 +35,8 @@ ksocknal_alloc_tx(int type, int size)
spin_lock(&ksocknal_data.ksnd_tx_lock);
if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
- next, struct ksock_tx, tx_list);
+ tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
+ struct ksock_tx, tx_list);
LASSERT(tx->tx_desc_size == size);
list_del(&tx->tx_list);
}
@@ -164,13 +164,13 @@ ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
do {
LASSERT(tx->tx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return rc;
}
- nob -= (int)kiov->kiov_len;
+ nob -= (int)kiov->bv_len;
tx->tx_kiov = ++kiov;
tx->tx_nkiov--;
} while (nob);
@@ -326,13 +326,13 @@ ksocknal_recv_kiov(struct ksock_conn *conn)
do {
LASSERT(conn->ksnc_rx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return -EAGAIN;
}
- nob -= kiov->kiov_len;
+ nob -= kiov->bv_len;
conn->ksnc_rx_kiov = ++kiov;
conn->ksnc_rx_nkiov--;
} while (nob);
@@ -1325,39 +1325,36 @@ ksocknal_process_receive(struct ksock_conn *conn)
int
ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct ksock_conn *conn = private;
struct ksock_sched *sched = conn->ksnc_scheduler;
- LASSERT(mlen <= rlen);
- LASSERT(niov <= LNET_MAX_IOV);
+ LASSERT(iov_iter_count(to) <= rlen);
+ LASSERT(to->nr_segs <= LNET_MAX_IOV);
conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_wanted = iov_iter_count(to);
conn->ksnc_rx_nob_left = rlen;
- if (!mlen || iov) {
+ if (to->type & ITER_KVEC) {
conn->ksnc_rx_nkiov = 0;
conn->ksnc_rx_kiov = NULL;
conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
conn->ksnc_rx_niov =
lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset, iov_iter_count(to));
} else {
conn->ksnc_rx_niov = 0;
conn->ksnc_rx_iov = NULL;
conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
conn->ksnc_rx_nkiov =
lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset, iov_iter_count(to));
}
- LASSERT(mlen ==
- lnet_iov_nob(conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
- lnet_kiov_nob(conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
LASSERT(conn->ksnc_rx_scheduled);
spin_lock_bh(&sched->kss_lock);
@@ -2008,13 +2005,6 @@ ksocknal_connect(struct ksock_route *route)
list_splice_init(&peer->ksnp_tx_queue, &zombies);
}
-#if 0 /* irrelevant with only eager routes */
- if (!route->ksnr_deleted) {
- /* make this route least-favourite for re-selection */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
- }
-#endif
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
ksocknal_peer_failed(peer);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 6a17757fce1e..6c95e989ca12 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -73,9 +73,9 @@ ksocknal_lib_zc_capable(struct ksock_conn *conn)
int
ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
{
+ struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
struct socket *sock = conn->ksnc_sock;
- int nob;
- int rc;
+ int nob, i;
if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
@@ -83,34 +83,16 @@ ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
!tx->tx_msg.ksm_csum) /* not checksummed */
ksocknal_lib_csum_tx(tx);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- {
-#if SOCKNAL_SINGLE_FRAG_TX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- int i;
+ for (nob = i = 0; i < tx->tx_niov; i++)
+ nob += tx->tx_iov[i].iov_len;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- rc = kernel_sendmsg(sock, &msg, scratchiov, niov, nob);
- }
- return rc;
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+ tx->tx_iov, tx->tx_niov, nob);
+ return sock_sendmsg(sock, &msg);
}
int
@@ -124,20 +106,16 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
/* Not NOOP message */
LASSERT(tx->tx_lnetmsg);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
if (tx->tx_msg.ksm_zc_cookies[0]) {
/* Zero copy is enabled */
struct sock *sk = sock->sk;
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
+ struct page *page = kiov->bv_page;
+ int offset = kiov->bv_offset;
+ int fragsize = kiov->bv_len;
int msgflg = MSG_DONTWAIT;
CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
+ page, offset, kiov->bv_len);
if (!list_empty(&conn->ksnc_tx_queue) ||
fragsize < tx->tx_resid)
@@ -150,34 +128,19 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
}
} else {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
int i;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
+ for (nob = i = 0; i < tx->tx_nkiov; i++)
+ nob += kiov[i].bv_len;
if (!list_empty(&conn->ksnc_tx_queue) ||
nob < tx->tx_resid)
msg.msg_flags |= MSG_MORE;
- rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
+ iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
+ kiov, tx->tx_nkiov, nob);
+ rc = sock_sendmsg(sock, &msg);
}
return rc;
}
@@ -201,14 +164,7 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn)
int
ksocknal_lib_recv_iov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
-#endif
struct kvec *iov = conn->ksnc_rx_iov;
struct msghdr msg = {
.msg_flags = 0
@@ -220,20 +176,15 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
int sum;
__u32 saved_csum;
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
LASSERT(niov > 0);
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += iov[i].iov_len;
+
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob,
- MSG_DONTWAIT);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
saved_csum = 0;
if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -259,67 +210,10 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
return rc;
}
-static void
-ksocknal_lib_kiov_vunmap(void *addr)
-{
- if (!addr)
- return;
-
- vunmap(addr);
-}
-
-static void *
-ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
- struct kvec *iov, struct page **pages)
-{
- void *addr;
- int nob;
- int i;
-
- if (!*ksocknal_tunables.ksnd_zc_recv || !pages)
- return NULL;
-
- LASSERT(niov <= LNET_MAX_IOV);
-
- if (niov < 2 ||
- niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
- return NULL;
-
- for (nob = i = 0; i < niov; i++) {
- if ((kiov[i].kiov_offset && i > 0) ||
- (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
- return NULL;
-
- pages[i] = kiov[i].kiov_page;
- nob += kiov[i].kiov_len;
- }
-
- addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
- if (!addr)
- return NULL;
-
- iov->iov_base = addr + kiov[0].kiov_offset;
- iov->iov_len = nob;
-
- return addr;
-}
-
int
ksocknal_lib_recv_kiov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- struct page **pages = NULL;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct msghdr msg = {
.msg_flags = 0
@@ -328,63 +222,32 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn)
int i;
int rc;
void *base;
- void *addr;
int sum;
int fragnob;
- int n;
-
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
- if (addr) {
- nob = scratchiov[0].iov_len;
- n = 1;
- } else {
- for (nob = i = 0; i < niov; i++) {
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- }
- n = niov;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += kiov[i].bv_len;
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov,
- n, nob, MSG_DONTWAIT);
+ iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
if (conn->ksnc_msg.ksm_csum) {
for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
LASSERT(i < niov);
- /*
- * Dang! have to kmap again because I have nowhere to
- * stash the mapped address. But by doing it while the
- * page is still mapped, the kernel just bumps the map
- * count and returns me the address it stashed.
- */
- base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- fragnob = kiov[i].kiov_len;
+ base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
+ fragnob = kiov[i].bv_len;
if (fragnob > sum)
fragnob = sum;
conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
base, fragnob);
- kunmap(kiov[i].kiov_page);
+ kunmap(kiov[i].bv_page);
}
}
-
- if (addr) {
- ksocknal_lib_kiov_vunmap(addr);
- } else {
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
- }
-
return rc;
}
@@ -406,12 +269,12 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
if (tx->tx_kiov) {
for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].kiov_page) +
- tx->tx_kiov[i].kiov_offset;
+ base = kmap(tx->tx_kiov[i].bv_page) +
+ tx->tx_kiov[i].bv_offset;
- csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
+ csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len);
- kunmap(tx->tx_kiov[i].kiov_page);
+ kunmap(tx->tx_kiov[i].bv_page);
}
} else {
for (i = 1; i < tx->tx_niov; i++)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index 42b15a769183..23b36b890964 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -328,15 +328,20 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
*/
void libcfs_debug_dumplog_internal(void *arg)
{
+ static time64_t last_dump_time;
+ time64_t current_time;
void *journal_info;
journal_info = current->journal_info;
current->journal_info = NULL;
+ current_time = ktime_get_real_seconds();
- if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
+ if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
+ current_time > last_dump_time) {
+ last_dump_time = current_time;
snprintf(debug_file_name, sizeof(debug_file_name) - 1,
"%s.%lld.%ld", libcfs_debug_file_path_arr,
- (s64)ktime_get_real_seconds(), (long_ptr_t)arg);
+ (s64)current_time, (long_ptr_t)arg);
pr_alert("LustreError: dumping log to %s\n", debug_file_name);
cfs_tracefile_dump_all_pages(debug_file_name);
libcfs_run_debug_log_upcall(debug_file_name);
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index 9288ee08d1f7..e4b1a0a86eae 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -90,8 +90,10 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set)
}
}
- if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
- (value & CFS_FAIL_ONCE))
+ /* Take into account the current call for FAIL_ONCE for ORSET only,
+ * as RESET is a new fail_loc, it does not change the current call
+ */
+ if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
/* Lost race to set CFS_FAILED_BIT. */
if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
index fc697cdfcdaf..56a614d7713b 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -229,8 +229,6 @@ cfs_str2num_check(char *str, int nob, unsigned *num,
char *endp, cache;
int rc;
- str = cfs_trimwhite(str);
-
/**
* kstrouint can only handle strings composed
* of only numbers. We need to scan the string
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index b52518c54efe..e8b1a61420de 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -74,6 +74,17 @@ struct cfs_cpt_data {
static struct cfs_cpt_data cpt_data;
+static void
+cfs_node_to_cpumask(int node, cpumask_t *mask)
+{
+ const cpumask_t *tmp = cpumask_of_node(node);
+
+ if (tmp)
+ cpumask_copy(mask, tmp);
+ else
+ cpumask_clear(mask);
+}
+
void
cfs_cpt_table_free(struct cfs_cpt_table *cptab)
{
@@ -403,7 +414,7 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
mutex_lock(&cpt_data.cpt_mutex);
mask = cpt_data.cpt_cpumask;
- cpumask_copy(mask, cpumask_of_node(node));
+ cfs_node_to_cpumask(node, mask);
rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
@@ -427,7 +438,7 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
mutex_lock(&cpt_data.cpt_mutex);
mask = cpt_data.cpt_cpumask;
- cpumask_copy(mask, cpumask_of_node(node));
+ cfs_node_to_cpumask(node, mask);
cfs_cpt_unset_cpumask(cptab, cpt, mask);
@@ -749,7 +760,7 @@ cfs_cpt_table_create(int ncpt)
}
for_each_online_node(i) {
- cpumask_copy(mask, cpumask_of_node(i));
+ cfs_node_to_cpumask(i, mask);
while (!cpumask_empty(mask)) {
struct cfs_cpu_partition *part;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 5c0116ade909..7f56d2c9dd00 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -95,8 +95,8 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
err = crypto_ahash_setkey(tfm, key, key_len);
else if ((*type)->cht_key != 0)
err = crypto_ahash_setkey(tfm,
- (unsigned char *)&((*type)->cht_key),
- (*type)->cht_size);
+ (unsigned char *)&((*type)->cht_key),
+ (*type)->cht_size);
if (err != 0) {
ahash_request_free(*req);
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 346db892f275..4daf828198c3 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1286,6 +1286,25 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
sizeof(*ni->ni_lnd_tunables));
}
+ /*
+ * If given some LND tunable parameters, parse those now to
+ * override the values in the NI structure.
+ */
+ if (conf) {
+ if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
+ ni->ni_peerrtrcredits =
+ conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+ if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
+ ni->ni_peertimeout =
+ conf->cfg_config_u.cfg_net.net_peer_timeout;
+ if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+ ni->ni_peertxcredits =
+ conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+ if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
+ ni->ni_maxtxcredits =
+ conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ }
+
rc = lnd->lnd_startup(ni);
mutex_unlock(&the_lnet.ln_lnd_mutex);
@@ -1299,33 +1318,6 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
goto failed0;
}
- /*
- * If given some LND tunable parameters, parse those now to
- * override the values in the NI structure.
- */
- if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- }
- if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- }
- /*
- * TODO
- * Note: For now, don't allow the user to change
- * peertxcredits as this number is used in the
- * IB LND to control queue depth.
- *
- * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
- * ni->ni_peertxcredits =
- * conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- */
- if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
- }
-
LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
lnet_net_lock(LNET_LOCK_EX);
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index a72afdf68bb2..9e2183ff847e 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -31,6 +31,8 @@
*/
#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
#include "../../include/linux/lnet/lib-lnet.h"
struct lnet_text_buf { /* tmp struct for parsing routes */
@@ -110,6 +112,11 @@ lnet_ni_free(struct lnet_ni *ni)
LIBCFS_FREE(ni->ni_interfaces[i],
strlen(ni->ni_interfaces[i]) + 1);
}
+
+ /* release reference to net namespace */
+ if (ni->ni_net_ns)
+ put_net(ni->ni_net_ns);
+
LIBCFS_FREE(ni, sizeof(*ni));
}
@@ -171,6 +178,13 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
/* LND will fill in the address part of the NID */
ni->ni_nid = LNET_MKNID(net, 0);
+
+ /* Store net namespace in which current ni is being created */
+ if (current->nsproxy->net_ns)
+ ni->ni_net_ns = get_net(current->nsproxy->net_ns);
+ else
+ ni->ni_net_ns = NULL;
+
ni->ni_last_alive = ktime_get_real_seconds();
list_add_tail(&ni->ni_list, nilist);
return ni;
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index 1834bf7a27ef..eab53cd57296 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -134,11 +134,11 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
for (i = 0; i < (int)niov; i++) {
/* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].kiov_offset +
- lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE)
+ if (lmd->md_iov.kiov[i].bv_offset +
+ lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
return -EINVAL; /* invalid length */
- total_length += lmd->md_iov.kiov[i].kiov_len;
+ total_length += lmd->md_iov.kiov[i].bv_len;
}
lmd->md_length = total_length;
@@ -292,11 +292,12 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
+ if (rc)
+ goto out_free;
+
cpt = lnet_cpt_of_cookie(meh.cookie);
lnet_res_lock(cpt);
- if (rc)
- goto failed;
me = lnet_handle2me(&meh);
if (!me)
@@ -307,7 +308,7 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc)
- goto failed;
+ goto out_unlock;
/*
* attach this MD to portal of ME and check if it matches any
@@ -324,10 +325,10 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
return 0;
- failed:
- lnet_md_free(md);
-
+out_unlock:
lnet_res_unlock(cpt);
+out_free:
+ lnet_md_free(md);
return rc;
}
EXPORT_SYMBOL(LNetMDAttach);
@@ -370,24 +371,25 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
+ if (rc)
+ goto out_free;
cpt = lnet_res_lock_current();
- if (rc)
- goto failed;
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc)
- goto failed;
+ goto out_unlock;
lnet_md2handle(handle, md);
lnet_res_unlock(cpt);
return 0;
- failed:
+out_unlock:
+ lnet_res_unlock(cpt);
+out_free:
lnet_md_free(md);
- lnet_res_unlock(cpt);
return rc;
}
EXPORT_SYMBOL(LNetMDBind);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index e6d3b801d87d..48e6f8f2392f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -37,6 +37,8 @@
#define DEBUG_SUBSYSTEM S_LNET
#include "../../include/linux/lnet/lib-lnet.h"
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
static int local_nid_dist_zero = 1;
module_param(local_nid_dist_zero, int, 0444);
@@ -166,25 +168,17 @@ lnet_iov_nob(unsigned int niov, struct kvec *iov)
EXPORT_SYMBOL(lnet_iov_nob);
void
-lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
- unsigned int nsiov, struct kvec *siov, unsigned int soffset,
- unsigned int nob)
+lnet_copy_iov2iter(struct iov_iter *to,
+ unsigned int nsiov, const struct kvec *siov,
+ unsigned int soffset, unsigned int nob)
{
/* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
+ const char *s;
+ size_t left;
if (!nob)
return;
- /* skip complete frags before 'doffset' */
- LASSERT(ndiov > 0);
- while (doffset >= diov->iov_len) {
- doffset -= diov->iov_len;
- diov++;
- ndiov--;
- LASSERT(ndiov > 0);
- }
-
/* skip complete frags before 'soffset' */
LASSERT(nsiov > 0);
while (soffset >= siov->iov_len) {
@@ -194,39 +188,68 @@ lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
LASSERT(nsiov > 0);
}
+ s = (char *)siov->iov_base + soffset;
+ left = siov->iov_len - soffset;
do {
- LASSERT(ndiov > 0);
+ size_t n, copy = left;
LASSERT(nsiov > 0);
- this_nob = min(diov->iov_len - doffset,
- siov->iov_len - soffset);
- this_nob = min(this_nob, nob);
- memcpy((char *)diov->iov_base + doffset,
- (char *)siov->iov_base + soffset, this_nob);
- nob -= this_nob;
+ if (copy > nob)
+ copy = nob;
+ n = copy_to_iter(s, copy, to);
+ if (n != copy)
+ return;
+ nob -= n;
- if (diov->iov_len > doffset + this_nob) {
- doffset += this_nob;
- } else {
- diov++;
- ndiov--;
- doffset = 0;
- }
+ siov++;
+ s = (char *)siov->iov_base;
+ left = siov->iov_len;
+ nsiov--;
+ } while (nob > 0);
+}
+EXPORT_SYMBOL(lnet_copy_iov2iter);
- if (siov->iov_len > soffset + this_nob) {
- soffset += this_nob;
- } else {
- siov++;
- nsiov--;
- soffset = 0;
- }
+void
+lnet_copy_kiov2iter(struct iov_iter *to,
+ unsigned int nsiov, const lnet_kiov_t *siov,
+ unsigned int soffset, unsigned int nob)
+{
+ if (!nob)
+ return;
+
+ LASSERT(!in_interrupt());
+
+ LASSERT(nsiov > 0);
+ while (soffset >= siov->bv_len) {
+ soffset -= siov->bv_len;
+ siov++;
+ nsiov--;
+ LASSERT(nsiov > 0);
+ }
+
+ do {
+ size_t copy = siov->bv_len - soffset, n;
+
+ LASSERT(nsiov > 0);
+
+ if (copy > nob)
+ copy = nob;
+ n = copy_page_to_iter(siov->bv_page,
+ siov->bv_offset + soffset,
+ copy, to);
+ if (n != copy)
+ return;
+ nob -= n;
+ siov++;
+ nsiov--;
+ soffset = 0;
} while (nob > 0);
}
-EXPORT_SYMBOL(lnet_copy_iov2iov);
+EXPORT_SYMBOL(lnet_copy_kiov2iter);
int
lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, struct kvec *src,
+ int src_niov, const struct kvec *src,
unsigned int offset, unsigned int len)
{
/*
@@ -280,238 +303,15 @@ lnet_kiov_nob(unsigned int niov, lnet_kiov_t *kiov)
LASSERT(!niov || kiov);
while (niov-- > 0)
- nob += (kiov++)->kiov_len;
+ nob += (kiov++)->bv_len;
return nob;
}
EXPORT_SYMBOL(lnet_kiov_nob);
-void
-lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
- unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
- unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
- char *daddr = NULL;
- char *saddr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(ndiov > 0);
- while (doffset >= diov->kiov_len) {
- doffset -= diov->kiov_len;
- diov++;
- ndiov--;
- LASSERT(ndiov > 0);
- }
-
- LASSERT(nsiov > 0);
- while (soffset >= siov->kiov_len) {
- soffset -= siov->kiov_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- do {
- LASSERT(ndiov > 0);
- LASSERT(nsiov > 0);
- this_nob = min(diov->kiov_len - doffset,
- siov->kiov_len - soffset);
- this_nob = min(this_nob, nob);
-
- if (!daddr)
- daddr = ((char *)kmap(diov->kiov_page)) +
- diov->kiov_offset + doffset;
- if (!saddr)
- saddr = ((char *)kmap(siov->kiov_page)) +
- siov->kiov_offset + soffset;
-
- /*
- * Vanishing risk of kmap deadlock when mapping 2 pages.
- * However in practice at least one of the kiovs will be mapped
- * kernel pages and the map/unmap will be NOOPs
- */
- memcpy(daddr, saddr, this_nob);
- nob -= this_nob;
-
- if (diov->kiov_len > doffset + this_nob) {
- daddr += this_nob;
- doffset += this_nob;
- } else {
- kunmap(diov->kiov_page);
- daddr = NULL;
- diov++;
- ndiov--;
- doffset = 0;
- }
-
- if (siov->kiov_len > soffset + this_nob) {
- saddr += this_nob;
- soffset += this_nob;
- } else {
- kunmap(siov->kiov_page);
- saddr = NULL;
- siov++;
- nsiov--;
- soffset = 0;
- }
- } while (nob > 0);
-
- if (daddr)
- kunmap(diov->kiov_page);
- if (saddr)
- kunmap(siov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2kiov);
-
-void
-lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset, unsigned int nob)
-{
- /* NB iov, kiov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT(niov > 0);
- }
-
- LASSERT(nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT(nkiov > 0);
- }
-
- do {
- LASSERT(niov > 0);
- LASSERT(nkiov > 0);
- this_nob = min(iov->iov_len - iovoffset,
- (__kernel_size_t)kiov->kiov_len - kiovoffset);
- this_nob = min(this_nob, nob);
-
- if (!addr)
- addr = ((char *)kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy((char *)iov->iov_base + iovoffset, addr, this_nob);
- nob -= this_nob;
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- } while (nob > 0);
-
- if (addr)
- kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iov);
-
-void
-lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset, unsigned int niov,
- struct kvec *iov, unsigned int iovoffset,
- unsigned int nob)
-{
- /* NB kiov, iov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT(nkiov > 0);
- }
-
- LASSERT(niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT(niov > 0);
- }
-
- do {
- LASSERT(nkiov > 0);
- LASSERT(niov > 0);
- this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset,
- iov->iov_len - iovoffset);
- this_nob = min(this_nob, nob);
-
- if (!addr)
- addr = ((char *)kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy(addr, (char *)iov->iov_base + iovoffset, this_nob);
- nob -= this_nob;
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
- } while (nob > 0);
-
- if (addr)
- kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_iov2kiov);
-
int
lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
+ int src_niov, const lnet_kiov_t *src,
unsigned int offset, unsigned int len)
{
/*
@@ -526,8 +326,8 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
return 0; /* no frags */
LASSERT(src_niov > 0);
- while (offset >= src->kiov_len) { /* skip initial frags */
- offset -= src->kiov_len;
+ while (offset >= src->bv_len) { /* skip initial frags */
+ offset -= src->bv_len;
src_niov--;
src++;
LASSERT(src_niov > 0);
@@ -538,19 +338,19 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
LASSERT(src_niov > 0);
LASSERT((int)niov <= dst_niov);
- frag_len = src->kiov_len - offset;
- dst->kiov_page = src->kiov_page;
- dst->kiov_offset = src->kiov_offset + offset;
+ frag_len = src->bv_len - offset;
+ dst->bv_page = src->bv_page;
+ dst->bv_offset = src->bv_offset + offset;
if (len <= frag_len) {
- dst->kiov_len = len;
- LASSERT(dst->kiov_offset + dst->kiov_len
+ dst->bv_len = len;
+ LASSERT(dst->bv_offset + dst->bv_len
<= PAGE_SIZE);
return niov;
}
- dst->kiov_len = frag_len;
- LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+ dst->bv_len = frag_len;
+ LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
len -= frag_len;
dst++;
@@ -569,6 +369,7 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
unsigned int niov = 0;
struct kvec *iov = NULL;
lnet_kiov_t *kiov = NULL;
+ struct iov_iter to;
int rc;
LASSERT(!in_interrupt());
@@ -594,8 +395,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
}
}
- rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed,
- niov, iov, kiov, offset, mlen, rlen);
+ if (iov) {
+ iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
+ iov_iter_advance(&to, offset);
+ } else {
+ iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
+ iov_iter_advance(&to, offset);
+ }
+ rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
@@ -2002,6 +1809,9 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
lnet_msgtyp2str(type), rc);
lnet_msg_free(msg);
+ if (rc == -ESHUTDOWN)
+ /* We are shutting down. Don't do anything more */
+ return 0;
goto drop;
}
@@ -2512,6 +2322,15 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
}
if (LNET_NIDNET(ni->ni_nid) == dstnet) {
+ /*
+ * Check if ni was originally created in
+ * current net namespace.
+ * If not, assign order above 0xffff0000,
+ * to make this ni not a priority.
+ */
+ if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
+ order += 0xffff0000;
+
if (srcnidp)
*srcnidp = ni->ni_nid;
if (orderp)
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
index 910e106e221d..0897e588bd54 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -449,23 +449,7 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status)
if (!msg)
return;
-#if 0
- CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
- lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
- msg->msg_target_is_router ? "t" : "",
- msg->msg_routing ? "X" : "",
- msg->msg_ack ? "A" : "",
- msg->msg_sending ? "S" : "",
- msg->msg_receiving ? "R" : "",
- msg->msg_delayed ? "d" : "",
- msg->msg_txcredit ? "C" : "",
- msg->msg_peertxcredit ? "c" : "",
- msg->msg_rtrcredit ? "F" : "",
- msg->msg_peerrtrcredit ? "f" : "",
- msg->msg_onactivelist ? "!" : "",
- !msg->msg_txpeer ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
- !msg->msg_rxpeer ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
-#endif
+
msg->msg_ev.status = status;
if (msg->msg_md) {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 891fd59401d7..4e6dd5149b4f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -265,21 +265,17 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
unsigned long then;
struct timeval tv;
+ struct kvec iov = { .iov_base = buffer, .iov_len = nob };
+ struct msghdr msg = {NULL,};
LASSERT(nob > 0);
/*
* Caller may pass a zero timeout if she thinks the socket buffer is
* empty enough to take the whole message immediately
*/
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
for (;;) {
- struct kvec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_flags = !timeout ? MSG_DONTWAIT : 0
- };
-
+ msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
if (timeout) {
/* Set send timeout to remaining time */
jiffies_to_timeval(jiffies_left, &tv);
@@ -296,9 +292,6 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
jiffies_left -= jiffies - then;
- if (rc == nob)
- return 0;
-
if (rc < 0)
return rc;
@@ -307,11 +300,11 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
return -ECONNABORTED;
}
+ if (!msg_data_left(&msg))
+ break;
+
if (jiffies_left <= 0)
return -EAGAIN;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
}
return 0;
}
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
index 08402712a452..cb213b8f51cf 100644
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -42,36 +42,23 @@ lolnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
static int
lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ int delayed, struct iov_iter *to, unsigned int rlen)
{
lnet_msg_t *sendmsg = private;
if (lntmsg) { /* not discarding */
- if (sendmsg->msg_iov) {
- if (iov)
- lnet_copy_iov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_iov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- } else {
- if (iov)
- lnet_copy_kiov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_kiov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- }
+ if (sendmsg->msg_iov)
+ lnet_copy_iov2iter(to,
+ sendmsg->msg_niov,
+ sendmsg->msg_iov,
+ sendmsg->msg_offset,
+ iov_iter_count(to));
+ else
+ lnet_copy_kiov2iter(to,
+ sendmsg->msg_niov,
+ sendmsg->msg_kiov,
+ sendmsg->msg_offset,
+ iov_iter_count(to));
lnet_finalize(ni, lntmsg, 0);
}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 063543233035..063ad55ec950 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1307,7 +1307,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].kiov_page);
+ __free_page(rb->rb_kiov[npages].bv_page);
LIBCFS_FREE(rb, sz);
}
@@ -1333,15 +1333,15 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
GFP_KERNEL | __GFP_ZERO, 0);
if (!page) {
while (--i >= 0)
- __free_page(rb->rb_kiov[i].kiov_page);
+ __free_page(rb->rb_kiov[i].bv_page);
LIBCFS_FREE(rb, sz);
return NULL;
}
- rb->rb_kiov[i].kiov_len = PAGE_SIZE;
- rb->rb_kiov[i].kiov_offset = 0;
- rb->rb_kiov[i].kiov_page = page;
+ rb->rb_kiov[i].bv_len = PAGE_SIZE;
+ rb->rb_kiov[i].bv_offset = 0;
+ rb->rb_kiov[i].bv_page = page;
}
return rb;
@@ -1693,7 +1693,7 @@ lnet_rtrpools_adjust(int tiny, int small, int large)
int
lnet_rtrpools_enable(void)
{
- int rc;
+ int rc = 0;
if (the_lnet.ln_routing)
return 0;
@@ -1706,9 +1706,9 @@ lnet_rtrpools_enable(void)
* if we are just configuring this for the first
* time.
*/
- return lnet_rtrpools_alloc(1);
-
- rc = lnet_rtrpools_adjust_helper(0, 0, 0);
+ rc = lnet_rtrpools_alloc(1);
+ else
+ rc = lnet_rtrpools_adjust_helper(0, 0, 0);
if (rc)
return rc;
@@ -1718,7 +1718,7 @@ lnet_rtrpools_enable(void)
the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
lnet_net_unlock(LNET_LOCK_EX);
- return 0;
+ return rc;
}
void
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index 13d0454e7fcb..b20c5d394e3b 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -226,7 +226,7 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
brw_fill_page(pg, pattern, magic);
}
}
@@ -238,7 +238,7 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
if (brw_check_page(pg, pattern, magic)) {
CERROR("Bulk page %p (%d/%d) is corrupted!\n",
pg, i, bk->bk_niov);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 1be3cad727ae..55afb53b0743 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -152,10 +152,10 @@ lstcon_rpc_put(struct lstcon_rpc *crpc)
LASSERT(list_empty(&crpc->crp_link));
for (i = 0; i < bulk->bk_niov; i++) {
- if (!bulk->bk_iovs[i].kiov_page)
+ if (!bulk->bk_iovs[i].bv_page)
continue;
- __free_page(bulk->bk_iovs[i].kiov_page);
+ __free_page(bulk->bk_iovs[i].bv_page);
}
srpc_client_rpc_decref(crpc->crp_rpc);
@@ -705,7 +705,7 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
LASSERT(i < nkiov);
- pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page);
+ pid = (lnet_process_id_packed_t *)page_address(kiov[i].bv_page);
return &pid[idx % SFW_ID_PER_PAGE];
}
@@ -849,12 +849,11 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
min_t(int, nob, PAGE_SIZE);
nob -= len;
- bulk->bk_iovs[i].kiov_offset = 0;
- bulk->bk_iovs[i].kiov_len = len;
- bulk->bk_iovs[i].kiov_page =
- alloc_page(GFP_KERNEL);
+ bulk->bk_iovs[i].bv_offset = 0;
+ bulk->bk_iovs[i].bv_len = len;
+ bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
- if (!bulk->bk_iovs[i].kiov_page) {
+ if (!bulk->bk_iovs[i].bv_page) {
lstcon_rpc_put(*crpc);
return -ENOMEM;
}
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index 4c33621f06da..a0fcbf3bcc95 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -1993,8 +1993,6 @@ static void lstcon_init_acceptor_service(void)
lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
}
-extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-
static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
/* initialize console */
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index 78b147732615..78388a611c22 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -184,6 +184,7 @@ lstcon_id2hash(lnet_process_id_t id, struct list_head *hash)
return &hash[idx];
}
+int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
int lstcon_console_init(void);
int lstcon_console_fini(void);
int lstcon_session_match(lst_sid_t sid);
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index c2f121f44d33..abbd6287b4bd 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -784,8 +784,8 @@ sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
lnet_process_id_packed_t id;
int j;
- dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
- LASSERT(dests); /* my pages are within KVM always */
+ dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
+ LASSERT(dests); /* my pages are within KVM always */
id = dests[i % SFW_ID_PER_PAGE];
if (msg->msg_magic != SRPC_MSG_MAGIC)
sfw_unpack_id(id);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 3b26d6eb4240..f5619d8744ef 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -91,9 +91,9 @@ srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob)
LASSERT(nob > 0);
LASSERT(i >= 0 && i < bk->bk_niov);
- bk->bk_iovs[i].kiov_offset = 0;
- bk->bk_iovs[i].kiov_page = pg;
- bk->bk_iovs[i].kiov_len = nob;
+ bk->bk_iovs[i].bv_offset = 0;
+ bk->bk_iovs[i].bv_page = pg;
+ bk->bk_iovs[i].bv_len = nob;
return nob;
}
@@ -106,7 +106,7 @@ srpc_free_bulk(struct srpc_bulk *bk)
LASSERT(bk);
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
if (!pg)
break;
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
index 99ae7eb6720e..4e49cb356d64 100644
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -63,14 +63,12 @@ const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
FID_SEQ_NORMAL,
(__u64)~0ULL
};
-EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE);
/* Zero range, used for init and other purposes. */
const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE = {
0,
0
};
-EXPORT_SYMBOL(LUSTRE_SEQ_ZERO_RANGE);
/* Lustre Big Fs Lock fid. */
const struct lu_fid LUSTRE_BFL_FID = { .f_seq = FID_SEQ_SPECIAL,
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 454744d25956..edd72b926f81 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -125,19 +125,19 @@ static int seq_client_rpc(struct lu_client_seq *seq,
if (!range_is_sane(output)) {
CERROR("%s: Invalid range received from server: "
- DRANGE"\n", seq->lcs_name, PRANGE(output));
+ DRANGE "\n", seq->lcs_name, PRANGE(output));
rc = -EINVAL;
goto out_req;
}
if (range_is_exhausted(output)) {
CERROR("%s: Range received from server is exhausted: "
- DRANGE"]\n", seq->lcs_name, PRANGE(output));
+ DRANGE "]\n", seq->lcs_name, PRANGE(output));
rc = -EINVAL;
goto out_req;
}
- CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n",
+ CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence " DRANGE "]\n",
seq->lcs_name, opcname, PRANGE(output));
out_req:
@@ -179,7 +179,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
seq->lcs_name, rc);
return rc;
}
- CDEBUG(D_INFO, "%s: New range - "DRANGE"\n",
+ CDEBUG(D_INFO, "%s: New range - " DRANGE "\n",
seq->lcs_name, PRANGE(&seq->lcs_space));
} else {
rc = 0;
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 81b7ca9ea2fd..3ed32d77f38b 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -105,7 +105,7 @@ ldebugfs_fid_space_seq_write(struct file *file,
rc = ldebugfs_fid_write_common(buffer, count, &seq->lcs_space);
if (rc == 0) {
- CDEBUG(D_INFO, "%s: Space: "DRANGE"\n",
+ CDEBUG(D_INFO, "%s: Space: " DRANGE "\n",
seq->lcs_name, PRANGE(&seq->lcs_space));
}
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index f0efe5b9fbec..08eaec735d6f 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -31,6 +31,25 @@
*
* lustre/fld/fld_internal.h
*
+ * Subsystem Description:
+ * FLD is FID Location Database, which stores where (IE, on which MDT)
+ * FIDs are located.
+ * The database is basically a record file, each record consists of a FID
+ * sequence range, MDT/OST index, and flags. The FLD for the whole FS
+ * is only stored on the sequence controller(MDT0) right now, but each target
+ * also has its local FLD, which only stores the local sequence.
+ *
+ * The FLD subsystem usually has two tasks:
+ * 1. maintain the database, i.e. when the sequence controller allocates
+ * new sequence ranges to some nodes, it will call the FLD API to insert the
+ * location information <sequence_range, node_index> in FLDB.
+ *
+ * 2. Handle requests from other nodes, i.e. if client needs to know where
+ * the FID is located, if it can not find the information in the local cache,
+ * it will send a FLD lookup RPC to the FLD service, and the FLD service will
+ * look up the FLDB entry and return the location information to client.
+ *
+ *
* Author: Yury Umanets <umka@clusterfs.com>
* Author: Tom WangDi <wangdi@clusterfs.com>
*/
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index e59d626a1548..0de72b717ce5 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -53,57 +53,6 @@
#include "../include/lustre_mdc.h"
#include "fld_internal.h"
-/* TODO: these 3 functions are copies of flow-control code from mdc_lib.c
- * It should be common thing. The same about mdc RPC lock
- */
-static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
-{
- int rc;
-
- spin_lock(&cli->cl_loi_list_lock);
- rc = list_empty(&mcw->mcw_entry);
- spin_unlock(&cli->cl_loi_list_lock);
- return rc;
-};
-
-static void fld_enter_request(struct client_obd *cli)
-{
- struct mdc_cache_waiter mcw;
- struct l_wait_info lwi = { 0 };
-
- spin_lock(&cli->cl_loi_list_lock);
- if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
- list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
- init_waitqueue_head(&mcw.mcw_waitq);
- spin_unlock(&cli->cl_loi_list_lock);
- l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
- } else {
- cli->cl_r_in_flight++;
- spin_unlock(&cli->cl_loi_list_lock);
- }
-}
-
-static void fld_exit_request(struct client_obd *cli)
-{
- struct list_head *l, *tmp;
- struct mdc_cache_waiter *mcw;
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_r_in_flight--;
- list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
- if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
- /* No free request slots anymore */
- break;
- }
-
- mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
- list_del_init(&mcw->mcw_entry);
- cli->cl_r_in_flight++;
- wake_up(&mcw->mcw_waitq);
- }
- spin_unlock(&cli->cl_loi_list_lock);
-}
-
static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
{
LASSERT(fld->lcf_count > 0);
@@ -270,7 +219,6 @@ int fld_client_del_target(struct lu_client_fld *fld, __u64 idx)
spin_unlock(&fld->lcf_lock);
return -ENOENT;
}
-EXPORT_SYMBOL(fld_client_del_target);
static struct dentry *fld_debugfs_dir;
@@ -439,9 +387,9 @@ int fld_client_rpc(struct obd_export *exp,
req->rq_reply_portal = MDC_REPLY_PORTAL;
ptlrpc_at_set_req_timeout(req);
- fld_enter_request(&exp->exp_obd->u.cli);
+ obd_get_request_slot(&exp->exp_obd->u.cli);
rc = ptlrpc_queue_wait(req);
- fld_exit_request(&exp->exp_obd->u.cli);
+ obd_put_request_slot(&exp->exp_obd->u.cli);
if (rc)
goto out_req;
@@ -505,7 +453,6 @@ void fld_client_flush(struct lu_client_fld *fld)
{
fld_cache_flush(fld->lcf_cache);
}
-EXPORT_SYMBOL(fld_client_flush);
static int __init fld_init(void)
{
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index 3cd4a2577d90..89292c93dcd5 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -93,8 +93,8 @@
* super-class definitions.
*/
#include "lu_object.h"
+#include "lustre_compat.h"
#include <linux/atomic.h>
-#include "linux/lustre_compat25.h"
#include <linux/mutex.h>
#include <linux/radix-tree.h>
#include <linux/spinlock.h>
@@ -191,6 +191,9 @@ struct cl_attr {
* Group identifier for quota purposes.
*/
gid_t cat_gid;
+
+ /* nlink of the directory */
+ __u64 cat_nlink;
};
/**
@@ -320,7 +323,7 @@ struct cl_object_operations {
* to be used instead of newly created.
*/
int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
+ struct cl_page *page, pgoff_t index);
/**
* Initialize lock slice for this layer. Called top-to-bottom through
* every object layer when a new cl_lock is instantiated. Layer
@@ -366,8 +369,8 @@ struct cl_object_operations {
* \return the same convention as for
* cl_object_operations::coo_attr_get() is used.
*/
- int (*coo_attr_set)(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid);
+ int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid);
/**
* Update object configuration. Called top-to-bottom to modify object
* configuration.
@@ -392,6 +395,11 @@ struct cl_object_operations {
* mainly pages and locks.
*/
int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
+ /**
+ * Object getstripe method.
+ */
+ int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
+ struct lov_user_md __user *lum);
};
/**
@@ -687,17 +695,6 @@ enum cl_page_type {
};
/**
- * Flags maintained for every cl_page.
- */
-enum cl_page_flags {
- /**
- * Set when pagein completes. Used for debugging (read completes at
- * most once for a page).
- */
- CPF_READ_COMPLETED = 1 << 0
-};
-
-/**
* Fields are protected by the lock on struct page, except for atomics and
* immutables.
*
@@ -711,24 +708,19 @@ struct cl_page {
atomic_t cp_ref;
/** An object this page is a part of. Immutable after creation. */
struct cl_object *cp_obj;
- /** List of slices. Immutable after creation. */
- struct list_head cp_layers;
/** vmpage */
struct page *cp_vmpage;
+ /** Linkage of pages within group. Pages must be owned */
+ struct list_head cp_batch;
+ /** List of slices. Immutable after creation. */
+ struct list_head cp_layers;
+ /** Linkage of pages within cl_req. */
+ struct list_head cp_flight;
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
*/
const enum cl_page_state cp_state;
- /** Linkage of pages within group. Protected by cl_page::cp_mutex. */
- struct list_head cp_batch;
- /** Mutex serializing membership of a page in a batch. */
- struct mutex cp_mutex;
- /** Linkage of pages within cl_req. */
- struct list_head cp_flight;
- /** Transfer error. */
- int cp_error;
-
/**
* Page type. Only CPT_TRANSIENT is used so far. Immutable after
* creation.
@@ -741,10 +733,6 @@ struct cl_page {
*/
struct cl_io *cp_owner;
/**
- * Debug information, the task is owning the page.
- */
- struct task_struct *cp_task;
- /**
* Owning IO request in cl_page_state::CPS_PAGEOUT and
* cl_page_state::CPS_PAGEIN states. This field is maintained only in
* the top-level pages. Protected by a VM lock.
@@ -756,8 +744,6 @@ struct cl_page {
struct lu_ref_link cp_obj_ref;
/** Link to a queue, for debugging. */
struct lu_ref_link cp_queue_ref;
- /** Per-page flags from enum cl_page_flags. Protected by a VM lock. */
- unsigned cp_flags;
/** Assigned if doing a sync_io */
struct cl_sync_io *cp_sync_io;
};
@@ -1056,23 +1042,32 @@ do { \
} \
} while (0)
-static inline int __page_in_use(const struct cl_page *page, int refc)
-{
- if (page->cp_type == CPT_CACHEABLE)
- ++refc;
- LASSERT(atomic_read(&page->cp_ref) > 0);
- return (atomic_read(&page->cp_ref) > refc);
-}
-
-#define cl_page_in_use(pg) __page_in_use(pg, 1)
-#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
-
static inline struct page *cl_page_vmpage(struct cl_page *page)
{
LASSERT(page->cp_vmpage);
return page->cp_vmpage;
}
+/**
+ * Check if a cl_page is in use.
+ *
+ * Client cache holds a refcount, this refcount will be dropped when
+ * the page is taken out of cache, see vvp_page_delete().
+ */
+static inline bool __page_in_use(const struct cl_page *page, int refc)
+{
+ return (atomic_read(&page->cp_ref) > refc + 1);
+}
+
+/**
+ * Caller itself holds a refcount of cl_page.
+ */
+#define cl_page_in_use(pg) __page_in_use(pg, 1)
+/**
+ * Caller doesn't hold a refcount.
+ */
+#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
@@ -1771,12 +1766,14 @@ struct cl_io {
struct cl_setattr_io {
struct ost_lvb sa_attr;
unsigned int sa_valid;
+ int sa_stripe_index;
+ struct lu_fid *sa_parent_fid;
} ci_setattr;
struct cl_fault_io {
/** page index within file. */
pgoff_t ft_index;
/** bytes valid byte on a faulted page. */
- int ft_nob;
+ size_t ft_nob;
/** writable page? for nopage() only */
int ft_writable;
/** page of an executable? */
@@ -1909,7 +1906,7 @@ struct cl_req_attr {
/** Generic attributes for the server consumption. */
struct obdo *cra_oa;
/** Jobid */
- char cra_jobid[JOBSTATS_JOBID_SIZE];
+ char cra_jobid[LUSTRE_JOBID_SIZE];
};
/**
@@ -2176,14 +2173,16 @@ void cl_object_attr_lock(struct cl_object *o);
void cl_object_attr_unlock(struct cl_object *o);
int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
struct cl_attr *attr);
-int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid);
+int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid);
int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
struct ost_lvb *lvb);
int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
const struct cl_object_conf *conf);
int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
+int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+ struct lov_user_md __user *lum);
/**
* Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2197,6 +2196,7 @@ static inline void cl_object_page_init(struct cl_object *clob, int size)
{
clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
+ WARN_ON(cl_object_header(clob)->coh_page_bufsize > 512);
}
static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2263,6 +2263,8 @@ void cl_page_unassume(const struct lu_env *env,
struct cl_io *io, struct cl_page *pg);
void cl_page_disown(const struct lu_env *env,
struct cl_io *io, struct cl_page *page);
+void cl_page_disown0(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg);
int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io);
/** @} ownership */
@@ -2304,7 +2306,7 @@ int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
-int cl_page_size(const struct cl_object *obj);
+size_t cl_page_size(const struct cl_object *obj);
int cl_pages_prune(const struct lu_env *env, struct cl_object *obj);
void cl_lock_print(const struct lu_env *env, void *cookie,
@@ -2333,7 +2335,7 @@ struct cl_client_cache {
/**
* # of LRU entries available
*/
- atomic_t ccc_lru_left;
+ atomic_long_t ccc_lru_left;
/**
* List of entities(OSCs) for this LRU cache
*/
@@ -2347,14 +2349,18 @@ struct cl_client_cache {
*/
spinlock_t ccc_lru_lock;
/**
+ * Set if unstable check is enabled
+ */
+ unsigned int ccc_unstable_check:1;
+ /**
* # of unstable pages for this mount point
*/
- atomic_t ccc_unstable_nr;
+ atomic_long_t ccc_unstable_nr;
/**
* Waitq for awaiting unstable pages to reach zero.
* Used at umounting time and signaled on BRW commit
*/
- wait_queue_head_t ccc_unstable_waitq;
+ wait_queue_head_t ccc_unstable_waitq;
};
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
index 4a15228b5570..5d387d372547 100644
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -63,6 +63,11 @@ static inline int interval_is_intree(struct interval_node *node)
return node->in_intree == 1;
}
+static inline __u64 interval_low(struct interval_node *node)
+{
+ return node->in_extent.start;
+}
+
static inline __u64 interval_high(struct interval_node *node)
{
return node->in_extent.end;
@@ -77,8 +82,29 @@ static inline void interval_set(struct interval_node *node,
node->in_max_high = end;
}
+/*
+ * Rules to write an interval callback.
+ * - the callback returns INTERVAL_ITER_STOP when it thinks the iteration
+ * should be stopped. It will then cause the iteration function to return
+ * immediately with return value INTERVAL_ITER_STOP.
+ * - callbacks for interval_iterate and interval_iterate_reverse: Every
+ * nodes in the tree will be set to @node before the callback being called
+ * - callback for interval_search: Only overlapped node will be set to @node
+ * before the callback being called.
+ */
+typedef enum interval_iter (*interval_callback_t)(struct interval_node *node,
+ void *args);
+
struct interval_node *interval_insert(struct interval_node *node,
struct interval_node **root);
void interval_erase(struct interval_node *node, struct interval_node **root);
+/*
+ * Search the extents in the tree and call @func for each overlapped
+ * extents.
+ */
+enum interval_iter interval_search(struct interval_node *root,
+ struct interval_node_extent *ex,
+ interval_callback_t func, void *data);
+
#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
deleted file mode 100644
index d18e8a76bb25..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LINUX_LL_H
-#define _LINUX_LL_H
-
-#ifndef _LL_H
-#error Do not #include this file directly. #include <lustre_lite.h> instead
-#endif
-
-#include <linux/statfs.h>
-
-#include <linux/fs.h>
-#include <linux/dcache.h>
-
-#include "../obd_class.h"
-#include "../lustre_net.h"
-#include "../lustre_ha.h"
-
-#include <linux/rbtree.h>
-#include "../../include/linux/lustre_compat25.h"
-#include <linux/pagemap.h>
-
-/* lprocfs.c */
-enum {
- LPROC_LL_DIRTY_HITS = 0,
- LPROC_LL_DIRTY_MISSES,
- LPROC_LL_READ_BYTES,
- LPROC_LL_WRITE_BYTES,
- LPROC_LL_BRW_READ,
- LPROC_LL_BRW_WRITE,
- LPROC_LL_OSC_READ,
- LPROC_LL_OSC_WRITE,
- LPROC_LL_IOCTL,
- LPROC_LL_OPEN,
- LPROC_LL_RELEASE,
- LPROC_LL_MAP,
- LPROC_LL_LLSEEK,
- LPROC_LL_FSYNC,
- LPROC_LL_READDIR,
- LPROC_LL_SETATTR,
- LPROC_LL_TRUNC,
- LPROC_LL_FLOCK,
- LPROC_LL_GETATTR,
- LPROC_LL_CREATE,
- LPROC_LL_LINK,
- LPROC_LL_UNLINK,
- LPROC_LL_SYMLINK,
- LPROC_LL_MKDIR,
- LPROC_LL_RMDIR,
- LPROC_LL_MKNOD,
- LPROC_LL_RENAME,
- LPROC_LL_STAFS,
- LPROC_LL_ALLOC_INODE,
- LPROC_LL_SETXATTR,
- LPROC_LL_GETXATTR,
- LPROC_LL_GETXATTR_HITS,
- LPROC_LL_LISTXATTR,
- LPROC_LL_REMOVEXATTR,
- LPROC_LL_INODE_PERM,
- LPROC_LL_FILE_OPCODES
-};
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
deleted file mode 100644
index e967950e8536..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/linux/lustre_user.h
- *
- * Lustre public user-space interface definitions.
- */
-
-#ifndef _LINUX_LUSTRE_USER_H
-#define _LINUX_LUSTRE_USER_H
-
-# include <linux/quota.h>
-
-/*
- * asm-x86_64/processor.h on some SLES 9 distros seems to use
- * kernel-only typedefs. fortunately skipping it altogether is ok
- * (for now).
- */
-#define __ASM_X86_64_PROCESSOR_H
-
-#include <linux/string.h>
-
-/*
- * We need to always use 64bit version because the structure
- * is shared across entire cluster where 32bit and 64bit machines
- * are co-existing.
- */
-#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
-typedef struct stat64 lstat_t;
-#define lstat_f lstat64
-#else
-typedef struct stat lstat_t;
-#define lstat_f lstat
-#endif
-
-#define HAVE_LOV_USER_MDS_DATA
-
-#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index d68e60e7fef7..cc0713ef8ae5 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -165,8 +165,10 @@ struct lprocfs_percpu {
struct lprocfs_counter lp_cntr[0];
};
-#define LPROCFS_GET_NUM_CPU 0x0001
-#define LPROCFS_GET_SMP_ID 0x0002
+enum lprocfs_stats_lock_ops {
+ LPROCFS_GET_NUM_CPU = 0x0001, /* number allocated per-CPU stats */
+ LPROCFS_GET_SMP_ID = 0x0002, /* current stat to be updated */
+};
enum lprocfs_stats_flags {
LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */
@@ -363,82 +365,99 @@ static inline void s2dhms(struct dhms *ts, time64_t secs64)
#define JOBSTATS_PROCNAME_UID "procname_uid"
#define JOBSTATS_NODELOCAL "nodelocal"
+/* obd_config.c */
+void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
+
int lprocfs_write_frac_helper(const char __user *buffer,
unsigned long count, int *val, int mult);
int lprocfs_read_frac_helper(char *buffer, unsigned long count,
long val, int mult);
int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid);
-/*
- * \return value
- * < 0 : on error (only possible for opc as LPROCFS_GET_SMP_ID)
+
+/**
+ * Lock statistics structure for access, possibly only on this CPU.
+ *
+ * The statistics struct may be allocated with per-CPU structures for
+ * efficient concurrent update (usually only on server-wide stats), or
+ * as a single global struct (e.g. for per-client or per-job statistics),
+ * so the required locking depends on the type of structure allocated.
+ *
+ * For per-CPU statistics, pin the thread to the current cpuid so that
+ * will only access the statistics for that CPU. If the stats structure
+ * for the current CPU has not been allocated (or previously freed),
+ * allocate it now. The per-CPU statistics do not need locking since
+ * the thread is pinned to the CPU during update.
+ *
+ * For global statistics, lock the stats structure to prevent concurrent update.
+ *
+ * \param[in] stats statistics structure to lock
+ * \param[in] opc type of operation:
+ * LPROCFS_GET_SMP_ID: "lock" and return current CPU index
+ * for incrementing statistics for that CPU
+ * LPROCFS_GET_NUM_CPU: "lock" and return number of used
+ * CPU indices to iterate over all indices
+ * \param[out] flags CPU interrupt saved state for IRQ-safe locking
+ *
+ * \retval cpuid of current thread or number of allocated structs
+ * \retval negative on error (only for opc LPROCFS_GET_SMP_ID + per-CPU stats)
*/
-static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int opc,
+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats,
+ enum lprocfs_stats_lock_ops opc,
unsigned long *flags)
{
- int rc = 0;
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ spin_lock_irqsave(&stats->ls_lock, *flags);
+ else
+ spin_lock(&stats->ls_lock);
+ return opc == LPROCFS_GET_NUM_CPU ? 1 : 0;
+ }
switch (opc) {
- default:
- LBUG();
+ case LPROCFS_GET_SMP_ID: {
+ unsigned int cpuid = get_cpu();
- case LPROCFS_GET_SMP_ID:
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_lock_irqsave(&stats->ls_lock, *flags);
- else
- spin_lock(&stats->ls_lock);
- return 0;
- } else {
- unsigned int cpuid = get_cpu();
-
- if (unlikely(!stats->ls_percpu[cpuid])) {
- rc = lprocfs_stats_alloc_one(stats, cpuid);
- if (rc < 0) {
- put_cpu();
- return rc;
- }
+ if (unlikely(!stats->ls_percpu[cpuid])) {
+ int rc = lprocfs_stats_alloc_one(stats, cpuid);
+
+ if (rc < 0) {
+ put_cpu();
+ return rc;
}
- return cpuid;
}
-
+ return cpuid;
+ }
case LPROCFS_GET_NUM_CPU:
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_lock_irqsave(&stats->ls_lock, *flags);
- else
- spin_lock(&stats->ls_lock);
- return 1;
- }
return stats->ls_biggest_alloc_num;
+ default:
+ LBUG();
}
}
-static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats, int opc,
+/**
+ * Unlock statistics structure after access.
+ *
+ * Unlock the lock acquired via lprocfs_stats_lock() for global statistics,
+ * or unpin this thread from the current cpuid for per-CPU statistics.
+ *
+ * This function must be called using the same arguments as used when calling
+ * lprocfs_stats_lock() so that the correct operation can be performed.
+ *
+ * \param[in] stats statistics structure to unlock
+ * \param[in] opc type of operation (current cpuid or number of structs)
+ * \param[in] flags CPU interrupt saved state for IRQ-safe locking
+ */
+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats,
+ enum lprocfs_stats_lock_ops opc,
unsigned long *flags)
{
- switch (opc) {
- default:
- LBUG();
-
- case LPROCFS_GET_SMP_ID:
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_unlock_irqrestore(&stats->ls_lock, *flags);
- else
- spin_unlock(&stats->ls_lock);
- } else {
- put_cpu();
- }
- return;
-
- case LPROCFS_GET_NUM_CPU:
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_unlock_irqrestore(&stats->ls_lock, *flags);
- else
- spin_unlock(&stats->ls_lock);
- }
- return;
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ spin_unlock_irqrestore(&stats->ls_lock, *flags);
+ else
+ spin_unlock(&stats->ls_lock);
+ } else if (opc == LPROCFS_GET_SMP_ID) {
+ put_cpu();
}
}
@@ -496,7 +515,7 @@ static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
int idx,
enum lprocfs_fields_flags field)
{
- int i;
+ unsigned int i;
unsigned int num_cpu;
unsigned long flags = 0;
__u64 ret = 0;
@@ -681,6 +700,12 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
extern const struct sysfs_ops lustre_sysfs_ops;
+struct root_squash_info;
+int lprocfs_wr_root_squash(const char *buffer, unsigned long count,
+ struct root_squash_info *squash, char *name);
+int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count,
+ struct root_squash_info *squash, char *name);
+
/* all quota proc functions */
int lprocfs_quota_rd_bunit(char *page, char **start,
loff_t off, int count,
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 6e25c1bb6aa3..260643ee0d48 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -327,7 +327,7 @@ struct lu_device_type {
/**
* Number of existing device type instances.
*/
- unsigned ldt_device_nr;
+ atomic_t ldt_device_nr;
/**
* Linkage into a global list of all device types.
*
@@ -602,7 +602,7 @@ struct lu_site {
/**
* index of bucket on hash table while purging
*/
- int ls_purge_start;
+ unsigned int ls_purge_start;
/**
* Top-level device for this stack.
*/
@@ -623,6 +623,11 @@ struct lu_site {
spinlock_t ls_ld_lock;
/**
+ * Lock to serialize site purge.
+ */
+ struct mutex ls_purge_mutex;
+
+ /**
* lu_site stats
*/
struct lprocfs_stats *ls_stats;
@@ -673,7 +678,6 @@ void lu_object_add(struct lu_object *before, struct lu_object *o);
int lu_device_type_init(struct lu_device_type *ldt);
void lu_device_type_fini(struct lu_device_type *ldt);
-void lu_types_stop(void);
/** @} ctors */
@@ -1025,7 +1029,8 @@ enum lu_context_tag {
/**
* Contexts usable in cache shrinker thread.
*/
- LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+ LCT_SHRINKER = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD |
+ LCT_NOREF
};
/**
@@ -1264,12 +1269,28 @@ struct lu_name {
};
/**
+ * Validate names (path components)
+ *
+ * To be valid \a name must be non-empty, '\0' terminated of length \a
+ * name_len, and not contain '/'. The maximum length of a name (before
+ * say -ENAMETOOLONG will be returned) is really controlled by llite
+ * and the server. We only check for something insane coming from bad
+ * integer handling here.
+ */
+static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
+{
+ return name && name_len > 0 && name_len < INT_MAX &&
+ name[name_len] == '\0' && strlen(name) == name_len &&
+ !memchr(name, '/', name_len);
+}
+
+/**
* Common buffer structure to be passed around for various xattr_{s,g}et()
* methods.
*/
struct lu_buf {
void *lb_buf;
- ssize_t lb_len;
+ size_t lb_len;
};
#define DLUBUF "(%p %zu)"
@@ -1298,5 +1319,12 @@ struct lu_kmem_descr {
int lu_kmem_init(struct lu_kmem_descr *caches);
void lu_kmem_fini(struct lu_kmem_descr *caches);
+void lu_buf_free(struct lu_buf *buf);
+void lu_buf_alloc(struct lu_buf *buf, size_t size);
+void lu_buf_realloc(struct lu_buf *buf, size_t size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len);
+
/** @} lu */
#endif /* __LUSTRE_LU_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 051864c23b5b..72eaee95c6b8 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -93,6 +93,7 @@
/* Defn's shared with user-space. */
#include "lustre_user.h"
#include "lustre_errno.h"
+#include "../lustre_ver.h"
/*
* GENERAL STUFF
@@ -196,12 +197,12 @@ static inline unsigned fld_range_type(const struct lu_seq_range *range)
return range->lsr_flags & LU_SEQ_RANGE_MASK;
}
-static inline int fld_range_is_ost(const struct lu_seq_range *range)
+static inline bool fld_range_is_ost(const struct lu_seq_range *range)
{
return fld_range_type(range) == LU_SEQ_RANGE_OST;
}
-static inline int fld_range_is_mdt(const struct lu_seq_range *range)
+static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
{
return fld_range_type(range) == LU_SEQ_RANGE_MDT;
}
@@ -260,23 +261,23 @@ static inline void range_init(struct lu_seq_range *range)
* check if given seq id \a s is within given range \a r
*/
-static inline int range_within(const struct lu_seq_range *range,
- __u64 s)
+static inline bool range_within(const struct lu_seq_range *range,
+ __u64 s)
{
return s >= range->lsr_start && s < range->lsr_end;
}
-static inline int range_is_sane(const struct lu_seq_range *range)
+static inline bool range_is_sane(const struct lu_seq_range *range)
{
return (range->lsr_end >= range->lsr_start);
}
-static inline int range_is_zero(const struct lu_seq_range *range)
+static inline bool range_is_zero(const struct lu_seq_range *range)
{
return (range->lsr_start == 0 && range->lsr_end == 0);
}
-static inline int range_is_exhausted(const struct lu_seq_range *range)
+static inline bool range_is_exhausted(const struct lu_seq_range *range)
{
return range_space(range) == 0;
@@ -437,69 +438,69 @@ enum dot_lustre_oid {
FID_OID_DOT_LUSTRE_OBF = 2UL,
};
-static inline int fid_seq_is_mdt0(__u64 seq)
+static inline bool fid_seq_is_mdt0(__u64 seq)
{
return (seq == FID_SEQ_OST_MDT0);
}
-static inline int fid_seq_is_mdt(const __u64 seq)
+static inline bool fid_seq_is_mdt(__u64 seq)
{
return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
};
-static inline int fid_seq_is_echo(__u64 seq)
+static inline bool fid_seq_is_echo(__u64 seq)
{
return (seq == FID_SEQ_ECHO);
}
-static inline int fid_is_echo(const struct lu_fid *fid)
+static inline bool fid_is_echo(const struct lu_fid *fid)
{
return fid_seq_is_echo(fid_seq(fid));
}
-static inline int fid_seq_is_llog(__u64 seq)
+static inline bool fid_seq_is_llog(__u64 seq)
{
return (seq == FID_SEQ_LLOG);
}
-static inline int fid_is_llog(const struct lu_fid *fid)
+static inline bool fid_is_llog(const struct lu_fid *fid)
{
/* file with OID == 0 is not llog but contains last oid */
return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
}
-static inline int fid_seq_is_rsvd(const __u64 seq)
+static inline bool fid_seq_is_rsvd(__u64 seq)
{
return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD);
};
-static inline int fid_seq_is_special(const __u64 seq)
+static inline bool fid_seq_is_special(__u64 seq)
{
return seq == FID_SEQ_SPECIAL;
};
-static inline int fid_seq_is_local_file(const __u64 seq)
+static inline bool fid_seq_is_local_file(__u64 seq)
{
return seq == FID_SEQ_LOCAL_FILE ||
seq == FID_SEQ_LOCAL_NAME;
};
-static inline int fid_seq_is_root(const __u64 seq)
+static inline bool fid_seq_is_root(__u64 seq)
{
return seq == FID_SEQ_ROOT;
}
-static inline int fid_seq_is_dot(const __u64 seq)
+static inline bool fid_seq_is_dot(__u64 seq)
{
return seq == FID_SEQ_DOT_LUSTRE;
}
-static inline int fid_seq_is_default(const __u64 seq)
+static inline bool fid_seq_is_default(__u64 seq)
{
return seq == FID_SEQ_LOV_DEFAULT;
}
-static inline int fid_is_mdt0(const struct lu_fid *fid)
+static inline bool fid_is_mdt0(const struct lu_fid *fid)
{
return fid_seq_is_mdt0(fid_seq(fid));
}
@@ -516,12 +517,12 @@ static inline void lu_root_fid(struct lu_fid *fid)
* \param fid the fid to be tested.
* \return true if the fid is a igif; otherwise false.
*/
-static inline int fid_seq_is_igif(const __u64 seq)
+static inline bool fid_seq_is_igif(__u64 seq)
{
return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
}
-static inline int fid_is_igif(const struct lu_fid *fid)
+static inline bool fid_is_igif(const struct lu_fid *fid)
{
return fid_seq_is_igif(fid_seq(fid));
}
@@ -531,27 +532,27 @@ static inline int fid_is_igif(const struct lu_fid *fid)
* \param fid the fid to be tested.
* \return true if the fid is a idif; otherwise false.
*/
-static inline int fid_seq_is_idif(const __u64 seq)
+static inline bool fid_seq_is_idif(__u64 seq)
{
return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
}
-static inline int fid_is_idif(const struct lu_fid *fid)
+static inline bool fid_is_idif(const struct lu_fid *fid)
{
return fid_seq_is_idif(fid_seq(fid));
}
-static inline int fid_is_local_file(const struct lu_fid *fid)
+static inline bool fid_is_local_file(const struct lu_fid *fid)
{
return fid_seq_is_local_file(fid_seq(fid));
}
-static inline int fid_seq_is_norm(const __u64 seq)
+static inline bool fid_seq_is_norm(__u64 seq)
{
return (seq >= FID_SEQ_NORMAL);
}
-static inline int fid_is_norm(const struct lu_fid *fid)
+static inline bool fid_is_norm(const struct lu_fid *fid)
{
return fid_seq_is_norm(fid_seq(fid));
}
@@ -658,7 +659,7 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
oi->oi_fid.f_oid = oid;
oi->oi_fid.f_ver = oid >> 48;
} else {
- if (oid > OBIF_MAX_OID) {
+ if (oid >= OBIF_MAX_OID) {
CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
return;
}
@@ -683,7 +684,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
fid->f_oid = oid;
fid->f_ver = oid >> 48;
} else {
- if (oid > OBIF_MAX_OID) {
+ if (oid >= OBIF_MAX_OID) {
CERROR("Too large OID %#llx to set REG "DFID"\n",
(unsigned long long)oid, PFID(fid));
return -EBADF;
@@ -769,7 +770,7 @@ static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
}
/* Check whether the fid is for LAST_ID */
-static inline int fid_is_last_id(const struct lu_fid *fid)
+static inline bool fid_is_last_id(const struct lu_fid *fid)
{
return (fid_oid(fid) == 0);
}
@@ -838,7 +839,7 @@ static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
dst->f_ver = be32_to_cpu(fid_ver(src));
}
-static inline int fid_is_sane(const struct lu_fid *fid)
+static inline bool fid_is_sane(const struct lu_fid *fid)
{
return fid &&
((fid_seq(fid) >= FID_SEQ_START && fid_ver(fid) == 0) ||
@@ -846,15 +847,10 @@ static inline int fid_is_sane(const struct lu_fid *fid)
fid_seq_is_rsvd(fid_seq(fid)));
}
-static inline int fid_is_zero(const struct lu_fid *fid)
-{
- return fid_seq(fid) == 0 && fid_oid(fid) == 0;
-}
-
void lustre_swab_lu_fid(struct lu_fid *fid);
void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
+static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
{
return memcmp(f0, f1, sizeof(*f0)) == 0;
}
@@ -1017,12 +1013,12 @@ static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
return next;
}
-static inline int lu_dirent_calc_size(int namelen, __u16 attr)
+static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
{
- int size;
+ size_t size;
if (attr & LUDA_TYPE) {
- const unsigned align = sizeof(struct luda_type) - 1;
+ const size_t align = sizeof(struct luda_type) - 1;
size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
size += sizeof(struct luda_type);
@@ -1033,15 +1029,6 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
return (size + 7) & ~7;
}
-static inline int lu_dirent_size(struct lu_dirent *ent)
-{
- if (le16_to_cpu(ent->lde_reclen) == 0) {
- return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
- le32_to_cpu(ent->lde_attrs));
- }
- return le16_to_cpu(ent->lde_reclen);
-}
-
#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
/**
@@ -1067,19 +1054,19 @@ struct lustre_handle {
#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
-static inline int lustre_handle_is_used(struct lustre_handle *lh)
+static inline bool lustre_handle_is_used(const struct lustre_handle *lh)
{
return lh->cookie != 0ull;
}
-static inline int lustre_handle_equal(const struct lustre_handle *lh1,
- const struct lustre_handle *lh2)
+static inline bool lustre_handle_equal(const struct lustre_handle *lh1,
+ const struct lustre_handle *lh2)
{
return lh1->cookie == lh2->cookie;
}
static inline void lustre_handle_copy(struct lustre_handle *tgt,
- struct lustre_handle *src)
+ const struct lustre_handle *src)
{
tgt->cookie = src->cookie;
}
@@ -1105,7 +1092,7 @@ struct lustre_msg_v2 {
/* without gss, ptlrpc_body is put at the first buffer. */
#define PTLRPC_NUM_VERSIONS 4
-#define JOBSTATS_JOBID_SIZE 32 /* 32 bytes string */
+
struct ptlrpc_body_v3 {
struct lustre_handle pb_handle;
__u32 pb_type;
@@ -1127,7 +1114,7 @@ struct ptlrpc_body_v3 {
__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
/* padding for future needs */
__u64 pb_padding[4];
- char pb_jobid[JOBSTATS_JOBID_SIZE];
+ char pb_jobid[LUSTRE_JOBID_SIZE];
};
#define ptlrpc_body ptlrpc_body_v3
@@ -1293,6 +1280,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack
* name in request
*/
+#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */
+#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
@@ -1318,14 +1308,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
OBD_CONNECT_FULL20)
-#define OBD_OCD_VERSION(major, minor, patch, fix) (((major)<<24) + \
- ((minor)<<16) + \
- ((patch)<<8) + (fix))
-#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255)
-#define OBD_OCD_VERSION_MINOR(version) ((int)((version)>>16)&255)
-#define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255)
-#define OBD_OCD_VERSION_FIX(version) ((int)(version)&255)
-
/* This structure is used for both request and reply.
*
* If we eventually have separate connect data for different types, which we
@@ -1478,10 +1460,23 @@ enum obdo_flags {
OBD_FL_LOCAL_MASK = 0xF0000000,
};
-#define LOV_MAGIC_V1 0x0BD10BD0
-#define LOV_MAGIC LOV_MAGIC_V1
-#define LOV_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_MAGIC_V3 0x0BD30BD0
+/*
+ * All LOV EA magics should have the same postfix, if some new version
+ * Lustre instroduces new LOV EA magic, then when down-grade to an old
+ * Lustre, even though the old version system does not recognizes such
+ * new magic, it still can distinguish the corrupted cases by checking
+ * the magic's postfix.
+ */
+#define LOV_MAGIC_MAGIC 0x0BD0
+#define LOV_MAGIC_MASK 0xFFFF
+
+#define LOV_MAGIC_V1 (0x0BD10000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_MIGRATE (0x0BD40000 | LOV_MAGIC_MAGIC)
+/* reserved for specifying OSTs */
+#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC LOV_MAGIC_V1
/*
* magic for fully defined striping
@@ -1498,14 +1493,6 @@ enum obdo_flags {
#define LOV_MAGIC_V1_DEF 0x0CD10BD0
#define LOV_MAGIC_V3_DEF 0x0CD30BD0
-#define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */
-#define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */
-#define LOV_PATTERN_FIRST 0x100 /* first stripe is not in round-robin */
-#define LOV_PATTERN_CMOBD 0x200
-
-#define LOV_PATTERN_F_MASK 0xffff0000
-#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
-
#define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK)
#define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK)
@@ -1569,25 +1556,25 @@ static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
oi->oi.oi_id = oid;
}
-static inline __u64 lmm_oi_id(struct ost_id *oi)
+static inline __u64 lmm_oi_id(const struct ost_id *oi)
{
return oi->oi.oi_id;
}
-static inline __u64 lmm_oi_seq(struct ost_id *oi)
+static inline __u64 lmm_oi_seq(const struct ost_id *oi)
{
return oi->oi.oi_seq;
}
static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
- struct ost_id *src_oi)
+ const struct ost_id *src_oi)
{
dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
}
static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
- struct ost_id *src_oi)
+ const struct ost_id *src_oi)
{
dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
@@ -1610,6 +1597,7 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
#define XATTR_NAME_LOV "trusted.lov"
#define XATTR_NAME_LMA "trusted.lma"
#define XATTR_NAME_LMV "trusted.lmv"
+#define XATTR_NAME_DEFAULT_LMV "trusted.dmv"
#define XATTR_NAME_LINK "trusted.link"
#define XATTR_NAME_FID "trusted.fid"
#define XATTR_NAME_VERSION "trusted.version"
@@ -1625,7 +1613,7 @@ struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */
/* lmm_stripe_count used to be __u32 */
__u16 lmm_stripe_count; /* num stripes in use for this object */
__u16 lmm_layout_gen; /* layout generation number */
- char lmm_pool_name[LOV_MAXPOOLNAME]; /* must be 32bit aligned */
+ char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
@@ -1727,6 +1715,8 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
+#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
+
#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \
OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \
@@ -1782,7 +1772,7 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
* it to sync quickly
*/
-#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+#define OBD_OBJECT_EOF LUSTRE_EOF
#define OST_MIN_PRECREATE 32
#define OST_MAX_PRECREATE 20000
@@ -1806,9 +1796,9 @@ void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
/* multiple of 8 bytes => can array */
struct niobuf_remote {
- __u64 offset;
- __u32 len;
- __u32 flags;
+ __u64 rnb_offset;
+ __u32 rnb_len;
+ __u32 rnb_flags;
};
void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
@@ -1878,12 +1868,6 @@ struct obd_quotactl {
void lustre_swab_obd_quotactl(struct obd_quotactl *q);
-#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
-#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
-#define Q_GETOINFO 0x800102 /* get obd quota info */
-#define Q_GETOQUOTA 0x800103 /* get obd quotas */
-#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
-
#define Q_COPY(out, in, member) (out)->member = (in)->member
#define QCTL_COPY(out, in) \
@@ -1946,8 +1930,8 @@ enum mds_cmd {
MDS_DISCONNECT = 39,
MDS_GETSTATUS = 40,
MDS_STATFS = 41,
- MDS_PIN = 42,
- MDS_UNPIN = 43,
+ MDS_PIN = 42, /* obsolete, never used in a release */
+ MDS_UNPIN = 43, /* obsolete, never used in a release */
MDS_SYNC = 44,
MDS_DONE_WRITING = 45,
MDS_SET_INFO = 46,
@@ -1956,7 +1940,7 @@ enum mds_cmd {
MDS_GETXATTR = 49,
MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
MDS_WRITEPAGE = 51,
- MDS_IS_SUBDIR = 52,
+ MDS_IS_SUBDIR = 52, /* obsolete, never used in a release */
MDS_GET_INFO = 53,
MDS_HSM_STATE_GET = 54,
MDS_HSM_STATE_SET = 55,
@@ -1984,7 +1968,7 @@ enum mdt_reint_cmd {
REINT_OPEN = 6,
REINT_SETXATTR = 7,
REINT_RMENTRY = 8,
-/* REINT_WRITE = 9, */
+ REINT_MIGRATE = 9,
REINT_MAX
};
@@ -2003,6 +1987,7 @@ void lustre_swab_generic_32s(__u32 *val);
#define DISP_OPEN_LOCK 0x02000000
#define DISP_OPEN_LEASE 0x04000000
#define DISP_OPEN_STRIPE 0x08000000
+#define DISP_OPEN_DENY 0x10000000
/* INODE LOCK PARTS */
#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also
@@ -2028,7 +2013,7 @@ void lustre_swab_generic_32s(__u32 *val);
#define MDS_INODELOCK_MAXSHIFT 5
/* This FULL lock is useful to take on unlink sort of operations */
-#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
+#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
* but was moved into name[1] along with the OID to avoid consuming the
@@ -2108,43 +2093,43 @@ enum md_transient_state {
};
struct mdt_body {
- struct lu_fid fid1;
- struct lu_fid fid2;
- struct lustre_handle handle;
- __u64 valid;
- __u64 size; /* Offset, in the case of MDS_READPAGE */
- __s64 mtime;
- __s64 atime;
- __s64 ctime;
- __u64 blocks; /* XID, in the case of MDS_READPAGE */
- __u64 ioepoch;
- __u64 t_state; /* transient file state defined in
- * enum md_transient_state
- * was "ino" until 2.4.0
- */
- __u32 fsuid;
- __u32 fsgid;
- __u32 capability;
- __u32 mode;
- __u32 uid;
- __u32 gid;
- __u32 flags; /* from vfs for pin/unpin, LUSTRE_BFLAG close */
- __u32 rdev;
- __u32 nlink; /* #bytes to read in the case of MDS_READPAGE */
- __u32 unused2; /* was "generation" until 2.4.0 */
- __u32 suppgid;
- __u32 eadatasize;
- __u32 aclsize;
- __u32 max_mdsize;
- __u32 max_cookiesize;
- __u32 uid_h; /* high 32-bits of uid, for FUID */
- __u32 gid_h; /* high 32-bits of gid, for FUID */
- __u32 padding_5; /* also fix lustre_swab_mdt_body */
- __u64 padding_6;
- __u64 padding_7;
- __u64 padding_8;
- __u64 padding_9;
- __u64 padding_10;
+ struct lu_fid mbo_fid1;
+ struct lu_fid mbo_fid2;
+ struct lustre_handle mbo_handle;
+ __u64 mbo_valid;
+ __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */
+ __s64 mbo_mtime;
+ __s64 mbo_atime;
+ __s64 mbo_ctime;
+ __u64 mbo_blocks; /* XID, in the case of MDS_READPAGE */
+ __u64 mbo_ioepoch;
+ __u64 mbo_t_state; /* transient file state defined in
+ * enum md_transient_state
+ * was "ino" until 2.4.0
+ */
+ __u32 mbo_fsuid;
+ __u32 mbo_fsgid;
+ __u32 mbo_capability;
+ __u32 mbo_mode;
+ __u32 mbo_uid;
+ __u32 mbo_gid;
+ __u32 mbo_flags;
+ __u32 mbo_rdev;
+ __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */
+ __u32 mbo_unused2; /* was "generation" until 2.4.0 */
+ __u32 mbo_suppgid;
+ __u32 mbo_eadatasize;
+ __u32 mbo_aclsize;
+ __u32 mbo_max_mdsize;
+ __u32 mbo_max_cookiesize;
+ __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */
+ __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */
+ __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */
+ __u64 mbo_padding_6;
+ __u64 mbo_padding_7;
+ __u64 mbo_padding_8;
+ __u64 mbo_padding_9;
+ __u64 mbo_padding_10;
}; /* 216 */
void lustre_swab_mdt_body(struct mdt_body *b);
@@ -2263,6 +2248,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
*/
#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
+#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
+ MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
+ MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
+ MDS_OPEN_RELEASE)
+
enum mds_op_bias {
MDS_CHECK_SPLIT = 1 << 0,
MDS_CROSS_REF = 1 << 1,
@@ -2277,6 +2267,7 @@ enum mds_op_bias {
MDS_CREATE_VOLATILE = 1 << 10,
MDS_OWNEROVERRIDE = 1 << 11,
MDS_HSM_RELEASE = 1 << 12,
+ MDS_RENAME_MIGRATE = BIT(13),
};
/* instance of mdt_reint_rec */
@@ -2472,7 +2463,7 @@ struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
__u32 ld_active_tgt_count; /* how many active */
__u32 ld_default_stripe_count; /* how many objects are used */
- __u32 ld_pattern; /* default MEA_MAGIC_* */
+ __u32 ld_pattern; /* default hash pattern */
__u64 ld_default_hash_size;
__u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */
__u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */
@@ -2482,23 +2473,129 @@ struct lmv_desc {
struct obd_uuid ld_uuid;
};
-/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
-struct lmv_stripe_md {
- __u32 mea_magic;
- __u32 mea_count;
- __u32 mea_master;
- __u32 mea_padding;
- char mea_pool_name[LOV_MAXPOOLNAME];
- struct lu_fid mea_ids[0];
+/* LMV layout EA, and it will be stored both in master and slave object */
+struct lmv_mds_md_v1 {
+ __u32 lmv_magic;
+ __u32 lmv_stripe_count;
+ __u32 lmv_master_mdt_index; /* On master object, it is master
+ * MDT index, on slave object, it
+ * is stripe index of the slave obj
+ */
+ __u32 lmv_hash_type; /* dir stripe policy, i.e. indicate
+ * which hash function to be used,
+ * Note: only lower 16 bits is being
+ * used for now. Higher 16 bits will
+ * be used to mark the object status,
+ * for example migrating or dead.
+ */
+ __u32 lmv_layout_version; /* Used for directory restriping */
+ __u32 lmv_padding1;
+ __u64 lmv_padding2;
+ __u64 lmv_padding3;
+ char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */
+ struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */
};
-#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
-#define MEA_MAGIC_ALL_CHARS 0xb222a11c
-#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
+#define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */
+#define LMV_MAGIC LMV_MAGIC_V1
+
+/* #define LMV_USER_MAGIC 0x0CD30CD0 */
+#define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */
+
+/*
+ *Right now only the lower part(0-16bits) of lmv_hash_type is being used,
+ * and the higher part will be the flag to indicate the status of object,
+ * for example the object is being migrated. And the hash function
+ * might be interpreted differently with different flags.
+ */
+#define LMV_HASH_TYPE_MASK 0x0000ffff
+
+#define LMV_HASH_FLAG_MIGRATION 0x80000000
+#define LMV_HASH_FLAG_DEAD 0x40000000
-#define MAX_HASH_SIZE_32 0x7fffffffUL
-#define MAX_HASH_SIZE 0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
+/**
+ * The FNV-1a hash algorithm is as follows:
+ * hash = FNV_offset_basis
+ * for each octet_of_data to be hashed
+ * hash = hash XOR octet_of_data
+ * hash = hash × FNV_prime
+ * return hash
+ * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash
+ *
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
+ * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL
+ **/
+#define LUSTRE_FNV_1A_64_PRIME 0x100000001b3ULL
+#define LUSTRE_FNV_1A_64_OFFSET_BIAS 0xcbf29ce484222325ULL
+static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
+{
+ __u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
+ const unsigned char *p = buf;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ hash ^= p[i];
+ hash *= LUSTRE_FNV_1A_64_PRIME;
+ }
+
+ return hash;
+}
+
+union lmv_mds_md {
+ __u32 lmv_magic;
+ struct lmv_mds_md_v1 lmv_md_v1;
+ struct lmv_user_md lmv_user_md;
+};
+
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
+
+static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
+{
+ ssize_t len = -EINVAL;
+
+ switch (lmm_magic) {
+ case LMV_MAGIC_V1: {
+ struct lmv_mds_md_v1 *lmm1;
+
+ len = sizeof(*lmm1);
+ len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]);
+ break; }
+ default:
+ break;
+ }
+ return len;
+}
+
+static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
+{
+ switch (le32_to_cpu(lmm->lmv_magic)) {
+ case LMV_MAGIC_V1:
+ return le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
+ case LMV_USER_MAGIC:
+ return le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline int lmv_mds_md_stripe_count_set(union lmv_mds_md *lmm,
+ unsigned int stripe_count)
+{
+ int rc = 0;
+
+ switch (le32_to_cpu(lmm->lmv_magic)) {
+ case LMV_MAGIC_V1:
+ lmm->lmv_md_v1.lmv_stripe_count = cpu_to_le32(stripe_count);
+ break;
+ case LMV_USER_MAGIC:
+ lmm->lmv_user_md.lum_stripe_count = cpu_to_le32(stripe_count);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ return rc;
+}
enum fld_rpc_opc {
FLD_QUERY = 900,
@@ -2582,8 +2679,8 @@ struct ldlm_res_id {
#define PLDLMRES(res) (res)->lr_name.name[0], (res)->lr_name.name[1], \
(res)->lr_name.name[2], (res)->lr_name.name[3]
-static inline int ldlm_res_eq(const struct ldlm_res_id *res0,
- const struct ldlm_res_id *res1)
+static inline bool ldlm_res_eq(const struct ldlm_res_id *res0,
+ const struct ldlm_res_id *res1)
{
return !memcmp(res0, res1, sizeof(*res0));
}
@@ -2620,17 +2717,15 @@ struct ldlm_extent {
__u64 gid;
};
-#define LDLM_GID_ANY ((__u64)-1)
-
-static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
- struct ldlm_extent *ex2)
+static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
+ const struct ldlm_extent *ex2)
{
return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
}
/* check if @ex1 contains @ex2 */
-static inline int ldlm_extent_contain(struct ldlm_extent *ex1,
- struct ldlm_extent *ex2)
+static inline int ldlm_extent_contain(const struct ldlm_extent *ex1,
+ const struct ldlm_extent *ex2)
{
return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
}
@@ -2833,7 +2928,29 @@ enum obd_cmd {
};
#define OBD_FIRST_OPC OBD_PING
-/* catalog of log objects */
+/**
+ * llog contexts indices.
+ *
+ * There is compatibility problem with indexes below, they are not
+ * continuous and must keep their numbers for compatibility needs.
+ * See LU-5218 for details.
+ */
+enum llog_ctxt_id {
+ LLOG_CONFIG_ORIG_CTXT = 0,
+ LLOG_CONFIG_REPL_CTXT = 1,
+ LLOG_MDS_OST_ORIG_CTXT = 2,
+ LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */
+ LLOG_SIZE_ORIG_CTXT = 4,
+ LLOG_SIZE_REPL_CTXT = 5,
+ LLOG_TEST_ORIG_CTXT = 8,
+ LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */
+ LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */
+ LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */
+ /* for multiple changelog consumers */
+ LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
+ LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
+ LLOG_MAX_CTXTS
+};
/** Identifier for a single log object */
struct llog_logid {
@@ -2939,7 +3056,7 @@ struct llog_setattr64_rec {
__u32 lsr_uid_h;
__u32 lsr_gid;
__u32 lsr_gid_h;
- __u64 lsr_padding;
+ __u64 lsr_valid;
struct llog_rec_tail lsr_tail;
} __packed;
@@ -2963,15 +3080,9 @@ struct changelog_setinfo {
/** changelog record */
struct llog_changelog_rec {
- struct llog_rec_hdr cr_hdr;
- struct changelog_rec cr;
- struct llog_rec_tail cr_tail; /**< for_sizezof_only */
-} __packed;
-
-struct llog_changelog_ext_rec {
- struct llog_rec_hdr cr_hdr;
- struct changelog_ext_rec cr;
- struct llog_rec_tail cr_tail; /**< for_sizezof_only */
+ struct llog_rec_hdr cr_hdr;
+ struct changelog_rec cr; /**< Variable length field */
+ struct llog_rec_tail cr_do_not_use; /**< for_sizezof_only */
} __packed;
struct llog_changelog_user_rec {
@@ -2990,7 +3101,7 @@ enum agent_req_status {
ARS_SUCCEED,
};
-static inline char *agent_req_status2name(enum agent_req_status ars)
+static inline const char *agent_req_status2name(const enum agent_req_status ars)
{
switch (ars) {
case ARS_WAITING:
@@ -3056,6 +3167,9 @@ enum llog_flag {
LLOG_F_ZAP_WHEN_EMPTY = 0x1,
LLOG_F_IS_CAT = 0x2,
LLOG_F_IS_PLAIN = 0x4,
+ LLOG_F_EXT_JOBID = BIT(3),
+
+ LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
};
struct llog_log_hdr {
@@ -3068,8 +3182,8 @@ struct llog_log_hdr {
__u32 llh_cat_idx;
/* for a catalog the first plain slot is next to it */
struct obd_uuid llh_tgtuuid;
- __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23];
- __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+ __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
+ __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
struct llog_rec_tail llh_tail;
} __packed;
@@ -3166,7 +3280,7 @@ struct obdo {
#define o_cksum o_nlink
#define o_grant_used o_data_version
-static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
+static inline void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
struct obdo *wobdo,
const struct obdo *lobdo)
{
@@ -3185,7 +3299,7 @@ static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
}
}
-static inline void lustre_get_wire_obdo(struct obd_connect_data *ocd,
+static inline void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
struct obdo *lobdo,
const struct obdo *wobdo)
{
@@ -3284,17 +3398,17 @@ void lustre_swab_lustre_capa(struct lustre_capa *c);
/** lustre_capa::lc_opc */
enum {
- CAPA_OPC_BODY_WRITE = 1<<0, /**< write object data */
- CAPA_OPC_BODY_READ = 1<<1, /**< read object data */
- CAPA_OPC_INDEX_LOOKUP = 1<<2, /**< lookup object fid */
- CAPA_OPC_INDEX_INSERT = 1<<3, /**< insert object fid */
- CAPA_OPC_INDEX_DELETE = 1<<4, /**< delete object fid */
- CAPA_OPC_OSS_WRITE = 1<<5, /**< write oss object data */
- CAPA_OPC_OSS_READ = 1<<6, /**< read oss object data */
- CAPA_OPC_OSS_TRUNC = 1<<7, /**< truncate oss object */
- CAPA_OPC_OSS_DESTROY = 1<<8, /**< destroy oss object */
- CAPA_OPC_META_WRITE = 1<<9, /**< write object meta data */
- CAPA_OPC_META_READ = 1<<10, /**< read object meta data */
+ CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */
+ CAPA_OPC_BODY_READ = 1 << 1, /**< read object data */
+ CAPA_OPC_INDEX_LOOKUP = 1 << 2, /**< lookup object fid */
+ CAPA_OPC_INDEX_INSERT = 1 << 3, /**< insert object fid */
+ CAPA_OPC_INDEX_DELETE = 1 << 4, /**< delete object fid */
+ CAPA_OPC_OSS_WRITE = 1 << 5, /**< write oss object data */
+ CAPA_OPC_OSS_READ = 1 << 6, /**< read oss object data */
+ CAPA_OPC_OSS_TRUNC = 1 << 7, /**< truncate oss object */
+ CAPA_OPC_OSS_DESTROY = 1 << 8, /**< destroy oss object */
+ CAPA_OPC_META_WRITE = 1 << 9, /**< write object meta data */
+ CAPA_OPC_META_READ = 1 << 10, /**< read object meta data */
};
#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
@@ -3346,6 +3460,14 @@ struct getinfo_fid2path {
void lustre_swab_fid2path(struct getinfo_fid2path *gf);
+/** path2parent request/reply structures */
+struct getparent {
+ struct lu_fid gp_fid; /**< parent FID */
+ __u32 gp_linkno; /**< hardlink number */
+ __u32 gp_name_size; /**< size of the name field */
+ char gp_name[0]; /**< zero-terminated link name */
+} __packed;
+
enum {
LAYOUT_INTENT_ACCESS = 0,
LAYOUT_INTENT_READ = 1,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
new file mode 100644
index 000000000000..f3d7c94c3b50
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
@@ -0,0 +1,412 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+#ifndef LUSTRE_IOCTL_H_
+#define LUSTRE_IOCTL_H_
+
+#include <linux/types.h>
+#include "../../../include/linux/libcfs/libcfs.h"
+#include "lustre_idl.h"
+
+#ifdef __KERNEL__
+# include <linux/ioctl.h>
+# include <linux/string.h>
+# include "../obd_support.h"
+#else /* __KERNEL__ */
+# include <malloc.h>
+# include <string.h>
+#include <libcfs/util/ioctl.h>
+#endif /* !__KERNEL__ */
+
+#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS)
+# error This file is for Lustre internal use only.
+#endif
+
+enum md_echo_cmd {
+ ECHO_MD_CREATE = 1, /* Open/Create file on MDT */
+ ECHO_MD_MKDIR = 2, /* Mkdir on MDT */
+ ECHO_MD_DESTROY = 3, /* Unlink file on MDT */
+ ECHO_MD_RMDIR = 4, /* Rmdir on MDT */
+ ECHO_MD_LOOKUP = 5, /* Lookup on MDT */
+ ECHO_MD_GETATTR = 6, /* Getattr on MDT */
+ ECHO_MD_SETATTR = 7, /* Setattr on MDT */
+ ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */
+};
+
+#define OBD_DEV_ID 1
+#define OBD_DEV_NAME "obd"
+#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
+#define OBD_DEV_MAJOR 10
+#define OBD_DEV_MINOR 241
+
+#define OBD_IOCTL_VERSION 0x00010004
+#define OBD_DEV_BY_DEVNAME 0xffffd0de
+#define OBD_MAX_IOCTL_BUFFER CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
+
+struct obd_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+
+ union {
+ __u64 ioc_cookie;
+ __u64 ioc_u64_1;
+ };
+ union {
+ __u32 ioc_conn1;
+ __u32 ioc_u32_1;
+ };
+ union {
+ __u32 ioc_conn2;
+ __u32 ioc_u32_2;
+ };
+
+ struct obdo ioc_obdo1;
+ struct obdo ioc_obdo2;
+
+ __u64 ioc_count;
+ __u64 ioc_offset;
+ __u32 ioc_dev;
+ __u32 ioc_command;
+
+ __u64 ioc_nid;
+ __u32 ioc_nal;
+ __u32 ioc_type;
+
+ /* buffers the kernel will treat as user pointers */
+ __u32 ioc_plen1;
+ char __user *ioc_pbuf1;
+ __u32 ioc_plen2;
+ char __user *ioc_pbuf2;
+
+ /* inline buffers for various arguments */
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+ __u32 ioc_inllen3;
+ char *ioc_inlbuf3;
+ __u32 ioc_inllen4;
+ char *ioc_inlbuf4;
+
+ char ioc_bulk[0];
+};
+
+struct obd_ioctl_hdr {
+ __u32 ioc_len;
+ __u32 ioc_version;
+};
+
+static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data)
+{
+ __u32 len = cfs_size_round(sizeof(*data));
+
+ len += cfs_size_round(data->ioc_inllen1);
+ len += cfs_size_round(data->ioc_inllen2);
+ len += cfs_size_round(data->ioc_inllen3);
+ len += cfs_size_round(data->ioc_inllen4);
+
+ return len;
+}
+
+static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
+{
+ if (data->ioc_len > (1 << 30)) {
+ CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
+ return 1;
+ }
+
+ if (data->ioc_inllen1 > (1 << 30)) {
+ CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
+ return 1;
+ }
+
+ if (data->ioc_inllen2 > (1 << 30)) {
+ CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
+ return 1;
+ }
+
+ if (data->ioc_inllen3 > (1 << 30)) {
+ CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
+ return 1;
+ }
+
+ if (data->ioc_inllen4 > (1 << 30)) {
+ CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+ return 1;
+ }
+
+ if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+ CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+ CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
+ CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+ CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (data->ioc_pbuf1 && !data->ioc_plen1) {
+ CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (data->ioc_pbuf2 && !data->ioc_plen2) {
+ CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
+ return 1;
+ }
+
+ if (!data->ioc_pbuf1 && data->ioc_plen1) {
+ CERROR("OBD ioctl: plen1 set but NULL pointer\n");
+ return 1;
+ }
+
+ if (!data->ioc_pbuf2 && data->ioc_plen2) {
+ CERROR("OBD ioctl: plen2 set but NULL pointer\n");
+ return 1;
+ }
+
+ if (obd_ioctl_packlen(data) > data->ioc_len) {
+ CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
+ obd_ioctl_packlen(data), data->ioc_len);
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifdef __KERNEL__
+
+int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
+int obd_ioctl_popdata(void __user *arg, void *data, int len);
+
+static inline void obd_ioctl_freedata(char *buf, size_t len)
+{
+ kvfree(buf);
+}
+
+#else /* __KERNEL__ */
+
+static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
+ int max_len)
+{
+ char *ptr;
+ struct obd_ioctl_data *overlay;
+
+ data->ioc_len = obd_ioctl_packlen(data);
+ data->ioc_version = OBD_IOCTL_VERSION;
+
+ if (*pbuf && data->ioc_len > max_len) {
+ fprintf(stderr, "pbuf = %p, ioc_len = %u, max_len = %d\n",
+ *pbuf, data->ioc_len, max_len);
+ return -EINVAL;
+ }
+
+ if (!*pbuf)
+ *pbuf = malloc(data->ioc_len);
+
+ if (!*pbuf)
+ return -ENOMEM;
+
+ overlay = (struct obd_ioctl_data *)*pbuf;
+ memcpy(*pbuf, data, sizeof(*data));
+
+ ptr = overlay->ioc_bulk;
+ if (data->ioc_inlbuf1)
+ LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+
+ if (data->ioc_inlbuf2)
+ LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+
+ if (data->ioc_inlbuf3)
+ LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+
+ if (data->ioc_inlbuf4)
+ LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+
+ if (obd_ioctl_is_invalid(overlay)) {
+ fprintf(stderr, "invalid ioctl data: ioc_len = %u, max_len = %d\n",
+ data->ioc_len, max_len);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline int
+obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
+{
+ char *ptr;
+ struct obd_ioctl_data *overlay;
+
+ if (!pbuf)
+ return 1;
+
+ overlay = (struct obd_ioctl_data *)pbuf;
+
+ /* Preserve the caller's buffer pointers */
+ overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
+ overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
+ overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
+ overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
+
+ memcpy(data, pbuf, sizeof(*data));
+
+ ptr = overlay->ioc_bulk;
+ if (data->ioc_inlbuf1)
+ LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+
+ if (data->ioc_inlbuf2)
+ LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+
+ if (data->ioc_inlbuf3)
+ LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+
+ if (data->ioc_inlbuf4)
+ LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+
+ return 0;
+}
+
+#endif /* !__KERNEL__ */
+
+/*
+ * OBD_IOC_DATA_TYPE is only for compatibility reasons with older
+ * Linux Lustre user tools. New ioctls should NOT use this macro as
+ * the ioctl "size". Instead the ioctl should get a "size" argument
+ * which is the actual data type used by the ioctl, to ensure the
+ * ioctl interface is versioned correctly.
+ */
+#define OBD_IOC_DATA_TYPE long
+
+/* IOC_LDLM_TEST _IOWR('f', 40, long) */
+/* IOC_LDLM_DUMP _IOWR('f', 41, long) */
+/* IOC_LDLM_REGRESS_START _IOWR('f', 42, long) */
+/* IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) */
+
+#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) */
+
+#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETATTR _IOWR('f', 108, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) */
+
+/* OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME])
+#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
+#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE)
+
+/* OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */
+#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) */
+#define OBD_GET_VERSION _IOWR('f', 144, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_CLOSE_UUID _IOWR('f', 147, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETDEVICE _IOWR('f', 149, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FID2PATH _IOWR('f', 150, OBD_IOC_DATA_TYPE)
+/* lustre/lustre_user.h 151-153 */
+/* OBD_IOC_LOV_SETSTRIPE 154 LL_IOC_LOV_SETSTRIPE */
+/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */
+/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */
+/* lustre/lustre_user.h 157-159 */
+#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
+#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
+#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
+/* lustre/lustre_user.h 163-176 */
+#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data)
+/* OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) */
+/* OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_NODEMAP _IOWR('f', 197, OBD_IOC_DATA_TYPE)
+
+/* ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) */
+/* ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) */
+/* ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) */
+/* ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) */
+
+#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE)
+
+/* lustre/lustre_user.h 212-217 */
+#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t)
+#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data)
+#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data)
+#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_QUERY_LFSCK _IOR('f', 232, struct obd_ioctl_data)
+/* lustre/lustre_user.h 240-249 */
+/* LIBCFS_IOC_DEBUG_MASK 250 */
+
+#define IOC_OSC_SET_ACTIVE _IOWR('h', 21, void *)
+
+#endif /* LUSTRE_IOCTL_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index ef6f38ff359e..6fc985571cba 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -42,8 +42,35 @@
* @{
*/
+#ifdef __KERNEL__
+# include <linux/quota.h>
+# include <linux/string.h> /* snprintf() */
+# include <linux/version.h>
+#else /* !__KERNEL__ */
+# define NEED_QUOTA_DEFS
+# include <stdio.h> /* snprintf() */
+# include <string.h>
+# include <sys/quota.h>
+# include <sys/stat.h>
+#endif /* __KERNEL__ */
#include "ll_fiemap.h"
-#include "../linux/lustre_user.h"
+
+/*
+ * We need to always use 64bit version because the structure
+ * is shared across entire cluster where 32bit and 64bit machines
+ * are co-existing.
+ */
+#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
+typedef struct stat64 lstat_t;
+#define lstat_f lstat64
+#else
+typedef struct stat lstat_t;
+#define lstat_f lstat
+#endif
+
+#define HAVE_LOV_USER_MDS_DATA
+
+#define LUSTRE_EOF 0xffffffffffffffffULL
/* for statfs() */
#define LL_SUPER_MAGIC 0x0BD00BD0
@@ -117,6 +144,11 @@ struct lu_fid {
__u32 f_ver;
};
+static inline bool fid_is_zero(const struct lu_fid *fid)
+{
+ return !fid->f_seq && !fid->f_oid;
+}
+
struct filter_fid {
struct lu_fid ff_parent; /* ff_parent.f_ver == file stripe number */
};
@@ -167,7 +199,7 @@ struct lustre_mdt_attrs {
*/
struct ost_id {
union {
- struct ostid {
+ struct {
__u64 oi_id;
__u64 oi_seq;
} oi;
@@ -188,26 +220,20 @@ struct ost_id {
* *STRIPE* - set/get lov_user_md
* *INFO - set/get lov_user_mds_data
*/
-/* see <lustre_lib.h> for ioctl numberss 101-150 */
+/* lustre_ioctl.h 101-150 */
#define LL_IOC_GETFLAGS _IOR('f', 151, long)
#define LL_IOC_SETFLAGS _IOW('f', 152, long)
#define LL_IOC_CLRFLAGS _IOW('f', 153, long)
-/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long)
-/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long)
-/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
#define LL_IOC_LOV_SETEA _IOW('f', 156, long)
-#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long)
-#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid)
+/* LL_IOC_RECREATE_OBJ 157 obsolete */
+/* LL_IOC_RECREATE_FID 158 obsolete */
#define LL_IOC_GROUP_LOCK _IOW('f', 158, long)
#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long)
-/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK _IOW('f', 160, int)
-/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
-/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
-#define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
+/* #define LL_IOC_QUOTACHECK 160 OBD_IOC_QUOTACHECK */
+/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */
+/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
@@ -221,8 +247,7 @@ struct ost_id {
#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
#define LL_IOC_GET_MDTIDX _IOR('f', 175, int)
-/* see <lustre_lib.h> for ioctl numbers 177-210 */
-
+/* lustre_ioctl.h 177-210 */
#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
@@ -242,6 +267,17 @@ struct ost_id {
#define LL_IOC_SET_LEASE _IOWR('f', 243, long)
#define LL_IOC_GET_LEASE _IO('f', 244)
#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
+#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
+#define LL_IOC_MIGRATE _IOR('f', 247, int)
+#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
+#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
+
+/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
+enum ll_lease_type {
+ LL_LEASE_RDLCK = 0x1,
+ LL_LEASE_WRLCK = 0x2,
+ LL_LEASE_UNLCK = 0x4,
+};
#define LL_STATFS_LMV 1
#define LL_STATFS_LOV 2
@@ -253,10 +289,6 @@ struct ost_id {
#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
-/* Keep these for backward compartability. */
-#define LL_IOC_OBD_STATFS IOC_OBD_STATFS
-#define IOC_MDC_GETSTRIPE IOC_MDC_GETFILESTRIPE
-
#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
@@ -273,20 +305,26 @@ struct ost_id {
#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
#define LL_FILE_RMTACL 0x00000020
-#define LOV_USER_MAGIC_V1 0x0BD10BD0
-#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_USER_MAGIC_V3 0x0BD30BD0
+#define LOV_USER_MAGIC_V1 0x0BD10BD0
+#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
+/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
+#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
+
+#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/
-#define LMV_MAGIC_V1 0x0CD10CD0 /*normal stripe lmv magic */
-#define LMV_USER_MAGIC 0x0CD20CD0 /*default lmv magic*/
+#define LOV_PATTERN_RAID0 0x001
+#define LOV_PATTERN_RAID1 0x002
+#define LOV_PATTERN_FIRST 0x100
+#define LOV_PATTERN_CMOBD 0x200
-#define LOV_PATTERN_RAID0 0x001
-#define LOV_PATTERN_RAID1 0x002
-#define LOV_PATTERN_FIRST 0x100
+#define LOV_PATTERN_F_MASK 0xffff0000
+#define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */
+#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
-#define LOV_MAXPOOLNAME 16
-#define LOV_POOLNAMEF "%.16s"
+#define LOV_MAXPOOLNAME 15
+#define LOV_POOLNAMEF "%.15s"
#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
@@ -344,18 +382,17 @@ struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
* used when reading
*/
};
- char lmm_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+ char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */
struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
} __packed;
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
- if (lmm_magic == LOV_USER_MAGIC_V3)
- return sizeof(struct lov_user_md_v3) +
- stripes * sizeof(struct lov_user_ost_data_v1);
- else
+ if (lmm_magic == LOV_USER_MAGIC_V1)
return sizeof(struct lov_user_md_v1) +
stripes * sizeof(struct lov_user_ost_data_v1);
+ return sizeof(struct lov_user_md_v3) +
+ stripes * sizeof(struct lov_user_ost_data_v1);
}
/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
@@ -374,19 +411,26 @@ struct lov_user_mds_data_v3 {
} __packed;
#endif
-/* keep this to be the same size as lov_user_ost_data_v1 */
struct lmv_user_mds_data {
struct lu_fid lum_fid;
__u32 lum_padding;
__u32 lum_mds;
};
-/* lum_type */
-enum {
- LMV_STRIPE_TYPE = 0,
- LMV_DEFAULT_TYPE = 1,
+enum lmv_hash_type {
+ LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
+ LMV_HASH_TYPE_ALL_CHARS = 1,
+ LMV_HASH_TYPE_FNV_1A_64 = 2,
};
+#define LMV_HASH_NAME_ALL_CHARS "all_char"
+#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64"
+
+/*
+ * Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
+ * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
+ */
+#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
#define lmv_user_md lmv_user_md_v1
struct lmv_user_md_v1 {
__u32 lum_magic; /* must be the first field */
@@ -397,9 +441,9 @@ struct lmv_user_md_v1 {
__u32 lum_padding1;
__u32 lum_padding2;
__u32 lum_padding3;
- char lum_pool_name[LOV_MAXPOOLNAME];
+ char lum_pool_name[LOV_MAXPOOLNAME + 1];
struct lmv_user_mds_data lum_objects[0];
-};
+} __packed;
static inline int lmv_user_md_size(int stripes, int lmm_magic)
{
@@ -407,6 +451,8 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic)
stripes * sizeof(struct lmv_user_mds_data);
}
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
struct ll_recreate_obj {
__u64 lrc_id;
__u32 lrc_ost_idx;
@@ -498,6 +544,12 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
/********* Quotas **********/
+#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
+#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
+#define Q_GETOINFO 0x800102 /* get obd quota info */
+#define Q_GETOQUOTA 0x800103 /* get obd quotas */
+#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
+
/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */
#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */
@@ -648,11 +700,16 @@ static inline const char *changelog_type2str(int type)
}
/* per-record flags */
-#define CLF_VERSION 0x1000
-#define CLF_EXT_VERSION 0x2000
#define CLF_FLAGSHIFT 12
#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1)
#define CLF_VERMASK (~CLF_FLAGMASK)
+enum changelog_rec_flags {
+ CLF_VERSION = 0x1000,
+ CLF_RENAME = 0x2000,
+ CLF_JOBID = 0x4000,
+ CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID
+};
+
/* Anything under the flagmask may be per-type (if desired) */
/* Flags for unlink */
#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
@@ -736,12 +793,35 @@ static inline void hsm_set_cl_error(int *flags, int error)
*flags |= (error << CLF_HSM_ERR_L);
}
-#define CR_MAXSIZE cfs_size_round(2*NAME_MAX + 1 + \
- sizeof(struct changelog_ext_rec))
+enum changelog_send_flag {
+ /* Not yet implemented */
+ CHANGELOG_FLAG_FOLLOW = BIT(0),
+ /*
+ * Blocking IO makes sense in case of slow user parsing of the records,
+ * but it also prevents us from cleaning up if the records are not
+ * consumed.
+ */
+ CHANGELOG_FLAG_BLOCK = BIT(1),
+ /* Pack jobid into the changelog records if available. */
+ CHANGELOG_FLAG_JOBID = BIT(2),
+};
+
+#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
+ changelog_rec_offset(CLF_SUPPORTED))
+
+/* 31 usable bytes string + null terminator. */
+#define LUSTRE_JOBID_SIZE 32
+/*
+ * This is the minimal changelog record. It can contain extensions
+ * such as rename fields or process jobid. Its exact content is described
+ * by the cr_flags.
+ *
+ * Extensions are packed in the same order as their corresponding flags.
+ */
struct changelog_rec {
__u16 cr_namelen;
- __u16 cr_flags; /**< (flags&CLF_FLAGMASK)|CLF_VERSION */
+ __u16 cr_flags; /**< \a changelog_rec_flags */
__u32 cr_type; /**< \a changelog_rec_type */
__u64 cr_index; /**< changelog record number */
__u64 cr_prev; /**< last index for this target fid */
@@ -751,55 +831,138 @@ struct changelog_rec {
__u32 cr_markerflags; /**< CL_MARK flags */
};
struct lu_fid cr_pfid; /**< parent fid */
- char cr_name[0]; /**< last element */
} __packed;
-/* changelog_ext_rec is 2*sizeof(lu_fid) bigger than changelog_rec, to save
- * space, only rename uses changelog_ext_rec, while others use changelog_rec to
- * store records.
- */
-struct changelog_ext_rec {
- __u16 cr_namelen;
- __u16 cr_flags; /**< (flags & CLF_FLAGMASK) |
- * CLF_EXT_VERSION
- */
- __u32 cr_type; /**< \a changelog_rec_type */
- __u64 cr_index; /**< changelog record number */
- __u64 cr_prev; /**< last index for this target fid */
- __u64 cr_time;
- union {
- struct lu_fid cr_tfid; /**< target fid */
- __u32 cr_markerflags; /**< CL_MARK flags */
- };
- struct lu_fid cr_pfid; /**< target parent fid */
- struct lu_fid cr_sfid; /**< source fid, or zero */
- struct lu_fid cr_spfid; /**< source parent fid, or zero */
- char cr_name[0]; /**< last element */
-} __packed;
+/* Changelog extension for RENAME. */
+struct changelog_ext_rename {
+ struct lu_fid cr_sfid; /**< source fid, or zero */
+ struct lu_fid cr_spfid; /**< source parent fid, or zero */
+};
-#define CHANGELOG_REC_EXTENDED(rec) \
- (((rec)->cr_flags & CLF_VERMASK) == CLF_EXT_VERSION)
+/* Changelog extension to include JOBID. */
+struct changelog_ext_jobid {
+ char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */
+};
+
+static inline size_t changelog_rec_offset(enum changelog_rec_flags crf)
+{
+ size_t size = sizeof(struct changelog_rec);
+
+ if (crf & CLF_RENAME)
+ size += sizeof(struct changelog_ext_rename);
+
+ if (crf & CLF_JOBID)
+ size += sizeof(struct changelog_ext_jobid);
-static inline int changelog_rec_size(struct changelog_rec *rec)
+ return size;
+}
+
+static inline size_t changelog_rec_size(struct changelog_rec *rec)
{
- return CHANGELOG_REC_EXTENDED(rec) ? sizeof(struct changelog_ext_rec) :
- sizeof(*rec);
+ return changelog_rec_offset(rec->cr_flags);
+}
+
+static inline size_t changelog_rec_varsize(struct changelog_rec *rec)
+{
+ return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
+}
+
+static inline
+struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
+
+ return (struct changelog_ext_rename *)((char *)rec +
+ changelog_rec_offset(crf));
+}
+
+/* The jobid follows the rename extension, if present */
+static inline
+struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME);
+
+ return (struct changelog_ext_jobid *)((char *)rec +
+ changelog_rec_offset(crf));
}
+/* The name follows the rename and jobid extensions, if present */
static inline char *changelog_rec_name(struct changelog_rec *rec)
{
- return CHANGELOG_REC_EXTENDED(rec) ?
- ((struct changelog_ext_rec *)rec)->cr_name : rec->cr_name;
+ return (char *)rec + changelog_rec_offset(rec->cr_flags &
+ CLF_SUPPORTED);
}
-static inline int changelog_rec_snamelen(struct changelog_ext_rec *rec)
+static inline size_t changelog_rec_snamelen(struct changelog_rec *rec)
{
- return rec->cr_namelen - strlen(rec->cr_name) - 1;
+ return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
}
-static inline char *changelog_rec_sname(struct changelog_ext_rec *rec)
+static inline char *changelog_rec_sname(struct changelog_rec *rec)
{
- return rec->cr_name + strlen(rec->cr_name) + 1;
+ char *cr_name = changelog_rec_name(rec);
+
+ return cr_name + strlen(cr_name) + 1;
+}
+
+/**
+ * Remap a record to the desired format as specified by the crf flags.
+ * The record must be big enough to contain the final remapped version.
+ * Superfluous extension fields are removed and missing ones are added
+ * and zeroed. The flags of the record are updated accordingly.
+ *
+ * The jobid and rename extensions can be added to a record, to match the
+ * format an application expects, typically. In this case, the newly added
+ * fields will be zeroed.
+ * The Jobid field can be removed, to guarantee compatibility with older
+ * clients that don't expect this field in the records they process.
+ *
+ * The following assumptions are being made:
+ * - CLF_RENAME will not be removed
+ * - CLF_JOBID will not be added without CLF_RENAME being added too
+ *
+ * @param[in,out] rec The record to remap.
+ * @param[in] crf_wanted Flags describing the desired extensions.
+ */
+static inline void changelog_remap_rec(struct changelog_rec *rec,
+ enum changelog_rec_flags crf_wanted)
+{
+ char *jid_mov, *rnm_mov;
+
+ crf_wanted &= CLF_SUPPORTED;
+
+ if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted)
+ return;
+
+ /* First move the variable-length name field */
+ memmove((char *)rec + changelog_rec_offset(crf_wanted),
+ changelog_rec_name(rec), rec->cr_namelen);
+
+ /* Locations of jobid and rename extensions in the remapped record */
+ jid_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & ~CLF_JOBID);
+ rnm_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME));
+
+ /* Move the extension fields to the desired positions */
+ if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
+ memmove(jid_mov, changelog_rec_jobid(rec),
+ sizeof(struct changelog_ext_jobid));
+
+ if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
+ memmove(rnm_mov, changelog_rec_rename(rec),
+ sizeof(struct changelog_ext_rename));
+
+ /* Clear newly added fields */
+ if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
+ memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
+
+ if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
+ memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
+
+ /* Update the record's flags accordingly */
+ rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
}
struct ioc_changelog {
@@ -978,7 +1141,7 @@ struct hsm_user_request {
/** Return pointer to data field in a hsm user request */
static inline void *hur_data(struct hsm_user_request *hur)
{
- return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
+ return &hur->hur_user_item[hur->hur_request.hr_itemcount];
}
/**
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index 95a0be13c0fb..8eb394e64b25 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -151,13 +151,11 @@ static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, char *nam
lustre_cfg_bufs_set_string(bufs, 0, name);
}
-static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
+static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index)
{
- int i;
- int offset;
- int bufcount;
-
- LASSERT(index >= 0);
+ __u32 i;
+ size_t offset;
+ __u32 bufcount;
bufcount = lcfg->lcfg_bufcount;
if (index >= bufcount)
@@ -172,7 +170,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
struct lustre_cfg *lcfg)
{
- int i;
+ __u32 i;
bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
for (i = 0; i < bufs->lcfg_bufcount; i++) {
@@ -181,7 +179,7 @@ static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
}
}
-static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
+static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, __u32 index)
{
char *s;
@@ -197,8 +195,8 @@ static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
* of data. Try to use the padding first though.
*/
if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
- int last = min((int)lcfg->lcfg_buflens[index],
- cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
+ size_t last = min((size_t)lcfg->lcfg_buflens[index],
+ cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
char lost = s[last];
s[last] = '\0';
@@ -210,10 +208,10 @@ static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
return s;
}
-static inline int lustre_cfg_len(__u32 bufcount, __u32 *buflens)
+static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens)
{
- int i;
- int len;
+ __u32 i;
+ __u32 len;
len = LCFG_HDR_SIZE(bufcount);
for (i = 0; i < bufcount; i++)
@@ -254,7 +252,7 @@ static inline void lustre_cfg_free(struct lustre_cfg *lcfg)
return;
}
-static inline int lustre_cfg_sanity_check(void *buf, int len)
+static inline int lustre_cfg_sanity_check(void *buf, size_t len)
{
struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 1eb64ec4bed4..567c438e93cb 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -30,8 +30,8 @@
* Lustre is a trademark of Sun Microsystems, Inc.
*/
-#ifndef _LINUX_COMPAT25_H
-#define _LINUX_COMPAT25_H
+#ifndef _LUSTRE_COMPAT_H
+#define _LUSTRE_COMPAT_H
#include <linux/fs_struct.h>
#include <linux/namei.h>
@@ -74,4 +74,4 @@
# define ext2_find_next_zero_bit find_next_zero_bit_le
#endif
-#endif /* _COMPAT25_H */
+#endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 60051a5cfe20..d03534432624 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -573,6 +573,11 @@ enum lvb_type {
};
/**
+ * LDLM_GID_ANY is used to match any group id in ldlm_lock_match().
+ */
+#define LDLM_GID_ANY ((__u64)-1)
+
+/**
* LDLM lock structure
*
* Represents a single LDLM lock and its state in memory. Each lock is
@@ -968,6 +973,7 @@ struct ldlm_enqueue_info {
void *ei_cb_cp; /** lock completion callback */
void *ei_cb_gl; /** lock glimpse callback */
void *ei_cbdata; /** Data to be passed into callbacks. */
+ unsigned int ei_enq_slave:1; /* whether enqueue slave stripes */
};
extern struct obd_ops ldlm_obd_ops;
@@ -1281,16 +1287,6 @@ int ldlm_cli_cancel_list(struct list_head *head, int count,
int intent_disposition(struct ldlm_reply *rep, int flag);
void intent_set_disposition(struct ldlm_reply *rep, int flag);
-/* ioctls for trying requests */
-#define IOC_LDLM_TYPE 'f'
-#define IOC_LDLM_MIN_NR 40
-
-#define IOC_LDLM_TEST _IOWR('f', 40, long)
-#define IOC_LDLM_DUMP _IOWR('f', 41, long)
-#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long)
-#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long)
-#define IOC_LDLM_MAX_NR 43
-
/**
* "Modes" of acquiring lock_res, necessary to tell lockdep that taking more
* than one lock_res is dead-lock safe.
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index e7e0c21a9b40..a0f064d237c9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -28,21 +28,6 @@
/** l_flags bits marked as "all_flags" bits */
#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL
-/** l_flags bits marked as "ast" bits */
-#define LDLM_FL_AST_MASK 0x0000000080008000ULL
-
-/** l_flags bits marked as "blocked" bits */
-#define LDLM_FL_BLOCKED_MASK 0x000000000000000EULL
-
-/** l_flags bits marked as "gone" bits */
-#define LDLM_FL_GONE_MASK 0x0006004000000000ULL
-
-/** l_flags bits marked as "inherit" bits */
-#define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL
-
-/** l_flags bits marked as "off_wire" bits */
-#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL
-
/** extent, mode, or resource changed */
#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */
#define ldlm_is_lock_changed(_l) LDLM_TEST_FLAG((_l), 1ULL << 0)
@@ -372,6 +357,27 @@
#define ldlm_set_excl(_l) LDLM_SET_FLAG((_l), 1ULL << 55)
#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55)
+/** l_flags bits marked as "ast" bits */
+#define LDLM_FL_AST_MASK (LDLM_FL_FLOCK_DEADLOCK |\
+ LDLM_FL_AST_DISCARD_DATA)
+
+/** l_flags bits marked as "blocked" bits */
+#define LDLM_FL_BLOCKED_MASK (LDLM_FL_BLOCK_GRANTED |\
+ LDLM_FL_BLOCK_CONV |\
+ LDLM_FL_BLOCK_WAIT)
+
+/** l_flags bits marked as "gone" bits */
+#define LDLM_FL_GONE_MASK (LDLM_FL_DESTROYED |\
+ LDLM_FL_FAILED)
+
+/** l_flags bits marked as "inherit" bits */
+/* Flags inherited from wire on enqueue/reply between client/server. */
+/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */
+/* TEST_LOCK flag to not let TEST lock to be granted. */
+#define LDLM_FL_INHERIT_MASK (LDLM_FL_CANCEL_ON_BLOCK |\
+ LDLM_FL_NO_TIMEOUT |\
+ LDLM_FL_TEST_LOCK)
+
/** test for ldlm_lock flag bit set */
#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0)
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
index d1039e1ff70d..1e71a8638186 100644
--- a/drivers/staging/lustre/lustre/include/lustre_eacl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -46,6 +46,7 @@
#ifdef CONFIG_FS_POSIX_ACL
+#include <linux/fs.h>
#include <linux/posix_acl_xattr.h>
typedef struct {
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 743671a547ef..316780693193 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -229,6 +229,7 @@ enum local_oid {
MDD_LOV_OBJ_OSEQ = 4121UL,
LFSCK_NAMESPACE_OID = 4122UL,
REMOTE_PARENT_DIR_OID = 4123UL,
+ SLAVE_LLOG_CATALOGS_OID = 4124UL,
};
static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
@@ -392,21 +393,19 @@ struct ldlm_namespace;
* but was moved into name[1] along with the OID to avoid consuming the
* renaming name[2,3] fields that need to be used for the quota identifier.
*/
-static inline struct ldlm_res_id *
+static inline void
fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res)
{
memset(res, 0, sizeof(*res));
res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid);
res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid);
-
- return res;
}
/*
* Return true if resource is for object identified by FID.
*/
-static inline int fid_res_name_eq(const struct lu_fid *fid,
- const struct ldlm_res_id *res)
+static inline bool fid_res_name_eq(const struct lu_fid *fid,
+ const struct ldlm_res_id *res)
{
return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) &&
res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid);
@@ -415,29 +414,25 @@ static inline int fid_res_name_eq(const struct lu_fid *fid,
/*
* Extract FID from LDLM resource. Reverse of fid_build_reg_res_name().
*/
-static inline struct lu_fid *
+static inline void
fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res)
{
fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF];
fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]);
fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
LASSERT(fid_res_name_eq(fid, res));
-
- return fid;
}
/*
* Build (DLM) resource identifier from global quota FID and quota ID.
*/
-static inline struct ldlm_res_id *
+static inline void
fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid,
struct ldlm_res_id *res)
{
fid_build_reg_res_name(glb_fid, res);
res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid);
res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid);
-
- return res;
}
/*
@@ -454,14 +449,12 @@ static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid,
(__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32);
}
-static inline struct ldlm_res_id *
+static inline void
fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash,
struct ldlm_res_id *res)
{
fid_build_reg_res_name(fid, res);
res->name[LUSTRE_RES_ID_HSH_OFF] = hash;
-
- return res;
}
/**
@@ -482,7 +475,7 @@ fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash,
* res will be built from normal FID directly, i.e. res[0] = f_seq,
* res[1] = f_oid + f_ver.
*/
-static inline void ostid_build_res_name(struct ost_id *oi,
+static inline void ostid_build_res_name(const struct ost_id *oi,
struct ldlm_res_id *name)
{
memset(name, 0, sizeof(*name));
@@ -497,8 +490,8 @@ static inline void ostid_build_res_name(struct ost_id *oi,
/**
* Return true if the resource is for the object identified by this id & group.
*/
-static inline int ostid_res_name_eq(struct ost_id *oi,
- struct ldlm_res_id *name)
+static inline int ostid_res_name_eq(const struct ost_id *oi,
+ const struct ldlm_res_id *name)
{
/* Note: it is just a trick here to save some effort, probably the
* correct way would be turn them into the FID and compare
@@ -603,13 +596,14 @@ static inline __u32 fid_flatten32(const struct lu_fid *fid)
* (from OID), or up to 128M inodes without collisions for new files.
*/
ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) +
- (seq >> (64 - (40-8)) & 0xffffff00) +
+ (seq >> (64 - (40 - 8)) & 0xffffff00) +
(fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8);
return ino ? ino : fid_oid(fid);
}
-static inline int lu_fid_diff(struct lu_fid *fid1, struct lu_fid *fid2)
+static inline int lu_fid_diff(const struct lu_fid *fid1,
+ const struct lu_fid *fid2)
{
LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
PFID(fid1), PFID(fid2));
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
index 1a63a6b9e116..e071bac9df57 100644
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -66,6 +66,7 @@ struct portals_handle_ops {
struct portals_handle {
struct list_head h_link;
__u64 h_cookie;
+ const void *h_owner;
struct portals_handle_ops *h_ops;
/* newly added fields to handle the RCU issue. -jxiong */
@@ -75,15 +76,13 @@ struct portals_handle {
unsigned int h_in:1;
};
-#define RCU2HANDLE(rcu) container_of(rcu, struct portals_handle, h_rcu)
-
/* handles.c */
/* Add a handle to the hash table */
void class_handle_hash(struct portals_handle *,
struct portals_handle_ops *ops);
void class_handle_unhash(struct portals_handle *);
-void *class_handle2object(__u64 cookie);
+void *class_handle2object(__u64 cookie, const void *owner);
void class_handle_free_cb(struct rcu_head *rcu);
int class_handle_init(void);
void class_handle_cleanup(void);
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 4445be7a59dd..5461ba33d90c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -285,8 +285,10 @@ struct obd_import {
imp_resend_replay:1,
/* disable normal recovery, for test only. */
imp_no_pinger_recover:1,
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
/* need IR MNE swab */
imp_need_mne_swab:1,
+#endif
/* import must be reconnected instead of
* chosing new connection
*/
@@ -305,28 +307,6 @@ struct obd_import {
time64_t imp_last_reply_time; /* for health check */
};
-typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
- int event, void *event_arg, void *cb_data);
-
-/**
- * Structure for import observer.
- * It is possible to register "observer" on an import and every time
- * something happens to an import (like connect/evict/disconnect)
- * obderver will get its callback called with event type
- */
-struct obd_import_observer {
- struct list_head oio_chain;
- obd_import_callback oio_cb;
- void *oio_cb_data;
-};
-
-void class_observe_import(struct obd_import *imp, obd_import_callback cb,
- void *cb_data);
-void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
- void *cb_data);
-void class_notify_import_observers(struct obd_import *imp, int event,
- void *event_arg);
-
/* import.c */
static inline unsigned int at_est2timeout(unsigned int val)
{
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 06958f217fc8..6b231913ba2e 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -51,7 +51,6 @@
#include "lustre_cfg.h"
/* target.c */
-struct kstatfs;
struct ptlrpc_request;
struct obd_export;
struct lu_target;
@@ -74,325 +73,8 @@ int do_set_info_async(struct obd_import *imp,
u32 vallen, void *val,
struct ptlrpc_request_set *set);
-#define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
-#define OBD_MAX_IOCTL_BUFFER CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
-
void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
-/* client.c */
-
-int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
-struct client_obd *client_conn2cli(struct lustre_handle *conn);
-
-struct md_open_data;
-struct obd_client_handle {
- struct lustre_handle och_fh;
- struct lu_fid och_fid;
- struct md_open_data *och_mod;
- struct lustre_handle och_lease_handle; /* open lock for lease */
- __u32 och_magic;
- fmode_t och_flags;
-};
-
-#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
-
-/* statfs_pack.c */
-void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
-
-/*
- * For md echo client
- */
-enum md_echo_cmd {
- ECHO_MD_CREATE = 1, /* Open/Create file on MDT */
- ECHO_MD_MKDIR = 2, /* Mkdir on MDT */
- ECHO_MD_DESTROY = 3, /* Unlink file on MDT */
- ECHO_MD_RMDIR = 4, /* Rmdir on MDT */
- ECHO_MD_LOOKUP = 5, /* Lookup on MDT */
- ECHO_MD_GETATTR = 6, /* Getattr on MDT */
- ECHO_MD_SETATTR = 7, /* Setattr on MDT */
- ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */
-};
-
-/*
- * OBD IOCTLS
- */
-#define OBD_IOCTL_VERSION 0x00010004
-
-struct obd_ioctl_data {
- __u32 ioc_len;
- __u32 ioc_version;
-
- union {
- __u64 ioc_cookie;
- __u64 ioc_u64_1;
- };
- union {
- __u32 ioc_conn1;
- __u32 ioc_u32_1;
- };
- union {
- __u32 ioc_conn2;
- __u32 ioc_u32_2;
- };
-
- struct obdo ioc_obdo1;
- struct obdo ioc_obdo2;
-
- u64 ioc_count;
- u64 ioc_offset;
- __u32 ioc_dev;
- __u32 ioc_command;
-
- __u64 ioc_nid;
- __u32 ioc_nal;
- __u32 ioc_type;
-
- /* buffers the kernel will treat as user pointers */
- __u32 ioc_plen1;
- void __user *ioc_pbuf1;
- __u32 ioc_plen2;
- void __user *ioc_pbuf2;
-
- /* inline buffers for various arguments */
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
- __u32 ioc_inllen3;
- char *ioc_inlbuf3;
- __u32 ioc_inllen4;
- char *ioc_inlbuf4;
-
- char ioc_bulk[0];
-};
-
-struct obd_ioctl_hdr {
- __u32 ioc_len;
- __u32 ioc_version;
-};
-
-static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
-{
- int len = cfs_size_round(sizeof(struct obd_ioctl_data));
-
- len += cfs_size_round(data->ioc_inllen1);
- len += cfs_size_round(data->ioc_inllen2);
- len += cfs_size_round(data->ioc_inllen3);
- len += cfs_size_round(data->ioc_inllen4);
- return len;
-}
-
-static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
-{
- if (data->ioc_len > OBD_MAX_IOCTL_BUFFER) {
- CERROR("OBD ioctl: ioc_len larger than %d\n",
- OBD_MAX_IOCTL_BUFFER);
- return 1;
- }
- if (data->ioc_inllen1 > OBD_MAX_IOCTL_BUFFER) {
- CERROR("OBD ioctl: ioc_inllen1 larger than ioc_len\n");
- return 1;
- }
- if (data->ioc_inllen2 > OBD_MAX_IOCTL_BUFFER) {
- CERROR("OBD ioctl: ioc_inllen2 larger than ioc_len\n");
- return 1;
- }
- if (data->ioc_inllen3 > OBD_MAX_IOCTL_BUFFER) {
- CERROR("OBD ioctl: ioc_inllen3 larger than ioc_len\n");
- return 1;
- }
- if (data->ioc_inllen4 > OBD_MAX_IOCTL_BUFFER) {
- CERROR("OBD ioctl: ioc_inllen4 larger than ioc_len\n");
- return 1;
- }
- if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
- CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
- CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
- CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
- CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_pbuf1 && !data->ioc_plen1) {
- CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_pbuf2 && !data->ioc_plen2) {
- CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_plen1 && !data->ioc_pbuf1) {
- CERROR("OBD ioctl: plen1 set but NULL pointer\n");
- return 1;
- }
- if (data->ioc_plen2 && !data->ioc_pbuf2) {
- CERROR("OBD ioctl: plen2 set but NULL pointer\n");
- return 1;
- }
- if (obd_ioctl_packlen(data) > data->ioc_len) {
- CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
- obd_ioctl_packlen(data), data->ioc_len);
- return 1;
- }
- return 0;
-}
-
-#include "obd_support.h"
-
-/* function defined in lustre/obdclass/<platform>/<platform>-module.c */
-int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
-int obd_ioctl_popdata(void __user *arg, void *data, int len);
-
-static inline void obd_ioctl_freedata(char *buf, int len)
-{
- kvfree(buf);
- return;
-}
-
-/*
- * BSD ioctl description:
- * #define IOC_V1 _IOR(g, n1, long)
- * #define IOC_V2 _IOW(g, n2, long)
- *
- * ioctl(f, IOC_V1, arg);
- * arg will be treated as a long value,
- *
- * ioctl(f, IOC_V2, arg)
- * arg will be treated as a pointer, bsd will call
- * copyin(buf, arg, sizeof(long))
- *
- * To make BSD ioctl handles argument correctly and simplely,
- * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data
- * for us. Does this change affect Linux? (XXX Liang)
- */
-#define OBD_IOC_DATA_TYPE long
-
-#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME])
-#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
-
-#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139)
-#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE)
-
-#define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
-/* see also <lustre/lustre_user.h> for ioctls 151-153 */
-/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE)
-/* see <lustre/lustre_user.h> for ioctls 157-159 */
-/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
-/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
-/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
-#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
-/* see also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data)
-#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LLOG_CATINFO is deprecated */
-#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE)
-
-/* #define ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) */
-/* #define ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) */
-/* #define ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) */
-/* #define ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE)
-
-/* <lustre/lustre_user.h> defines ioctl number 218-219 */
-#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t)
-
-#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data)
-#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data)
-
-#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PAUSE_LFSCK _IOW('f', 232, OBD_IOC_DATA_TYPE)
-
-/* XXX _IOWR('f', 250, long) has been defined in
- * libcfs/include/libcfs/libcfs_private.h for debug, don't use it
- */
-
-/* Until such time as we get_info the per-stripe maximum from the OST,
- * we define this to be 2T - 4k, which is the ext3 maxbytes.
- */
-#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-/* Special values for remove LOV EA from disk */
-#define LOVEA_DELETE_VALUES(size, count, offset) (size == 0 && count == 0 && \
- offset == (typeof(offset))(-1))
-
-/* #define POISON_BULK 0 */
-
/*
* l_wait_event is a flexible sleeping function, permitting simple caller
* configuration of interrupt and timeout sensitivity along with actions to
diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h
new file mode 100644
index 000000000000..249e8bf4fa22
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_linkea.h
@@ -0,0 +1,79 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: di wang <di.wang@intel.com>
+ */
+
+#define DEFAULT_LINKEA_SIZE 4096
+
+struct linkea_data {
+ /**
+ * Buffer to keep link EA body.
+ */
+ struct lu_buf *ld_buf;
+ /**
+ * The matched header, entry and its length in the EA
+ */
+ struct link_ea_header *ld_leh;
+ struct link_ea_entry *ld_lee;
+ int ld_reclen;
+};
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf);
+int linkea_init(struct linkea_data *ldata);
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+ struct lu_name *lname, struct lu_fid *pfid);
+int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
+ const struct lu_fid *pfid);
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid);
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname);
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid);
+
+static inline void linkea_first_entry(struct linkea_data *ldata)
+{
+ LASSERT(ldata);
+ LASSERT(ldata->ld_leh);
+
+ if (ldata->ld_leh->leh_reccount == 0)
+ ldata->ld_lee = NULL;
+ else
+ ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
+}
+
+static inline void linkea_next_entry(struct linkea_data *ldata)
+{
+ LASSERT(ldata);
+ LASSERT(ldata->ld_leh);
+
+ if (ldata->ld_lee) {
+ ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
+ ldata->ld_reclen);
+ if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
+ ldata->ld_leh->leh_len))
+ ldata->ld_lee = NULL;
+ }
+}
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
deleted file mode 100644
index b16897702559..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_lite.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LL_H
-#define _LL_H
-
-/** \defgroup lite lite
- *
- * @{
- */
-
-#include "linux/lustre_lite.h"
-
-#include "obd_class.h"
-#include "lustre_net.h"
-#include "lustre_mds.h"
-#include "lustre_ha.h"
-
-/* 4UL * 1024 * 1024 */
-#define LL_MAX_BLKSIZE_BITS (22)
-#define LL_MAX_BLKSIZE (1UL<<LL_MAX_BLKSIZE_BITS)
-
-/*
- * This is embedded into llite super-blocks to keep track of
- * connect flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
- /*
- * This is conjunction of connect_flags across all imports (LOVs) this
- * mount is connected to. This field is updated by cl_ocd_update()
- * under ->lco_lock.
- */
- __u64 lco_flags;
- struct mutex lco_lock;
- struct obd_export *lco_md_exp;
- struct obd_export *lco_dt_exp;
-};
-
-/*
- * Chain of hash overflow pages.
- */
-struct ll_dir_chain {
- /* XXX something. Later */
-};
-
-static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
-{
-}
-
-static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
-{
-}
-
-static inline unsigned long hash_x_index(__u64 hash, int hash64)
-{
- if (BITS_PER_LONG == 32 && hash64)
- hash >>= 32;
- /* save hash 0 as index 0 because otherwise we'll save it at
- * page index end (~0UL) and it causes truncate_inode_pages_range()
- * to loop forever.
- */
- return ~0UL - (hash + !hash);
-}
-
-/** @} lite */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
new file mode 100644
index 000000000000..d7f7afa8dfa7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h
@@ -0,0 +1,184 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre_lmv.h
+ *
+ * Lustre LMV structures and functions.
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#ifndef _LUSTRE_LMV_H
+#define _LUSTRE_LMV_H
+#include "lustre/lustre_idl.h"
+
+struct lmv_oinfo {
+ struct lu_fid lmo_fid;
+ u32 lmo_mds;
+ struct inode *lmo_root;
+};
+
+struct lmv_stripe_md {
+ __u32 lsm_md_magic;
+ __u32 lsm_md_stripe_count;
+ __u32 lsm_md_master_mdt_index;
+ __u32 lsm_md_hash_type;
+ __u32 lsm_md_layout_version;
+ __u32 lsm_md_default_count;
+ __u32 lsm_md_default_index;
+ char lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
+ struct lmv_oinfo lsm_md_oinfo[0];
+};
+
+static inline bool
+lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
+{
+ __u32 idx;
+
+ if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
+ lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
+ lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index ||
+ lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
+ lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version ||
+ !strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name))
+ return false;
+
+ for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
+ if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
+ &lsm2->lsm_md_oinfo[idx].lmo_fid))
+ return false;
+ }
+
+ return true;
+}
+
+union lmv_mds_md;
+
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+ const union lmv_mds_md *lmm, int stripe_count);
+
+static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count)
+{
+ return lmv_unpack_md(NULL, lsmp, NULL, stripe_count);
+}
+
+static inline void lmv_free_memmd(struct lmv_stripe_md *lsm)
+{
+ lmv_unpack_md(NULL, &lsm, NULL, 0);
+}
+
+static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
+ const struct lmv_mds_md_v1 *lmv_src)
+{
+ __u32 i;
+
+ lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
+ lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
+ lmv_dst->lmv_master_mdt_index =
+ le32_to_cpu(lmv_src->lmv_master_mdt_index);
+ lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
+ lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
+
+ for (i = 0; i < lmv_src->lmv_stripe_count; i++)
+ fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
+ &lmv_src->lmv_stripe_fids[i]);
+}
+
+static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
+ const union lmv_mds_md *lmv_src)
+{
+ switch (le32_to_cpu(lmv_src->lmv_magic)) {
+ case LMV_MAGIC_V1:
+ lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
+ break;
+ default:
+ break;
+ }
+}
+
+/* This hash is only for testing purpose */
+static inline unsigned int
+lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
+{
+ const unsigned char *p = (const unsigned char *)name;
+ unsigned int c = 0;
+
+ while (--namelen >= 0)
+ c += p[namelen];
+
+ c = c % count;
+
+ return c;
+}
+
+static inline unsigned int
+lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
+{
+ __u64 hash;
+
+ hash = lustre_hash_fnv_1a_64(name, namelen);
+
+ return do_div(hash, count);
+}
+
+static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
+ unsigned int stripe_count,
+ const char *name, int namelen)
+{
+ __u32 hash_type = lmv_hash_type & LMV_HASH_TYPE_MASK;
+ int idx;
+
+ LASSERT(namelen > 0);
+ if (stripe_count <= 1)
+ return 0;
+
+ /* for migrating object, always start from 0 stripe */
+ if (lmv_hash_type & LMV_HASH_FLAG_MIGRATION)
+ return 0;
+
+ switch (hash_type) {
+ case LMV_HASH_TYPE_ALL_CHARS:
+ idx = lmv_hash_all_chars(stripe_count, name, namelen);
+ break;
+ case LMV_HASH_TYPE_FNV_1A_64:
+ idx = lmv_hash_fnv1a(stripe_count, name, namelen);
+ break;
+ default:
+ idx = -EBADFD;
+ break;
+ }
+ CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
+ hash_type, idx);
+
+ return idx;
+}
+
+static inline bool lmv_is_known_hash_type(__u32 type)
+{
+ return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
+ (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index b96e02317bfc..995b266932e3 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -277,12 +277,11 @@ static inline void llog_ctxt_put(struct llog_ctxt *ctxt)
__llog_ctxt_put(NULL, ctxt);
}
-static inline void llog_group_init(struct obd_llog_group *olg, int group)
+static inline void llog_group_init(struct obd_llog_group *olg)
{
init_waitqueue_head(&olg->olg_waitq);
spin_lock_init(&olg->olg_lock);
mutex_init(&olg->olg_cat_processing);
- olg->olg_seq = group;
}
static inline int llog_group_set_ctxt(struct obd_llog_group *olg,
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index fa62b95d351f..8fc2d3f2dfd6 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -96,7 +96,7 @@ static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
struct lookup_intent *it)
{
if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT))
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
return;
/* This would normally block until the existing request finishes.
@@ -136,7 +136,7 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
struct lookup_intent *it)
{
if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT))
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
return;
if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
@@ -156,34 +156,44 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
mutex_unlock(&lck->rpcl_mutex);
}
-/* Update the maximum observed easize and cookiesize. The default easize
- * and cookiesize is initialized to the minimum value but allowed to grow
- * up to a single page in size if required to handle the common case.
+/**
+ * Update the maximum possible easize and cookiesize.
+ *
+ * The values are learned from ptlrpc replies sent by the MDT. The
+ * default easize and cookiesize is initialized to the minimum value but
+ * allowed to grow up to a single page in size if required to handle the
+ * common case.
+ *
+ * \see client_obd::cl_default_mds_easize and
+ * client_obd::cl_default_mds_cookiesize
+ *
+ * \param[in] exp export for MDC device
+ * \param[in] body body of ptlrpc reply from MDT
+ *
*/
static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
struct mdt_body *body)
{
- if (body->valid & OBD_MD_FLMODEASIZE) {
+ if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
struct client_obd *cli = &exp->exp_obd->u.cli;
+ u32 def_cookiesize, def_easize;
- if (cli->cl_max_mds_easize < body->max_mdsize) {
- cli->cl_max_mds_easize = body->max_mdsize;
- cli->cl_default_mds_easize =
- min_t(__u32, body->max_mdsize, PAGE_SIZE);
- }
- if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
- cli->cl_max_mds_cookiesize = body->max_cookiesize;
- cli->cl_default_mds_cookiesize =
- min_t(__u32, body->max_cookiesize, PAGE_SIZE);
- }
+ if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
+ cli->cl_max_mds_easize = body->mbo_max_mdsize;
+
+ def_easize = min_t(__u32, body->mbo_max_mdsize,
+ OBD_MAX_DEFAULT_EA_SIZE);
+ cli->cl_default_mds_easize = def_easize;
+
+ if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
+ cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
+
+ def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
+ OBD_MAX_DEFAULT_COOKIE_SIZE);
+ cli->cl_default_mds_cookiesize = def_cookiesize;
}
}
-struct mdc_cache_waiter {
- struct list_head mcw_entry;
- wait_queue_head_t mcw_waitq;
-};
-
/* mdc/mdc_locks.c */
int it_open_error(int phase, struct lookup_intent *it);
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
index 4104bd9bd5c4..23a7e4f78e9a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -58,9 +58,6 @@ struct mds_group_info {
#define MDD_OBD_NAME "mdd_obd"
#define MDD_OBD_UUID "mdd_obd_uuid"
-/* these are local flags, used only on the client, private */
-#define M_CHECK_STALE 0200000000
-
/** @} mds */
#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index d5debd615fdf..e9aba99ee52a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -261,7 +261,10 @@
#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */
-#define OST_MAXREQSIZE (5 * 1024)
+/**
+ * FIEMAP request can be 4K+ for now
+ */
+#define OST_MAXREQSIZE (16 * 1024)
/* Macro to hide a typecast. */
#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
@@ -570,13 +573,13 @@ struct ptlrpc_nrs_pol_ops {
*
* \param[in,out] policy The policy being initialized
*/
- int (*op_policy_init) (struct ptlrpc_nrs_policy *policy);
+ int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
/**
* Called during policy unregistration; this operation is optional.
*
* \param[in,out] policy The policy being unregistered/finalized
*/
- void (*op_policy_fini) (struct ptlrpc_nrs_policy *policy);
+ void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
/**
* Called when activating a policy via lprocfs; policies allocate and
* initialize their resources here; this operation is optional.
@@ -585,7 +588,7 @@ struct ptlrpc_nrs_pol_ops {
*
* \see nrs_policy_start_locked()
*/
- int (*op_policy_start) (struct ptlrpc_nrs_policy *policy);
+ int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
/**
* Called when deactivating a policy via lprocfs; policies deallocate
* their resources here; this operation is optional
@@ -594,7 +597,7 @@ struct ptlrpc_nrs_pol_ops {
*
* \see nrs_policy_stop0()
*/
- void (*op_policy_stop) (struct ptlrpc_nrs_policy *policy);
+ void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
/**
* Used for policy-specific operations; i.e. not generic ones like
* \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
@@ -610,8 +613,8 @@ struct ptlrpc_nrs_pol_ops {
*
* \see ptlrpc_nrs_policy_control()
*/
- int (*op_policy_ctl) (struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg);
+ int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg);
/**
* Called when obtaining references to the resources of the resource
@@ -648,11 +651,11 @@ struct ptlrpc_nrs_pol_ops {
* \see ptlrpc_nrs_req_initialize()
* \see ptlrpc_nrs_hpreq_add_nolock()
*/
- int (*op_res_get) (struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req);
+ int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req);
/**
* Called when releasing references taken for resources in the resource
* hierarchy for the request; this operation is optional.
@@ -663,8 +666,8 @@ struct ptlrpc_nrs_pol_ops {
* \see ptlrpc_nrs_req_finalize()
* \see ptlrpc_nrs_hpreq_add_nolock()
*/
- void (*op_res_put) (struct ptlrpc_nrs_policy *policy,
- const struct ptlrpc_nrs_resource *res);
+ void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
+ const struct ptlrpc_nrs_resource *res);
/**
* Obtains a request for handling from the policy, and optionally
@@ -683,8 +686,8 @@ struct ptlrpc_nrs_pol_ops {
* \see ptlrpc_nrs_req_get_nolock()
*/
struct ptlrpc_nrs_request *
- (*op_req_get) (struct ptlrpc_nrs_policy *policy, bool peek,
- bool force);
+ (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
+ bool force);
/**
* Called when attempting to add a request to a policy for later
* handling; this operation is mandatory.
@@ -697,8 +700,8 @@ struct ptlrpc_nrs_pol_ops {
*
* \see ptlrpc_nrs_req_add_nolock()
*/
- int (*op_req_enqueue) (struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
+ int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
/**
* Removes a request from the policy's set of pending requests. Normally
* called after a request has been polled successfully from the policy
@@ -707,8 +710,8 @@ struct ptlrpc_nrs_pol_ops {
* \param[in,out] policy The policy the request \a nrq belongs to
* \param[in,out] nrq The request to dequeue
*/
- void (*op_req_dequeue) (struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
+ void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
/**
* Called after the request being carried out. Could be used for
* job/resource control; this operation is optional.
@@ -721,8 +724,8 @@ struct ptlrpc_nrs_pol_ops {
*
* \see ptlrpc_nrs_req_stop_nolock()
*/
- void (*op_req_stop) (struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
+ void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
/**
* Registers the policy's lprocfs interface with a PTLRPC service.
*
@@ -731,7 +734,7 @@ struct ptlrpc_nrs_pol_ops {
* \retval 0 success
* \retval != 0 error
*/
- int (*op_lprocfs_init) (struct ptlrpc_service *svc);
+ int (*op_lprocfs_init)(struct ptlrpc_service *svc);
/**
* Unegisters the policy's lprocfs interface with a PTLRPC service.
*
@@ -743,7 +746,7 @@ struct ptlrpc_nrs_pol_ops {
*
* \param[in] svc The service
*/
- void (*op_lprocfs_fini) (struct ptlrpc_service *svc);
+ void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
};
/**
@@ -1628,7 +1631,7 @@ static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req)
/**
* Returns 1 if request buffer at offset \a index was already swabbed
*/
-static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
+static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
return req->rq_req_swab_mask & (1 << index);
@@ -1637,7 +1640,7 @@ static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
/**
* Returns 1 if request reply buffer at offset \a index was already swabbed
*/
-static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index)
+static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index)
{
LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
return req->rq_rep_swab_mask & (1 << index);
@@ -1662,7 +1665,8 @@ static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
/**
* Mark request buffer at offset \a index that it was already swabbed
*/
-static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
+static inline void lustre_set_req_swabbed(struct ptlrpc_request *req,
+ size_t index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
LASSERT((req->rq_req_swab_mask & (1 << index)) == 0);
@@ -1672,7 +1676,8 @@ static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
/**
* Mark request reply buffer at offset \a index that it was already swabbed
*/
-static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index)
+static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req,
+ size_t index)
{
LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0);
@@ -2403,7 +2408,6 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags);
int ptlrpc_reply(struct ptlrpc_request *req);
int ptlrpc_send_error(struct ptlrpc_request *req, int difficult);
int ptlrpc_error(struct ptlrpc_request *req);
-void ptlrpc_resend_req(struct ptlrpc_request *request);
int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd);
@@ -2423,23 +2427,17 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
int ptlrpc_queue_wait(struct ptlrpc_request *req);
int ptlrpc_replay_req(struct ptlrpc_request *req);
-int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async);
void ptlrpc_abort_inflight(struct obd_import *imp);
void ptlrpc_abort_set(struct ptlrpc_request_set *set);
struct ptlrpc_request_set *ptlrpc_prep_set(void);
struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
void *arg);
-int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set);
int ptlrpc_set_wait(struct ptlrpc_request_set *);
-int ptlrpc_expired_set(void *data);
-void ptlrpc_interrupted_set(void *data);
void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
void ptlrpc_set_destroy(struct ptlrpc_request_set *);
void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
-void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
- struct ptlrpc_request *req);
void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
@@ -2611,9 +2609,9 @@ int ptlrpc_reconnect_import(struct obd_import *imp);
* @{
*/
int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
- int index);
+ u32 index);
void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
- int index);
+ u32 index);
int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len);
int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len);
@@ -2632,27 +2630,27 @@ int lustre_shrink_msg(struct lustre_msg *msg, int segment,
unsigned int newlen, int move_data);
void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
int __lustre_unpack_msg(struct lustre_msg *m, int len);
-int lustre_msg_hdr_size(__u32 magic, int count);
-int lustre_msg_size(__u32 magic, int count, __u32 *lengths);
-int lustre_msg_size_v2(int count, __u32 *lengths);
-int lustre_packed_msg_size(struct lustre_msg *msg);
-int lustre_msg_early_size(void);
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
-void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
-int lustre_msg_buflen(struct lustre_msg *m, int n);
-int lustre_msg_bufcount(struct lustre_msg *m);
-char *lustre_msg_string(struct lustre_msg *m, int n, int max_len);
+u32 lustre_msg_hdr_size(__u32 magic, u32 count);
+u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths);
+u32 lustre_msg_size_v2(int count, __u32 *lengths);
+u32 lustre_packed_msg_size(struct lustre_msg *msg);
+u32 lustre_msg_early_size(void);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size);
+void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen);
+u32 lustre_msg_buflen(struct lustre_msg *m, u32 n);
+u32 lustre_msg_bufcount(struct lustre_msg *m);
+char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len);
__u32 lustre_msghdr_get_flags(struct lustre_msg *msg);
void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags);
__u32 lustre_msg_get_flags(struct lustre_msg *msg);
-void lustre_msg_add_flags(struct lustre_msg *msg, int flags);
-void lustre_msg_set_flags(struct lustre_msg *msg, int flags);
-void lustre_msg_clear_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags);
+void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags);
+void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags);
__u32 lustre_msg_get_op_flags(struct lustre_msg *msg);
-void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags);
struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
__u32 lustre_msg_get_type(struct lustre_msg *msg);
-void lustre_msg_add_version(struct lustre_msg *msg, int version);
+void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
__u32 lustre_msg_get_opc(struct lustre_msg *msg);
__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 82aadd32c2b8..8061a04ee806 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -39,6 +39,9 @@
#ifndef _LUSTRE_PARAM_H
#define _LUSTRE_PARAM_H
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/lnet/types.h"
+
/** \defgroup param param
*
* @{
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
index 5842cb18b49e..5842cb18b49e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index 544a43c862b9..a13558e53274 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -76,7 +76,8 @@ void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req,
void req_capsule_fini(struct req_capsule *pill);
void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt);
-int req_capsule_filled_sizes(struct req_capsule *pill, enum req_location loc);
+size_t req_capsule_filled_sizes(struct req_capsule *pill,
+ enum req_location loc);
int req_capsule_server_pack(struct req_capsule *pill);
void *req_capsule_client_get(struct req_capsule *pill,
@@ -86,27 +87,27 @@ void *req_capsule_client_swab_get(struct req_capsule *pill,
void *swabber);
void *req_capsule_client_sized_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len);
+ u32 len);
void *req_capsule_server_get(struct req_capsule *pill,
const struct req_msg_field *field);
void *req_capsule_server_sized_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len);
+ u32 len);
void *req_capsule_server_swab_get(struct req_capsule *pill,
const struct req_msg_field *field,
void *swabber);
void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len, void *swabber);
+ u32 len, void *swabber);
void req_capsule_set_size(struct req_capsule *pill,
const struct req_msg_field *field,
- enum req_location loc, int size);
-int req_capsule_get_size(const struct req_capsule *pill,
+ enum req_location loc, u32 size);
+u32 req_capsule_get_size(const struct req_capsule *pill,
const struct req_msg_field *field,
enum req_location loc);
-int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
-int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
+u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
enum req_location loc);
void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt);
@@ -115,8 +116,7 @@ int req_capsule_has_field(const struct req_capsule *pill,
enum req_location loc);
void req_capsule_shrink(struct req_capsule *pill,
const struct req_msg_field *field,
- unsigned int newlen,
- enum req_location loc);
+ u32 newlen, enum req_location loc);
int req_layout_init(void);
void req_layout_fini(void);
@@ -149,14 +149,11 @@ extern struct req_format RQF_MDS_GETATTR;
extern struct req_format RQF_MDS_GETATTR_NAME;
extern struct req_format RQF_MDS_CLOSE;
extern struct req_format RQF_MDS_RELEASE_CLOSE;
-extern struct req_format RQF_MDS_PIN;
-extern struct req_format RQF_MDS_UNPIN;
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
extern struct req_format RQF_MDS_GET_INFO;
extern struct req_format RQF_MDS_READPAGE;
extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_IS_SUBDIR;
extern struct req_format RQF_MDS_DONE_WRITING;
extern struct req_format RQF_MDS_REINT;
extern struct req_format RQF_MDS_REINT_CREATE;
diff --git a/drivers/staging/lustre/lustre/include/lustre_ver.h b/drivers/staging/lustre/lustre/include/lustre_ver.h
index 64559a16f4de..19c9135e2273 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ver.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ver.h
@@ -2,14 +2,21 @@
#define _LUSTRE_VER_H_
#define LUSTRE_MAJOR 2
-#define LUSTRE_MINOR 4
-#define LUSTRE_PATCH 60
+#define LUSTRE_MINOR 6
+#define LUSTRE_PATCH 99
#define LUSTRE_FIX 0
-#define LUSTRE_VERSION_STRING "2.4.60"
+#define LUSTRE_VERSION_STRING "2.6.99"
-#define LUSTRE_VERSION_CODE OBD_OCD_VERSION(LUSTRE_MAJOR, \
- LUSTRE_MINOR, LUSTRE_PATCH, \
- LUSTRE_FIX)
+#define OBD_OCD_VERSION(major, minor, patch, fix) \
+ (((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix))
+
+#define OBD_OCD_VERSION_MAJOR(version) ((int)((version) >> 24) & 255)
+#define OBD_OCD_VERSION_MINOR(version) ((int)((version) >> 16) & 255)
+#define OBD_OCD_VERSION_PATCH(version) ((int)((version) >> 8) & 255)
+#define OBD_OCD_VERSION_FIX(version) ((int)((version) >> 0) & 255)
+
+#define LUSTRE_VERSION_CODE \
+ OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX)
/*
* If lustre version of client and servers it connects to differs by more
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index a1bc2c478ff9..f6fc4dd05bd6 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -35,21 +35,13 @@
#include <linux/spinlock.h>
-#define IOC_OSC_TYPE 'h'
-#define IOC_OSC_MIN_NR 20
-#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR 50
-
-#define IOC_MDC_TYPE 'i'
-#define IOC_MDC_MIN_NR 20
-#define IOC_MDC_MAX_NR 50
-
#include "lustre/lustre_idl.h"
#include "lustre_lib.h"
#include "lu_ref.h"
#include "lustre_export.h"
#include "lustre_fid.h"
#include "lustre_fld.h"
+#include "lustre_handles.h"
#include "lustre_intent.h"
#define MAX_OBD_DEVICES 8192
@@ -81,6 +73,13 @@ static inline void loi_init(struct lov_oinfo *loi)
{
}
+/*
+ * If we are unable to get the maximum object size from the OST in
+ * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
+ * the old maximum object size from ext3.
+ */
+#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
+
struct lov_stripe_md {
atomic_t lsm_refc;
spinlock_t lsm_lock;
@@ -89,31 +88,17 @@ struct lov_stripe_md {
/* maximum possible file size, might change as OSTs status changes,
* e.g. disconnected, deactivated
*/
- __u64 lsm_maxbytes;
- struct {
- /* Public members. */
- struct ost_id lw_object_oi; /* lov object id/seq */
-
- /* LOV-private members start here -- only for use in lov/. */
- __u32 lw_magic;
- __u32 lw_stripe_size; /* size of the stripe */
- __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */
- __u16 lw_stripe_count; /* number of objects being striped over */
- __u16 lw_layout_gen; /* generation of the layout */
- char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
- } lsm_wire;
-
+ __u64 lsm_maxbytes;
+ struct ost_id lsm_oi;
+ __u32 lsm_magic;
+ __u32 lsm_stripe_size;
+ __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */
+ __u16 lsm_stripe_count;
+ __u16 lsm_layout_gen;
+ char lsm_pool_name[LOV_MAXPOOLNAME + 1];
struct lov_oinfo *lsm_oinfo[0];
};
-#define lsm_oi lsm_wire.lw_object_oi
-#define lsm_magic lsm_wire.lw_magic
-#define lsm_layout_gen lsm_wire.lw_layout_gen
-#define lsm_stripe_size lsm_wire.lw_stripe_size
-#define lsm_pattern lsm_wire.lw_pattern
-#define lsm_stripe_count lsm_wire.lw_stripe_count
-#define lsm_pool_name lsm_wire.lw_pool_name
-
static inline bool lsm_is_released(struct lov_stripe_md *lsm)
{
return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
@@ -177,31 +162,10 @@ struct obd_type {
struct brw_page {
u64 off;
struct page *pg;
- int count;
+ unsigned int count;
u32 flag;
};
-/* llog contexts */
-enum llog_ctxt_id {
- LLOG_CONFIG_ORIG_CTXT = 0,
- LLOG_CONFIG_REPL_CTXT,
- LLOG_MDS_OST_ORIG_CTXT,
- LLOG_MDS_OST_REPL_CTXT,
- LLOG_SIZE_ORIG_CTXT,
- LLOG_SIZE_REPL_CTXT,
- LLOG_RD1_ORIG_CTXT,
- LLOG_RD1_REPL_CTXT,
- LLOG_TEST_ORIG_CTXT,
- LLOG_TEST_REPL_CTXT,
- LLOG_LOVEA_ORIG_CTXT,
- LLOG_LOVEA_REPL_CTXT,
- LLOG_CHANGELOG_ORIG_CTXT, /**< changelog generation on mdd */
- LLOG_CHANGELOG_REPL_CTXT, /**< changelog access on clients */
- LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */
- LLOG_AGENT_ORIG_CTXT, /**< agent requests generation on cdt */
- LLOG_MAX_CTXTS
-};
-
struct timeout_item {
enum timeout_event ti_event;
unsigned long ti_timeout;
@@ -211,11 +175,12 @@ struct timeout_item {
struct list_head ti_chain;
};
-#define OSC_MAX_RIF_DEFAULT 8
-#define OSC_MAX_RIF_MAX 256
-#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4)
-#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
-#define OSC_DEFAULT_RESENDS 10
+#define OBD_MAX_RIF_DEFAULT 8
+#define OBD_MAX_RIF_MAX 512
+#define OSC_MAX_RIF_MAX 256
+#define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
+#define OSC_DEFAULT_RESENDS 10
/* possible values for fo_sync_lock_cancel */
enum {
@@ -225,40 +190,74 @@ enum {
NUM_SYNC_ON_CANCEL_STATES
};
-#define MDC_MAX_RIF_DEFAULT 8
-#define MDC_MAX_RIF_MAX 512
-
enum obd_cl_sem_lock_class {
OBD_CLI_SEM_NORMAL,
OBD_CLI_SEM_MGC,
OBD_CLI_SEM_MDCOSC,
};
+/*
+ * Limit reply buffer size for striping data to one x86_64 page. This
+ * value is chosen to fit the striping data for common use cases while
+ * staying well below the limit at which the buffer must be backed by
+ * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
+ * on the MDS.
+ */
+#define OBD_MAX_DEFAULT_EA_SIZE 4096
+#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096
+
struct mdc_rpc_lock;
struct obd_import;
struct client_obd {
struct rw_semaphore cl_sem;
struct obd_uuid cl_target_uuid;
struct obd_import *cl_import; /* ptlrpc connection state */
- int cl_conn_count;
- /* max_mds_easize is purely a performance thing so we don't have to
- * call obd_size_diskmd() all the time.
+ size_t cl_conn_count;
+ /*
+ * Cache maximum and default values for easize and cookiesize. This is
+ * strictly a performance optimization to minimize calls to
+ * obd_size_diskmd(). The default values are used to calculate the
+ * initial size of a request buffer. The ptlrpc layer will resize the
+ * buffer as needed to accommodate a larger reply from the
+ * server. The default values should be small enough to avoid wasted
+ * memory and excessive use of vmalloc(), yet large enough to avoid
+ * reallocating the buffer in the common use case.
*/
- int cl_default_mds_easize;
- int cl_max_mds_easize;
- int cl_default_mds_cookiesize;
- int cl_max_mds_cookiesize;
+ /*
+ * Default EA size for striping attributes. It is initialized at
+ * mount-time based on the default stripe width of the filesystem,
+ * then it tracks the largest observed EA size advertised by
+ * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE.
+ */
+ u32 cl_default_mds_easize;
+ /* Maximum possible EA size computed at mount-time based on
+ * the number of OSTs in the filesystem. May be increased at
+ * run-time if a larger observed size is advertised by the MDT.
+ */
+ u32 cl_max_mds_easize;
+ /* Default cookie size for llog cookies (see struct llog_cookie). It is
+ * initialized to zero at mount-time, then it tracks the largest
+ * observed cookie size advertised by the MDT, up to a maximum value of
+ * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not
+ * used by clients communicating with MDS versions 2.4.0 and later.
+ */
+ u32 cl_default_mds_cookiesize;
+ /* Maximum possible cookie size computed at mount-time based on
+ * the number of OSTs in the filesystem. May be increased at
+ * run-time if a larger observed size is advertised by the MDT.
+ */
+ u32 cl_max_mds_cookiesize;
enum lustre_sec_part cl_sp_me;
enum lustre_sec_part cl_sp_to;
struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */
/* the grant values are protected by loi_list_lock below */
- long cl_dirty; /* all _dirty_ in bytes */
- long cl_dirty_max; /* allowed w/o rpc */
- long cl_dirty_transit; /* dirty synchronous */
- long cl_avail_grant; /* bytes of credit for ost */
- long cl_lost_grant; /* lost credits (trunc) */
+ unsigned long cl_dirty_pages; /* all _dirty_ in pahges */
+ unsigned long cl_dirty_max_pages; /* allowed w/o rpc */
+ unsigned long cl_dirty_transit; /* dirty synchronous */
+ unsigned long cl_avail_grant; /* bytes of credit for ost */
+ unsigned long cl_lost_grant; /* lost credits (trunc) */
/* since we allocate grant by blocks, we don't know how many grant will
* be used to add a page into cache. As a solution, we reserve maximum
@@ -275,8 +274,7 @@ struct client_obd {
* the extent size. A chunk is max(PAGE_SIZE, OST block size)
*/
int cl_chunkbits;
- int cl_chunk;
- int cl_extent_tax; /* extent overhead, by bytes */
+ unsigned int cl_extent_tax; /* extent overhead, by bytes */
/* keep track of objects that have lois that contain pages which
* have been queued for async brw. this lock also protects the
@@ -301,13 +299,13 @@ struct client_obd {
struct list_head cl_loi_hp_ready_list;
struct list_head cl_loi_write_list;
struct list_head cl_loi_read_list;
- int cl_r_in_flight;
- int cl_w_in_flight;
+ __u32 cl_r_in_flight;
+ __u32 cl_w_in_flight;
/* just a sum of the loi/lop pending numbers to be exported by sysfs */
atomic_t cl_pending_w_pages;
atomic_t cl_pending_r_pages;
__u32 cl_max_pages_per_rpc;
- int cl_max_rpcs_in_flight;
+ __u32 cl_max_rpcs_in_flight;
struct obd_histogram cl_read_rpc_hist;
struct obd_histogram cl_write_rpc_hist;
struct obd_histogram cl_read_page_hist;
@@ -318,13 +316,13 @@ struct client_obd {
/* lru for osc caching pages */
struct cl_client_cache *cl_cache;
struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */
- atomic_t *cl_lru_left;
- atomic_t cl_lru_busy;
+ atomic_long_t *cl_lru_left;
+ atomic_long_t cl_lru_busy;
+ atomic_long_t cl_lru_in_list;
atomic_t cl_lru_shrinkers;
- atomic_t cl_lru_in_list;
struct list_head cl_lru_list; /* lru page list */
spinlock_t cl_lru_list_lock; /* page list protector */
- atomic_t cl_unstable_count;
+ atomic_long_t cl_unstable_count;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
atomic_t cl_destroy_in_flight;
@@ -350,7 +348,7 @@ struct client_obd {
/* used by quotacheck when the servers are older than 2.4 */
int cl_qchk_stat; /* quotacheck stat of the peer */
#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
-#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0)
+#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE
#warning "please consider removing quotacheck compatibility code"
#endif
@@ -431,7 +429,7 @@ struct lov_obd {
struct lmv_tgt_desc {
struct obd_uuid ltd_uuid;
struct obd_export *ltd_exp;
- int ltd_idx;
+ u32 ltd_idx;
struct mutex ltd_fid_mutex;
unsigned long ltd_active:1; /* target up for requests */
};
@@ -458,9 +456,8 @@ struct lmv_obd {
int max_def_easize;
int max_cookiesize;
int max_def_cookiesize;
- int server_timeout;
- int tgts_size; /* size of tgts array */
+ u32 tgts_size; /* size of tgts array */
struct lmv_tgt_desc **tgts;
struct obd_connect_data conn_data;
@@ -470,12 +467,11 @@ struct lmv_obd {
struct niobuf_local {
__u64 lnb_file_offset;
__u32 lnb_page_offset;
- __u32 len;
- __u32 flags;
- struct page *page;
- struct dentry *dentry;
- int lnb_grant_used;
- int rc;
+ __u32 lnb_len;
+ __u32 lnb_flags;
+ struct page *lnb_page;
+ void *lnb_data;
+ int lnb_rc;
};
#define LUSTRE_FLD_NAME "fld"
@@ -517,7 +513,6 @@ struct niobuf_local {
#define N_LOCAL_TEMP_PAGE 0x10000000
struct obd_trans_info {
- __u64 oti_transno;
__u64 oti_xid;
/* Only used on the server side for tracking acks. */
struct oti_req_ack_lock {
@@ -527,50 +522,11 @@ struct obd_trans_info {
void *oti_handle;
struct llog_cookie oti_onecookie;
struct llog_cookie *oti_logcookies;
- int oti_numcookies;
- /** synchronous write is needed */
- unsigned long oti_sync_write:1;
- /* initial thread handling transaction */
- struct ptlrpc_thread *oti_thread;
- __u32 oti_conn_cnt;
/** VBR: versions */
__u64 oti_pre_version;
- /** JobID */
- char *oti_jobid;
-
- struct obd_uuid *oti_ost_uuid;
};
-static inline void oti_alloc_cookies(struct obd_trans_info *oti,
- int num_cookies)
-{
- if (!oti)
- return;
-
- if (num_cookies == 1)
- oti->oti_logcookies = &oti->oti_onecookie;
- else
- oti->oti_logcookies = libcfs_kvzalloc(num_cookies * sizeof(oti->oti_onecookie),
- GFP_NOFS);
-
- oti->oti_numcookies = num_cookies;
-}
-
-static inline void oti_free_cookies(struct obd_trans_info *oti)
-{
- if (!oti || !oti->oti_logcookies)
- return;
-
- if (oti->oti_logcookies == &oti->oti_onecookie)
- LASSERT(oti->oti_numcookies == 1);
- else
- kvfree(oti->oti_logcookies);
-
- oti->oti_logcookies = NULL;
- oti->oti_numcookies = 0;
-}
-
/*
* Events signalled through obd_notify() upcall-chain.
*/
@@ -616,7 +572,6 @@ struct target_recovery_data {
};
struct obd_llog_group {
- int olg_seq;
struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS];
wait_queue_head_t olg_waitq;
spinlock_t olg_lock;
@@ -625,7 +580,6 @@ struct obd_llog_group {
/* corresponds to one of the obd's */
#define OBD_DEVICE_MAGIC 0XAB5CD6EF
-#define OBD_DEV_BY_DEVNAME 0xffffd0de
struct lvfs_run_ctxt {
struct dt_device *dt;
@@ -653,7 +607,6 @@ struct obd_device {
obd_starting:1, /* started setup */
obd_force:1, /* cleanup with > 0 obd refcount */
obd_fail:1, /* cleanup with failover */
- obd_async_recov:1, /* allow asynchronous orphan cleanup */
obd_no_conn:1, /* deny new connections */
obd_inactive:1, /* device active/inactive
* (for sysfs status only!!)
@@ -728,9 +681,6 @@ struct obd_device {
struct completion obd_kobj_unregister;
};
-#define OBD_LLOG_FL_SENDNOW 0x0001
-#define OBD_LLOG_FL_EXIT 0x0002
-
enum obd_cleanup_stage {
/* Special case hack for MDS LOVs */
OBD_CLEANUP_EARLY,
@@ -740,8 +690,6 @@ enum obd_cleanup_stage {
/* get/set_info keys */
#define KEY_ASYNC "async"
-#define KEY_BLOCKSIZE_BITS "blocksize_bits"
-#define KEY_BLOCKSIZE "blocksize"
#define KEY_CHANGELOG_CLEAR "changelog_clear"
#define KEY_FID2PATH "fid2path"
#define KEY_CHECKSUM "checksum"
@@ -753,30 +701,22 @@ enum obd_cleanup_stage {
#define KEY_GRANT_SHRINK "grant_shrink"
#define KEY_HSM_COPYTOOL_SEND "hsm_send"
#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
-#define KEY_INIT_RECOV "initial_recov"
#define KEY_INTERMDS "inter_mds"
#define KEY_LAST_ID "last_id"
#define KEY_LAST_FID "last_fid"
-#define KEY_LOCK_TO_STRIPE "lock_to_stripe"
#define KEY_LOVDESC "lovdesc"
-#define KEY_LOV_IDX "lov_idx"
#define KEY_MAX_EASIZE "max_easize"
#define KEY_DEFAULT_EASIZE "default_easize"
-#define KEY_MDS_CONN "mds_conn"
#define KEY_MGSSEC "mgssec"
-#define KEY_NEXT_ID "next_id"
#define KEY_READ_ONLY "read-only"
#define KEY_REGISTER_TARGET "register_target"
#define KEY_SET_FS "set_fs"
#define KEY_TGT_COUNT "tgt_count"
/* KEY_SET_INFO in lustre_idl.h */
#define KEY_SPTLRPC_CONF "sptlrpc_conf"
-#define KEY_CONNECT_FLAG "connect_flags"
-#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel"
#define KEY_CACHE_SET "cache_set"
#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink"
-#define KEY_CHANGELOG_INDEX "changelog_index"
struct lu_context;
@@ -801,9 +741,11 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
/* CREAT needs to be tested before open (both could be set) */
if (it->it_op & IT_CREAT)
return LCK_CW;
- else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+ else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP |
IT_LAYOUT))
return LCK_CR;
+ else if (it->it_op & IT_READDIR)
+ return LCK_PR;
else if (it->it_op & IT_GETXATTR)
return LCK_PR;
else if (it->it_op & IT_SETXATTR)
@@ -813,6 +755,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
return -EINVAL;
}
+enum md_cli_flags {
+ CLI_SET_MEA = BIT(0),
+ CLI_RM_ENTRY = BIT(1),
+ CLI_HASH64 = BIT(2),
+ CLI_API32 = BIT(3),
+ CLI_MIGRATE = BIT(4),
+};
+
struct md_op_data {
struct lu_fid op_fid1; /* operation fid1 (usually parent) */
struct lu_fid op_fid2; /* operation fid2 (usually child) */
@@ -822,7 +772,7 @@ struct md_op_data {
struct lustre_handle op_handle;
s64 op_mod_time;
const char *op_name;
- int op_namelen;
+ size_t op_namelen;
__u32 op_mode;
struct lmv_stripe_md *op_mea1;
struct lmv_stripe_md *op_mea2;
@@ -831,6 +781,7 @@ struct md_op_data {
__u32 op_fsgid;
cfs_cap_t op_cap;
void *op_data;
+ size_t op_data_size;
/* iattr fields and blocks. */
struct iattr op_attr;
@@ -845,28 +796,29 @@ struct md_op_data {
/* Various operation flags. */
enum mds_op_bias op_bias;
- /* Operation type */
- __u32 op_opc;
-
/* Used by readdir */
__u64 op_offset;
/* Used by readdir */
- __u32 op_npages;
+ __u32 op_max_pages;
/* used to transfer info between the stacks of MD client
* see enum op_cli_flags
*/
- __u32 op_cli_flags;
+ enum md_cli_flags op_cli_flags;
/* File object data version for HSM release, on client */
__u64 op_data_version;
struct lustre_handle op_lease_handle;
+
+ /* default stripe offset */
+ __u32 op_default_stripe_offset;
};
-enum op_cli_flags {
- CLI_SET_MEA = 1 << 0,
- CLI_RM_ENTRY = 1 << 1,
+struct md_callback {
+ int (*md_blocking_ast)(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc,
+ void *data, int flag);
};
struct md_enqueue_info;
@@ -879,8 +831,7 @@ struct md_enqueue_info {
struct inode *mi_dir;
int (*mi_cb)(struct ptlrpc_request *req,
struct md_enqueue_info *minfo, int rc);
- __u64 mi_cbdata;
- unsigned int mi_generation;
+ void *mi_cbdata;
};
struct obd_ops {
@@ -894,8 +845,6 @@ struct obd_ops {
__u32 keylen, void *key,
__u32 vallen, void *val,
struct ptlrpc_request_set *set);
- int (*attach)(struct obd_device *dev, u32 len, void *data);
- int (*detach)(struct obd_device *dev);
int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
int (*precleanup)(struct obd_device *dev,
enum obd_cleanup_stage cleanup_stage);
@@ -927,8 +876,8 @@ struct obd_ops {
int (*fid_fini)(struct obd_device *obd);
/* Allocate new fid according to passed @hint. */
- int (*fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
- struct md_op_data *op_data);
+ int (*fid_alloc)(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data);
/*
* Object with @fid is getting deleted, we may want to do something
@@ -943,13 +892,10 @@ struct obd_ops {
int (*unpackmd)(struct obd_export *exp,
struct lov_stripe_md **mem_tgt,
struct lov_mds_md *disk_src, int disk_len);
- int (*preallocate)(struct lustre_handle *, u32 *req, u64 *ids);
int (*create)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md **ea,
- struct obd_trans_info *oti);
+ struct obdo *oa, struct obd_trans_info *oti);
int (*destroy)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md *ea,
- struct obd_trans_info *oti, struct obd_export *md_exp);
+ struct obdo *oa, struct obd_trans_info *oti);
int (*setattr)(const struct lu_env *, struct obd_export *exp,
struct obd_info *oinfo, struct obd_trans_info *oti);
int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
@@ -959,8 +905,6 @@ struct obd_ops {
struct obd_info *oinfo);
int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
struct ptlrpc_request_set *set);
- int (*adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
- u64 size, int shrink);
int (*preprw)(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa, int objcount,
struct obd_ioobj *obj, struct niobuf_remote *remote,
@@ -972,8 +916,6 @@ struct obd_ops {
struct niobuf_remote *remote, int pages,
struct niobuf_local *local,
struct obd_trans_info *oti, int rc);
- int (*find_cbdata)(struct obd_export *, struct lov_stripe_md *,
- ldlm_iterator_t it, void *data);
int (*init_export)(struct obd_export *exp);
int (*destroy_export)(struct obd_export *exp);
@@ -1009,27 +951,11 @@ struct obd_ops {
*/
};
-enum {
- LUSTRE_OPC_MKDIR = (1 << 0),
- LUSTRE_OPC_SYMLINK = (1 << 1),
- LUSTRE_OPC_MKNOD = (1 << 2),
- LUSTRE_OPC_CREATE = (1 << 3),
- LUSTRE_OPC_ANY = (1 << 4)
-};
-
/* lmv structures */
-#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
-#define MEA_MAGIC_ALL_CHARS 0xb222a11c
-#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
-
-#define MAX_HASH_SIZE_32 0x7fffffffUL
-#define MAX_HASH_SIZE 0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
-
struct lustre_md {
struct mdt_body *body;
struct lov_stripe_md *lsm;
- struct lmv_stripe_md *mea;
+ struct lmv_stripe_md *lmv;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *posix_acl;
#endif
@@ -1044,48 +970,55 @@ struct md_open_data {
bool mod_is_create;
};
+struct obd_client_handle {
+ struct lustre_handle och_fh;
+ struct lu_fid och_fid;
+ struct md_open_data *och_mod;
+ struct lustre_handle och_lease_handle; /* open lock for lease */
+ __u32 och_magic;
+ int och_flags;
+};
+
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
struct lookup_intent;
+struct cl_attr;
struct md_ops {
int (*getstatus)(struct obd_export *, struct lu_fid *);
int (*null_inode)(struct obd_export *, const struct lu_fid *);
- int (*find_cbdata)(struct obd_export *, const struct lu_fid *,
- ldlm_iterator_t, void *);
int (*close)(struct obd_export *, struct md_op_data *,
struct md_open_data *, struct ptlrpc_request **);
int (*create)(struct obd_export *, struct md_op_data *,
- const void *, int, int, __u32, __u32, cfs_cap_t,
- __u64, struct ptlrpc_request **);
+ const void *, size_t, umode_t, uid_t, gid_t,
+ cfs_cap_t, __u64, struct ptlrpc_request **);
int (*done_writing)(struct obd_export *, struct md_op_data *,
struct md_open_data *);
int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
+ const ldlm_policy_data_t *,
struct lookup_intent *, struct md_op_data *,
- struct lustre_handle *, void *, int,
- struct ptlrpc_request **, __u64);
+ struct lustre_handle *, __u64);
int (*getattr)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
int (*getattr_name)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
int (*intent_lock)(struct obd_export *, struct md_op_data *,
- void *, int, struct lookup_intent *, int,
+ struct lookup_intent *,
struct ptlrpc_request **,
ldlm_blocking_callback, __u64);
int (*link)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
int (*rename)(struct obd_export *, struct md_op_data *,
- const char *, int, const char *, int,
+ const char *, size_t, const char *, size_t,
struct ptlrpc_request **);
- int (*is_subdir)(struct obd_export *, const struct lu_fid *,
- const struct lu_fid *,
- struct ptlrpc_request **);
int (*setattr)(struct obd_export *, struct md_op_data *, void *,
- int, void *, int, struct ptlrpc_request **,
+ size_t, void *, size_t, struct ptlrpc_request **,
struct md_open_data **mod);
int (*sync)(struct obd_export *, const struct lu_fid *,
struct ptlrpc_request **);
- int (*readpage)(struct obd_export *, struct md_op_data *,
- struct page **, struct ptlrpc_request **);
-
+ int (*read_page)(struct obd_export *, struct md_op_data *,
+ struct md_callback *cb_op, __u64 hash_offset,
+ struct page **ppage);
int (*unlink)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
@@ -1097,7 +1030,7 @@ struct md_ops {
u64, const char *, const char *, int, int, int,
struct ptlrpc_request **);
- int (*init_ea_size)(struct obd_export *, int, int, int, int);
+ int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32);
int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
struct obd_export *, struct obd_export *,
@@ -1105,12 +1038,17 @@ struct md_ops {
int (*free_lustre_md)(struct obd_export *, struct lustre_md *);
+ int (*merge_attr)(struct obd_export *,
+ const struct lmv_stripe_md *lsm,
+ struct cl_attr *attr, ldlm_blocking_callback);
+
int (*set_open_replay_data)(struct obd_export *,
struct obd_client_handle *,
struct lookup_intent *);
int (*clear_open_replay_data)(struct obd_export *,
struct obd_client_handle *);
- int (*set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
+ int (*set_lock_data)(struct obd_export *, const struct lustre_handle *,
+ void *, __u64 *);
enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
const struct lu_fid *, enum ldlm_type,
@@ -1121,6 +1059,11 @@ struct md_ops {
ldlm_policy_data_t *, enum ldlm_mode,
enum ldlm_cancel_flags flags, void *opaque);
+ int (*get_fid_from_lsm)(struct obd_export *,
+ const struct lmv_stripe_md *,
+ const char *name, int namelen,
+ struct lu_fid *fid);
+
int (*intent_getattr_async)(struct obd_export *,
struct md_enqueue_info *,
struct ldlm_enqueue_info *);
@@ -1137,8 +1080,6 @@ struct md_ops {
struct lsm_operations {
void (*lsm_free)(struct lov_stripe_md *);
- int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
- struct obd_export *md_exp);
void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *,
u64 *);
void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *,
@@ -1164,10 +1105,6 @@ static inline const struct lsm_operations *lsm_op_find(int magic)
}
}
-/* Requests for obd_extent_calc() */
-#define OBD_CALC_STRIPE_START 1
-#define OBD_CALC_STRIPE_END 2
-
static inline struct md_open_data *obd_mod_alloc(void)
{
struct md_open_data *mod;
@@ -1211,7 +1148,8 @@ static inline const char *lu_dev_name(const struct lu_device *lu_dev)
return lu_dev->ld_obd->obd_name;
}
-static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
+static inline bool filename_is_volatile(const char *name, size_t namelen,
+ int *idx)
{
const char *start;
char *end;
@@ -1259,4 +1197,28 @@ static inline int cli_brw_size(struct obd_device *obd)
return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
}
+/*
+ * when RPC size or the max RPCs in flight is increased, the max dirty pages
+ * of the client should be increased accordingly to avoid sending fragmented
+ * RPCs over the network when the client runs out of the maximum dirty space
+ * when so many RPCs are being generated.
+ */
+static inline void client_adjust_max_dirty(struct client_obd *cli)
+{
+ /* initializing */
+ if (cli->cl_dirty_max_pages <= 0)
+ cli->cl_dirty_max_pages =
+ (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT;
+ else {
+ unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
+ cli->cl_max_pages_per_rpc;
+
+ if (dirty_max > cli->cl_dirty_max_pages)
+ cli->cl_dirty_max_pages = dirty_max;
+ }
+
+ if (cli->cl_dirty_max_pages > totalram_pages / 8)
+ cli->cl_dirty_max_pages = totalram_pages / 8;
+}
+
#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 6482a937000b..16094dbec08b 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -56,7 +56,6 @@
#define OBD_STATFS_FOR_MDT0 0x0008 /* The statfs is only for retrieving
* information from MDT0.
*/
-#define OBD_FL_PUNCH 0x00000001 /* To indicate it is punch operation */
/* OBD Device Declarations */
extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
@@ -97,6 +96,11 @@ int obd_zombie_impexp_init(void);
void obd_zombie_impexp_stop(void);
void obd_zombie_barrier(void);
+int obd_get_request_slot(struct client_obd *cli);
+void obd_put_request_slot(struct client_obd *cli);
+__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
+int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
+
struct llog_handle;
struct llog_rec_hdr;
typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *,
@@ -265,10 +269,10 @@ static inline int lprocfs_climp_check(struct obd_device *obd)
struct inode;
struct lu_attr;
struct obdo;
-void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid);
+void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid);
-void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid);
+void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
+void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid);
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op
@@ -673,15 +677,6 @@ static inline int obd_unpackmd(struct obd_export *exp,
return rc;
}
-/* helper functions */
-static inline int obd_alloc_memmd(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt)
-{
- LASSERT(mem_tgt);
- LASSERT(!*mem_tgt);
- return obd_unpackmd(exp, mem_tgt, NULL, 0);
-}
-
static inline int obd_free_memmd(struct obd_export *exp,
struct lov_stripe_md **mem_tgt)
{
@@ -695,29 +690,26 @@ static inline int obd_free_memmd(struct obd_export *exp,
}
static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct lov_stripe_md **ea,
- struct obd_trans_info *oti)
+ struct obdo *obdo, struct obd_trans_info *oti)
{
int rc;
EXP_CHECK_DT_OP(exp, create);
EXP_COUNTER_INCREMENT(exp, create);
- rc = OBP(exp->exp_obd, create)(env, exp, obdo, ea, oti);
+ rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti);
return rc;
}
static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct lov_stripe_md *ea,
- struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obdo *obdo, struct obd_trans_info *oti)
{
int rc;
EXP_CHECK_DT_OP(exp, destroy);
EXP_COUNTER_INCREMENT(exp, destroy);
- rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, ea, oti, md_exp);
+ rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti);
return rc;
}
@@ -925,7 +917,8 @@ static inline int obd_fid_fini(struct obd_device *obd)
return rc;
}
-static inline int obd_fid_alloc(struct obd_export *exp,
+static inline int obd_fid_alloc(const struct lu_env *env,
+ struct obd_export *exp,
struct lu_fid *fid,
struct md_op_data *op_data)
{
@@ -934,7 +927,7 @@ static inline int obd_fid_alloc(struct obd_export *exp,
EXP_CHECK_DT_OP(exp, fid_alloc);
EXP_COUNTER_INCREMENT(exp, fid_alloc);
- rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, op_data);
+ rc = OBP(exp->exp_obd, fid_alloc)(env, exp, fid, op_data);
return rc;
}
@@ -1147,19 +1140,6 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd,
return rc;
}
-static inline int obd_adjust_kms(struct obd_export *exp,
- struct lov_stripe_md *lsm, u64 size,
- int shrink)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, adjust_kms);
- EXP_COUNTER_INCREMENT(exp, adjust_kms);
-
- rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
- return rc;
-}
-
static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
int len, void *karg, void __user *uarg)
{
@@ -1172,19 +1152,6 @@ static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
return rc;
}
-static inline int obd_find_cbdata(struct obd_export *exp,
- struct lov_stripe_md *lsm,
- ldlm_iterator_t it, void *data)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, find_cbdata);
- EXP_COUNTER_INCREMENT(exp, find_cbdata);
-
- rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
- return rc;
-}
-
static inline void obd_import_event(struct obd_device *obd,
struct obd_import *imp,
enum obd_import_event event)
@@ -1210,12 +1177,7 @@ static inline int obd_notify(struct obd_device *obd,
if (rc)
return rc;
- /* the check for async_recov is a complete hack - I'm hereby
- * overloading the meaning to also mean "this was called from
- * mds_postsetup". I know that my mds is able to handle notifies
- * by this point, and it needs to get them to execute mds_postrecov.
- */
- if (!obd->obd_set_up && !obd->obd_async_recov) {
+ if (!obd->obd_set_up) {
CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
return -EINVAL;
}
@@ -1358,18 +1320,6 @@ static inline int md_null_inode(struct obd_export *exp,
return rc;
}
-static inline int md_find_cbdata(struct obd_export *exp,
- const struct lu_fid *fid,
- ldlm_iterator_t it, void *data)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, find_cbdata);
- EXP_MD_COUNTER_INCREMENT(exp, find_cbdata);
- rc = MDP(exp->exp_obd, find_cbdata)(exp, fid, it, data);
- return rc;
-}
-
static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
struct md_open_data *mod,
struct ptlrpc_request **request)
@@ -1383,9 +1333,9 @@ static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
}
static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid,
- __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
- struct ptlrpc_request **request)
+ const void *data, size_t datalen, umode_t mode,
+ uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+ __u64 rdev, struct ptlrpc_request **request)
{
int rc;
@@ -1410,19 +1360,18 @@ static inline int md_done_writing(struct obd_export *exp,
static inline int md_enqueue(struct obd_export *exp,
struct ldlm_enqueue_info *einfo,
+ const ldlm_policy_data_t *policy,
struct lookup_intent *it,
struct md_op_data *op_data,
struct lustre_handle *lockh,
- void *lmm, int lmmsize,
- struct ptlrpc_request **req,
__u64 extra_lock_flags)
{
int rc;
EXP_CHECK_MD_OP(exp, enqueue);
EXP_MD_COUNTER_INCREMENT(exp, enqueue);
- rc = MDP(exp->exp_obd, enqueue)(exp, einfo, it, op_data, lockh,
- lmm, lmmsize, req, extra_lock_flags);
+ rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, it, op_data, lockh,
+ extra_lock_flags);
return rc;
}
@@ -1439,9 +1388,9 @@ static inline int md_getattr_name(struct obd_export *exp,
}
static inline int md_intent_lock(struct obd_export *exp,
- struct md_op_data *op_data, void *lmm,
- int lmmsize, struct lookup_intent *it,
- int lookup_flags, struct ptlrpc_request **reqp,
+ struct md_op_data *op_data,
+ struct lookup_intent *it,
+ struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
{
@@ -1449,9 +1398,8 @@ static inline int md_intent_lock(struct obd_export *exp,
EXP_CHECK_MD_OP(exp, intent_lock);
EXP_MD_COUNTER_INCREMENT(exp, intent_lock);
- rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
- it, lookup_flags, reqp, cb_blocking,
- extra_lock_flags);
+ rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, it, reqp,
+ cb_blocking, extra_lock_flags);
return rc;
}
@@ -1467,8 +1415,8 @@ static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
}
static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new,
- int newlen, struct ptlrpc_request **request)
+ const char *old, size_t oldlen, const char *new,
+ size_t newlen, struct ptlrpc_request **request)
{
int rc;
@@ -1479,21 +1427,8 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static inline int md_is_subdir(struct obd_export *exp,
- const struct lu_fid *pfid,
- const struct lu_fid *cfid,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, is_subdir);
- EXP_MD_COUNTER_INCREMENT(exp, is_subdir);
- rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
- return rc;
-}
-
static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, size_t ealen, void *ea2, size_t ea2len,
struct ptlrpc_request **request,
struct md_open_data **mod)
{
@@ -1517,15 +1452,18 @@ static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
return rc;
}
-static inline int md_readpage(struct obd_export *exp, struct md_op_data *opdata,
- struct page **pages,
- struct ptlrpc_request **request)
+static inline int md_read_page(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct md_callback *cb_op,
+ __u64 hash_offset,
+ struct page **ppage)
{
int rc;
- EXP_CHECK_MD_OP(exp, readpage);
- EXP_MD_COUNTER_INCREMENT(exp, readpage);
- rc = MDP(exp->exp_obd, readpage)(exp, opdata, pages, request);
+ EXP_CHECK_MD_OP(exp, read_page);
+ EXP_MD_COUNTER_INCREMENT(exp, read_page);
+ rc = MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset,
+ ppage);
return rc;
}
@@ -1559,6 +1497,16 @@ static inline int md_free_lustre_md(struct obd_export *exp,
return MDP(exp->exp_obd, free_lustre_md)(exp, md);
}
+static inline int md_merge_attr(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ struct cl_attr *attr,
+ ldlm_blocking_callback cb)
+{
+ EXP_CHECK_MD_OP(exp, merge_attr);
+ EXP_MD_COUNTER_INCREMENT(exp, merge_attr);
+ return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb);
+}
+
static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
u64 valid, const char *name,
const char *input, int input_size,
@@ -1603,7 +1551,8 @@ static inline int md_clear_open_replay_data(struct obd_export *exp,
}
static inline int md_set_lock_data(struct obd_export *exp,
- __u64 *lockh, void *data, __u64 *bits)
+ const struct lustre_handle *lockh,
+ void *data, __u64 *bits)
{
EXP_CHECK_MD_OP(exp, set_lock_data);
EXP_MD_COUNTER_INCREMENT(exp, set_lock_data);
@@ -1674,6 +1623,19 @@ static inline int md_revalidate_lock(struct obd_export *exp,
return rc;
}
+static inline int md_get_fid_from_lsm(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ const char *name, int namelen,
+ struct lu_fid *fid)
+{
+ int rc;
+
+ EXP_CHECK_MD_OP(exp, get_fid_from_lsm);
+ EXP_MD_COUNTER_INCREMENT(exp, get_fid_from_lsm);
+ rc = MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen, fid);
+ return rc;
+}
+
/* OBD Metadata Support */
int obd_init_caches(void);
@@ -1682,16 +1644,6 @@ void obd_cleanup_caches(void);
/* support routines */
extern struct kmem_cache *obdo_cachep;
-static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
-{
- /* something here */
-}
-
-static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
-{
- /* something here */
-}
-
typedef int (*register_lwp_cb)(void *data);
struct lwp_register_item {
@@ -1710,6 +1662,9 @@ struct lwp_register_item {
extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
/* obd_mount.c */
+int lustre_unregister_fs(void);
+int lustre_register_fs(void);
+int lustre_check_exclusion(struct super_block *sb, char *svname);
/* sysctl.c */
int obd_sysctl_init(void);
@@ -1730,8 +1685,24 @@ void class_exit_uuidlist(void);
extern char obd_jobid_node[];
extern struct miscdevice obd_psdev;
extern spinlock_t obd_types_lock;
+int class_procfs_init(void);
+int class_procfs_clean(void);
/* prng.c */
#define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
+/* statfs_pack.c */
+struct kstatfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
+
+/* root squash info */
+struct rw_semaphore;
+struct root_squash_info {
+ uid_t rsi_uid;
+ gid_t rsi_gid;
+ struct list_head rsi_nosquash_nids;
+ struct rw_semaphore rsi_sem;
+};
+
#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 845e64a56c21..b346a7f10aa4 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -35,7 +35,7 @@
#include <linux/slab.h>
#include "../../include/linux/libcfs/libcfs.h"
-#include "linux/lustre_compat25.h"
+#include "lustre_compat.h"
#include "lprocfs_status.h"
/* global variables */
@@ -52,11 +52,9 @@ extern unsigned int at_max;
extern unsigned int at_history;
extern int at_early_margin;
extern int at_extra;
-extern unsigned int obd_sync_filter;
-extern unsigned int obd_max_dirty_pages;
-extern atomic_t obd_unstable_pages;
-extern atomic_t obd_dirty_pages;
-extern atomic_t obd_dirty_transit_pages;
+extern unsigned long obd_max_dirty_pages;
+extern atomic_long_t obd_dirty_pages;
+extern atomic_long_t obd_dirty_transit_pages;
extern char obd_jobid_var[];
/* Some hash init argument constants */
@@ -117,17 +115,17 @@ extern char obd_jobid_var[];
* running on a backup server. (If it's too low, import_select_connection
* will increase the timeout anyhow.)
*/
-#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout/20)
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout / 20)
/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
INITIAL_CONNECT_TIMEOUT)
/* The min time a target should wait for clients to reconnect in recovery */
-#define OBD_RECOVERY_TIME_MIN (2*RECONNECT_DELAY_MAX)
+#define OBD_RECOVERY_TIME_MIN (2 * RECONNECT_DELAY_MAX)
#define OBD_IR_FACTOR_MIN 1
#define OBD_IR_FACTOR_MAX 10
-#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX/2)
+#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX / 2)
/* default timeout for the MGS to become IR_FULL */
-#define OBD_IR_MGS_TIMEOUT (4*obd_timeout)
+#define OBD_IR_MGS_TIMEOUT (4 * obd_timeout)
#define LONG_UNLINK 300 /* Unlink should happen before now */
/**
@@ -318,6 +316,10 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b
#define OBD_FAIL_LDLM_OST_LVB 0x31c
#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d
+#define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320
+#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321
+#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
+#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
@@ -400,6 +402,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803
#define OBD_FAIL_MDC_RPCS_SEM 0x804
#define OBD_FAIL_MDC_LIGHTWEIGHT 0x805
+#define OBD_FAIL_MDC_CLOSE 0x806
#define OBD_FAIL_MGS 0x900
#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901
@@ -455,6 +458,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LOV_INIT 0x1403
#define OBD_FAIL_GLIMPSE_DELAY 0x1404
#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405
+#define OBD_FAIL_GETATTR_DELAY 0x1409
#define OBD_FAIL_FID_INDIR 0x1501
#define OBD_FAIL_FID_INLMA 0x1502
@@ -474,11 +478,16 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LFSCK_CRASH 0x160a
#define OBD_FAIL_LFSCK_NO_AUTO 0x160b
#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
+#define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
+#define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
/* UPDATE */
#define OBD_FAIL_UPDATE_OBJ_NET 0x1700
#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+/* LMV */
+#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901
+
/* Assign references to moved code to reduce code changes */
#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id)
#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id)
@@ -520,7 +529,8 @@ do { \
POISON_PTR(ptr); \
} while (0)
-#define KEY_IS(str) \
- (keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
+#define KEY_IS(str) \
+ (keylen >= (sizeof(str) - 1) && \
+ memcmp(key, str, (sizeof(str) - 1)) == 0)
#endif
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index f4a70ebddeaf..e134ecd21bb2 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -90,6 +90,17 @@ static inline int extent_equal(struct interval_node_extent *e1,
return (e1->start == e2->start) && (e1->end == e2->end);
}
+static inline int extent_overlapped(struct interval_node_extent *e1,
+ struct interval_node_extent *e2)
+{
+ return (e1->start <= e2->end) && (e2->start <= e1->end);
+}
+
+static inline int node_equal(struct interval_node *n1, struct interval_node *n2)
+{
+ return extent_equal(&n1->in_extent, &n2->in_extent);
+}
+
static inline __u64 max_u64(__u64 x, __u64 y)
{
return x > y ? x : y;
@@ -262,7 +273,7 @@ struct interval_node *interval_insert(struct interval_node *node,
p = root;
while (*p) {
parent = *p;
- if (extent_equal(&parent->in_extent, &node->in_extent))
+ if (node_equal(parent, node))
return parent;
/* max_high field must be updated after each iteration */
@@ -463,3 +474,90 @@ color:
interval_erase_color(child, parent, root);
}
EXPORT_SYMBOL(interval_erase);
+
+static inline int interval_may_overlap(struct interval_node *node,
+ struct interval_node_extent *ext)
+{
+ return (ext->start <= node->in_max_high &&
+ ext->end >= interval_low(node));
+}
+
+/*
+ * This function finds all intervals that overlap interval ext,
+ * and calls func to handle resulted intervals one by one.
+ * in lustre, this function will find all conflicting locks in
+ * the granted queue and add these locks to the ast work list.
+ *
+ * {
+ * if (!node)
+ * return 0;
+ * if (ext->end < interval_low(node)) {
+ * interval_search(node->in_left, ext, func, data);
+ * } else if (interval_may_overlap(node, ext)) {
+ * if (extent_overlapped(ext, &node->in_extent))
+ * func(node, data);
+ * interval_search(node->in_left, ext, func, data);
+ * interval_search(node->in_right, ext, func, data);
+ * }
+ * return 0;
+ * }
+ *
+ */
+enum interval_iter interval_search(struct interval_node *node,
+ struct interval_node_extent *ext,
+ interval_callback_t func,
+ void *data)
+{
+ enum interval_iter rc = INTERVAL_ITER_CONT;
+ struct interval_node *parent;
+
+ LASSERT(ext);
+ LASSERT(func);
+
+ while (node) {
+ if (ext->end < interval_low(node)) {
+ if (node->in_left) {
+ node = node->in_left;
+ continue;
+ }
+ } else if (interval_may_overlap(node, ext)) {
+ if (extent_overlapped(ext, &node->in_extent)) {
+ rc = func(node, data);
+ if (rc == INTERVAL_ITER_STOP)
+ break;
+ }
+
+ if (node->in_left) {
+ node = node->in_left;
+ continue;
+ }
+ if (node->in_right) {
+ node = node->in_right;
+ continue;
+ }
+ }
+
+ parent = node->in_parent;
+ while (parent) {
+ if (node_is_left_child(node) &&
+ parent->in_right) {
+ /*
+ * If we ever got the left, it means that the
+ * parent met ext->end<interval_low(parent), or
+ * may_overlap(parent). If the former is true,
+ * we needn't go back. So stop early and check
+ * may_overlap(parent) after this loop.
+ */
+ node = parent->in_right;
+ break;
+ }
+ node = parent;
+ parent = parent->in_parent;
+ }
+ if (!parent || !interval_may_overlap(parent, ext))
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(interval_search);
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index ea8840cb9056..3845f386f1db 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -45,6 +45,8 @@
* being an atomic operation.
*/
struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
+ __acquires(&lock->l_lock)
+ __acquires(&lock->l_resource->lr_lock)
{
spin_lock(&lock->l_lock);
@@ -59,6 +61,8 @@ EXPORT_SYMBOL(lock_res_and_lock);
* Unlock a lock and its resource previously locked with lock_res_and_lock
*/
void unlock_res_and_lock(struct ldlm_lock *lock)
+ __releases(&lock->l_resource->lr_lock)
+ __releases(&lock->l_lock)
{
/* on server-side resource of lock doesn't change */
ldlm_clear_res_locked(lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index f5023d9b78f5..ecf472e4813d 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -221,7 +221,7 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
}
void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+ ldlm_policy_data_t *lpolicy)
{
memset(lpolicy, 0, sizeof(*lpolicy));
lpolicy->l_extent.start = wpolicy->l_extent.start;
@@ -230,7 +230,7 @@ void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
}
void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+ ldlm_wire_policy_data_t *wpolicy)
{
memset(wpolicy, 0, sizeof(*wpolicy));
wpolicy->l_extent.start = lpolicy->l_extent.start;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index d6b61bc39135..861f36f039b5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -97,7 +97,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
list_del_init(&lock->l_res_link);
- if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
+ if (flags == LDLM_FL_WAIT_NOREPROC) {
/* client side - set a flag to prevent sending a CANCEL */
lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
@@ -166,7 +166,7 @@ reprocess:
*/
list_for_each(tmp, &res->lr_granted) {
lock = list_entry(tmp, struct ldlm_lock,
- l_res_link);
+ l_res_link);
if (ldlm_same_flock_owner(lock, req)) {
ownlocks = tmp;
break;
@@ -182,7 +182,7 @@ reprocess:
*/
list_for_each(tmp, &res->lr_granted) {
lock = list_entry(tmp, struct ldlm_lock,
- l_res_link);
+ l_res_link);
if (ldlm_same_flock_owner(lock, req)) {
if (!ownlocks)
@@ -339,10 +339,10 @@ reprocess:
lock->l_granted_mode, &null_cbs,
NULL, 0, LVB_T_NONE);
lock_res_and_lock(req);
- if (!new2) {
+ if (IS_ERR(new2)) {
ldlm_flock_destroy(req, lock->l_granted_mode,
*flags);
- *err = -ENOLCK;
+ *err = PTR_ERR(new2);
return LDLM_ITER_STOP;
}
goto reprocess;
@@ -455,29 +455,22 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
enum ldlm_error err;
int rc = 0;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) {
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_FAIL_LOC;
+ unlock_res_and_lock(lock);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4);
+ }
CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
flags, data, getlk);
- /* Import invalidation. We need to actually release the lock
- * references being held, so that it can go away. No point in
- * holding the lock even if app still believes it has it, since
- * server already dropped it anyway. Only for granted locks too.
- */
- if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
- (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
- if (lock->l_req_mode == lock->l_granted_mode &&
- lock->l_granted_mode != LCK_NL && !data)
- ldlm_lock_decref_internal(lock, lock->l_req_mode);
-
- /* Need to wake up the waiter if we were evicted */
- wake_up(&lock->l_waitq);
- return 0;
- }
-
LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
- if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV))) {
+ if (flags & LDLM_FL_FAILED)
+ goto granted;
+
+ if (!(flags & LDLM_FL_BLOCKED_MASK)) {
if (!data)
/* mds granted the lock in the reply */
goto granted;
@@ -514,12 +507,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
granted:
OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
- if (ldlm_is_failed(lock)) {
- LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
- return -EIO;
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) {
+ lock_res_and_lock(lock);
+ /* DEADLOCK is always set with CBPENDING */
+ lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
+ unlock_res_and_lock(lock);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4);
+ }
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) {
+ lock_res_and_lock(lock);
+ /* DEADLOCK is always set with CBPENDING */
+ lock->l_flags |= LDLM_FL_FAIL_LOC |
+ LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
+ unlock_res_and_lock(lock);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4);
}
-
- LDLM_DEBUG(lock, "client-side enqueue granted");
lock_res_and_lock(lock);
@@ -530,20 +532,59 @@ granted:
if (ldlm_is_destroyed(lock)) {
unlock_res_and_lock(lock);
LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
- return 0;
+ /*
+ * An error is still to be returned, to propagate it up to
+ * ldlm_cli_enqueue_fini() caller.
+ */
+ return -EIO;
}
/* ldlm_lock_enqueue() has already placed lock on the granted list. */
- list_del_init(&lock->l_res_link);
+ ldlm_resource_unlink_lock(lock);
+
+ /*
+ * Import invalidation. We need to actually release the lock
+ * references being held, so that it can go away. No point in
+ * holding the lock even if app still believes it has it, since
+ * server already dropped it anyway. Only for granted locks too.
+ */
+ /* Do the same for DEADLOCK'ed locks. */
+ if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) {
+ int mode;
+
+ if (flags & LDLM_FL_TEST_LOCK)
+ LASSERT(ldlm_is_test_lock(lock));
+
+ if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
+ mode = getlk->fl_type;
+ else
+ mode = lock->l_granted_mode;
+
+ if (ldlm_is_flock_deadlock(lock)) {
+ LDLM_DEBUG(lock, "client-side enqueue deadlock received");
+ rc = -EDEADLK;
+ }
+ ldlm_flock_destroy(lock, mode, LDLM_FL_WAIT_NOREPROC);
+ unlock_res_and_lock(lock);
+
+ /* Need to wake up the waiter if we were evicted */
+ wake_up(&lock->l_waitq);
+
+ /*
+ * An error is still to be returned, to propagate it up to
+ * ldlm_cli_enqueue_fini() caller.
+ */
+ return rc ? : -EIO;
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue granted");
- if (ldlm_is_flock_deadlock(lock)) {
- LDLM_DEBUG(lock, "client-side enqueue deadlock received");
- rc = -EDEADLK;
- } else if (flags & LDLM_FL_TEST_LOCK) {
+ if (flags & LDLM_FL_TEST_LOCK) {
/* fcntl(F_GETLK) request */
/* The old mode was saved in getlk->fl_type so that if the mode
* in the lock changes we can decref the appropriate refcount.
*/
+ LASSERT(ldlm_is_test_lock(lock));
ldlm_flock_destroy(lock, getlk->fl_type, LDLM_FL_WAIT_NOREPROC);
switch (lock->l_granted_mode) {
case LCK_PR:
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index e4cf65d2d3b1..5e82cfc245b2 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -100,9 +100,10 @@ enum {
int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
enum ldlm_cancel_flags sync, int flags);
int ldlm_cancel_lru_local(struct ldlm_namespace *ns,
- struct list_head *cancels, int count, int max,
- enum ldlm_cancel_flags cancel_flags, int flags);
-extern int ldlm_enqueue_min;
+ struct list_head *cancels, int count, int max,
+ enum ldlm_cancel_flags cancel_flags, int flags);
+extern unsigned int ldlm_enqueue_min;
+extern unsigned int ldlm_cancel_unused_locks_before_replay;
/* ldlm_resource.c */
int ldlm_resource_putref_locked(struct ldlm_resource *res);
@@ -200,8 +201,7 @@ ldlm_interval_extent(struct ldlm_interval *node)
LASSERT(!list_empty(&node->li_group));
- lock = list_entry(node->li_group.next, struct ldlm_lock,
- l_sl_policy);
+ lock = list_entry(node->li_group.next, struct ldlm_lock, l_sl_policy);
return &lock->l_policy_data.l_extent;
}
@@ -302,7 +302,7 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
lock_res_and_lock(lock);
if ((lock->l_req_mode == lock->l_granted_mode) &&
- !ldlm_is_cp_reqd(lock))
+ !ldlm_is_cp_reqd(lock))
ret = 1;
else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
ret = 1;
@@ -326,13 +326,13 @@ void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
ldlm_wire_policy_data_t *wpolicy);
void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
+ ldlm_policy_data_t *lpolicy);
void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy);
+ ldlm_wire_policy_data_t *wpolicy);
void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
+ ldlm_policy_data_t *lpolicy);
void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
+ ldlm_policy_data_t *lpolicy);
void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
ldlm_wire_policy_data_t *wpolicy);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 7c832aae7d5e..153e990c494e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -82,7 +82,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
if (priority) {
list_del(&item->oic_item);
list_add(&item->oic_item,
- &imp->imp_conn_list);
+ &imp->imp_conn_list);
item->oic_last_attempt = 0;
}
CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n",
@@ -102,7 +102,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
list_add(&imp_conn->oic_item, &imp->imp_conn_list);
else
list_add_tail(&imp_conn->oic_item,
- &imp->imp_conn_list);
+ &imp->imp_conn_list);
CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
imp, imp->imp_obd->obd_name, uuid->uuid,
(priority ? "head" : "tail"));
@@ -299,12 +299,14 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
sizeof(server_uuid)));
- cli->cl_dirty = 0;
+ cli->cl_dirty_pages = 0;
cli->cl_avail_grant = 0;
- /* FIXME: Should limit this for the sum of all cl_dirty_max. */
- cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
- if (cli->cl_dirty_max >> PAGE_SHIFT > totalram_pages / 8)
- cli->cl_dirty_max = totalram_pages << (PAGE_SHIFT - 3);
+ /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
+ /*
+ * cl_dirty_max_pages may be changed at connect time in
+ * ptlrpc_connect_interpret().
+ */
+ client_adjust_max_dirty(cli);
INIT_LIST_HEAD(&cli->cl_cache_waiters);
INIT_LIST_HEAD(&cli->cl_loi_ready_list);
INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
@@ -326,11 +328,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
/* lru for osc. */
INIT_LIST_HEAD(&cli->cl_lru_osc);
atomic_set(&cli->cl_lru_shrinkers, 0);
- atomic_set(&cli->cl_lru_busy, 0);
- atomic_set(&cli->cl_lru_in_list, 0);
+ atomic_long_set(&cli->cl_lru_busy, 0);
+ atomic_long_set(&cli->cl_lru_in_list, 0);
INIT_LIST_HEAD(&cli->cl_lru_list);
spin_lock_init(&cli->cl_lru_list_lock);
- atomic_set(&cli->cl_unstable_count, 0);
+ atomic_long_set(&cli->cl_unstable_count, 0);
init_waitqueue_head(&cli->cl_destroy_waitq);
atomic_set(&cli->cl_destroy_in_flight, 0);
@@ -360,7 +362,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
cli->cl_chunkbits = PAGE_SHIFT;
if (!strcmp(name, LUSTRE_MDC_NAME)) {
- cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
+ cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
cli->cl_max_rpcs_in_flight = 2;
} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
@@ -368,7 +370,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
cli->cl_max_rpcs_in_flight = 4;
} else {
- cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
+ cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
}
rc = ldlm_get_ref();
if (rc) {
@@ -534,7 +536,7 @@ int client_disconnect_export(struct obd_export *exp)
imp = cli->cl_import;
down_write(&cli->cl_sem);
- CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
+ CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
cli->cl_conn_count);
if (!cli->cl_conn_count) {
@@ -690,7 +692,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
if (rs->rs_transno > exp->exp_last_committed) {
/* not committed already */
list_add_tail(&rs->rs_obd_list,
- &exp->exp_uncommitted_replies);
+ &exp->exp_uncommitted_replies);
}
spin_unlock(&exp->exp_uncommitted_replies_lock);
@@ -795,7 +797,7 @@ void ldlm_dump_export_locks(struct obd_export *exp)
CERROR("dumping locks for export %p,ignore if the unmount doesn't hang\n",
exp);
list_for_each_entry(lock, &exp->exp_locks_list,
- l_exp_refs_link)
+ l_exp_refs_link)
LDLM_ERROR(lock, "lock:");
}
spin_unlock(&exp->exp_locks_list_guard);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index a5993f745ebe..3c48b4fb96f1 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -481,8 +481,8 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
unlock_res_and_lock(lock);
newres = ldlm_resource_get(ns, NULL, new_resid, type, 1);
- if (!newres)
- return -ENOMEM;
+ if (IS_ERR(newres))
+ return PTR_ERR(newres);
lu_ref_add(&newres->lr_reference, "lock", lock);
/*
@@ -542,7 +542,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
LASSERT(handle);
- lock = class_handle2object(handle->cookie);
+ lock = class_handle2object(handle->cookie, NULL);
if (!lock)
return NULL;
@@ -937,7 +937,7 @@ static void search_granted_lock(struct list_head *queue,
/* go to next policy group within mode group */
tmp = policy_end->l_res_link.next;
lock = list_entry(tmp, struct ldlm_lock,
- l_res_link);
+ l_res_link);
} /* loop over policy groups within the mode group */
/* insert point is last lock of the mode group,
@@ -1028,15 +1028,28 @@ void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
check_res_locked(res);
lock->l_granted_mode = lock->l_req_mode;
+
+ if (work_list && lock->l_completion_ast)
+ ldlm_add_ast_work_item(lock, NULL, work_list);
+
if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS)
ldlm_grant_lock_with_skiplist(lock);
else if (res->lr_type == LDLM_EXTENT)
ldlm_extent_add_lock(res, lock);
- else
+ else if (res->lr_type == LDLM_FLOCK) {
+ /*
+ * We should not add locks to granted list in the following cases:
+ * - this is an UNLOCK but not a real lock;
+ * - this is a TEST lock;
+ * - this is a F_CANCELLK lock (async flock has req_mode == 0)
+ * - this is a deadlock (flock cannot be granted)
+ */
+ if (!lock->l_req_mode || lock->l_req_mode == LCK_NL ||
+ ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
+ return;
ldlm_resource_add_lock(res, &res->lr_granted, lock);
-
- if (work_list && lock->l_completion_ast)
- ldlm_add_ast_work_item(lock, NULL, work_list);
+ } else
+ LBUG();
ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock);
}
@@ -1103,7 +1116,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
* of bits.
*/
if (lock->l_resource->lr_type == LDLM_IBITS &&
- ((lock->l_policy_data.l_inodebits.bits &
+ ((lock->l_policy_data.l_inodebits.bits &
policy->l_inodebits.bits) !=
policy->l_inodebits.bits))
continue;
@@ -1214,7 +1227,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
}
res = ldlm_resource_get(ns, NULL, res_id, type, 0);
- if (!res) {
+ if (IS_ERR(res)) {
LASSERT(!old_lock);
return 0;
}
@@ -1363,12 +1376,12 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
if (size == sizeof(struct ost_lvb)) {
if (loc == RCL_CLIENT)
lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb);
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb);
else
lvb = req_capsule_server_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb);
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb);
if (unlikely(!lvb)) {
LDLM_ERROR(lock, "no LVB");
return -EPROTO;
@@ -1380,8 +1393,8 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
if (loc == RCL_CLIENT)
lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb_v1);
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb_v1);
else
lvb = req_capsule_server_sized_swab_get(pill,
&RMF_DLM_LVB, size,
@@ -1405,12 +1418,12 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
if (size == sizeof(struct lquota_lvb)) {
if (loc == RCL_CLIENT)
lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_lquota_lvb);
+ &RMF_DLM_LVB,
+ lustre_swab_lquota_lvb);
else
lvb = req_capsule_server_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_lquota_lvb);
+ &RMF_DLM_LVB,
+ lustre_swab_lquota_lvb);
if (unlikely(!lvb)) {
LDLM_ERROR(lock, "no LVB");
return -EPROTO;
@@ -1462,15 +1475,15 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
{
struct ldlm_lock *lock;
struct ldlm_resource *res;
+ int rc;
res = ldlm_resource_get(ns, NULL, res_id, type, 1);
- if (!res)
- return NULL;
+ if (IS_ERR(res))
+ return ERR_CAST(res);
lock = ldlm_lock_new(res);
-
if (!lock)
- return NULL;
+ return ERR_PTR(-ENOMEM);
lock->l_req_mode = mode;
lock->l_ast_data = data;
@@ -1484,27 +1497,33 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
lock->l_tree_node = NULL;
/* if this is the extent lock, allocate the interval tree node */
if (type == LDLM_EXTENT) {
- if (!ldlm_interval_alloc(lock))
+ if (!ldlm_interval_alloc(lock)) {
+ rc = -ENOMEM;
goto out;
+ }
}
if (lvb_len) {
lock->l_lvb_len = lvb_len;
lock->l_lvb_data = kzalloc(lvb_len, GFP_NOFS);
- if (!lock->l_lvb_data)
+ if (!lock->l_lvb_data) {
+ rc = -ENOMEM;
goto out;
+ }
}
lock->l_lvb_type = lvb_type;
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK))
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK)) {
+ rc = -ENOENT;
goto out;
+ }
return lock;
out:
ldlm_lock_destroy(lock);
LDLM_LOCK_RELEASE(lock);
- return NULL;
+ return ERR_PTR(rc);
}
/**
@@ -1522,16 +1541,13 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
struct ldlm_lock *lock = *lockp;
struct ldlm_resource *res = lock->l_resource;
- lock->l_last_activity = ktime_get_real_seconds();
-
lock_res_and_lock(lock);
if (lock->l_req_mode == lock->l_granted_mode) {
/* The server returned a blocked lock, but it was granted
* before we got a chance to actually enqueue it. We don't
* need to do anything else.
*/
- *flags &= ~(LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
+ *flags &= ~LDLM_FL_BLOCKED_MASK;
goto out;
}
@@ -1546,6 +1562,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
*/
if (*flags & LDLM_FL_AST_DISCARD_DATA)
ldlm_set_ast_discard_data(lock);
+ if (*flags & LDLM_FL_TEST_LOCK)
+ ldlm_set_test_lock(lock);
/*
* This distinction between local lock trees is very important; a client
@@ -1688,7 +1706,7 @@ static int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
return -ENOENT;
gl_work = list_entry(arg->list->next, struct ldlm_glimpse_work,
- gl_list);
+ gl_list);
list_del_init(&gl_work->gl_list);
lock = gl_work->gl_lock;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 821939ff2e6b..fde697ebaadc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -559,8 +559,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
switch (lustre_msg_get_opc(req->rq_reqmsg)) {
case LDLM_BL_CALLBACK:
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) {
+ if (cfs_fail_err)
+ ldlm_callback_reply(req, -(int)cfs_fail_err);
return 0;
+ }
break;
case LDLM_CP_CALLBACK:
if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
@@ -706,12 +709,12 @@ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
if (!list_empty(&blp->blp_list) &&
(list_empty(&blp->blp_prio_list) || num_bl == 0))
blwi = list_entry(blp->blp_list.next,
- struct ldlm_bl_work_item, blwi_entry);
+ struct ldlm_bl_work_item, blwi_entry);
else
if (!list_empty(&blp->blp_prio_list))
blwi = list_entry(blp->blp_prio_list.next,
- struct ldlm_bl_work_item,
- blwi_entry);
+ struct ldlm_bl_work_item,
+ blwi_entry);
if (blwi) {
if (++num_bl >= atomic_read(&blp->blp_num_threads))
@@ -741,7 +744,7 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
init_completion(&bltd.bltd_comp);
bltd.bltd_num = atomic_read(&blp->blp_num_threads);
snprintf(bltd.bltd_name, sizeof(bltd.bltd_name),
- "ldlm_bl_%02d", bltd.bltd_num);
+ "ldlm_bl_%02d", bltd.bltd_num);
task = kthread_run(ldlm_bl_thread_main, &bltd, "%s", bltd.bltd_name);
if (IS_ERR(task)) {
CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
@@ -786,8 +789,8 @@ static int ldlm_bl_thread_main(void *arg)
if (!blwi) {
atomic_dec(&blp->blp_busy_threads);
l_wait_event_exclusive(blp->blp_waitq,
- (blwi = ldlm_bl_get_work(blp)),
- &lwi);
+ (blwi = ldlm_bl_get_work(blp)),
+ &lwi);
busy = atomic_inc_return(&blp->blp_busy_threads);
} else {
busy = atomic_read(&blp->blp_busy_threads);
@@ -874,8 +877,6 @@ void ldlm_put_ref(void)
}
EXPORT_SYMBOL(ldlm_put_ref);
-extern unsigned int ldlm_cancel_unused_locks_before_replay;
-
static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
@@ -1094,16 +1095,17 @@ int ldlm_init(void)
return -ENOMEM;
ldlm_lock_slab = kmem_cache_create("ldlm_locks",
- sizeof(struct ldlm_lock), 0,
- SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
+ sizeof(struct ldlm_lock), 0,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_DESTROY_BY_RCU, NULL);
if (!ldlm_lock_slab) {
kmem_cache_destroy(ldlm_resource_slab);
return -ENOMEM;
}
ldlm_interval_slab = kmem_cache_create("interval_node",
- sizeof(struct ldlm_interval),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ sizeof(struct ldlm_interval),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
if (!ldlm_interval_slab) {
kmem_cache_destroy(ldlm_resource_slab);
kmem_cache_destroy(ldlm_lock_slab);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 657ed4012776..9a1136e32dfc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -357,38 +357,40 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl)
int count;
recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec <= 0)
- goto recalc;
-
- spin_lock(&pl->pl_lock);
if (recalc_interval_sec > 0) {
- /*
- * Update pool statistics every 1s.
- */
- ldlm_pool_recalc_stats(pl);
-
- /*
- * Zero out all rates and speed for the last period.
- */
- atomic_set(&pl->pl_grant_rate, 0);
- atomic_set(&pl->pl_cancel_rate, 0);
+ spin_lock(&pl->pl_lock);
+ recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
+
+ if (recalc_interval_sec > 0) {
+ /*
+ * Update pool statistics every 1s.
+ */
+ ldlm_pool_recalc_stats(pl);
+
+ /*
+ * Zero out all rates and speed for the last period.
+ */
+ atomic_set(&pl->pl_grant_rate, 0);
+ atomic_set(&pl->pl_cancel_rate, 0);
+ }
+ spin_unlock(&pl->pl_lock);
}
- spin_unlock(&pl->pl_lock);
- recalc:
if (pl->pl_ops->po_recalc) {
count = pl->pl_ops->po_recalc(pl);
lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
count);
}
+
recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() +
pl->pl_recalc_period;
if (recalc_interval_sec <= 0) {
+ /* DEBUG: should be re-removed after LU-4536 is fixed */
+ CDEBUG(D_DLMTRACE, "%s: Negative interval(%ld), too short period(%ld)\n",
+ pl->pl_name, (long)recalc_interval_sec,
+ (long)pl->pl_recalc_period);
+
/* Prevent too frequent recalculation. */
- CDEBUG(D_DLMTRACE,
- "Negative interval(%d), too short period(%lld)",
- recalc_interval_sec,
- (s64)pl->pl_recalc_period);
recalc_interval_sec = 1;
}
@@ -792,7 +794,8 @@ static struct completion ldlm_pools_comp;
*/
static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
{
- int total = 0, nr_ns;
+ unsigned long total = 0;
+ int nr_ns;
struct ldlm_namespace *ns;
struct ldlm_namespace *ns_old = NULL; /* loop detection */
void *cookie;
@@ -995,7 +998,7 @@ static int ldlm_pools_thread_main(void *arg)
wake_up(&thread->t_ctl_waitq);
CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n",
- "ldlm_poold", current_pid());
+ "ldlm_poold", current_pid());
while (1) {
struct l_wait_info lwi;
@@ -1025,7 +1028,7 @@ static int ldlm_pools_thread_main(void *arg)
wake_up(&thread->t_ctl_waitq);
CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n",
- "ldlm_poold", current_pid());
+ "ldlm_poold", current_pid());
complete_and_exit(&ldlm_pools_comp, 0);
}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index af487f9937f4..35ba6f14d95f 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -63,8 +63,8 @@
#include "ldlm_internal.h"
-int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
-module_param(ldlm_enqueue_min, int, 0644);
+unsigned int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
+module_param(ldlm_enqueue_min, uint, 0644);
MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum");
/* in client side, whether the cached locks will be canceled before replay */
@@ -123,44 +123,56 @@ static int ldlm_expired_completion_wait(void *data)
return 0;
}
+/**
+ * Calculate the Completion timeout (covering enqueue, BL AST, data flush,
+ * lock cancel, and their replies). Used for lock completion timeout on the
+ * client side.
+ *
+ * \param[in] lock lock which is waiting the completion callback
+ *
+ * \retval timeout in seconds to wait for the server reply
+ */
/* We use the same basis for both server side and client side functions
* from a single node.
*/
-static int ldlm_get_enq_timeout(struct ldlm_lock *lock)
+static unsigned int ldlm_cp_timeout(struct ldlm_lock *lock)
{
- int timeout = at_get(ldlm_lock_to_ns_at(lock));
+ unsigned int timeout;
if (AT_OFF)
- return obd_timeout / 2;
- /* Since these are non-updating timeouts, we should be conservative.
- * It would be nice to have some kind of "early reply" mechanism for
- * lock callbacks too...
+ return obd_timeout;
+
+ /*
+ * Wait a long time for enqueue - server may have to callback a
+ * lock from another client. Server will evict the other client if it
+ * doesn't respond reasonably, and then give us the lock.
*/
- timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
- return max(timeout, ldlm_enqueue_min);
+ timeout = at_get(ldlm_lock_to_ns_at(lock));
+ return max(3 * timeout, ldlm_enqueue_min);
}
/**
* Helper function for ldlm_completion_ast(), updating timings when lock is
* actually granted.
*/
-static int ldlm_completion_tail(struct ldlm_lock *lock)
+static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
{
long delay;
- int result;
+ int result = 0;
if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
LDLM_DEBUG(lock, "client-side enqueue: destroyed");
result = -EIO;
+ } else if (!data) {
+ LDLM_DEBUG(lock, "client-side enqueue: granted");
} else {
+ /* Take into AT only CP RPC, not immediately granted locks */
delay = ktime_get_real_seconds() - lock->l_last_activity;
LDLM_DEBUG(lock, "client-side enqueue: granted after %lds",
delay);
/* Update our time estimate */
- at_measured(ldlm_lock_to_ns_at(lock),
- delay);
- result = 0;
+ at_measured(ldlm_lock_to_ns_at(lock), delay);
}
return result;
}
@@ -177,10 +189,9 @@ int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data)
return 0;
}
- if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV))) {
+ if (!(flags & LDLM_FL_BLOCKED_MASK)) {
wake_up(&lock->l_waitq);
- return ldlm_completion_tail(lock);
+ return ldlm_completion_tail(lock, data);
}
LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, going forward");
@@ -224,8 +235,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
goto noreproc;
}
- if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV))) {
+ if (!(flags & LDLM_FL_BLOCKED_MASK)) {
wake_up(&lock->l_waitq);
return 0;
}
@@ -240,13 +250,10 @@ noreproc:
if (obd)
imp = obd->u.cli.cl_import;
- /* Wait a long time for enqueue - server may have to callback a
- * lock from another client. Server will evict the other client if it
- * doesn't respond reasonably, and then give us the lock.
- */
- timeout = ldlm_get_enq_timeout(lock) * 2;
+ timeout = ldlm_cp_timeout(lock);
lwd.lwd_lock = lock;
+ lock->l_last_activity = ktime_get_real_seconds();
if (ldlm_is_no_timeout(lock)) {
LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
@@ -279,7 +286,7 @@ noreproc:
return rc;
}
- return ldlm_completion_tail(lock);
+ return ldlm_completion_tail(lock, data);
}
EXPORT_SYMBOL(ldlm_completion_ast);
@@ -309,8 +316,6 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
else
LDLM_DEBUG(lock, "lock was granted or failed in race");
- ldlm_lock_decref_internal(lock, mode);
-
/* XXX - HACK because we shouldn't call ldlm_lock_destroy()
* from llite/file.c/ll_file_flock().
*/
@@ -321,9 +326,14 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
*/
if (lock->l_resource->lr_type == LDLM_FLOCK) {
lock_res_and_lock(lock);
- ldlm_resource_unlink_lock(lock);
- ldlm_lock_destroy_nolock(lock);
+ if (!ldlm_is_destroyed(lock)) {
+ ldlm_resource_unlink_lock(lock);
+ ldlm_lock_decref_internal_nolock(lock, mode);
+ ldlm_lock_destroy_nolock(lock);
+ }
unlock_res_and_lock(lock);
+ } else {
+ ldlm_lock_decref_internal(lock, mode);
}
}
@@ -418,11 +428,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
*flags = ldlm_flags_from_wire(reply->lock_flags);
lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
LDLM_FL_INHERIT_MASK);
- /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
- * to wait with no timeout as well
- */
- lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
- LDLM_FL_NO_TIMEOUT);
unlock_res_and_lock(lock);
CDEBUG(D_INFO, "local: %p, remote cookie: %#llx, flags: 0x%llx\n",
@@ -556,7 +561,7 @@ static inline int ldlm_capsule_handles_avail(struct req_capsule *pill,
enum req_location loc,
int off)
{
- int size = req_capsule_msg_size(pill, loc);
+ u32 size = req_capsule_msg_size(pill, loc);
return ldlm_req_handles_avail(size, off);
}
@@ -565,7 +570,7 @@ static inline int ldlm_format_handles_avail(struct obd_import *imp,
const struct req_format *fmt,
enum req_location loc, int off)
{
- int size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
+ u32 size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
return ldlm_req_handles_avail(size, off);
}
@@ -696,8 +701,8 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
einfo->ei_mode, &cbs, einfo->ei_cbdata,
lvb_len, lvb_type);
- if (!lock)
- return -ENOMEM;
+ if (IS_ERR(lock))
+ return PTR_ERR(lock);
/* for the local lock, add the reference */
ldlm_lock_addref_internal(lock, einfo->ei_mode);
ldlm_lock2handle(lock, lockh);
@@ -719,6 +724,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
lock->l_export = NULL;
lock->l_blocking_ast = einfo->ei_cb_bl;
lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
+ lock->l_last_activity = ktime_get_real_seconds();
/* lock not sent to server yet */
@@ -819,7 +825,7 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
lock_res_and_lock(lock);
ldlm_set_cbpending(lock);
local_only = !!(lock->l_flags &
- (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
+ (LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL_ON_BLOCK));
ldlm_cancel_callback(lock);
rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
unlock_res_and_lock(lock);
@@ -1180,8 +1186,7 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
slv = ldlm_pool_get_slv(pl);
lvf = ldlm_pool_get_lvf(pl);
- la = cfs_duration_sec(cfs_time_sub(cur,
- lock->l_last_used));
+ la = cfs_duration_sec(cfs_time_sub(cur, lock->l_last_used));
lv = lvf * la * unused;
/* Inform pool about current CLV to see it via debugfs. */
@@ -1193,9 +1198,6 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
if (slv == 0 || lv < slv)
return LDLM_POLICY_KEEP_LOCK;
- if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
- return LDLM_POLICY_KEEP_LOCK;
-
return LDLM_POLICY_CANCEL_LOCK;
}
@@ -1239,9 +1241,6 @@ static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
cfs_time_add(lock->l_last_used, ns->ns_max_age)))
return LDLM_POLICY_KEEP_LOCK;
- if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
- return LDLM_POLICY_KEEP_LOCK;
-
return LDLM_POLICY_CANCEL_LOCK;
}
@@ -1374,7 +1373,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
break;
list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
- l_lru) {
+ l_lru) {
/* No locks which got blocking requests. */
LASSERT(!ldlm_is_bl_ast(lock));
@@ -1413,7 +1412,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
* That is, for shrinker policy we drop only
* old locks, but additionally choose them by
* their weight. Big extent locks will stay in
- * the cache. */
+ * the cache.
+ */
result = pf(ns, lock, unused, added, count);
if (result == LDLM_POLICY_KEEP_LOCK) {
lu_ref_del(&lock->l_reference,
@@ -1610,8 +1610,7 @@ int ldlm_cli_cancel_list(struct list_head *cancels, int count,
*/
while (count > 0) {
LASSERT(!list_empty(cancels));
- lock = list_entry(cancels->next, struct ldlm_lock,
- l_bl_ast);
+ lock = list_entry(cancels->next, struct ldlm_lock, l_bl_ast);
LASSERT(lock->l_conn_export);
if (exp_connect_cancelset(lock->l_conn_export)) {
@@ -1660,7 +1659,7 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
int rc;
res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
- if (!res) {
+ if (IS_ERR(res)) {
/* This is not a problem. */
CDEBUG(D_INFO, "No resource %llu\n", res_id->name[0]);
return 0;
@@ -1704,7 +1703,8 @@ static int ldlm_cli_hash_cancel_unused(struct cfs_hash *hs,
* that have 0 readers/writers.
*
* If flags & LCF_LOCAL, throw the locks away without trying
- * to notify the server. */
+ * to notify the server.
+ */
int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
enum ldlm_cancel_flags flags, void *opaque)
@@ -1811,13 +1811,10 @@ int ldlm_resource_iterate(struct ldlm_namespace *ns,
struct ldlm_resource *res;
int rc;
- if (!ns) {
- CERROR("must pass in namespace\n");
- LBUG();
- }
+ LASSERTF(ns, "must pass in namespace\n");
res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
- if (!res)
+ if (IS_ERR(res))
return 0;
LDLM_RESOURCE_ADDREF(res);
@@ -1843,7 +1840,7 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
* bug 17614: locks being actively cancelled. Get a reference
* on a lock so that it does not disappear under us (e.g. due to cancel)
*/
- if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) {
+ if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCELING))) {
list_add(&lock->l_pending_chain, list);
LDLM_LOCK_GET(lock);
}
@@ -2013,7 +2010,7 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
LCF_LOCAL, LDLM_CANCEL_NO_WAIT);
CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
- canceled, ldlm_ns_name(ns));
+ canceled, ldlm_ns_name(ns));
}
int ldlm_replay_locks(struct obd_import *imp)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 51a28d96af39..a09c25aea698 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -449,8 +449,8 @@ static unsigned ldlm_res_hop_hash(struct cfs_hash *hs,
const void *key, unsigned mask)
{
const struct ldlm_res_id *id = key;
- unsigned val = 0;
- unsigned i;
+ unsigned int val = 0;
+ unsigned int i;
for (i = 0; i < RES_NAME_SIZE; i++)
val += id->name[i];
@@ -561,9 +561,9 @@ static struct cfs_hash_ops ldlm_ns_fid_hash_ops = {
struct ldlm_ns_hash_def {
enum ldlm_ns_type nsd_type;
/** hash bucket bits */
- unsigned nsd_bkt_bits;
+ unsigned int nsd_bkt_bits;
/** hash bits */
- unsigned nsd_all_bits;
+ unsigned int nsd_all_bits;
/** hash operations */
struct cfs_hash_ops *nsd_hops;
};
@@ -758,8 +758,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
*/
lock_res(res);
list_for_each(tmp, q) {
- lock = list_entry(tmp, struct ldlm_lock,
- l_res_link);
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
if (ldlm_is_cleaned(lock)) {
lock = NULL;
continue;
@@ -793,8 +792,14 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
*/
unlock_res(res);
LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
+ if (lock->l_flags & LDLM_FL_FAIL_LOC) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(4));
+ set_current_state(TASK_RUNNING);
+ }
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, 0, NULL);
+ lock->l_completion_ast(lock, LDLM_FL_FAILED,
+ NULL);
LDLM_LOCK_RELEASE(lock);
continue;
}
@@ -875,7 +880,8 @@ static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force)
ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
force_wait:
if (force)
- lwi = LWI_TIMEOUT(obd_timeout * HZ / 4, NULL, NULL);
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(obd_timeout *
+ MSEC_PER_SEC) / 4, NULL, NULL);
rc = l_wait_event(ns->ns_waitq,
atomic_read(&ns->ns_bref) == 0, &lwi);
@@ -1082,10 +1088,11 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
int create)
{
struct hlist_node *hnode;
- struct ldlm_resource *res;
+ struct ldlm_resource *res = NULL;
struct cfs_hash_bd bd;
__u64 version;
int ns_refcount = 0;
+ int rc;
LASSERT(!parent);
LASSERT(ns->ns_rs_hash);
@@ -1095,31 +1102,20 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
if (hnode) {
cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- /* Synchronize with regard to resource creation. */
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
- mutex_lock(&res->lr_lvb_mutex);
- mutex_unlock(&res->lr_lvb_mutex);
- }
-
- if (unlikely(res->lr_lvb_len < 0)) {
- ldlm_resource_putref(res);
- res = NULL;
- }
- return res;
+ goto lvbo_init;
}
version = cfs_hash_bd_version_get(&bd);
cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
if (create == 0)
- return NULL;
+ return ERR_PTR(-ENOENT);
LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
"type: %d\n", type);
res = ldlm_resource_new();
if (!res)
- return NULL;
+ return ERR_PTR(-ENOMEM);
res->lr_ns_bucket = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
res->lr_name = *name;
@@ -1137,7 +1133,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
/* We have taken lr_lvb_mutex. Drop it. */
mutex_unlock(&res->lr_lvb_mutex);
kmem_cache_free(ldlm_resource_slab, res);
-
+lvbo_init:
res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
/* Synchronize with regard to resource creation. */
if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
@@ -1146,8 +1142,9 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
}
if (unlikely(res->lr_lvb_len < 0)) {
+ rc = res->lr_lvb_len;
ldlm_resource_putref(res);
- res = NULL;
+ res = ERR_PTR(rc);
}
return res;
}
@@ -1158,8 +1155,6 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
- int rc;
-
OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
rc = ns->ns_lvbo->lvbo_init(res);
if (rc < 0) {
@@ -1169,7 +1164,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
res->lr_lvb_len = rc;
mutex_unlock(&res->lr_lvb_mutex);
ldlm_resource_putref(res);
- return NULL;
+ return ERR_PTR(rc);
}
}
@@ -1386,7 +1381,7 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
if (!list_empty(&res->lr_granted)) {
CDEBUG(level, "Granted locks (in reverse order):\n");
list_for_each_entry_reverse(lock, &res->lr_granted,
- l_res_link) {
+ l_res_link) {
LDLM_DEBUG_LIMIT(level, lock, "###");
if (!(level & D_CANTMASK) &&
++granted > ldlm_dump_granted_max) {
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 2cbb1b80bd41..1ac0940bd8df 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_LUSTRE_FS) += lustre.o
lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
- rw.o namei.o symlink.o llite_mmap.o \
+ rw.o namei.o symlink.o llite_mmap.o range_lock.o \
xattr.o xattr_cache.o rw26.o super25.o statahead.o \
glimpse.o lcommon_cl.o lcommon_misc.o \
vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 463b1a360733..0e45d8fc4d7c 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -37,7 +37,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre/lustre_idl.h"
#include "../include/lustre_dlm.h"
@@ -102,39 +101,6 @@ static int ll_dcompare(const struct dentry *dentry,
return 0;
}
-static inline int return_if_equal(struct ldlm_lock *lock, void *data)
-{
- return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
- LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
-}
-
-/* find any ldlm lock of the inode in mdc and lov
- * return 0 not find
- * 1 find one
- * < 0 error
- */
-static int find_cbdata(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct lov_stripe_md *lsm;
- int rc = 0;
-
- LASSERT(inode);
- rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
- return_if_equal, NULL);
- if (rc != 0)
- return rc;
-
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm)
- return rc;
-
- rc = obd_find_cbdata(sbi->ll_dt_exp, lsm, return_if_equal, NULL);
- ccc_inode_lsm_put(inode, lsm);
-
- return rc;
-}
-
/**
* Called when last reference to a dentry is dropped and dcache wants to know
* whether or not it should cache it:
@@ -155,19 +121,6 @@ static int ll_ddelete(const struct dentry *de)
/* kernel >= 2.6.38 last refcount is decreased after this function. */
LASSERT(d_count(de) == 1);
- /* Disable this piece of code temporarily because this is called
- * inside dcache_lock so it's not appropriate to do lots of work
- * here. ATTENTION: Before this piece of code enabling, LU-2487 must be
- * resolved.
- */
-#if 0
- /* if not ldlm lock for this inode, set i_nlink to 0 so that
- * this inode can be recycled later b=20433
- */
- if (d_really_is_positive(de) && !find_cbdata(d_inode(de)))
- clear_nlink(d_inode(de));
-#endif
-
if (d_lustre_invalid((struct dentry *)de))
return 1;
return 0;
@@ -325,14 +278,13 @@ static int ll_revalidate_dentry(struct dentry *dentry,
if (lookup_flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
return 1;
- if (d_need_statahead(dir, dentry) <= 0)
+ if (!dentry_may_statahead(dir, dentry))
return 1;
if (lookup_flags & LOOKUP_RCU)
return -ECHILD;
- do_statahead_enter(dir, &dentry, !d_inode(dentry));
- ll_statahead_mark(dir, dentry);
+ ll_statahead(dir, &dentry, !d_inode(dentry));
return 1;
}
@@ -347,18 +299,9 @@ static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
return ll_revalidate_dentry(dentry, flags);
}
-static void ll_d_iput(struct dentry *de, struct inode *inode)
-{
- LASSERT(inode);
- if (!find_cbdata(inode))
- clear_nlink(inode);
- iput(inode);
-}
-
const struct dentry_operations ll_d_ops = {
.d_revalidate = ll_revalidate_nd,
.d_release = ll_release,
.d_delete = ll_ddelete,
- .d_iput = ll_d_iput,
.d_compare = ll_dcompare,
};
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 5b381779c827..7f32a539d260 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -46,9 +46,8 @@
#include "../include/obd_support.h"
#include "../include/obd_class.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_lib.h"
-#include "../include/lustre/lustre_idl.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "../include/lustre_fid.h"
#include "../include/lustre_kernelcomm.h"
@@ -134,111 +133,35 @@
* for this integrated page will be adjusted. See lmv_adjust_dirpages().
*
*/
-
-/* returns the page unlocked, but with a reference */
-static int ll_dir_filler(void *_hash, struct page *page0)
+struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
+ __u64 offset)
{
- struct inode *inode = page0->mapping->host;
- int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
- struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
- struct ptlrpc_request *request;
- struct mdt_body *body;
- struct md_op_data *op_data;
- __u64 hash = *((__u64 *)_hash);
- struct page **page_pool;
+ struct md_callback cb_op;
struct page *page;
- struct lu_dirpage *dp;
- int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_SHIFT;
- int nrdpgs = 0; /* number of pages read actually */
- int npages;
- int i;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
- PFID(ll_inode2fid(inode)), inode, hash);
-
- LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
- if (page_pool) {
- page_pool[0] = page0;
- } else {
- page_pool = &page0;
- max_pages = 1;
- }
- for (npages = 1; npages < max_pages; npages++) {
- page = page_cache_alloc_cold(inode->i_mapping);
- if (!page)
- break;
- page_pool[npages] = page;
- }
-
- op_data->op_npages = npages;
- op_data->op_offset = hash;
- rc = md_readpage(exp, op_data, page_pool, &request);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- /* page0 is special, which was added into page cache early */
- delete_from_page_cache(page0);
- } else if (rc == 0) {
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- /* Checked by mdc_readpage() */
- if (body->valid & OBD_MD_FLSIZE)
- i_size_write(inode, body->size);
-
- nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
- >> PAGE_SHIFT;
- SetPageUptodate(page0);
- }
- unlock_page(page0);
- ptlrpc_req_finished(request);
-
- CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);
-
- for (i = 1; i < npages; i++) {
- unsigned long offset;
- int ret;
-
- page = page_pool[i];
-
- if (rc < 0 || i >= nrdpgs) {
- put_page(page);
- continue;
- }
-
- SetPageUptodate(page);
-
- dp = kmap(page);
- hash = le64_to_cpu(dp->ldp_hash_start);
- kunmap(page);
-
- offset = hash_x_index(hash, hash64);
-
- prefetchw(&page->flags);
- ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
- GFP_NOFS);
- if (ret == 0) {
- unlock_page(page);
- } else {
- CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
- offset, ret);
- }
- put_page(page);
- }
+ cb_op.md_blocking_ast = ll_md_blocking_ast;
+ rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
+ if (rc)
+ return ERR_PTR(rc);
- if (page_pool != &page0)
- kfree(page_pool);
- return rc;
+ return page;
}
-void ll_release_page(struct page *page, int remove)
+void ll_release_page(struct inode *inode, struct page *page, bool remove)
{
kunmap(page);
+
+ /*
+ * Always remove the page for striped dir, because the page is
+ * built from temporarily in LMV layer
+ */
+ if (inode && S_ISDIR(inode->i_mode) &&
+ ll_i2info(inode)->lli_lsm_md) {
+ __free_page(page);
+ return;
+ }
+
if (remove) {
lock_page(page);
if (likely(page->mapping))
@@ -248,225 +171,6 @@ void ll_release_page(struct page *page, int remove)
put_page(page);
}
-/*
- * Find, kmap and return page that contains given hash.
- */
-static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
- __u64 *start, __u64 *end)
-{
- int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
- struct address_space *mapping = dir->i_mapping;
- /*
- * Complement of hash is used as an index so that
- * radix_tree_gang_lookup() can be used to find a page with starting
- * hash _smaller_ than one we are looking for.
- */
- unsigned long offset = hash_x_index(*hash, hash64);
- struct page *page;
- int found;
-
- spin_lock_irq(&mapping->tree_lock);
- found = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)&page, offset, 1);
- if (found > 0 && !radix_tree_exceptional_entry(page)) {
- struct lu_dirpage *dp;
-
- get_page(page);
- spin_unlock_irq(&mapping->tree_lock);
- /*
- * In contrast to find_lock_page() we are sure that directory
- * page cannot be truncated (while DLM lock is held) and,
- * hence, can avoid restart.
- *
- * In fact, page cannot be locked here at all, because
- * ll_dir_filler() does synchronous io.
- */
- wait_on_page_locked(page);
- if (PageUptodate(page)) {
- dp = kmap(page);
- if (BITS_PER_LONG == 32 && hash64) {
- *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- *hash = *hash >> 32;
- } else {
- *start = le64_to_cpu(dp->ldp_hash_start);
- *end = le64_to_cpu(dp->ldp_hash_end);
- }
- LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
- *start, *end, *hash);
- CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash %llu\n",
- offset, *start, *end, *hash);
- if (*hash > *end) {
- ll_release_page(page, 0);
- page = NULL;
- } else if (*end != *start && *hash == *end) {
- /*
- * upon hash collision, remove this page,
- * otherwise put page reference, and
- * ll_get_dir_page() will issue RPC to fetch
- * the page we want.
- */
- ll_release_page(page,
- le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
- page = NULL;
- }
- } else {
- put_page(page);
- page = ERR_PTR(-EIO);
- }
-
- } else {
- spin_unlock_irq(&mapping->tree_lock);
- page = NULL;
- }
- return page;
-}
-
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
- struct ll_dir_chain *chain)
-{
- ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
- struct address_space *mapping = dir->i_mapping;
- struct lustre_handle lockh;
- struct lu_dirpage *dp;
- struct page *page;
- enum ldlm_mode mode;
- int rc;
- __u64 start = 0;
- __u64 end = 0;
- __u64 lhash = hash;
- struct ll_inode_info *lli = ll_i2info(dir);
- int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
-
- mode = LCK_PR;
- rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
- ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
- if (!rc) {
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = mode,
- .ei_cb_bl = ll_md_blocking_ast,
- .ei_cb_cp = ldlm_completion_ast,
- };
- struct lookup_intent it = { .it_op = IT_READDIR };
- struct ptlrpc_request *request;
- struct md_op_data *op_data;
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return (void *)op_data;
-
- rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
- op_data, &lockh, NULL, 0, NULL, 0);
-
- ll_finish_md_op_data(op_data);
-
- request = (struct ptlrpc_request *)it.it_request;
- if (request)
- ptlrpc_req_finished(request);
- if (rc < 0) {
- CERROR("lock enqueue: " DFID " at %llu: rc %d\n",
- PFID(ll_inode2fid(dir)), hash, rc);
- return ERR_PTR(rc);
- }
-
- CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
- PFID(ll_inode2fid(dir)), dir);
- md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
- &it.it_lock_handle, dir, NULL);
- } else {
- /* for cross-ref object, l_ast_data of the lock may not be set,
- * we reset it here
- */
- md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
- dir, NULL);
- }
- ldlm_lock_dump_handle(D_OTHER, &lockh);
-
- mutex_lock(&lli->lli_readdir_mutex);
- page = ll_dir_page_locate(dir, &lhash, &start, &end);
- if (IS_ERR(page)) {
- CERROR("dir page locate: "DFID" at %llu: rc %ld\n",
- PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
- goto out_unlock;
- } else if (page) {
- /*
- * XXX nikita: not entirely correct handling of a corner case:
- * suppose hash chain of entries with hash value HASH crosses
- * border between pages P0 and P1. First both P0 and P1 are
- * cached, seekdir() is called for some entry from the P0 part
- * of the chain. Later P0 goes out of cache. telldir(HASH)
- * happens and finds P1, as it starts with matching hash
- * value. Remaining entries from P0 part of the chain are
- * skipped. (Is that really a bug?)
- *
- * Possible solutions: 0. don't cache P1 is such case, handle
- * it as an "overflow" page. 1. invalidate all pages at
- * once. 2. use HASH|1 as an index for P1.
- */
- goto hash_collision;
- }
-
- page = read_cache_page(mapping, hash_x_index(hash, hash64),
- ll_dir_filler, &lhash);
- if (IS_ERR(page)) {
- CERROR("read cache page: "DFID" at %llu: rc %ld\n",
- PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
- goto out_unlock;
- }
-
- wait_on_page_locked(page);
- (void)kmap(page);
- if (!PageUptodate(page)) {
- CERROR("page not updated: "DFID" at %llu: rc %d\n",
- PFID(ll_inode2fid(dir)), hash, -5);
- goto fail;
- }
- if (!PageChecked(page))
- /* XXX: check page format later */
- SetPageChecked(page);
- if (PageError(page)) {
- CERROR("page error: "DFID" at %llu: rc %d\n",
- PFID(ll_inode2fid(dir)), hash, -5);
- goto fail;
- }
-hash_collision:
- dp = page_address(page);
- if (BITS_PER_LONG == 32 && hash64) {
- start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- lhash = hash >> 32;
- } else {
- start = le64_to_cpu(dp->ldp_hash_start);
- end = le64_to_cpu(dp->ldp_hash_end);
- lhash = hash;
- }
- if (end == start) {
- LASSERT(start == lhash);
- CWARN("Page-wide hash collision: %llu\n", end);
- if (BITS_PER_LONG == 32 && hash64)
- CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
- le64_to_cpu(dp->ldp_hash_start),
- le64_to_cpu(dp->ldp_hash_end), hash);
- /*
- * Fetch whole overflow chain...
- *
- * XXX not yet.
- */
- goto fail;
- }
-out_unlock:
- mutex_unlock(&lli->lli_readdir_mutex);
- ldlm_lock_decref(&lockh, mode);
- return page;
-
-fail:
- ll_release_page(page, 1);
- page = ERR_PTR(-EIO);
- goto out_unlock;
-}
-
/**
* return IF_* type for given lu_dirent entry.
* IF_* flag shld be converted to particular OS file type in
@@ -489,119 +193,100 @@ static __u16 ll_dirent_type_get(struct lu_dirent *ent)
return type;
}
-int ll_dir_read(struct inode *inode, struct dir_context *ctx)
+int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
+ struct dir_context *ctx)
{
- struct ll_inode_info *info = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = ctx->pos;
- int api32 = ll_need_32bit_api(sbi);
- int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
+ __u64 pos = *ppos;
+ int is_api32 = ll_need_32bit_api(sbi);
+ int is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
struct page *page;
- struct ll_dir_chain chain;
- int done = 0;
+ bool done = false;
int rc = 0;
- ll_dir_chain_init(&chain);
-
- page = ll_get_dir_page(inode, pos, &chain);
+ page = ll_get_dir_page(inode, op_data, pos);
while (rc == 0 && !done) {
struct lu_dirpage *dp;
struct lu_dirent *ent;
+ __u64 hash;
+ __u64 next;
- if (!IS_ERR(page)) {
- /*
- * If page is empty (end of directory is reached),
- * use this value.
- */
- __u64 hash = MDS_DIR_END_OFF;
- __u64 next;
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent && !done;
- ent = lu_dirent_next(ent)) {
- __u16 type;
- int namelen;
- struct lu_fid fid;
- __u64 lhash;
- __u64 ino;
+ if (IS_ERR(page)) {
+ rc = PTR_ERR(page);
+ break;
+ }
+ hash = MDS_DIR_END_OFF;
+ dp = page_address(page);
+ for (ent = lu_dirent_start(dp); ent && !done;
+ ent = lu_dirent_next(ent)) {
+ __u16 type;
+ int namelen;
+ struct lu_fid fid;
+ __u64 lhash;
+ __u64 ino;
+
+ hash = le64_to_cpu(ent->lde_hash);
+ if (hash < pos)
/*
- * XXX: implement correct swabbing here.
+ * Skip until we find target hash
+ * value.
*/
+ continue;
- hash = le64_to_cpu(ent->lde_hash);
- if (hash < pos)
- /*
- * Skip until we find target hash
- * value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (namelen == 0)
- /*
- * Skip dummy record.
- */
- continue;
-
- if (api32 && hash64)
- lhash = hash >> 32;
- else
- lhash = hash;
- fid_le_to_cpu(&fid, &ent->lde_fid);
- ino = cl_fid_build_ino(&fid, api32);
- type = ll_dirent_type_get(ent);
- ctx->pos = lhash;
- /* For 'll_nfs_get_name_filldir()', it will try
- * to access the 'ent' through its 'lde_name',
- * so the parameter 'name' for 'ctx->actor()'
- * must be part of the 'ent'.
+ namelen = le16_to_cpu(ent->lde_namelen);
+ if (namelen == 0)
+ /*
+ * Skip dummy record.
*/
- done = !dir_emit(ctx, ent->lde_name,
- namelen, ino, type);
- }
- next = le64_to_cpu(dp->ldp_hash_end);
- if (!done) {
- pos = next;
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- done = 1;
- ll_release_page(page, 0);
- } else if (1 /* chain is exhausted*/) {
- /*
- * Normal case: continue to the next
- * page.
- */
- ll_release_page(page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- next = pos;
- page = ll_get_dir_page(inode, pos,
- &chain);
- } else {
- /*
- * go into overflow page.
- */
- LASSERT(le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- ll_release_page(page, 1);
- }
- } else {
- pos = hash;
- ll_release_page(page, 0);
- }
+ continue;
+
+ if (is_api32 && is_hash64)
+ lhash = hash >> 32;
+ else
+ lhash = hash;
+ fid_le_to_cpu(&fid, &ent->lde_fid);
+ ino = cl_fid_build_ino(&fid, is_api32);
+ type = ll_dirent_type_get(ent);
+ ctx->pos = lhash;
+ /* For 'll_nfs_get_name_filldir()', it will try
+ * to access the 'ent' through its 'lde_name',
+ * so the parameter 'name' for 'ctx->actor()'
+ * must be part of the 'ent'.
+ */
+ done = !dir_emit(ctx, ent->lde_name,
+ namelen, ino, type);
+ }
+
+ if (done) {
+ pos = hash;
+ ll_release_page(inode, page, false);
+ break;
+ }
+
+ next = le64_to_cpu(dp->ldp_hash_end);
+ pos = next;
+ if (pos == MDS_DIR_END_OFF) {
+ /*
+ * End of directory reached.
+ */
+ done = 1;
+ ll_release_page(inode, page, false);
} else {
- rc = PTR_ERR(page);
- CERROR("error reading dir "DFID" at %lu: rc %d\n",
- PFID(&info->lli_fid), (unsigned long)pos, rc);
+ /*
+ * Normal case: continue to the next
+ * page.
+ */
+ ll_release_page(inode, page,
+ le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ next = pos;
+ page = ll_get_dir_page(inode, op_data, pos);
}
}
ctx->pos = pos;
- ll_dir_chain_fini(&chain);
return rc;
}
@@ -613,9 +298,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
__u64 pos = lfd ? lfd->lfd_pos : 0;
int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
int api32 = ll_need_32bit_api(sbi);
+ struct md_op_data *op_data;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos/size %lu/%llu 32bit_api %d\n",
PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
i_size_read(inode), api32);
@@ -627,19 +313,58 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
goto out;
}
+ op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
+ LUSTRE_OPC_ANY, inode);
+ if (IS_ERR(op_data)) {
+ rc = PTR_ERR(op_data);
+ goto out;
+ }
+
+ if (unlikely(op_data->op_mea1)) {
+ /*
+ * This is only needed for striped dir to fill ..,
+ * see lmv_read_page
+ */
+ if (file_dentry(filp)->d_parent &&
+ file_dentry(filp)->d_parent->d_inode) {
+ __u64 ibits = MDS_INODELOCK_UPDATE;
+ struct inode *parent;
+
+ parent = file_dentry(filp)->d_parent->d_inode;
+ if (ll_have_md_lock(parent, &ibits, LCK_MINMODE))
+ op_data->op_fid3 = *ll_inode2fid(parent);
+ }
+
+ /*
+ * If it can not find in cache, do lookup .. on the master
+ * object
+ */
+ if (fid_is_zero(&op_data->op_fid3)) {
+ rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3);
+ if (rc) {
+ ll_finish_md_op_data(op_data);
+ return rc;
+ }
+ }
+ }
+ op_data->op_max_pages = sbi->ll_md_brw_pages;
ctx->pos = pos;
- rc = ll_dir_read(inode, ctx);
+ rc = ll_dir_read(inode, &pos, op_data, ctx);
+ pos = ctx->pos;
if (lfd)
- lfd->lfd_pos = ctx->pos;
- if (ctx->pos == MDS_DIR_END_OFF) {
+ lfd->lfd_pos = pos;
+
+ if (pos == MDS_DIR_END_OFF) {
if (api32)
- ctx->pos = LL_DIR_END_OFF_32BIT;
+ pos = LL_DIR_END_OFF_32BIT;
else
- ctx->pos = LL_DIR_END_OFF;
+ pos = LL_DIR_END_OFF;
} else {
if (api32 && hash64)
- ctx->pos >>= 32;
+ pos >>= 32;
}
+ ctx->pos = pos;
+ ll_finish_md_op_data(op_data);
filp->f_version = inode->i_version;
out:
@@ -668,18 +393,40 @@ static int ll_send_mgc_param(struct obd_export *mgc, char *string)
return rc;
}
-static int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
- char *filename)
+/**
+ * Create striped directory with specified stripe(@lump)
+ *
+ * param[in] parent the parent of the directory.
+ * param[in] lump the specified stripes.
+ * param[in] dirname the name of the directory.
+ * param[in] mode the specified mode of the directory.
+ *
+ * retval =0 if striped directory is being created successfully.
+ * <0 if the creation is failed.
+ */
+static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
+ const char *dirname, umode_t mode)
{
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int mode;
+ struct ll_sb_info *sbi = ll_i2sbi(parent);
int err;
- mode = (~current_umask() & 0755) | S_IFDIR;
- op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
- strlen(filename), mode, LUSTRE_OPC_MKDIR,
+ if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
+ return -EINVAL;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) name %s stripe_offset %d, stripe_count: %u\n",
+ PFID(ll_inode2fid(parent)), parent, dirname,
+ (int)lump->lum_stripe_offset, lump->lum_stripe_count);
+
+ if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
+ lustre_swab_lmv_user_md(lump);
+
+ if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
+ mode &= ~current_umask();
+ mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
+ op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
+ strlen(dirname), mode, LUSTRE_OPC_MKDIR,
lump);
if (IS_ERR(op_data)) {
err = PTR_ERR(op_data);
@@ -730,6 +477,13 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
lum_size = sizeof(struct lov_user_md_v3);
break;
}
+ case LMV_USER_MAGIC: {
+ if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC))
+ lustre_swab_lmv_user_md(
+ (struct lmv_user_md *)lump);
+ lum_size = sizeof(struct lmv_user_md);
+ break;
+ }
default: {
CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
lump->lmm_magic, LOV_USER_MAGIC_V1,
@@ -746,9 +500,6 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- if (lump && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC))
- op_data->op_cli_flags |= CLI_SET_MEA;
-
/* swabbing is done in lov_setstripe() on server side */
rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
NULL, 0, &req, NULL);
@@ -803,8 +554,16 @@ end:
return rc;
}
-int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
- int *lmm_size, struct ptlrpc_request **request)
+/**
+ * This function will be used to get default LOV/LMV/Default LMV
+ * @valid will be used to indicate which stripe it will retrieve
+ * OBD_MD_MEA LMV stripe EA
+ * OBD_MD_DEFAULT_MEA Default LMV stripe EA
+ * otherwise Default LOV EA.
+ * Each time, it can only retrieve 1 stripe EA
+ **/
+int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
+ struct ptlrpc_request **request, u64 valid)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct mdt_body *body;
@@ -813,7 +572,7 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
int rc, lmmsize;
struct md_op_data *op_data;
- rc = ll_get_default_mdsize(sbi, &lmmsize);
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
if (rc)
return rc;
@@ -834,9 +593,9 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- lmmsize = body->eadatasize;
+ lmmsize = body->mbo_eadatasize;
- if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+ if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
lmmsize == 0) {
rc = -ENODATA;
goto out;
@@ -844,6 +603,7 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
lmm = req_capsule_server_sized_get(&req->rq_pill,
&RMF_MDT_MD, lmmsize);
+ LASSERT(lmm);
/*
* This is coming from the MDS, so is probably in
@@ -860,40 +620,51 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
break;
+ case LMV_MAGIC_V1:
+ if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC)
+ lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm);
+ break;
+ case LMV_USER_MAGIC:
+ if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC)
+ lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
+ break;
default:
CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
rc = -EPROTO;
}
out:
- *lmmp = lmm;
- *lmm_size = lmmsize;
+ *plmm = lmm;
+ *plmm_size = lmmsize;
*request = req;
return rc;
}
-/*
- * Get MDT index for the inode.
- */
-int ll_get_mdt_idx(struct inode *inode)
+int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid)
{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
struct md_op_data *op_data;
- int rc, mdtidx;
+ int mdt_index, rc;
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
+ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ if (!op_data)
+ return -ENOMEM;
op_data->op_flags |= MF_GET_MDT_IDX;
+ op_data->op_fid1 = *fid;
rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
- mdtidx = op_data->op_mds;
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+ mdt_index = op_data->op_mds;
+ kvfree(op_data);
+ if (rc < 0)
return rc;
- }
- return mdtidx;
+
+ return mdt_index;
+}
+
+/*
+ * Get MDT index for the inode.
+ */
+int ll_get_mdt_idx(struct inode *inode)
+{
+ return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode));
}
/**
@@ -1288,11 +1059,9 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
}
case IOC_MDC_LOOKUP: {
- struct ptlrpc_request *request = NULL;
int namelen, len = 0;
char *buf = NULL;
char *filename;
- struct md_op_data *op_data;
rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
if (rc)
@@ -1308,21 +1077,13 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
goto out_free;
}
- op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out_free;
- }
-
- op_data->op_valid = OBD_MD_FLID;
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
+ rc = ll_get_fid_by_name(inode, filename, namelen, NULL);
if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+ CERROR("%s: lookup %.*s failed: rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), namelen,
+ filename, rc);
goto out_free;
}
- ptlrpc_req_finished(request);
out_free:
obd_ioctl_freedata(buf, len);
return rc;
@@ -1333,6 +1094,7 @@ out_free:
char *filename;
int namelen = 0;
int lumlen = 0;
+ umode_t mode;
int len;
int rc;
@@ -1366,15 +1128,32 @@ out_free:
goto lmv_out_free;
}
- /**
- * ll_dir_setdirstripe will be used to set dir stripe
- * mdc_create--->mdt_reint_create (with dirstripe)
- */
- rc = ll_dir_setdirstripe(inode, lum, filename);
+#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE
+ mode = data->ioc_type != 0 ? data->ioc_type : S_IRWXUGO;
+#else
+ mode = data->ioc_type;
+#endif
+ rc = ll_dir_setdirstripe(inode, lum, filename, mode);
lmv_out_free:
obd_ioctl_freedata(buf, len);
return rc;
}
+ case LL_IOC_LMV_SET_DEFAULT_STRIPE: {
+ struct lmv_user_md __user *ulump;
+ struct lmv_user_md lum;
+ int rc;
+
+ ulump = (struct lmv_user_md __user *)arg;
+ if (copy_from_user(&lum, ulump, sizeof(lum)))
+ return -EFAULT;
+
+ if (lum.lum_magic != LMV_USER_MAGIC)
+ return -EINVAL;
+
+ rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0);
+
+ return rc;
+ }
case LL_IOC_LOV_SETSTRIPE: {
struct lov_user_md_v3 lumv3;
struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
@@ -1404,50 +1183,100 @@ lmv_out_free:
return rc;
}
case LL_IOC_LMV_GETSTRIPE: {
- struct lmv_user_md __user *lump = (void __user *)arg;
+ struct lmv_user_md __user *ulmv;
struct lmv_user_md lum;
- struct lmv_user_md *tmp;
+ struct ptlrpc_request *request = NULL;
+ struct lmv_user_md *tmp = NULL;
+ union lmv_mds_md *lmm = NULL;
+ u64 valid = 0;
+ int stripe_count;
+ int mdt_index;
int lum_size;
- int rc = 0;
- int mdtindex;
+ int lmmsize;
+ int rc;
+ int i;
- if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md)))
+ ulmv = (struct lmv_user_md __user *)arg;
+ if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
return -EFAULT;
- if (lum.lum_magic != LMV_MAGIC_V1)
+ /*
+ * lum_magic will indicate which stripe the ioctl will like
+ * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
+ * is for default LMV stripe
+ */
+ if (lum.lum_magic == LMV_MAGIC_V1)
+ valid |= OBD_MD_MEA;
+ else if (lum.lum_magic == LMV_USER_MAGIC)
+ valid |= OBD_MD_DEFAULT_MEA;
+ else
return -EINVAL;
- lum_size = lmv_user_md_size(1, LMV_MAGIC_V1);
+ rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request,
+ valid);
+ if (rc)
+ goto finish_req;
+
+ /* Get default LMV EA */
+ if (lum.lum_magic == LMV_USER_MAGIC) {
+ if (rc)
+ goto finish_req;
+
+ if (lmmsize > sizeof(*ulmv)) {
+ rc = -EINVAL;
+ goto finish_req;
+ }
+
+ if (copy_to_user(ulmv, lmm, lmmsize))
+ rc = -EFAULT;
+
+ goto finish_req;
+ }
+
+ stripe_count = lmv_mds_md_stripe_count_get(lmm);
+ lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
tmp = kzalloc(lum_size, GFP_NOFS);
if (!tmp) {
rc = -ENOMEM;
- goto free_lmv;
+ goto finish_req;
}
- *tmp = lum;
- tmp->lum_type = LMV_STRIPE_TYPE;
- tmp->lum_stripe_count = 1;
- mdtindex = ll_get_mdt_idx(inode);
- if (mdtindex < 0) {
+ mdt_index = ll_get_mdt_idx(inode);
+ if (mdt_index < 0) {
rc = -ENOMEM;
- goto free_lmv;
+ goto out_tmp;
+ }
+ tmp->lum_magic = LMV_MAGIC_V1;
+ tmp->lum_stripe_count = 0;
+ tmp->lum_stripe_offset = mdt_index;
+ for (i = 0; i < stripe_count; i++) {
+ struct lu_fid fid;
+
+ fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]);
+ mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid);
+ if (mdt_index < 0) {
+ rc = mdt_index;
+ goto out_tmp;
+ }
+ tmp->lum_objects[i].lum_mds = mdt_index;
+ tmp->lum_objects[i].lum_fid = fid;
+ tmp->lum_stripe_count++;
}
- tmp->lum_stripe_offset = mdtindex;
- tmp->lum_objects[0].lum_mds = mdtindex;
- memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode),
- sizeof(struct lu_fid));
- if (copy_to_user((void __user *)arg, tmp, lum_size)) {
+ if (copy_to_user(ulmv, tmp, lum_size)) {
rc = -EFAULT;
- goto free_lmv;
+ goto out_tmp;
}
-free_lmv:
+out_tmp:
kfree(tmp);
+finish_req:
+ ptlrpc_req_finished(request);
return rc;
}
+
case LL_IOC_LOV_SWAP_LAYOUTS:
return -EPERM;
- case LL_IOC_OBD_STATFS:
+ case IOC_OBD_STATFS:
return ll_obd_statfs(inode, (void __user *)arg);
case LL_IOC_LOV_GETSTRIPE:
case LL_IOC_MDC_GETINFO:
@@ -1469,7 +1298,8 @@ free_lmv:
rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
&lmmsize, &request);
} else {
- rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+ rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
+ &request, 0);
}
if (request) {
@@ -1512,18 +1342,18 @@ skip_lmm:
lstat_t st = { 0 };
st.st_dev = inode->i_sb->s_dev;
- st.st_mode = body->mode;
- st.st_nlink = body->nlink;
- st.st_uid = body->uid;
- st.st_gid = body->gid;
- st.st_rdev = body->rdev;
- st.st_size = body->size;
+ st.st_mode = body->mbo_mode;
+ st.st_nlink = body->mbo_nlink;
+ st.st_uid = body->mbo_uid;
+ st.st_gid = body->mbo_gid;
+ st.st_rdev = body->mbo_rdev;
+ st.st_size = body->mbo_size;
st.st_blksize = PAGE_SIZE;
- st.st_blocks = body->blocks;
- st.st_atime = body->atime;
- st.st_mtime = body->mtime;
- st.st_ctime = body->ctime;
- st.st_ino = cl_fid_build_ino(&body->fid1,
+ st.st_blocks = body->mbo_blocks;
+ st.st_atime = body->mbo_atime;
+ st.st_mtime = body->mbo_mtime;
+ st.st_ctime = body->mbo_ctime;
+ st.st_ino = cl_fid_build_ino(&body->mbo_fid1,
sbi->ll_flags &
LL_SBI_32BIT_API);
@@ -1611,9 +1441,6 @@ free_lmm:
kvfree(lmm);
return rc;
}
- case OBD_IOC_LLOG_CATINFO: {
- return -EOPNOTSUPP;
- }
case OBD_IOC_QUOTACHECK: {
struct obd_quotactl *oqctl;
int error = 0;
@@ -1671,7 +1498,7 @@ out_poll:
kfree(check);
return rc;
}
- case LL_IOC_QUOTACTL: {
+ case OBD_IOC_QUOTACTL: {
struct if_quotactl *qctl;
qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
@@ -1739,6 +1566,25 @@ out_quotactl:
return rc;
case OBD_IOC_FID2PATH:
return ll_fid2path(inode, (void __user *)arg);
+ case LL_IOC_GETPARENT:
+ return ll_getparent(file, (void __user *)arg);
+ case LL_IOC_FID2MDTIDX: {
+ struct obd_export *exp = ll_i2mdexp(inode);
+ struct lu_fid fid;
+ __u32 index;
+
+ if (copy_from_user(&fid, (const struct lu_fid __user *)arg,
+ sizeof(fid)))
+ return -EFAULT;
+
+ /* Call mdc_iocontrol */
+ rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid,
+ &index);
+ if (rc)
+ return rc;
+
+ return index;
+ }
case LL_IOC_HSM_REQUEST: {
struct hsm_user_request *hur;
ssize_t totalsize;
@@ -1853,6 +1699,45 @@ out_quotactl:
kfree(copy);
return rc;
}
+ case LL_IOC_MIGRATE: {
+ char *buf = NULL;
+ const char *filename;
+ int namelen = 0;
+ int len;
+ int rc;
+ int mdtidx;
+
+ rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
+ if (rc < 0)
+ return rc;
+
+ data = (struct obd_ioctl_data *)buf;
+ if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
+ !data->ioc_inllen1 || !data->ioc_inllen2) {
+ rc = -EINVAL;
+ goto migrate_free;
+ }
+
+ filename = data->ioc_inlbuf1;
+ namelen = data->ioc_inllen1;
+ if (namelen < 1 || namelen != strlen(filename) + 1) {
+ rc = -EINVAL;
+ goto migrate_free;
+ }
+
+ if (data->ioc_inllen2 != sizeof(mdtidx)) {
+ rc = -EINVAL;
+ goto migrate_free;
+ }
+ mdtidx = *(int *)data->ioc_inlbuf2;
+
+ rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1);
+migrate_free:
+ obd_ioctl_freedata(buf, len);
+
+ return rc;
+ }
+
default:
return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
(void __user *)arg);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 57281b9e31ff..6e3a188baaae 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -38,14 +38,15 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/lustre_dlm.h"
-#include "../include/lustre_lite.h"
#include <linux/pagemap.h>
#include <linux/file.h>
+#include <linux/sched.h>
#include <linux/mount.h>
-#include "llite_internal.h"
#include "../include/lustre/ll_fiemap.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/cl_object.h"
+#include "llite_internal.h"
static int
ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
@@ -188,17 +189,11 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
spin_unlock(&lli->lli_lock);
}
- if (rc == 0) {
- rc = ll_objects_destroy(req, inode);
- if (rc)
- CERROR("inode %lu ll_objects destroy: rc = %d\n",
- inode->i_ino, rc);
- }
if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
struct mdt_body *body;
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->valid & OBD_MD_FLRELEASED))
+ if (!(body->mbo_valid & OBD_MD_FLRELEASED))
rc = -EBUSY;
}
@@ -349,13 +344,11 @@ int ll_file_release(struct inode *inode, struct file *file)
fd = LUSTRE_FPRIVATE(file);
LASSERT(fd);
- /* The last ref on @file, maybe not be the owner pid of statahead.
- * Different processes can open the same dir, "ll_opendir_key" means:
- * it is me that should stop the statahead thread.
+ /* The last ref on @file, maybe not be the owner pid of statahead,
+ * because parent and child process can share the same file handle.
*/
- if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
- lli->lli_opendir_pid != 0)
- ll_stop_statahead(inode, lli->lli_opendir_key);
+ if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
+ ll_deauthorize_statahead(inode, fd);
if (is_root_inode(inode)) {
LUSTRE_FPRIVATE(file) = NULL;
@@ -364,7 +357,8 @@ int ll_file_release(struct inode *inode, struct file *file)
}
if (!S_ISDIR(inode->i_mode)) {
- lov_read_and_clear_async_rc(lli->lli_clob);
+ if (lli->lli_clob)
+ lov_read_and_clear_async_rc(lli->lli_clob);
lli->lli_async_rc = 0;
}
@@ -376,55 +370,39 @@ int ll_file_release(struct inode *inode, struct file *file)
return rc;
}
-static int ll_intent_file_open(struct dentry *dentry, void *lmm,
- int lmmsize, struct lookup_intent *itp)
+static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
+ struct lookup_intent *itp)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(de);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct dentry *parent = dentry->d_parent;
- const char *name = dentry->d_name.name;
- const int len = dentry->d_name.len;
+ struct dentry *parent = de->d_parent;
+ const char *name = NULL;
struct md_op_data *op_data;
- struct ptlrpc_request *req;
- __u32 opc = LUSTRE_OPC_ANY;
- int rc;
+ struct ptlrpc_request *req = NULL;
+ int len = 0, rc;
- /* Usually we come here only for NFSD, and we want open lock. */
- /* We can also get here if there was cached open handle in revalidate_it
- * but it disappeared while we were getting from there to ll_file_open.
- * But this means this file was closed and immediately opened which
- * makes a good candidate for using OPEN lock
- */
- /* If lmmsize & lmm are not 0, we are just setting stripe info
- * parameters. No need for the open lock
+ LASSERT(parent);
+ LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
+
+ /*
+ * if server supports open-by-fid, or file name is invalid, don't pack
+ * name in open request
*/
- if (!lmm && lmmsize == 0) {
- struct ll_dentry_data *ldd = ll_d2d(dentry);
- /*
- * If we came via ll_iget_for_nfs, then we need to request
- * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
- *
- * NB: when ldd is NULL, it must have come via normal
- * lookup path only, since ll_iget_for_nfs always calls
- * ll_d_init().
- */
- if (ldd && ldd->lld_nfs_dentry) {
- ldd->lld_nfs_dentry = 0;
- itp->it_flags |= MDS_OPEN_LOCK;
- }
- if (itp->it_flags & FMODE_WRITE)
- opc = LUSTRE_OPC_CREATE;
+ if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
+ lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
+ name = de->d_name.name;
+ len = de->d_name.len;
}
- op_data = ll_prep_md_op_data(NULL, d_inode(parent),
- inode, name, len,
- O_RDWR, opc, NULL);
+ op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
+ O_RDWR, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
+ op_data->op_data = lmm;
+ op_data->op_data_size = lmmsize;
- itp->it_flags |= MDS_OPEN_BY_FID;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
- 0 /*unused */, &req, ll_md_blocking_ast, 0);
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
+ &ll_md_blocking_ast, 0);
ll_finish_md_op_data(op_data);
if (rc == -ESTALE) {
/* reason for keep own exit path - don`t flood log
@@ -479,8 +457,8 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
struct mdt_body *body;
body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
- och->och_fh = body->handle;
- och->och_fid = body->fid1;
+ och->och_fh = body->mbo_handle;
+ och->och_fid = body->mbo_fid1;
och->och_lease_handle.cookie = it->it_lock_handle;
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_flags = it->it_flags;
@@ -508,7 +486,7 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
body = req_capsule_server_get(&it->it_request->rq_pill,
&RMF_MDT_BODY);
- ll_ioepoch_open(lli, body->ioepoch);
+ ll_ioepoch_open(lli, body->mbo_ioepoch);
}
LUSTRE_FPRIVATE(file) = fd;
@@ -543,7 +521,7 @@ int ll_file_open(struct inode *inode, struct file *file)
struct obd_client_handle **och_p = NULL;
__u64 *och_usecount = NULL;
struct ll_file_data *fd;
- int rc = 0, opendir_set = 0;
+ int rc = 0;
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
PFID(ll_inode2fid(inode)), inode, file->f_flags);
@@ -558,16 +536,8 @@ int ll_file_open(struct inode *inode, struct file *file)
}
fd->fd_file = file;
- if (S_ISDIR(inode->i_mode)) {
- spin_lock(&lli->lli_sa_lock);
- if (!lli->lli_opendir_key && !lli->lli_sai &&
- lli->lli_opendir_pid == 0) {
- lli->lli_opendir_key = fd;
- lli->lli_opendir_pid = current_pid();
- opendir_set = 1;
- }
- spin_unlock(&lli->lli_sa_lock);
- }
+ if (S_ISDIR(inode->i_mode))
+ ll_authorize_statahead(inode, fd);
if (is_root_inode(inode)) {
LUSTRE_FPRIVATE(file) = fd;
@@ -615,7 +585,7 @@ restart:
} else if (it->it_flags & FMODE_EXEC) {
och_p = &lli->lli_mds_exec_och;
och_usecount = &lli->lli_open_fd_exec_count;
- } else {
+ } else {
och_p = &lli->lli_mds_read_och;
och_usecount = &lli->lli_open_fd_read_count;
}
@@ -652,9 +622,19 @@ restart:
* result in a deadlock
*/
mutex_unlock(&lli->lli_och_mutex);
- it->it_create_mode |= M_CHECK_STALE;
+ /*
+ * Normally called under two situations:
+ * 1. NFS export.
+ * 2. revalidate with IT_OPEN (revalidate doesn't
+ * execute this intent any more).
+ *
+ * Always fetch MDS_OPEN_LOCK if this is not setstripe.
+ *
+ * Always specify MDS_OPEN_BY_FID because we don't want
+ * to get file with different fid.
+ */
+ it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
- it->it_create_mode &= ~M_CHECK_STALE;
if (rc)
goto out_openerr;
@@ -716,9 +696,10 @@ out_och_free:
mutex_unlock(&lli->lli_och_mutex);
out_openerr:
- if (opendir_set != 0)
- ll_stop_statahead(inode, lli->lli_opendir_key);
- ll_file_data_put(fd);
+ if (lli->lli_opendir_key == fd)
+ ll_deauthorize_statahead(inode, fd);
+ if (fd)
+ ll_file_data_put(fd);
} else {
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
}
@@ -764,7 +745,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
struct lookup_intent it = { .it_op = IT_OPEN };
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct md_op_data *op_data;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
struct lustre_handle old_handle = { 0 };
struct obd_client_handle *och = NULL;
int rc;
@@ -831,8 +812,8 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
it.it_flags = fmode | open_flags;
it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
- ll_md_blocking_lease_ast,
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
+ &ll_md_blocking_lease_ast,
/* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
* it can be cancelled which may mislead applications that the lease is
* broken;
@@ -840,7 +821,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
* open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
* doesn't deal with openhandle, so normal openhandle will be leaked.
*/
- LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+ LDLM_FL_NO_LRU | LDLM_FL_EXCL);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
if (rc < 0)
@@ -908,7 +889,6 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
{
struct ldlm_lock *lock;
bool cancelled = true;
- int rc;
lock = ldlm_handle2lock(&och->och_lease_handle);
if (lock) {
@@ -926,9 +906,8 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
if (lease_broken)
*lease_broken = cancelled;
- rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
- NULL);
- return rc;
+ return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
+ inode, och, NULL);
}
/* Fills the obdo with the attributes for the lsm */
@@ -1138,10 +1117,11 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
{
struct ll_inode_info *lli = ll_i2info(file_inode(file));
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct range_lock range;
struct cl_io *io;
ssize_t result;
- CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+ CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n",
file->f_path.dentry->d_name.name, iot, *ppos, count);
restart:
@@ -1150,7 +1130,12 @@ restart:
if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
struct vvp_io *vio = vvp_env_io(env);
- int write_mutex_locked = 0;
+ bool range_locked = false;
+
+ if (file->f_flags & O_APPEND)
+ range_lock_init(&range, 0, LUSTRE_EOF);
+ else
+ range_lock_init(&range, *ppos, *ppos + count - 1);
vio->vui_fd = LUSTRE_FPRIVATE(file);
vio->vui_io_subtype = args->via_io_subtype;
@@ -1159,14 +1144,23 @@ restart:
case IO_NORMAL:
vio->vui_iter = args->u.normal.via_iter;
vio->vui_iocb = args->u.normal.via_iocb;
- if ((iot == CIT_WRITE) &&
+ /*
+ * Direct IO reads must also take range lock,
+ * or multiple reads will try to work on the same pages
+ * See LU-6227 for details.
+ */
+ if (((iot == CIT_WRITE) ||
+ (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
!(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- if (mutex_lock_interruptible(&lli->
- lli_write_mutex)) {
- result = -ERESTARTSYS;
+ CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
+ range.rl_node.in_extent.start,
+ range.rl_node.in_extent.end);
+ result = range_lock(&lli->lli_write_tree,
+ &range);
+ if (result < 0)
goto out;
- }
- write_mutex_locked = 1;
+
+ range_locked = true;
}
down_read(&lli->lli_trunc_sem);
break;
@@ -1183,8 +1177,12 @@ restart:
ll_cl_remove(file, env);
if (args->via_io_subtype == IO_NORMAL)
up_read(&lli->lli_trunc_sem);
- if (write_mutex_locked)
- mutex_unlock(&lli->lli_write_mutex);
+ if (range_locked) {
+ CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
+ range.rl_node.in_extent.start,
+ range.rl_node.in_extent.end);
+ range_unlock(&lli->lli_write_tree, &range);
+ }
} else {
/* cl_io_rw_init() handled IO */
result = io->ci_result;
@@ -1201,7 +1199,7 @@ out:
* short read/write instead of restart io.
*/
if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
- CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
+ CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n",
iot == CIT_READ ? "read" : "write",
file, *ppos, count);
LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
@@ -1296,94 +1294,15 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
return result;
}
-static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
-{
- struct obd_export *exp = ll_i2dtexp(inode);
- struct obd_trans_info oti = { 0 };
- struct obdo *oa = NULL;
- int lsm_size;
- int rc = 0;
- struct lov_stripe_md *lsm = NULL, *lsm2;
-
- oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oa)
- return -ENOMEM;
-
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm_has_objects(lsm)) {
- rc = -ENOENT;
- goto out;
- }
-
- lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
- (lsm->lsm_stripe_count));
-
- lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS);
- if (!lsm2) {
- rc = -ENOMEM;
- goto out;
- }
-
- oa->o_oi = *oi;
- oa->o_nlink = ost_idx;
- oa->o_flags |= OBD_FL_RECREATE_OBJS;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
- obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME);
- obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
- memcpy(lsm2, lsm, lsm_size);
- ll_inode_size_lock(inode);
- rc = obd_create(NULL, exp, oa, &lsm2, &oti);
- ll_inode_size_unlock(inode);
-
- kvfree(lsm2);
- goto out;
-out:
- ccc_inode_lsm_put(inode, lsm);
- kmem_cache_free(obdo_cachep, oa);
- return rc;
-}
-
-static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
-{
- struct ll_recreate_obj ucreat;
- struct ost_id oi;
-
- if (!capable(CFS_CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg,
- sizeof(ucreat)))
- return -EFAULT;
-
- ostid_set_seq_mdt0(&oi);
- ostid_set_id(&oi, ucreat.lrc_id);
- return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
-}
-
-static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
-{
- struct lu_fid fid;
- struct ost_id oi;
- u32 ost_idx;
-
- if (!capable(CFS_CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid)))
- return -EFAULT;
-
- fid_to_ostid(&fid, &oi);
- ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
- return ll_lov_recreate(inode, &oi, ost_idx);
-}
-
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
__u64 flags, struct lov_user_md *lum,
int lum_size)
{
struct lov_stripe_md *lsm = NULL;
- struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
+ struct lookup_intent oit = {
+ .it_op = IT_OPEN,
+ .it_flags = flags | MDS_OPEN_BY_FID,
+ };
int rc = 0;
lsm = ccc_inode_lsm_get(inode);
@@ -1397,11 +1316,11 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
ll_inode_size_lock(inode);
rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
- if (rc)
+ if (rc < 0)
goto out_unlock;
rc = oit.it_status;
if (rc < 0)
- goto out_req_free;
+ goto out_unlock;
ll_release_openhandle(inode, &oit);
@@ -1411,9 +1330,6 @@ out_unlock:
ccc_inode_lsm_put(inode, lsm);
out:
return rc;
-out_req_free:
- ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
- goto out;
}
int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
@@ -1448,9 +1364,9 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- lmmsize = body->eadatasize;
+ lmmsize = body->mbo_eadatasize;
- if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+ if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
lmmsize == 0) {
rc = -ENODATA;
goto out;
@@ -1481,13 +1397,13 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
*/
if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- if (S_ISREG(body->mode))
+ if (S_ISREG(body->mbo_mode))
lustre_swab_lov_user_md_objects(
((struct lov_user_md_v1 *)lmm)->lmm_objects,
stripe_count);
} else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- if (S_ISREG(body->mode))
+ if (S_ISREG(body->mbo_mode))
lustre_swab_lov_user_md_objects(
((struct lov_user_md_v3 *)lmm)->lmm_objects,
stripe_count);
@@ -1530,55 +1446,48 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
return rc;
}
+static int ll_file_getstripe(struct inode *inode,
+ struct lov_user_md __user *lum)
+{
+ struct lu_env *env;
+ int refcheck;
+ int rc;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
+ cl_env_put(env, &refcheck);
+ return rc;
+}
+
static int ll_lov_setstripe(struct inode *inode, struct file *file,
unsigned long arg)
{
- struct lov_user_md_v3 lumv3;
- struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
- struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
- struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
+ struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
+ struct lov_user_md *klum;
int lum_size, rc;
__u64 flags = FMODE_WRITE;
- /* first try with v1 which is smaller than v3 */
- lum_size = sizeof(struct lov_user_md_v1);
- if (copy_from_user(lumv1, lumv1p, lum_size))
- return -EFAULT;
-
- if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
- lum_size = sizeof(struct lov_user_md_v3);
- if (copy_from_user(&lumv3, lumv3p, lum_size))
- return -EFAULT;
- }
+ rc = ll_copy_user_md(lum, &klum);
+ if (rc < 0)
+ return rc;
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
+ lum_size = rc;
+ rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
lum_size);
cl_lov_delay_create_clear(&file->f_flags);
if (rc == 0) {
- struct lov_stripe_md *lsm;
__u32 gen;
- put_user(0, &lumv1p->lmm_stripe_count);
+ put_user(0, &lum->lmm_stripe_count);
ll_layout_refresh(inode, &gen);
- lsm = ccc_inode_lsm_get(inode);
- rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
- 0, lsm, (void __user *)arg);
- ccc_inode_lsm_put(inode, lsm);
+ rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
}
- return rc;
-}
-static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
-{
- struct lov_stripe_md *lsm;
- int rc = -ENODATA;
-
- lsm = ccc_inode_lsm_get(inode);
- if (lsm)
- rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
- lsm, (void __user *)arg);
- ccc_inode_lsm_put(inode, lsm);
+ kfree(klum);
return rc;
}
@@ -2247,6 +2156,12 @@ free_hss:
return rc;
}
+static inline long ll_lease_type_from_fmode(fmode_t fmode)
+{
+ return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
+ ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
+}
+
static long
ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -2314,11 +2229,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return rc;
}
case LL_IOC_LOV_GETSTRIPE:
- return ll_lov_getstripe(inode, arg);
- case LL_IOC_RECREATE_OBJ:
- return ll_lov_recreate_obj(inode, arg);
- case LL_IOC_RECREATE_FID:
- return ll_lov_recreate_fid(inode, arg);
+ return ll_file_getstripe(inode,
+ (struct lov_user_md __user *)arg);
case FSFILT_IOC_FIEMAP:
return ll_ioctl_fiemap(inode, arg);
case FSFILT_IOC_GETFLAGS:
@@ -2349,6 +2261,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
}
+ case LL_IOC_GETPARENT:
+ return ll_getparent(file, (struct getparent __user *)arg);
case OBD_IOC_FID2PATH:
return ll_fid2path(inode, (void __user *)arg);
case LL_IOC_DATA_VERSION: {
@@ -2451,20 +2365,20 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct ll_inode_info *lli = ll_i2info(inode);
struct obd_client_handle *och = NULL;
bool lease_broken;
- fmode_t mode = 0;
+ fmode_t fmode;
switch (arg) {
- case F_WRLCK:
+ case LL_LEASE_WRLCK:
if (!(file->f_mode & FMODE_WRITE))
return -EPERM;
- mode = FMODE_WRITE;
+ fmode = FMODE_WRITE;
break;
- case F_RDLCK:
+ case LL_LEASE_RDLCK:
if (!(file->f_mode & FMODE_READ))
return -EPERM;
- mode = FMODE_READ;
+ fmode = FMODE_READ;
break;
- case F_UNLCK:
+ case LL_LEASE_UNLCK:
mutex_lock(&lli->lli_och_mutex);
if (fd->fd_lease_och) {
och = fd->fd_lease_och;
@@ -2472,26 +2386,26 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
mutex_unlock(&lli->lli_och_mutex);
- if (och) {
- mode = och->och_flags &
- (FMODE_READ|FMODE_WRITE);
- rc = ll_lease_close(och, inode, &lease_broken);
- if (rc == 0 && lease_broken)
- mode = 0;
- } else {
- rc = -ENOLCK;
- }
+ if (!och)
+ return -ENOLCK;
+
+ fmode = och->och_flags;
+ rc = ll_lease_close(och, inode, &lease_broken);
+ if (rc < 0)
+ return rc;
+
+ if (lease_broken)
+ fmode = 0;
- /* return the type of lease or error */
- return rc < 0 ? rc : (int)mode;
+ return ll_lease_type_from_fmode(fmode);
default:
return -EINVAL;
}
- CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
+ CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
/* apply for lease */
- och = ll_lease_open(inode, file, mode, 0);
+ och = ll_lease_open(inode, file, fmode, 0);
if (IS_ERR(och))
return PTR_ERR(och);
@@ -2512,8 +2426,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case LL_IOC_GET_LEASE: {
struct ll_inode_info *lli = ll_i2info(inode);
struct ldlm_lock *lock = NULL;
+ fmode_t fmode = 0;
- rc = 0;
mutex_lock(&lli->lli_och_mutex);
if (fd->fd_lease_och) {
struct obd_client_handle *och = fd->fd_lease_och;
@@ -2522,14 +2436,13 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (lock) {
lock_res_and_lock(lock);
if (!ldlm_is_cancel(lock))
- rc = och->och_flags &
- (FMODE_READ | FMODE_WRITE);
+ fmode = och->och_flags;
unlock_res_and_lock(lock);
LDLM_LOCK_PUT(lock);
}
}
mutex_unlock(&lli->lli_och_mutex);
- return rc;
+ return ll_lease_type_from_fmode(fmode);
}
case LL_IOC_HSM_IMPORT: {
struct hsm_user_import *hui;
@@ -2574,9 +2487,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
eof = i_size_read(inode);
}
- retval = generic_file_llseek_size(file, offset, origin,
- ll_file_maxbytes(inode), eof);
- return retval;
+ return generic_file_llseek_size(file, offset, origin,
+ ll_file_maxbytes(inode), eof);
}
static int ll_flush(struct file *file, fl_owner_t id)
@@ -2593,9 +2505,11 @@ static int ll_flush(struct file *file, fl_owner_t id)
*/
rc = lli->lli_async_rc;
lli->lli_async_rc = 0;
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (rc == 0)
- rc = err;
+ if (lli->lli_clob) {
+ err = lov_read_and_clear_async_rc(lli->lli_clob);
+ if (!rc)
+ rc = err;
+ }
/* The application has been told about write failure already.
* Do not report failure again.
@@ -2714,6 +2628,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
struct md_op_data *op_data;
struct lustre_handle lockh = {0};
ldlm_policy_data_t flock = { {0} };
+ int fl_type = file_lock->fl_type;
__u64 flags = 0;
int rc;
int rc2 = 0;
@@ -2744,7 +2659,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
- switch (file_lock->fl_type) {
+ switch (fl_type) {
case F_RDLCK:
einfo.ei_mode = LCK_PR;
break;
@@ -2764,8 +2679,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
einfo.ei_mode = LCK_PW;
break;
default:
- CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
- file_lock->fl_type);
+ CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
return -ENOTSUPP;
}
@@ -2787,16 +2701,18 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
case F_GETLK64:
#endif
flags = LDLM_FL_TEST_LOCK;
- /* Save the old mode so that if the mode in the lock changes we
- * can decrement the appropriate reader or writer refcount.
- */
- file_lock->fl_type = einfo.ei_mode;
break;
default:
CERROR("unknown fcntl lock command: %d\n", cmd);
return -EINVAL;
}
+ /*
+ * Save the old mode so that if the mode in the lock changes we
+ * can decrement the appropriate reader or writer refcount.
+ */
+ file_lock->fl_type = einfo.ei_mode;
+
op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
@@ -2806,8 +2722,12 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
- rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
- op_data, &lockh, &flock, 0, NULL /* req */, flags);
+ rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
+ flags);
+
+ /* Restore the file lock type if not TEST lock. */
+ if (!(flags & LDLM_FL_TEST_LOCK))
+ file_lock->fl_type = fl_type;
if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
!(flags & LDLM_FL_TEST_LOCK))
@@ -2815,8 +2735,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
if (rc2 && file_lock->fl_type != F_UNLCK) {
einfo.ei_mode = LCK_NL;
- md_enqueue(sbi->ll_md_exp, &einfo, NULL,
- op_data, &lockh, &flock, 0, NULL /* req */, flags);
+ md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
+ &lockh, flags);
rc = rc2;
}
@@ -2825,6 +2745,117 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
return rc;
}
+int ll_get_fid_by_name(struct inode *parent, const char *name,
+ int namelen, struct lu_fid *fid)
+{
+ struct md_op_data *op_data = NULL;
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ int rc;
+
+ op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+
+ op_data->op_valid = OBD_MD_FLID;
+ rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc < 0)
+ return rc;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (!body) {
+ rc = -EFAULT;
+ goto out_req;
+ }
+ if (fid)
+ *fid = body->mbo_fid1;
+out_req:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
+ const char *name, int namelen)
+{
+ struct ptlrpc_request *request = NULL;
+ struct inode *child_inode = NULL;
+ struct dentry *dchild = NULL;
+ struct md_op_data *op_data;
+ struct qstr qstr;
+ int rc;
+
+ CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n",
+ name, PFID(ll_inode2fid(parent)), mdtidx);
+
+ op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
+ 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+
+ /* Get child FID first */
+ qstr.hash = full_name_hash(parent, name, namelen);
+ qstr.name = name;
+ qstr.len = namelen;
+ dchild = d_lookup(file_dentry(file), &qstr);
+ if (dchild) {
+ op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+ if (dchild->d_inode) {
+ child_inode = igrab(dchild->d_inode);
+ if (child_inode) {
+ inode_lock(child_inode);
+ op_data->op_fid3 = *ll_inode2fid(child_inode);
+ ll_invalidate_aliases(child_inode);
+ }
+ }
+ dput(dchild);
+ } else {
+ rc = ll_get_fid_by_name(parent, name, namelen,
+ &op_data->op_fid3);
+ if (rc)
+ goto out_free;
+ }
+
+ if (!fid_is_sane(&op_data->op_fid3)) {
+ CERROR("%s: migrate %s, but fid "DFID" is insane\n",
+ ll_get_fsname(parent->i_sb, NULL, 0), name,
+ PFID(&op_data->op_fid3));
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
+ if (rc < 0)
+ goto out_free;
+
+ if (rc == mdtidx) {
+ CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
+ PFID(&op_data->op_fid3), mdtidx);
+ rc = 0;
+ goto out_free;
+ }
+
+ op_data->op_mds = mdtidx;
+ op_data->op_cli_flags = CLI_MIGRATE;
+ rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
+ namelen, name, namelen, &request);
+ if (!rc)
+ ll_update_times(request, parent);
+
+ ptlrpc_req_finished(request);
+
+out_free:
+ if (child_inode) {
+ clear_nlink(child_inode);
+ inode_unlock(child_inode);
+ iput(child_inode);
+ }
+
+ ll_finish_md_op_data(op_data);
+ return rc;
+}
+
static int
ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
{
@@ -2847,7 +2878,7 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits,
struct lustre_handle lockh;
ldlm_policy_data_t policy;
enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
- (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
+ (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
struct lu_fid *fid;
__u64 flags;
int i;
@@ -2888,15 +2919,12 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
{
ldlm_policy_data_t policy = { .l_inodebits = {bits} };
struct lu_fid *fid;
- enum ldlm_mode rc;
fid = &ll_i2info(inode)->lli_fid;
CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
- rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
- fid, LDLM_IBITS, &policy, mode, lockh);
-
- return rc;
+ return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
+ fid, LDLM_IBITS, &policy, mode, lockh);
}
static int ll_inode_revalidate_fini(struct inode *inode, int rc)
@@ -2949,15 +2977,9 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- oit.it_create_mode |= M_CHECK_STALE;
- rc = md_intent_lock(exp, op_data, NULL, 0,
- /* we are not interested in name
- * based lookup
- */
- &oit, 0, &req,
- ll_md_blocking_ast, 0);
+ rc = md_intent_lock(exp, op_data, &oit, &req,
+ &ll_md_blocking_ast, 0);
ll_finish_md_op_data(op_data);
- oit.it_create_mode &= ~M_CHECK_STALE;
if (rc < 0) {
rc = ll_inode_revalidate_fini(inode, rc);
goto out;
@@ -3003,10 +3025,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
op_data->op_valid = valid;
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
- if (rc) {
- rc = ll_inode_revalidate_fini(inode, rc);
- return rc;
- }
+ if (rc)
+ return ll_inode_revalidate_fini(inode, rc);
rc = ll_prep_inode(&inode, req, NULL, NULL);
}
@@ -3015,6 +3035,28 @@ out:
return rc;
}
+static int ll_merge_md_attr(struct inode *inode)
+{
+ struct cl_attr attr = { 0 };
+ int rc;
+
+ LASSERT(ll_i2info(inode)->lli_lsm_md);
+ rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
+ &attr, ll_md_blocking_ast);
+ if (rc)
+ return rc;
+
+ set_nlink(inode, attr.cat_nlink);
+ inode->i_blocks = attr.cat_blocks;
+ i_size_write(inode, attr.cat_size);
+
+ ll_i2info(inode)->lli_atime = attr.cat_atime;
+ ll_i2info(inode)->lli_mtime = attr.cat_mtime;
+ ll_i2info(inode)->lli_ctime = attr.cat_ctime;
+
+ return 0;
+}
+
static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
{
struct inode *inode = d_inode(dentry);
@@ -3026,6 +3068,13 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
/* if object isn't regular file, don't validate size */
if (!S_ISREG(inode->i_mode)) {
+ if (S_ISDIR(inode->i_mode) &&
+ ll_i2info(inode)->lli_lsm_md) {
+ rc = ll_merge_md_attr(inode);
+ if (rc)
+ return rc;
+ }
+
LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
@@ -3057,13 +3106,14 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
if (res)
return res;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
+
stat->dev = inode->i_sb->s_dev;
if (ll_need_32bit_api(sbi))
stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
else
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
- stat->nlink = inode->i_nlink;
stat->uid = inode->i_uid;
stat->gid = inode->i_gid;
stat->rdev = inode->i_rdev;
@@ -3072,6 +3122,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
stat->ctime = inode->i_ctime;
stat->blksize = 1 << inode->i_blkbits;
+ stat->nlink = inode->i_nlink;
stat->size = i_size_read(inode);
stat->blocks = inode->i_blocks;
@@ -3139,6 +3190,12 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
int ll_inode_permission(struct inode *inode, int mask)
{
+ struct ll_sb_info *sbi;
+ struct root_squash_info *squash;
+ const struct cred *old_cred = NULL;
+ struct cred *cred = NULL;
+ bool squash_id = false;
+ cfs_cap_t cap;
int rc = 0;
if (mask & MAY_NOT_BLOCK)
@@ -3158,9 +3215,46 @@ int ll_inode_permission(struct inode *inode, int mask)
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
+ /* squash fsuid/fsgid if needed */
+ sbi = ll_i2sbi(inode);
+ squash = &sbi->ll_squash;
+ if (unlikely(squash->rsi_uid &&
+ uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
+ !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
+ squash_id = true;
+ }
+
+ if (squash_id) {
+ CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
+ __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
+ squash->rsi_uid, squash->rsi_gid);
+
+ /*
+ * update current process's credentials
+ * and FS capability
+ */
+ cred = prepare_creds();
+ if (!cred)
+ return -ENOMEM;
+
+ cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
+ cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
+ for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
+ if ((1 << cap) & CFS_CAP_FS_MASK)
+ cap_lower(cred->cap_effective, cap);
+ }
+ old_cred = override_creds(cred);
+ }
+
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
rc = generic_permission(inode, mask);
+ /* restore current process's credentials and FS capability */
+ if (squash_id) {
+ revert_creds(old_cred);
+ put_cred(cred);
+ }
+
return rc;
}
@@ -3213,10 +3307,10 @@ const struct inode_operations ll_file_inode_operations = {
.setattr = ll_setattr,
.getattr = ll_getattr,
.permission = ll_inode_permission,
- .setxattr = ll_setxattr,
- .getxattr = ll_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = ll_listxattr,
- .removexattr = ll_removexattr,
+ .removexattr = generic_removexattr,
.fiemap = ll_fiemap,
.get_acl = ll_get_acl,
};
@@ -3251,7 +3345,6 @@ void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
if (!in_data)
return NULL;
- memset(in_data, 0, sizeof(*in_data));
in_data->iocd_size = size;
in_data->iocd_cb = cb;
in_data->iocd_count = count;
@@ -3389,7 +3482,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
goto out;
}
- lmmsize = body->eadatasize;
+ lmmsize = body->mbo_eadatasize;
if (lmmsize == 0) /* empty layout */ {
rc = 0;
goto out;
@@ -3447,7 +3540,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
PFID(&lli->lli_fid), inode, reconf);
/* in case this is a caching lock and reinstate with new inode */
- md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
+ md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
lock_res_and_lock(lock);
lvb_ready = ldlm_is_lvb_ready(lock);
@@ -3557,8 +3650,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
struct ldlm_enqueue_info einfo = {
.ei_type = LDLM_IBITS,
.ei_mode = LCK_CR,
- .ei_cb_bl = ll_md_blocking_ast,
- .ei_cb_cp = ldlm_completion_ast,
+ .ei_cb_bl = &ll_md_blocking_ast,
+ .ei_cb_cp = &ldlm_completion_ast,
};
int rc;
@@ -3604,8 +3697,7 @@ again:
ll_get_fsname(inode->i_sb, NULL, 0),
PFID(&lli->lli_fid), inode);
- rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
- NULL, 0, NULL, 0);
+ rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
ptlrpc_req_finished(it.it_request);
it.it_request = NULL;
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index 92004a05f9ee..22507b9c6d69 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -42,7 +42,6 @@
#include "../include/obd.h"
#include "../include/lustre_dlm.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_mdc.h"
#include <linux/pagemap.h>
#include <linux/file.h>
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 396e4e4f0715..084330d08f7a 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -49,7 +49,6 @@
#include "../include/obd.h"
#include "../include/obd_support.h"
#include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "../include/lustre_ver.h"
#include "../include/lustre_mdc.h"
@@ -100,6 +99,7 @@ int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
io->u.ci_setattr.sa_valid = attr->ia_valid;
+ io->u.ci_setattr.sa_parent_fid = ll_inode2fid(inode);
again:
if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
@@ -154,7 +154,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
int result = 0;
int refcheck;
- LASSERT(md->body->valid & OBD_MD_FLID);
+ LASSERT(md->body->mbo_valid & OBD_MD_FLID);
LASSERT(S_ISREG(inode->i_mode));
env = cl_env_get(&refcheck);
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index f6be105eeef7..fb346c12dad2 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -38,7 +38,6 @@
#include "../include/obd.h"
#include "../include/cl_object.h"
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
/* Initialize the default and maximum LOV EA and cookie sizes. This allows
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index 2326b40a0870..8644631bf2ba 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -38,7 +38,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
/** records that a write is in flight */
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 4d6d589a1677..3e98bd685061 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -36,14 +36,21 @@
#include "../include/lustre_ver.h"
#include "../include/lustre_disk.h" /* for s2sbi */
#include "../include/lustre_eacl.h"
+#include "../include/lustre_linkea.h"
/* for struct cl_lock_descr and struct cl_io */
+#include "../include/lustre_patchless_compat.h"
+#include "../include/lustre_compat.h"
#include "../include/cl_object.h"
+#include "../include/lustre_lmv.h"
#include "../include/lustre_mdc.h"
#include "../include/lustre_intent.h"
#include <linux/compat.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include "vvp_internal.h"
+#include "range_lock.h"
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
@@ -57,6 +64,9 @@
#define LL_DIR_END_OFF 0x7fffffffffffffffULL
#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
+/* 4UL * 1024 * 1024 */
+#define LL_MAX_BLKSIZE_BITS 22
+
#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
#define LUSTRE_FPRIVATE(file) ((file)->private_data)
@@ -116,9 +126,7 @@ struct ll_inode_info {
/* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid;
- /* Parent fid for accessing default stripe data on parent directory
- * for allocating OST objects after a mknod() and later open-by-FID.
- */
+ /* master inode fid for stripe directory */
struct lu_fid lli_pfid;
struct list_head lli_close_list;
@@ -156,7 +164,7 @@ struct ll_inode_info {
/* for directory */
struct {
/* serialize normal readdir and statahead-readdir. */
- struct mutex d_readdir_mutex;
+ struct mutex lli_readdir_mutex;
/* metadata statahead */
/* since parent-child threads can share the same @file
@@ -164,27 +172,39 @@ struct ll_inode_info {
* case of parent exit before child -- it is me should
* cleanup the dir readahead.
*/
- void *d_opendir_key;
- struct ll_statahead_info *d_sai;
+ void *lli_opendir_key;
+ struct ll_statahead_info *lli_sai;
/* protect statahead stuff. */
- spinlock_t d_sa_lock;
+ spinlock_t lli_sa_lock;
/* "opendir_pid" is the token when lookup/revalidate
* -- I am the owner of dir statahead.
*/
- pid_t d_opendir_pid;
- } d;
-
-#define lli_readdir_mutex u.d.d_readdir_mutex
-#define lli_opendir_key u.d.d_opendir_key
-#define lli_sai u.d.d_sai
-#define lli_sa_lock u.d.d_sa_lock
-#define lli_opendir_pid u.d.d_opendir_pid
+ pid_t lli_opendir_pid;
+ /* stat will try to access statahead entries or start
+ * statahead if this flag is set, and this flag will be
+ * set upon dir open, and cleared when dir is closed,
+ * statahead hit ratio is too low, or start statahead
+ * thread failed.
+ */
+ unsigned int lli_sa_enabled:1;
+ /* generation for statahead */
+ unsigned int lli_sa_generation;
+ /* directory stripe information */
+ struct lmv_stripe_md *lli_lsm_md;
+ /* default directory stripe offset. This is extracted
+ * from the "dmv" xattr in order to decide which MDT to
+ * create a subdirectory on. The MDS itself fetches
+ * "dmv" and gets the rest of the default layout itself
+ * (count, hash, etc).
+ */
+ __u32 lli_def_stripe_offset;
+ };
/* for non-directory */
struct {
- struct mutex f_size_mutex;
- char *f_symlink_name;
- __u64 f_maxbytes;
+ struct mutex lli_size_mutex;
+ char *lli_symlink_name;
+ __u64 lli_maxbytes;
/*
* struct rw_semaphore {
* signed long count; // align d.d_def_acl
@@ -192,16 +212,16 @@ struct ll_inode_info {
* struct list_head wait_list;
* }
*/
- struct rw_semaphore f_trunc_sem;
- struct mutex f_write_mutex;
+ struct rw_semaphore lli_trunc_sem;
+ struct range_lock_tree lli_write_tree;
- struct rw_semaphore f_glimpse_sem;
- unsigned long f_glimpse_time;
- struct list_head f_agl_list;
- __u64 f_agl_index;
+ struct rw_semaphore lli_glimpse_sem;
+ unsigned long lli_glimpse_time;
+ struct list_head lli_agl_list;
+ __u64 lli_agl_index;
/* for writepage() only to communicate to fsync */
- int f_async_rc;
+ int lli_async_rc;
/*
* whenever a process try to read/write the file, the
@@ -211,22 +231,9 @@ struct ll_inode_info {
* so the read/write statistics for jobid will not be
* accurate if the file is shared by different jobs.
*/
- char f_jobid[JOBSTATS_JOBID_SIZE];
- } f;
-
-#define lli_size_mutex u.f.f_size_mutex
-#define lli_symlink_name u.f.f_symlink_name
-#define lli_maxbytes u.f.f_maxbytes
-#define lli_trunc_sem u.f.f_trunc_sem
-#define lli_write_mutex u.f.f_write_mutex
-#define lli_glimpse_sem u.f.f_glimpse_sem
-#define lli_glimpse_time u.f.f_glimpse_time
-#define lli_agl_list u.f.f_agl_list
-#define lli_agl_index u.f.f_agl_index
-#define lli_async_rc u.f.f_async_rc
-#define lli_jobid u.f.f_jobid
-
- } u;
+ char lli_jobid[LUSTRE_JOBID_SIZE];
+ };
+ };
/* XXX: For following frequent used members, although they maybe special
* used for non-directory object, it is some time-wasting to check
@@ -401,12 +408,13 @@ enum stats_track_type {
#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
+#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */
#define LL_SBI_FLAGS { \
"nolck", \
"checksum", \
"flock", \
- "xattr", \
+ "user_xattr", \
"acl", \
"???", \
"???", \
@@ -422,9 +430,27 @@ enum stats_track_type {
"verbose", \
"layout", \
"user_fid2path",\
- "xattr", \
+ "xattr_cache", \
+ "norootsquash", \
}
+/*
+ * This is embedded into llite super-blocks to keep track of connect
+ * flags (capabilities) supported by all imports given mount is
+ * connected to.
+ */
+struct lustre_client_ocd {
+ /*
+ * This is conjunction of connect_flags across all imports
+ * (LOVs) this mount is connected to. This field is updated by
+ * cl_ocd_update() under ->lco_lock.
+ */
+ __u64 lco_flags;
+ struct mutex lco_lock;
+ struct obd_export *lco_md_exp;
+ struct obd_export *lco_dt_exp;
+};
+
struct ll_sb_info {
/* this protects pglist and ra_info. It isn't safe to
* grab from interrupt contexts
@@ -461,7 +487,7 @@ struct ll_sb_info {
unsigned int ll_namelen;
struct file_operations *ll_fop;
- unsigned int ll_md_brw_size; /* used by readdir */
+ unsigned int ll_md_brw_pages; /* readdir pages per RPC */
struct lu_site *ll_site;
struct cl_device *ll_cl;
@@ -484,11 +510,17 @@ struct ll_sb_info {
atomic_t ll_sa_wrong; /* statahead thread stopped for
* low hit ratio
*/
+ atomic_t ll_sa_running; /* running statahead thread
+ * count
+ */
atomic_t ll_agl_total; /* AGL thread started count */
dev_t ll_sdev_orig; /* save s_dev before assign for
* clustered nfs
*/
+ /* root squash */
+ struct root_squash_info ll_squash;
+
__kernel_fsid_t ll_fsid;
struct kobject ll_kobj; /* sysfs object */
struct super_block *ll_sb; /* struct super_block (for sysfs code)*/
@@ -643,25 +675,66 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
struct ll_file_data *file, loff_t pos,
size_t count, int rw);
+enum {
+ LPROC_LL_DIRTY_HITS,
+ LPROC_LL_DIRTY_MISSES,
+ LPROC_LL_READ_BYTES,
+ LPROC_LL_WRITE_BYTES,
+ LPROC_LL_BRW_READ,
+ LPROC_LL_BRW_WRITE,
+ LPROC_LL_OSC_READ,
+ LPROC_LL_OSC_WRITE,
+ LPROC_LL_IOCTL,
+ LPROC_LL_OPEN,
+ LPROC_LL_RELEASE,
+ LPROC_LL_MAP,
+ LPROC_LL_LLSEEK,
+ LPROC_LL_FSYNC,
+ LPROC_LL_READDIR,
+ LPROC_LL_SETATTR,
+ LPROC_LL_TRUNC,
+ LPROC_LL_FLOCK,
+ LPROC_LL_GETATTR,
+ LPROC_LL_CREATE,
+ LPROC_LL_LINK,
+ LPROC_LL_UNLINK,
+ LPROC_LL_SYMLINK,
+ LPROC_LL_MKDIR,
+ LPROC_LL_RMDIR,
+ LPROC_LL_MKNOD,
+ LPROC_LL_RENAME,
+ LPROC_LL_STAFS,
+ LPROC_LL_ALLOC_INODE,
+ LPROC_LL_SETXATTR,
+ LPROC_LL_GETXATTR,
+ LPROC_LL_GETXATTR_HITS,
+ LPROC_LL_LISTXATTR,
+ LPROC_LL_REMOVEXATTR,
+ LPROC_LL_INODE_PERM,
+ LPROC_LL_FILE_OPCODES
+};
+
/* llite/dir.c */
-void ll_release_page(struct page *page, int remove);
extern const struct file_operations ll_dir_operations;
extern const struct inode_operations ll_dir_inode_operations;
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
- struct ll_dir_chain *chain);
-int ll_dir_read(struct inode *inode, struct dir_context *ctx);
-
+int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
+ struct dir_context *ctx);
int ll_get_mdt_idx(struct inode *inode);
+int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid);
+struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
+ __u64 offset);
+void ll_release_page(struct inode *inode, struct page *page, bool remove);
+
/* llite/namei.c */
extern const struct inode_operations ll_special_inode_operations;
-int ll_objects_destroy(struct ptlrpc_request *request,
- struct inode *dir);
struct inode *ll_iget(struct super_block *sb, ino_t hash,
struct lustre_md *lic);
+int ll_test_inode_by_fid(struct inode *inode, void *opaque);
int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *data, int flag);
struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
+void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
/* llite/rw.c */
int ll_writepage(struct page *page, struct writeback_control *wbc);
@@ -704,7 +777,10 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
struct lustre_handle *fh);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
struct posix_acl *ll_get_acl(struct inode *inode, int type);
-
+int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
+ const char *name, int namelen);
+int ll_get_fid_by_name(struct inode *parent, const char *name,
+ int namelen, struct lu_fid *fid);
int ll_inode_permission(struct inode *inode, int mask);
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
@@ -715,8 +791,8 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
struct ptlrpc_request **request);
int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
int set_default);
-int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
- int *lmm_size, struct ptlrpc_request **request);
+int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size,
+ struct ptlrpc_request **request, u64 valid);
int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
int ll_merge_attr(const struct lu_env *env, struct inode *inode);
int ll_fid2path(struct inode *inode, void __user *arg);
@@ -748,8 +824,8 @@ int ll_setattr(struct dentry *de, struct iattr *attr);
int ll_statfs(struct dentry *de, struct kstatfs *sfs);
int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
__u64 max_age, __u32 flags);
-void ll_update_inode(struct inode *inode, struct lustre_md *md);
-void ll_read_inode2(struct inode *inode, void *opaque);
+int ll_update_inode(struct inode *inode, struct lustre_md *md);
+int ll_read_inode2(struct inode *inode, void *opaque);
void ll_delete_inode(struct inode *inode);
int ll_iocontrol(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
@@ -763,15 +839,46 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
int ll_obd_statfs(struct inode *inode, void __user *arg);
int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
+int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize);
int ll_process_config(struct lustre_cfg *lcfg);
+
+enum {
+ LUSTRE_OPC_MKDIR = 0,
+ LUSTRE_OPC_SYMLINK = 1,
+ LUSTRE_OPC_MKNOD = 2,
+ LUSTRE_OPC_CREATE = 3,
+ LUSTRE_OPC_ANY = 5,
+};
+
struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
struct inode *i1, struct inode *i2,
- const char *name, int namelen,
- int mode, __u32 opc, void *data);
+ const char *name, size_t namelen,
+ u32 mode, __u32 opc, void *data);
void ll_finish_md_op_data(struct md_op_data *op_data);
int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
+void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+ struct lov_user_md **kbuf);
+
+/* Compute expected user md size when passing in a md from user space */
+static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
+{
+ switch (lum->lmm_magic) {
+ case LOV_USER_MAGIC_V1:
+ return sizeof(struct lov_user_md_v1);
+ case LOV_USER_MAGIC_V3:
+ return sizeof(struct lov_user_md_v3);
+ case LOV_USER_MAGIC_SPECIFIC:
+ if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
+ return -EINVAL;
+
+ return lov_user_md_size(lum->lmm_stripe_count,
+ LOV_USER_MAGIC_SPECIFIC);
+ }
+ return -EINVAL;
+}
/* llite/llite_nfs.c */
extern const struct export_operations lustre_export_operations;
@@ -779,6 +886,7 @@ __u32 get_uuid2int(const char *name, int len);
void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
struct inode *search_inode_for_lustre(struct super_block *sb,
const struct lu_fid *fid);
+int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
/* llite/symlink.c */
extern const struct inode_operations ll_fast_symlink_inode_operations;
@@ -933,12 +1041,19 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
}
/* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size);
+extern const struct xattr_handler *ll_xattr_handlers[];
+
+#define XATTR_USER_T 1
+#define XATTR_TRUSTED_T 2
+#define XATTR_SECURITY_T 3
+#define XATTR_ACL_ACCESS_T 4
+#define XATTR_ACL_DEFAULT_T 5
+#define XATTR_LUSTRE_T 6
+#define XATTR_OTHER_T 7
+
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ll_removexattr(struct dentry *dentry, const char *name);
+int ll_xattr_list(struct inode *inode, const char *name, int type,
+ void *buffer, size_t size, __u64 valid);
/**
* Common IO arguments for various VFS I/O interfaces.
@@ -964,11 +1079,10 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
/* per inode struct, for dir only */
struct ll_statahead_info {
- struct inode *sai_inode;
+ struct dentry *sai_dentry;
atomic_t sai_refcount; /* when access this struct, hold
* refcount
*/
- unsigned int sai_generation; /* generation for statahead */
unsigned int sai_max; /* max ahead of lookup */
__u64 sai_sent; /* stat requests sent count */
__u64 sai_replied; /* stat requests which received
@@ -995,22 +1109,25 @@ struct ll_statahead_info {
unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
* hidden entries
*/
- sai_agl_valid:1;/* AGL is valid for the dir */
+ sai_agl_valid:1,/* AGL is valid for the dir */
+ sai_in_readpage:1;/* statahead is in readdir() */
wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
struct ptlrpc_thread sai_thread; /* stat-ahead thread */
struct ptlrpc_thread sai_agl_thread; /* AGL thread */
- struct list_head sai_entries; /* entry list */
- struct list_head sai_entries_received; /* entries returned */
- struct list_head sai_entries_stated; /* entries stated */
- struct list_head sai_entries_agl; /* AGL entries to be sent */
+ struct list_head sai_interim_entries; /* entries which got async
+ * stat reply, but not
+ * instantiated
+ */
+ struct list_head sai_entries; /* completed entries */
+ struct list_head sai_agls; /* AGLs to be sent */
struct list_head sai_cache[LL_SA_CACHE_SIZE];
spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
atomic_t sai_cache_count; /* entry count in cache */
};
-int do_statahead_enter(struct inode *dir, struct dentry **dentry,
- int only_unplug);
-void ll_stop_statahead(struct inode *dir, void *key);
+int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug);
+void ll_authorize_statahead(struct inode *dir, void *key);
+void ll_deauthorize_statahead(struct inode *dir, void *key);
blkcnt_t dirty_cnt(struct inode *inode);
@@ -1040,73 +1157,53 @@ static inline int ll_glimpse_size(struct inode *inode)
return rc;
}
-static inline void
-ll_statahead_mark(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct ll_dentry_data *ldd = ll_d2d(dentry);
-
- /* not the same process, don't mark */
- if (lli->lli_opendir_pid != current_pid())
- return;
-
- LASSERT(ldd);
- if (sai)
- ldd->lld_sa_generation = sai->sai_generation;
-}
-
-static inline int
-d_need_statahead(struct inode *dir, struct dentry *dentryp)
+/*
+ * dentry may statahead when statahead is enabled and current process has opened
+ * parent directory, and this dentry hasn't accessed statahead cache before
+ */
+static inline bool
+dentry_may_statahead(struct inode *dir, struct dentry *dentry)
{
struct ll_inode_info *lli;
struct ll_dentry_data *ldd;
if (ll_i2sbi(dir)->ll_sa_max == 0)
- return -EAGAIN;
+ return false;
lli = ll_i2info(dir);
+
+ /*
+ * statahead is not allowed for this dir, there may be three causes:
+ * 1. dir is not opened.
+ * 2. statahead hit ratio is too low.
+ * 3. previous stat started statahead thread failed.
+ */
+ if (!lli->lli_sa_enabled)
+ return false;
+
/* not the same process, don't statahead */
if (lli->lli_opendir_pid != current_pid())
- return -EAGAIN;
-
- /* statahead has been stopped */
- if (!lli->lli_opendir_key)
- return -EAGAIN;
+ return false;
- ldd = ll_d2d(dentryp);
/*
- * When stats a dentry, the system trigger more than once "revalidate"
- * or "lookup", for "getattr", for "getxattr", and maybe for others.
- * Under patchless client mode, the operation intent is not accurate,
- * which maybe misguide the statahead thread. For example:
- * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
- * have the same operation intent -- "IT_GETATTR".
- * In fact, one dentry should has only one chance to interact with the
- * statahead thread, otherwise the statahead windows will be confused.
+ * When stating a dentry, kernel may trigger 'revalidate' or 'lookup'
+ * multiple times, eg. for 'getattr', 'getxattr' and etc.
+ * For patchless client, lookup intent is not accurate, which may
+ * misguide statahead. For example:
+ * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will
+ * have the same intent -- IT_GETATTR, while one dentry should access
+ * statahead cache once, otherwise statahead windows is messed up.
* The solution is as following:
- * Assign "lld_sa_generation" with "sai_generation" when a dentry
- * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
- * will bypass interacting with statahead thread for checking:
- * "lld_sa_generation == lli_sai->sai_generation"
+ * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry
+ * IT_GETATTR for the first time, and subsequent IT_GETATTR will
+ * bypass interacting with statahead cache by checking
+ * 'lld_sa_generation == lli->lli_sa_generation'.
*/
- if (ldd && lli->lli_sai &&
- ldd->lld_sa_generation == lli->lli_sai->sai_generation)
- return -EAGAIN;
+ ldd = ll_d2d(dentry);
+ if (ldd && ldd->lld_sa_generation == lli->lli_sa_generation)
+ return false;
- return 1;
-}
-
-static inline int
-ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
-{
- int ret;
-
- ret = d_need_statahead(dir, *dentryp);
- if (ret <= 0)
- return ret;
-
- return do_statahead_enter(dir, dentryp, only_unplug);
+ return true;
}
/* llite ioctl register support routine */
@@ -1213,7 +1310,7 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
PFID(ll_inode2fid(inode)), inode,
handle.cookie);
- md_set_lock_data(exp, &handle.cookie, inode, NULL);
+ md_set_lock_data(exp, &handle, inode, NULL);
}
handle.cookie = it->it_lock_handle;
@@ -1221,8 +1318,7 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
PFID(ll_inode2fid(inode)), inode, handle.cookie);
- md_set_lock_data(exp, &handle.cookie, inode,
- &it->it_lock_bits);
+ md_set_lock_data(exp, &handle, inode, &it->it_lock_bits);
it->it_lock_set = 1;
}
@@ -1295,6 +1391,8 @@ void ll_xattr_fini(void);
int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
struct cl_page *page, enum cl_req_type crt);
+int ll_getparent(struct file *file, struct getparent __user *arg);
+
/* lcommon_cl.c */
int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 546063e728db..6bb41b09172e 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -41,7 +41,7 @@
#include <linux/types.h>
#include <linux/mm.h>
-#include "../include/lustre_lite.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_ha.h"
#include "../include/lustre_dlm.h"
#include "../include/lprocfs_status.h"
@@ -115,9 +115,16 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
sbi->ll_sa_max = LL_SA_RPC_DEF;
atomic_set(&sbi->ll_sa_total, 0);
atomic_set(&sbi->ll_sa_wrong, 0);
+ atomic_set(&sbi->ll_sa_running, 0);
atomic_set(&sbi->ll_agl_total, 0);
sbi->ll_flags |= LL_SBI_AGL_ENABLED;
+ /* root squash */
+ sbi->ll_squash.rsi_uid = 0;
+ sbi->ll_squash.rsi_gid = 0;
+ INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
+ init_rwsem(&sbi->ll_squash.rsi_sem);
+
sbi->ll_sb = sb;
return sbi;
@@ -128,6 +135,8 @@ static void ll_free_sbi(struct super_block *sb)
struct ll_sb_info *sbi = ll_s2sbi(sb);
if (sbi->ll_cache) {
+ if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
+ cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
cl_cache_decref(sbi->ll_cache);
sbi->ll_cache = NULL;
}
@@ -180,7 +189,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_PINGLESS |
OBD_CONNECT_MAX_EASIZE |
OBD_CONNECT_FLOCK_DEAD |
- OBD_CONNECT_DISP_STRIPE;
+ OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
+ OBD_CONNECT_OPEN_BY_FID |
+ OBD_CONNECT_DIR_STRIPE;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
@@ -310,9 +321,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
sbi->ll_flags |= LL_SBI_64BIT_HASH;
if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- sbi->ll_md_brw_size = data->ocd_brw_size;
+ sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
else
- sbi->ll_md_brw_size = PAGE_SIZE;
+ sbi->ll_md_brw_pages = 1;
if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
@@ -418,6 +429,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
sb->s_op = &lustre_super_operations;
+ sb->s_xattr = ll_xattr_handlers;
#if THREAD_SIZE >= 8192 /*b=17630*/
sb->s_export_op = &lustre_export_operations;
#endif
@@ -462,7 +474,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
md_free_lustre_md(sbi->ll_md_exp, &lmd);
ptlrpc_req_finished(request);
- if (!(root)) {
+ if (IS_ERR(root)) {
if (lmd.lsm)
obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
#ifdef CONFIG_FS_POSIX_ACL
@@ -486,11 +498,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
KEY_CHECKSUM, sizeof(checksum), &checksum,
NULL);
+ if (err) {
+ CERROR("%s: Set checksum failed: rc = %d\n",
+ sbi->ll_dt_exp->exp_obd->obd_name, err);
+ goto out_root;
+ }
cl_sb_init(sb);
err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
KEY_CACHE_SET, sizeof(*sbi->ll_cache),
sbi->ll_cache, NULL);
+ if (err) {
+ CERROR("%s: Set cache_set failed: rc = %d\n",
+ sbi->ll_dt_exp->exp_obd->obd_name, err);
+ goto out_root;
+ }
sb->s_root = d_make_root(root);
if (!sb->s_root) {
@@ -560,6 +582,17 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
return rc;
}
+/**
+ * Get the value of the default_easize parameter.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] sbi superblock info for this filesystem
+ * \param[out] lmmsize pointer to storage location for value
+ *
+ * \retval 0 on success
+ * \retval negative negated errno on failure
+ */
int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
{
int size, rc;
@@ -573,6 +606,29 @@ int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
return rc;
}
+/**
+ * Set the default_easize parameter to the given value.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] sbi superblock info for this filesystem
+ * \param[in] lmmsize the size to set
+ *
+ * \retval 0 on success
+ * \retval negative negated errno on failure
+ */
+int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
+{
+ if (lmmsize < sizeof(struct lov_mds_md) ||
+ lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
+ return -EINVAL;
+
+ return obd_set_info_async(NULL, sbi->ll_md_exp,
+ sizeof(KEY_DEFAULT_EASIZE),
+ KEY_DEFAULT_EASIZE,
+ sizeof(int), &lmmsize, NULL);
+}
+
static void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
@@ -608,6 +664,12 @@ void ll_kill_super(struct super_block *sb)
if (sbi) {
sb->s_dev = sbi->ll_sdev_orig;
sbi->ll_umounting = 1;
+
+ /* wait running statahead threads to quit */
+ while (atomic_read(&sbi->ll_sa_running) > 0) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
+ }
}
}
@@ -647,7 +709,8 @@ static int ll_options(char *options, int *flags)
*flags |= tmp;
goto next;
}
- tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
+ tmp = ll_set_opt("noflock", s1,
+ LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
if (tmp) {
*flags &= ~tmp;
goto next;
@@ -772,11 +835,13 @@ void ll_lli_init(struct ll_inode_info *lli)
lli->lli_sai = NULL;
spin_lock_init(&lli->lli_sa_lock);
lli->lli_opendir_pid = 0;
+ lli->lli_sa_enabled = 0;
+ lli->lli_def_stripe_offset = -1;
} else {
mutex_init(&lli->lli_size_mutex);
lli->lli_symlink_name = NULL;
init_rwsem(&lli->lli_trunc_sem);
- mutex_init(&lli->lli_write_mutex);
+ range_lock_tree_init(&lli->lli_write_tree);
init_rwsem(&lli->lli_glimpse_sem);
lli->lli_glimpse_time = 0;
INIT_LIST_HEAD(&lli->lli_agl_list);
@@ -896,7 +961,8 @@ void ll_put_super(struct super_block *sb)
struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi = ll_s2sbi(sb);
char *profilenm = get_profile_name(sb);
- int ccc_count, next, force = 1, rc = 0;
+ int next, force = 1, rc = 0;
+ long ccc_count;
CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -917,13 +983,13 @@ void ll_put_super(struct super_block *sb)
struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
- !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
+ !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr),
&lwi);
}
- ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
+ ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
if (!force && rc != -EINTR)
- LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+ LASSERTF(!ccc_count, "count: %li\n", ccc_count);
/* We need to set force before the lov_disconnect in
* lustre_common_put_super, since l_d cleans up osc's as well.
@@ -991,6 +1057,206 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
return inode;
}
+static void ll_dir_clear_lsm_md(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ LASSERT(S_ISDIR(inode->i_mode));
+
+ if (lli->lli_lsm_md) {
+ lmv_free_memmd(lli->lli_lsm_md);
+ lli->lli_lsm_md = NULL;
+ }
+}
+
+static struct inode *ll_iget_anon_dir(struct super_block *sb,
+ const struct lu_fid *fid,
+ struct lustre_md *md)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct mdt_body *body = md->body;
+ struct inode *inode;
+ ino_t ino;
+
+ ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
+ inode = iget_locked(sb, ino);
+ if (!inode) {
+ CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
+ ll_get_fsname(sb, NULL, 0), PFID(fid));
+ return ERR_PTR(-ENOENT);
+ }
+
+ if (inode->i_state & I_NEW) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lmv_stripe_md *lsm = md->lmv;
+
+ inode->i_mode = (inode->i_mode & ~S_IFMT) |
+ (body->mbo_mode & S_IFMT);
+ LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
+ PFID(fid));
+
+ LTIME_S(inode->i_mtime) = 0;
+ LTIME_S(inode->i_atime) = 0;
+ LTIME_S(inode->i_ctime) = 0;
+ inode->i_rdev = 0;
+
+ inode->i_op = &ll_dir_inode_operations;
+ inode->i_fop = &ll_dir_operations;
+ lli->lli_fid = *fid;
+ ll_lli_init(lli);
+
+ LASSERT(lsm);
+ /* master object FID */
+ lli->lli_pfid = body->mbo_fid1;
+ CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
+ lli, PFID(fid), PFID(&lli->lli_pfid));
+ unlock_new_inode(inode);
+ }
+
+ return inode;
+}
+
+static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
+{
+ struct lmv_stripe_md *lsm = md->lmv;
+ struct lu_fid *fid;
+ int i;
+
+ LASSERT(lsm);
+ /*
+ * XXX sigh, this lsm_root initialization should be in
+ * LMV layer, but it needs ll_iget right now, so we
+ * put this here right now.
+ */
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+ fid = &lsm->lsm_md_oinfo[i].lmo_fid;
+ LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
+ /* Unfortunately ll_iget will call ll_update_inode,
+ * where the initialization of slave inode is slightly
+ * different, so it reset lsm_md to NULL to avoid
+ * initializing lsm for slave inode.
+ */
+ /* For migrating inode, master stripe and master object will
+ * be same, so we only need assign this inode
+ */
+ if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
+ lsm->lsm_md_oinfo[i].lmo_root = inode;
+ else
+ lsm->lsm_md_oinfo[i].lmo_root =
+ ll_iget_anon_dir(inode->i_sb, fid, md);
+ if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
+ int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
+
+ lsm->lsm_md_oinfo[i].lmo_root = NULL;
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
+ const struct lmv_stripe_md *lsm_md2)
+{
+ return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
+ lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
+ lsm_md1->lsm_md_master_mdt_index ==
+ lsm_md2->lsm_md_master_mdt_index &&
+ lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
+ lsm_md1->lsm_md_layout_version ==
+ lsm_md2->lsm_md_layout_version &&
+ !strcmp(lsm_md1->lsm_md_pool_name,
+ lsm_md2->lsm_md_pool_name);
+}
+
+static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lmv_stripe_md *lsm = md->lmv;
+ int rc;
+
+ LASSERT(S_ISDIR(inode->i_mode));
+ CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md,
+ PFID(ll_inode2fid(inode)));
+
+ /* no striped information from request. */
+ if (!lsm) {
+ if (!lli->lli_lsm_md) {
+ return 0;
+ } else if (lli->lli_lsm_md->lsm_md_hash_type &
+ LMV_HASH_FLAG_MIGRATION) {
+ /*
+ * migration is done, the temporay MIGRATE layout has
+ * been removed
+ */
+ CDEBUG(D_INODE, DFID" finish migration.\n",
+ PFID(ll_inode2fid(inode)));
+ lmv_free_memmd(lli->lli_lsm_md);
+ lli->lli_lsm_md = NULL;
+ return 0;
+ } else {
+ /*
+ * The lustre_md from req does not include stripeEA,
+ * see ll_md_setattr
+ */
+ return 0;
+ }
+ }
+
+ /* set the directory layout */
+ if (!lli->lli_lsm_md) {
+ rc = ll_init_lsm_md(inode, md);
+ if (rc)
+ return rc;
+
+ lli->lli_lsm_md = lsm;
+ /*
+ * set lsm_md to NULL, so the following free lustre_md
+ * will not free this lsm
+ */
+ md->lmv = NULL;
+ CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
+ lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
+ return 0;
+ }
+
+ /* Compare the old and new stripe information */
+ if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
+ struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
+ int idx;
+
+ CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
+ inode, lsm, old_lsm,
+ lsm->lsm_md_magic, old_lsm->lsm_md_magic,
+ lsm->lsm_md_stripe_count,
+ old_lsm->lsm_md_stripe_count,
+ lsm->lsm_md_master_mdt_index,
+ old_lsm->lsm_md_master_mdt_index,
+ lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
+ lsm->lsm_md_layout_version,
+ old_lsm->lsm_md_layout_version,
+ lsm->lsm_md_pool_name,
+ old_lsm->lsm_md_pool_name);
+
+ for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
+ CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), idx,
+ PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
+ }
+
+ for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
+ CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), idx,
+ PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
+ }
+
+ return -EIO;
+ }
+
+ return 0;
+}
+
void ll_clear_inode(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
@@ -1031,14 +1297,15 @@ void ll_clear_inode(struct inode *inode)
#ifdef CONFIG_FS_POSIX_ACL
if (lli->lli_posix_acl) {
- LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
posix_acl_release(lli->lli_posix_acl);
lli->lli_posix_acl = NULL;
}
#endif
lli->lli_inode_magic = LLI_INODE_DEAD;
- if (!S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode))
+ ll_dir_clear_lsm_md(inode);
+ if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
LASSERT(list_empty(&lli->lli_agl_list));
/*
@@ -1103,10 +1370,10 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
op_data->op_attr.ia_valid = ia_valid;
/* Extract epoch data if obtained. */
- op_data->op_handle = md.body->handle;
- op_data->op_ioepoch = md.body->ioepoch;
+ op_data->op_handle = md.body->mbo_handle;
+ op_data->op_ioepoch = md.body->mbo_ioepoch;
- ll_update_inode(inode, &md);
+ rc = ll_update_inode(inode, &md);
ptlrpc_req_finished(request);
return rc;
@@ -1138,8 +1405,8 @@ static int ll_setattr_done_writing(struct inode *inode,
rc = ll_som_update(inode, op_data);
else if (rc) {
CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
- ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
+ ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
}
return rc;
}
@@ -1331,14 +1598,14 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
{
int mode = d_inode(de)->i_mode;
- if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
- (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
+ if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
+ (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
- (ATTR_SIZE|ATTR_MODE)) &&
+ if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
+ (ATTR_SIZE | ATTR_MODE)) &&
(((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
- (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+ (((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
!(attr->ia_mode & S_ISGID))))
attr->ia_valid |= ATTR_FORCE;
@@ -1349,7 +1616,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
attr->ia_valid |= ATTR_KILL_SUID;
if ((attr->ia_valid & ATTR_MODE) &&
- ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+ ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
!(attr->ia_mode & S_ISGID) &&
!(attr->ia_valid & ATTR_KILL_SGID))
attr->ia_valid |= ATTR_KILL_SGID;
@@ -1465,14 +1732,14 @@ void ll_inode_size_unlock(struct inode *inode)
mutex_unlock(&lli->lli_size_mutex);
}
-void ll_update_inode(struct inode *inode, struct lustre_md *md)
+int ll_update_inode(struct inode *inode, struct lustre_md *md)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct mdt_body *body = md->body;
struct lov_stripe_md *lsm = md->lsm;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+ LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0));
if (lsm) {
if (!lli->lli_has_smd &&
!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
@@ -1483,8 +1750,16 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
lli->lli_maxbytes = MAX_LFS_FILESIZE;
}
+ if (S_ISDIR(inode->i_mode)) {
+ int rc;
+
+ rc = ll_update_lsm_md(inode, md);
+ if (rc)
+ return rc;
+ }
+
#ifdef CONFIG_FS_POSIX_ACL
- if (body->valid & OBD_MD_FLACL) {
+ if (body->mbo_valid & OBD_MD_FLACL) {
spin_lock(&lli->lli_lock);
if (lli->lli_posix_acl)
posix_acl_release(lli->lli_posix_acl);
@@ -1492,65 +1767,67 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
spin_unlock(&lli->lli_lock);
}
#endif
- inode->i_ino = cl_fid_build_ino(&body->fid1,
+ inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
sbi->ll_flags & LL_SBI_32BIT_API);
- inode->i_generation = cl_fid_build_gen(&body->fid1);
+ inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
- if (body->valid & OBD_MD_FLATIME) {
- if (body->atime > LTIME_S(inode->i_atime))
- LTIME_S(inode->i_atime) = body->atime;
- lli->lli_atime = body->atime;
+ if (body->mbo_valid & OBD_MD_FLATIME) {
+ if (body->mbo_atime > LTIME_S(inode->i_atime))
+ LTIME_S(inode->i_atime) = body->mbo_atime;
+ lli->lli_atime = body->mbo_atime;
}
- if (body->valid & OBD_MD_FLMTIME) {
- if (body->mtime > LTIME_S(inode->i_mtime)) {
+ if (body->mbo_valid & OBD_MD_FLMTIME) {
+ if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
inode->i_ino, LTIME_S(inode->i_mtime),
- body->mtime);
- LTIME_S(inode->i_mtime) = body->mtime;
+ body->mbo_mtime);
+ LTIME_S(inode->i_mtime) = body->mbo_mtime;
}
- lli->lli_mtime = body->mtime;
- }
- if (body->valid & OBD_MD_FLCTIME) {
- if (body->ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->ctime;
- lli->lli_ctime = body->ctime;
- }
- if (body->valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
- if (body->valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
+ lli->lli_mtime = body->mbo_mtime;
+ }
+ if (body->mbo_valid & OBD_MD_FLCTIME) {
+ if (body->mbo_ctime > LTIME_S(inode->i_ctime))
+ LTIME_S(inode->i_ctime) = body->mbo_ctime;
+ lli->lli_ctime = body->mbo_ctime;
+ }
+ if (body->mbo_valid & OBD_MD_FLMODE)
+ inode->i_mode = (inode->i_mode & S_IFMT) |
+ (body->mbo_mode & ~S_IFMT);
+ if (body->mbo_valid & OBD_MD_FLTYPE)
+ inode->i_mode = (inode->i_mode & ~S_IFMT) |
+ (body->mbo_mode & S_IFMT);
LASSERT(inode->i_mode != 0);
if (S_ISREG(inode->i_mode))
inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
LL_MAX_BLKSIZE_BITS);
else
inode->i_blkbits = inode->i_sb->s_blocksize_bits;
- if (body->valid & OBD_MD_FLUID)
- inode->i_uid = make_kuid(&init_user_ns, body->uid);
- if (body->valid & OBD_MD_FLGID)
- inode->i_gid = make_kgid(&init_user_ns, body->gid);
- if (body->valid & OBD_MD_FLFLAGS)
- inode->i_flags = ll_ext_to_inode_flags(body->flags);
- if (body->valid & OBD_MD_FLNLINK)
- set_nlink(inode, body->nlink);
- if (body->valid & OBD_MD_FLRDEV)
- inode->i_rdev = old_decode_dev(body->rdev);
-
- if (body->valid & OBD_MD_FLID) {
+ if (body->mbo_valid & OBD_MD_FLUID)
+ inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
+ if (body->mbo_valid & OBD_MD_FLGID)
+ inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
+ if (body->mbo_valid & OBD_MD_FLFLAGS)
+ inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
+ if (body->mbo_valid & OBD_MD_FLNLINK)
+ set_nlink(inode, body->mbo_nlink);
+ if (body->mbo_valid & OBD_MD_FLRDEV)
+ inode->i_rdev = old_decode_dev(body->mbo_rdev);
+
+ if (body->mbo_valid & OBD_MD_FLID) {
/* FID shouldn't be changed! */
if (fid_is_sane(&lli->lli_fid)) {
- LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
+ LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
"Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
- PFID(&lli->lli_fid), PFID(&body->fid1),
+ PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
PFID(ll_inode2fid(inode)), inode);
} else {
- lli->lli_fid = body->fid1;
+ lli->lli_fid = body->mbo_fid1;
}
}
LASSERT(fid_seq(&lli->lli_fid) != 0);
- if (body->valid & OBD_MD_FLSIZE) {
+ if (body->mbo_valid & OBD_MD_FLSIZE) {
if (exp_connect_som(ll_i2mdexp(inode)) &&
S_ISREG(inode->i_mode)) {
struct lustre_handle lockh;
@@ -1577,7 +1854,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
/* Use old size assignment to avoid
* deadlock bz14138 & bz14326
*/
- i_size_write(inode, body->size);
+ i_size_write(inode, body->mbo_size);
spin_lock(&lli->lli_lock);
lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
spin_unlock(&lli->lli_lock);
@@ -1588,26 +1865,29 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
/* Use old size assignment to avoid
* deadlock bz14138 & bz14326
*/
- i_size_write(inode, body->size);
+ i_size_write(inode, body->mbo_size);
CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
- inode->i_ino, (unsigned long long)body->size);
+ inode->i_ino, (unsigned long long)body->mbo_size);
}
- if (body->valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->blocks;
+ if (body->mbo_valid & OBD_MD_FLBLOCKS)
+ inode->i_blocks = body->mbo_blocks;
}
- if (body->valid & OBD_MD_TSTATE) {
- if (body->t_state & MS_RESTORE)
+ if (body->mbo_valid & OBD_MD_TSTATE) {
+ if (body->mbo_t_state & MS_RESTORE)
lli->lli_flags |= LLIF_FILE_RESTORING;
}
+
+ return 0;
}
-void ll_read_inode2(struct inode *inode, void *opaque)
+int ll_read_inode2(struct inode *inode, void *opaque)
{
struct lustre_md *md = opaque;
struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
PFID(&lli->lli_fid), inode);
@@ -1623,7 +1903,9 @@ void ll_read_inode2(struct inode *inode, void *opaque)
LTIME_S(inode->i_atime) = 0;
LTIME_S(inode->i_ctime) = 0;
inode->i_rdev = 0;
- ll_update_inode(inode, md);
+ rc = ll_update_inode(inode, md);
+ if (rc)
+ return rc;
/* OIDEBUG(inode); */
@@ -1644,6 +1926,8 @@ void ll_read_inode2(struct inode *inode, void *opaque)
init_special_inode(inode, inode->i_mode,
inode->i_rdev);
}
+
+ return 0;
}
void ll_delete_inode(struct inode *inode)
@@ -1655,20 +1939,13 @@ void ll_delete_inode(struct inode *inode)
* osc_extent implementation at LU-1030.
*/
cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
- CL_FSYNC_DISCARD, 1);
+ CL_FSYNC_LOCAL, 1);
truncate_inode_pages_final(&inode->i_data);
- /* Workaround for LU-118 */
- if (inode->i_data.nrpages) {
- spin_lock_irq(&inode->i_data.tree_lock);
- spin_unlock_irq(&inode->i_data.tree_lock);
- LASSERTF(inode->i_data.nrpages == 0,
- "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- PFID(ll_inode2fid(inode)), inode,
- inode->i_data.nrpages);
- }
- /* Workaround end */
+ LASSERTF(!inode->i_data.nrpages,
+ "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+ PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
ll_clear_inode(inode);
clear_inode(inode);
@@ -1704,7 +1981,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- flags = body->flags;
+ flags = body->mbo_flags;
ptlrpc_req_finished(req);
@@ -1886,9 +2163,9 @@ void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
if (!op_data)
return;
- op_data->op_fid1 = body->fid1;
- op_data->op_ioepoch = body->ioepoch;
- op_data->op_handle = body->handle;
+ op_data->op_fid1 = body->mbo_fid1;
+ op_data->op_ioepoch = body->mbo_ioepoch;
+ op_data->op_handle = body->mbo_handle;
op_data->op_mod_time = get_seconds();
md_close(exp, op_data, NULL, &close_req);
ptlrpc_req_finished(close_req);
@@ -1910,7 +2187,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
goto cleanup;
if (*inode) {
- ll_update_inode(*inode, &md);
+ rc = ll_update_inode(*inode, &md);
+ if (rc)
+ goto out;
} else {
LASSERT(sb);
@@ -1918,18 +2197,18 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
* At this point server returns to client's same fid as client
* generated for creating. So using ->fid1 is okay here.
*/
- if (!fid_is_sane(&md.body->fid1)) {
+ if (!fid_is_sane(&md.body->mbo_fid1)) {
CERROR("%s: Fid is insane " DFID "\n",
ll_get_fsname(sb, NULL, 0),
- PFID(&md.body->fid1));
+ PFID(&md.body->mbo_fid1));
rc = -EINVAL;
goto out;
}
- *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
+ *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
sbi->ll_flags & LL_SBI_32BIT_API),
&md);
- if (!*inode) {
+ if (IS_ERR(*inode)) {
#ifdef CONFIG_FS_POSIX_ACL
if (md.posix_acl) {
posix_acl_release(md.posix_acl);
@@ -2075,11 +2354,20 @@ int ll_process_config(struct lustre_cfg *lcfg)
/* this function prepares md_op_data hint for passing ot down to MD stack. */
struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
struct inode *i1, struct inode *i2,
- const char *name, int namelen,
- int mode, __u32 opc, void *data)
+ const char *name, size_t namelen,
+ u32 mode, __u32 opc, void *data)
{
- if (namelen > ll_i2sbi(i1)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
+ if (!name) {
+ /* Do not reuse namelen for something else. */
+ if (namelen)
+ return ERR_PTR(-EINVAL);
+ } else {
+ if (namelen > ll_i2sbi(i1)->ll_namelen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ if (!lu_name_is_valid_2(name, namelen))
+ return ERR_PTR(-EINVAL);
+ }
if (!op_data)
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -2089,11 +2377,26 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
ll_i2gids(op_data->op_suppgids, i1, i2);
op_data->op_fid1 = *ll_inode2fid(i1);
+ op_data->op_default_stripe_offset = -1;
+ if (S_ISDIR(i1->i_mode)) {
+ op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
+ op_data->op_default_stripe_offset =
+ ll_i2info(i1)->lli_def_stripe_offset;
+ }
- if (i2)
+ if (i2) {
op_data->op_fid2 = *ll_inode2fid(i2);
- else
+ if (S_ISDIR(i2->i_mode))
+ op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
+ } else {
fid_zero(&op_data->op_fid2);
+ }
+
+ if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
+ op_data->op_cli_flags |= CLI_HASH64;
+
+ if (ll_need_32bit_api(ll_i2sbi(i1)))
+ op_data->op_cli_flags |= CLI_API32;
op_data->op_name = name;
op_data->op_namelen = namelen;
@@ -2105,26 +2408,12 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
op_data->op_bias = 0;
op_data->op_cli_flags = 0;
if ((opc == LUSTRE_OPC_CREATE) && name &&
- filename_is_volatile(name, namelen, NULL))
+ filename_is_volatile(name, namelen, &op_data->op_mds))
op_data->op_bias |= MDS_CREATE_VOLATILE;
- op_data->op_opc = opc;
- op_data->op_mds = 0;
+ else
+ op_data->op_mds = 0;
op_data->op_data = data;
- /* If the file is being opened after mknod() (normally due to NFS)
- * try to use the default stripe data from parent directory for
- * allocating OST objects. Try to pass the parent FID to MDS.
- */
- if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
- !ll_i2info(i2)->lli_has_smd) {
- struct ll_inode_info *lli = ll_i2info(i2);
-
- spin_lock(&lli->lli_lock);
- if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
- op_data->op_fid1 = lli->lli_pfid;
- spin_unlock(&lli->lli_lock);
- }
-
/* When called by ll_setattr_raw, file is i1. */
if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
op_data->op_bias |= MDS_DATA_MODIFIED;
@@ -2251,3 +2540,197 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
if (buf)
free_page((unsigned long)buf);
}
+
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+ struct lov_user_md **kbuf)
+{
+ struct lov_user_md lum;
+ ssize_t lum_size;
+
+ if (copy_from_user(&lum, md, sizeof(lum))) {
+ lum_size = -EFAULT;
+ goto no_kbuf;
+ }
+
+ lum_size = ll_lov_user_md_size(&lum);
+ if (lum_size < 0)
+ goto no_kbuf;
+
+ *kbuf = kzalloc(lum_size, GFP_NOFS);
+ if (!*kbuf) {
+ lum_size = -ENOMEM;
+ goto no_kbuf;
+ }
+
+ if (copy_from_user(*kbuf, md, lum_size) != 0) {
+ kfree(*kbuf);
+ *kbuf = NULL;
+ lum_size = -EFAULT;
+ }
+no_kbuf:
+ return lum_size;
+}
+
+/*
+ * Compute llite root squash state after a change of root squash
+ * configuration setting or add/remove of a lnet nid
+ */
+void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
+{
+ struct root_squash_info *squash = &sbi->ll_squash;
+ lnet_process_id_t id;
+ bool matched;
+ int i;
+
+ /* Update norootsquash flag */
+ down_write(&squash->rsi_sem);
+ if (list_empty(&squash->rsi_nosquash_nids)) {
+ sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+ } else {
+ /*
+ * Do not apply root squash as soon as one of our NIDs is
+ * in the nosquash_nids list
+ */
+ matched = false;
+ i = 0;
+
+ while (LNetGetId(i++, &id) != -ENOENT) {
+ if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+ continue;
+ if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
+ matched = true;
+ break;
+ }
+ }
+ if (matched)
+ sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
+ else
+ sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+ }
+ up_write(&squash->rsi_sem);
+}
+
+/**
+ * Parse linkea content to extract information about a given hardlink
+ *
+ * \param[in] ldata - Initialized linkea data
+ * \param[in] linkno - Link identifier
+ * \param[out] parent_fid - The entry's parent FID
+ * \param[in] size - Entry name destination buffer
+ *
+ * \retval 0 on success
+ * \retval Appropriate negative error code on failure
+ */
+static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
+ struct lu_fid *parent_fid, struct lu_name *ln)
+{
+ unsigned int idx;
+ int rc;
+
+ rc = linkea_init(ldata);
+ if (rc < 0)
+ return rc;
+
+ if (linkno >= ldata->ld_leh->leh_reccount)
+ /* beyond last link */
+ return -ENODATA;
+
+ linkea_first_entry(ldata);
+ for (idx = 0; ldata->ld_lee; idx++) {
+ linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
+ parent_fid);
+ if (idx == linkno)
+ break;
+
+ linkea_next_entry(ldata);
+ }
+
+ if (idx < linkno)
+ return -ENODATA;
+
+ return 0;
+}
+
+/**
+ * Get parent FID and name of an identified link. Operation is performed for
+ * a given link number, letting the caller iterate over linkno to list one or
+ * all links of an entry.
+ *
+ * \param[in] file - File descriptor against which to perform the operation
+ * \param[in,out] arg - User-filled structure containing the linkno to operate
+ * on and the available size. It is eventually filled with
+ * the requested information or left untouched on error
+ *
+ * \retval - 0 on success
+ * \retval - Appropriate negative error code on failure
+ */
+int ll_getparent(struct file *file, struct getparent __user *arg)
+{
+ struct inode *inode = file_inode(file);
+ struct linkea_data *ldata;
+ struct lu_fid parent_fid;
+ struct lu_buf buf = {
+ .lb_buf = NULL,
+ .lb_len = 0
+ };
+ struct lu_name ln;
+ u32 name_size;
+ u32 linkno;
+ int rc;
+
+ if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
+ !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
+ return -EPERM;
+
+ if (get_user(name_size, &arg->gp_name_size))
+ return -EFAULT;
+
+ if (get_user(linkno, &arg->gp_linkno))
+ return -EFAULT;
+
+ if (name_size > PATH_MAX)
+ return -EINVAL;
+
+ ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
+ if (!ldata)
+ return -ENOMEM;
+
+ rc = linkea_data_new(ldata, &buf);
+ if (rc < 0)
+ goto ldata_free;
+
+ rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
+ buf.lb_len, OBD_MD_FLXATTR);
+ if (rc < 0)
+ goto lb_free;
+
+ rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
+ if (rc < 0)
+ goto lb_free;
+
+ if (ln.ln_namelen >= name_size) {
+ rc = -EOVERFLOW;
+ goto lb_free;
+ }
+
+ if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
+ rc = -EFAULT;
+ goto lb_free;
+ }
+
+ if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
+ rc = -EFAULT;
+ goto lb_free;
+ }
+
+ if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
+ rc = -EFAULT;
+ goto lb_free;
+ }
+
+lb_free:
+ lu_buf_free(&buf);
+ldata_free:
+ kfree(ldata);
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 66ee5db5fce8..436691814a5e 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -43,9 +43,7 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
static const struct vm_operations_struct ll_file_vm_ops;
@@ -126,7 +124,7 @@ restart:
fio = &io->u.ci_fault;
fio->ft_index = index;
- fio->ft_executable = vma->vm_flags&VM_EXEC;
+ fio->ft_executable = vma->vm_flags & VM_EXEC;
/*
* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
@@ -134,7 +132,7 @@ restart:
* filemap_nopage. we do our readahead in ll_readpage.
*/
if (ra_flags)
- *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
+ *ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
vma->vm_flags &= ~VM_SEQ_READ;
vma->vm_flags |= VM_RAND_READ;
@@ -429,7 +427,6 @@ static void ll_vm_open(struct vm_area_struct *vma)
struct inode *inode = file_inode(vma->vm_file);
struct vvp_object *vob = cl_inode2vvp(inode);
- LASSERT(vma->vm_file);
LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
atomic_inc(&vob->vob_mmap_cnt);
}
@@ -442,7 +439,6 @@ static void ll_vm_close(struct vm_area_struct *vma)
struct inode *inode = file_inode(vma->vm_file);
struct vvp_object *vob = cl_inode2vvp(inode);
- LASSERT(vma->vm_file);
atomic_dec(&vob->vob_mmap_cnt);
LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 65972c892731..709230571b4b 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -38,7 +38,6 @@
*/
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
#include <linux/exportfs.h>
@@ -73,11 +72,6 @@ void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid)
fsid->val[1] = key >> 32;
}
-static int ll_nfs_test_inode(struct inode *inode, void *opaque)
-{
- return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
-}
-
struct inode *search_inode_for_lustre(struct super_block *sb,
const struct lu_fid *fid)
{
@@ -92,7 +86,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
CDEBUG(D_INFO, "searching inode for:(%lu,"DFID")\n", hash, PFID(fid));
- inode = ilookup5(sb, hash, ll_nfs_test_inode, (void *)fid);
+ inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid);
if (inode)
return inode;
@@ -153,12 +147,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
return ERR_PTR(-ESTALE);
}
+ result = d_obtain_alias(inode);
+ if (IS_ERR(result)) {
+ iput(inode);
+ return result;
+ }
+
/**
- * It is an anonymous dentry without OST objects created yet.
- * We have to find the parent to tell MDS how to init lov objects.
+ * In case d_obtain_alias() found a disconnected dentry, always update
+ * lli_pfid to allow later operation (normally open) have parent fid,
+ * which may be used by MDS to create data.
*/
- if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd &&
- parent && !fid_is_zero(parent)) {
+ if (parent) {
struct ll_inode_info *lli = ll_i2info(inode);
spin_lock(&lli->lli_lock);
@@ -255,6 +255,8 @@ static int ll_get_name(struct dentry *dentry, char *name,
.lgd_fid = ll_i2info(d_inode(child))->lli_fid,
.ctx.actor = ll_nfs_get_name_filldir,
};
+ struct md_op_data *op_data;
+ __u64 pos = 0;
if (!dir || !S_ISDIR(dir->i_mode)) {
rc = -ENOTDIR;
@@ -266,9 +268,18 @@ static int ll_get_name(struct dentry *dentry, char *name,
goto out;
}
+ op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+ LUSTRE_OPC_ANY, dir);
+ if (IS_ERR(op_data)) {
+ rc = PTR_ERR(op_data);
+ goto out;
+ }
+
+ op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
inode_lock(dir);
- rc = ll_dir_read(dir, &lgd.ctx);
+ rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx);
inode_unlock(dir);
+ ll_finish_md_op_data(op_data);
if (!rc && !lgd.lgd_found)
rc = -ENOENT;
out:
@@ -297,14 +308,12 @@ static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
}
-static struct dentry *ll_get_parent(struct dentry *dchild)
+int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid)
{
struct ptlrpc_request *req = NULL;
- struct inode *dir = d_inode(dchild);
struct ll_sb_info *sbi;
- struct dentry *result = NULL;
struct mdt_body *body;
- static char dotdot[] = "..";
+ static const char dotdot[] = "..";
struct md_op_data *op_data;
int rc;
int lmmsize;
@@ -319,13 +328,13 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
rc = ll_get_default_mdsize(sbi, &lmmsize);
if (rc != 0)
- return ERR_PTR(rc);
+ return rc;
op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
strlen(dotdot), lmmsize,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
- return (void *)op_data;
+ return PTR_ERR(op_data);
rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
@@ -333,21 +342,36 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
ll_get_fsname(dir->i_sb, NULL, 0),
PFID(ll_inode2fid(dir)), rc);
- return ERR_PTR(rc);
+ return rc;
}
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
/*
* LU-3952: MDT may lost the FID of its parent, we should not crash
* the NFS server, ll_iget_for_nfs() will handle the error.
*/
- if (body->valid & OBD_MD_FLID) {
+ if (body->mbo_valid & OBD_MD_FLID) {
CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+ PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1));
+ *parent_fid = body->mbo_fid1;
}
- result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
ptlrpc_req_finished(req);
- return result;
+ return 0;
+}
+
+static struct dentry *ll_get_parent(struct dentry *dchild)
+{
+ struct lu_fid parent_fid = { 0 };
+ struct dentry *dentry;
+ int rc;
+
+ rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid);
+ if (rc)
+ return ERR_PTR(rc);
+
+ dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL);
+
+ return dentry;
}
const struct export_operations lustre_export_operations = {
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index e86bf3c53be3..6eae60595905 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -31,7 +31,6 @@
*/
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "../include/lprocfs_status.h"
#include <linux/seq_file.h>
#include "../include/obd_support.h"
@@ -358,16 +357,16 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct cl_client_cache *cache = sbi->ll_cache;
int shift = 20 - PAGE_SHIFT;
- int max_cached_mb;
- int unused_mb;
+ long max_cached_mb;
+ long unused_mb;
max_cached_mb = cache->ccc_lru_max >> shift;
- unused_mb = atomic_read(&cache->ccc_lru_left) >> shift;
+ unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
seq_printf(m,
"users: %d\n"
- "max_cached_mb: %d\n"
- "used_mb: %d\n"
- "unused_mb: %d\n"
+ "max_cached_mb: %ld\n"
+ "used_mb: %ld\n"
+ "unused_mb: %ld\n"
"reclaim_count: %u\n",
atomic_read(&cache->ccc_users),
max_cached_mb,
@@ -385,10 +384,13 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct cl_client_cache *cache = sbi->ll_cache;
struct lu_env *env;
+ long diff = 0;
+ long nrpages = 0;
int refcheck;
- int mult, rc, pages_number;
- int diff = 0;
- int nrpages = 0;
+ long pages_number;
+ int mult;
+ long rc;
+ u64 val;
char kernbuf[128];
if (count >= sizeof(kernbuf))
@@ -401,10 +403,14 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
mult = 1 << (20 - PAGE_SHIFT);
buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
kernbuf;
- rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
if (rc)
return rc;
+ if (val > LONG_MAX)
+ return -ERANGE;
+ pages_number = (long)val;
+
if (pages_number < 0 || pages_number > totalram_pages) {
CERROR("%s: can't set max cache more than %lu MB\n",
ll_get_fsname(sb, NULL, 0),
@@ -418,7 +424,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
/* easy - add more LRU slots. */
if (diff >= 0) {
- atomic_add(diff, &cache->ccc_lru_left);
+ atomic_long_add(diff, &cache->ccc_lru_left);
rc = 0;
goto out;
}
@@ -429,18 +435,18 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
diff = -diff;
while (diff > 0) {
- int tmp;
+ long tmp;
/* reduce LRU budget from free slots. */
do {
- int ov, nv;
+ long ov, nv;
- ov = atomic_read(&cache->ccc_lru_left);
+ ov = atomic_long_read(&cache->ccc_lru_left);
if (ov == 0)
break;
nv = ov > diff ? ov - diff : 0;
- rc = atomic_cmpxchg(&cache->ccc_lru_left, ov, nv);
+ rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
if (likely(ov == rc)) {
diff -= ov - nv;
nrpages += ov - nv;
@@ -474,7 +480,7 @@ out:
spin_unlock(&sbi->ll_lock);
rc = count;
} else {
- atomic_add(nrpages, &cache->ccc_lru_left);
+ atomic_long_add(nrpages, &cache->ccc_lru_left);
}
return rc;
}
@@ -738,6 +744,18 @@ static ssize_t max_easize_show(struct kobject *kobj,
}
LUSTRE_RO_ATTR(max_easize);
+/**
+ * Get default_easize.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] kobj kernel object for sysfs tree
+ * \param[in] attr attribute of this kernel object
+ * \param[in] buf buffer to write data into
+ *
+ * \retval positive \a count on success
+ * \retval negative negated errno on failure
+ */
static ssize_t default_easize_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
@@ -753,7 +771,44 @@ static ssize_t default_easize_show(struct kobject *kobj,
return sprintf(buf, "%u\n", ealen);
}
-LUSTRE_RO_ATTR(default_easize);
+
+/**
+ * Set default_easize.
+ *
+ * Range checking on the passed value is handled by
+ * ll_set_default_mdsize().
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] kobj kernel object for sysfs tree
+ * \param[in] attr attribute of this kernel object
+ * \param[in] buffer string passed from user space
+ * \param[in] count \a buffer length
+ *
+ * \retval positive \a count on success
+ * \retval negative negated errno on failure
+ */
+static ssize_t default_easize_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ rc = ll_set_default_mdsize(sbi, val);
+ if (rc)
+ return rc;
+
+ return count;
+}
+LUSTRE_RW_ATTR(default_easize);
static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
{
@@ -774,7 +829,7 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
flags >>= 1;
++i;
}
- seq_printf(m, "\b\n");
+ seq_puts(m, "\b\n");
return 0;
}
@@ -823,15 +878,116 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
ll_kobj);
struct cl_client_cache *cache = sbi->ll_cache;
- int pages, mb;
+ long pages;
+ int mb;
- pages = atomic_read(&cache->ccc_unstable_nr);
+ pages = atomic_long_read(&cache->ccc_unstable_nr);
mb = (pages * PAGE_SIZE) >> 20;
- return sprintf(buf, "unstable_pages: %8d\n"
- "unstable_mb: %8d\n", pages, mb);
+ return sprintf(buf, "unstable_check: %8d\n"
+ "unstable_pages: %12ld\n"
+ "unstable_mb: %8d\n",
+ cache->ccc_unstable_check, pages, mb);
}
-LUSTRE_RO_ATTR(unstable_stats);
+
+static ssize_t unstable_stats_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ char kernbuf[128];
+ int val, rc;
+
+ if (!count)
+ return 0;
+ if (count >= sizeof(kernbuf))
+ return -EINVAL;
+
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+ kernbuf[count] = 0;
+
+ buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
+ kernbuf;
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ /* borrow lru lock to set the value */
+ spin_lock(&sbi->ll_cache->ccc_lru_lock);
+ sbi->ll_cache->ccc_unstable_check = !!val;
+ spin_unlock(&sbi->ll_cache->ccc_lru_lock);
+
+ return count;
+}
+LUSTRE_RW_ATTR(unstable_stats);
+
+static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ struct root_squash_info *squash = &sbi->ll_squash;
+
+ return sprintf(buf, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
+}
+
+static ssize_t root_squash_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ struct root_squash_info *squash = &sbi->ll_squash;
+
+ return lprocfs_wr_root_squash(buffer, count, squash,
+ ll_get_fsname(sbi->ll_sb, NULL, 0));
+}
+LUSTRE_RW_ATTR(root_squash);
+
+static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct root_squash_info *squash = &sbi->ll_squash;
+ int len;
+
+ down_read(&squash->rsi_sem);
+ if (!list_empty(&squash->rsi_nosquash_nids)) {
+ len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
+ &squash->rsi_nosquash_nids);
+ m->count += len;
+ seq_puts(m, "\n");
+ } else {
+ seq_puts(m, "NONE\n");
+ }
+ up_read(&squash->rsi_sem);
+
+ return 0;
+}
+
+static ssize_t ll_nosquash_nids_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct root_squash_info *squash = &sbi->ll_squash;
+ int rc;
+
+ rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
+ ll_get_fsname(sb, NULL, 0));
+ if (rc < 0)
+ return rc;
+
+ ll_compute_rootsquash_state(sbi);
+
+ return rc;
+}
+
+LPROC_SEQ_FOPS(ll_nosquash_nids);
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
/* { "mntpt_path", ll_rd_path, 0, 0 }, */
@@ -840,6 +996,8 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
{ "max_cached_mb", &ll_max_cached_mb_fops, NULL },
{ "statahead_stats", &ll_statahead_stats_fops, NULL, 0 },
{ "sbi_flags", &ll_sbi_flags_fops, NULL, 0 },
+ { .name = "nosquash_nids",
+ .fops = &ll_nosquash_nids_fops },
{ NULL }
};
@@ -869,6 +1027,7 @@ static struct attribute *llite_attrs[] = {
&lustre_attr_default_easize.attr,
&lustre_attr_xattr_cache.attr,
&lustre_attr_unstable_stats.attr,
+ &lustre_attr_root_squash.attr,
NULL,
};
@@ -893,17 +1052,17 @@ static const struct llite_file_opcode {
/* file operation */
{ LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
{ LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
- { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
"read_bytes" },
- { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
"write_bytes" },
- { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+ { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
"brw_read" },
- { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+ { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
"brw_write" },
- { LPROC_LL_OSC_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ { LPROC_LL_OSC_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
"osc_read" },
- { LPROC_LL_OSC_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ { LPROC_LL_OSC_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
"osc_write" },
{ LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
{ LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
@@ -1150,7 +1309,7 @@ static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
r, pct(r, read_tot), pct(read_cum, read_tot),
w, pct(w, write_tot), pct(write_cum, write_tot));
start = end;
- if (start == 1<<10) {
+ if (start == 1 << 10) {
start = 1;
units += 10;
unitp++;
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 2c4dc69731e8..dfa36d34c645 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -42,13 +42,12 @@
#include "../include/obd_support.h"
#include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "../include/lustre_ver.h"
#include "llite_internal.h"
-static int ll_create_it(struct inode *, struct dentry *,
- int, struct lookup_intent *);
+static int ll_create_it(struct inode *dir, struct dentry *dentry,
+ struct lookup_intent *it);
/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
static int ll_test_inode(struct inode *inode, void *opaque)
@@ -56,12 +55,12 @@ static int ll_test_inode(struct inode *inode, void *opaque)
struct ll_inode_info *lli = ll_i2info(inode);
struct lustre_md *md = opaque;
- if (unlikely(!(md->body->valid & OBD_MD_FLID))) {
+ if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) {
CERROR("MDS body missing FID\n");
return 0;
}
- if (!lu_fid_eq(&lli->lli_fid, &md->body->fid1))
+ if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1))
return 0;
return 1;
@@ -72,20 +71,20 @@ static int ll_set_inode(struct inode *inode, void *opaque)
struct ll_inode_info *lli = ll_i2info(inode);
struct mdt_body *body = ((struct lustre_md *)opaque)->body;
- if (unlikely(!(body->valid & OBD_MD_FLID))) {
+ if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) {
CERROR("MDS body missing FID\n");
return -EINVAL;
}
- lli->lli_fid = body->fid1;
- if (unlikely(!(body->valid & OBD_MD_FLTYPE))) {
+ lli->lli_fid = body->mbo_fid1;
+ if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) {
CERROR("Can not initialize inode " DFID
" without object type: valid = %#llx\n",
- PFID(&lli->lli_fid), body->valid);
+ PFID(&lli->lli_fid), body->mbo_valid);
return -EINVAL;
}
- inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mode & S_IFMT);
+ inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT);
if (unlikely(inode->i_mode == 0)) {
CERROR("Invalid inode "DFID" type\n", PFID(&lli->lli_fid));
return -EINVAL;
@@ -96,41 +95,45 @@ static int ll_set_inode(struct inode *inode, void *opaque)
return 0;
}
-/*
- * Get an inode by inode number (already instantiated by the intent lookup).
- * Returns inode or NULL
+/**
+ * Get an inode by inode number(@hash), which is already instantiated by
+ * the intent lookup).
*/
struct inode *ll_iget(struct super_block *sb, ino_t hash,
struct lustre_md *md)
{
struct inode *inode;
+ int rc = 0;
LASSERT(hash != 0);
inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
-
- if (inode) {
- if (inode->i_state & I_NEW) {
- int rc = 0;
-
- ll_read_inode2(inode, md);
- if (S_ISREG(inode->i_mode) &&
- !ll_i2info(inode)->lli_clob) {
- CDEBUG(D_INODE,
- "%s: apply lsm %p to inode " DFID ".\n",
- ll_get_fsname(sb, NULL, 0), md->lsm,
- PFID(ll_inode2fid(inode)));
- rc = cl_file_inode_init(inode, md);
- }
- if (rc != 0) {
- iget_failed(inode);
- inode = NULL;
- } else {
- unlock_new_inode(inode);
- }
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
- ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
- PFID(&md->body->fid1), inode);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ if (inode->i_state & I_NEW) {
+ rc = ll_read_inode2(inode, md);
+ if (!rc && S_ISREG(inode->i_mode) &&
+ !ll_i2info(inode)->lli_clob) {
+ CDEBUG(D_INODE, "%s: apply lsm %p to inode "DFID"\n",
+ ll_get_fsname(sb, NULL, 0), md->lsm,
+ PFID(ll_inode2fid(inode)));
+ rc = cl_file_inode_init(inode, md);
+ }
+ if (rc) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ inode = ERR_PTR(rc);
+ } else {
+ unlock_new_inode(inode);
+ }
+ } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
+ rc = ll_update_inode(inode, md);
+ CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p): rc = %d\n",
+ PFID(&md->body->mbo_fid1), inode, rc);
+ if (rc) {
+ iput(inode);
+ inode = ERR_PTR(rc);
}
}
return inode;
@@ -158,6 +161,11 @@ static void ll_invalidate_negative_children(struct inode *dir)
spin_unlock(&dir->i_lock);
}
+int ll_test_inode_by_fid(struct inode *inode, void *opaque)
+{
+ return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
+}
+
int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
{
@@ -196,6 +204,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
}
if (bits & MDS_INODELOCK_XATTR) {
+ if (S_ISDIR(inode->i_mode))
+ ll_i2info(inode)->lli_def_stripe_offset = -1;
ll_xattr_cache_destroy(inode);
bits &= ~MDS_INODELOCK_XATTR;
}
@@ -253,10 +263,41 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
}
if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- CDEBUG(D_INODE, "invalidating inode "DFID"\n",
- PFID(ll_inode2fid(inode)));
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, pfid = "DFID"\n",
+ PFID(ll_inode2fid(inode)), lli,
+ PFID(&lli->lli_pfid));
+
truncate_inode_pages(inode->i_mapping, 0);
- ll_invalidate_negative_children(inode);
+
+ if (unlikely(!fid_is_zero(&lli->lli_pfid))) {
+ struct inode *master_inode = NULL;
+ unsigned long hash;
+
+ /*
+ * This is slave inode, since all of the child
+ * dentry is connected on the master inode, so
+ * we have to invalidate the negative children
+ * on master inode
+ */
+ CDEBUG(D_INODE, "Invalidate s"DFID" m"DFID"\n",
+ PFID(ll_inode2fid(inode)),
+ PFID(&lli->lli_pfid));
+
+ hash = cl_fid_build_ino(&lli->lli_pfid,
+ ll_need_32bit_api(ll_i2sbi(inode)));
+
+ master_inode = ilookup5(inode->i_sb, hash,
+ ll_test_inode_by_fid,
+ (void *)&lli->lli_pfid);
+ if (master_inode && !IS_ERR(master_inode)) {
+ ll_invalidate_negative_children(master_inode);
+ iput(master_inode);
+ }
+ } else {
+ ll_invalidate_negative_children(inode);
+ }
}
if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
@@ -322,7 +363,8 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
LASSERT(alias != dentry);
spin_lock(&alias->d_lock);
- if (alias->d_flags & DCACHE_DISCONNECTED)
+ if ((alias->d_flags & DCACHE_DISCONNECTED) &&
+ S_ISDIR(inode->i_mode))
/* LASSERT(last_discon == NULL); LU-405, bz 20055 */
discon_alias = alias;
else if (alias->d_parent == dentry->d_parent &&
@@ -433,9 +475,20 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
struct lookup_intent parent_it = {
.it_op = IT_GETATTR,
.it_lock_handle = 0 };
+ struct lu_fid fid = ll_i2info(parent)->lli_fid;
+
+ /* If it is striped directory, get the real stripe parent */
+ if (unlikely(ll_i2info(parent)->lli_lsm_md)) {
+ rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
+ ll_i2info(parent)->lli_lsm_md,
+ (*de)->d_name.name,
+ (*de)->d_name.len, &fid);
+ if (rc)
+ return rc;
+ }
- if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
- &ll_i2info(parent)->lli_fid, NULL)) {
+ if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid,
+ NULL)) {
d_lustre_revalidate(*de);
ll_intent_release(&parent_it);
}
@@ -449,13 +502,13 @@ out:
}
static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it, int lookup_flags)
+ struct lookup_intent *it)
{
struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
struct dentry *save = dentry, *retval;
struct ptlrpc_request *req = NULL;
+ struct md_op_data *op_data = NULL;
struct inode *inode;
- struct md_op_data *op_data;
__u32 opc;
int rc;
@@ -471,8 +524,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
if (!it || it->it_op == IT_GETXATTR)
it = &lookup_it;
- if (it->it_op == IT_GETATTR) {
- rc = ll_statahead_enter(parent, &dentry, 0);
+ if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) {
+ rc = ll_statahead(parent, &dentry, 0);
if (rc == 1) {
if (dentry == save)
retval = NULL;
@@ -488,8 +541,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
opc = LUSTRE_OPC_ANY;
op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
- dentry->d_name.len, lookup_flags, opc,
- NULL);
+ dentry->d_name.len, 0, opc, NULL);
if (IS_ERR(op_data))
return (void *)op_data;
@@ -497,9 +549,38 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
it->it_create_mode &= ~current_umask();
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, NULL, 0, it,
- lookup_flags, &req, ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
+ rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
+ &ll_md_blocking_ast, 0);
+ /*
+ * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
+ * client does not know which suppgid should be sent to the MDS, or
+ * some other(s) changed the target file's GID after this RPC sent
+ * to the MDS with the suppgid as the original GID, then we should
+ * try again with right suppgid.
+ */
+ if (rc == -EACCES && it->it_op & IT_OPEN &&
+ it_disposition(it, DISP_OPEN_DENY)) {
+ struct mdt_body *body;
+
+ LASSERT(req);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (op_data->op_suppgids[0] == body->mbo_gid ||
+ op_data->op_suppgids[1] == body->mbo_gid ||
+ !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
+ retval = ERR_PTR(-EACCES);
+ goto out;
+ }
+
+ fid_zero(&op_data->op_fid2);
+ op_data->op_suppgids[1] = body->mbo_gid;
+ ptlrpc_req_finished(req);
+ req = NULL;
+ ll_intent_release(it);
+ rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
+ ll_md_blocking_ast, 0);
+ }
+
if (rc < 0) {
retval = ERR_PTR(rc);
goto out;
@@ -524,11 +605,11 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
retval = NULL;
else
retval = dentry;
- out:
- if (req)
- ptlrpc_req_finished(req);
- if (it->it_op == IT_GETATTR && (!retval || retval == dentry))
- ll_statahead_mark(parent, dentry);
+out:
+ if (op_data && !IS_ERR(op_data))
+ ll_finish_md_op_data(op_data);
+
+ ptlrpc_req_finished(req);
return retval;
}
@@ -541,15 +622,19 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
dentry, PFID(ll_inode2fid(parent)), parent, flags);
- /* Optimize away (CREATE && !OPEN). Let .create handle the race. */
- if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
+ /* Optimize away (CREATE && !OPEN). Let .create handle the race.
+ * but only if we have write permissions there, otherwise we need
+ * to proceed with lookup. LU-4185
+ */
+ if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
+ (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
return NULL;
- if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
+ if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
itp = NULL;
else
itp = &it;
- de = ll_lookup_it(parent, dentry, itp, 0);
+ de = ll_lookup_it(parent, dentry, itp);
if (itp)
ll_intent_release(itp);
@@ -567,7 +652,6 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
{
struct lookup_intent *it;
struct dentry *de;
- long long lookup_flags = LOOKUP_OPEN;
int rc = 0;
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
@@ -597,15 +681,14 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
return -ENOMEM;
it->it_op = IT_OPEN;
- if (open_flags & O_CREAT) {
+ if (open_flags & O_CREAT)
it->it_op |= IT_CREAT;
- lookup_flags |= LOOKUP_CREATE;
- }
it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
+ it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
/* Dentry added to dcache tree in ll_lookup_it */
- de = ll_lookup_it(dir, dentry, it, lookup_flags);
+ de = ll_lookup_it(dir, dentry, it);
if (IS_ERR(de))
rc = PTR_ERR(de);
else if (de)
@@ -614,7 +697,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
if (!rc) {
if (it_disposition(it, DISP_OPEN_CREATE)) {
/* Dentry instantiated in ll_create_it. */
- rc = ll_create_it(dir, dentry, mode, it);
+ rc = ll_create_it(dir, dentry, it);
if (rc) {
/* We dget in ll_splice_alias. */
if (de)
@@ -700,7 +783,7 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
+static int ll_create_it(struct inode *dir, struct dentry *dentry,
struct lookup_intent *it)
{
struct inode *inode;
@@ -721,27 +804,26 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
return 0;
}
-static void ll_update_times(struct ptlrpc_request *request,
- struct inode *inode)
+void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
{
struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
&RMF_MDT_BODY);
LASSERT(body);
- if (body->valid & OBD_MD_FLMTIME &&
- body->mtime > LTIME_S(inode->i_mtime)) {
+ if (body->mbo_valid & OBD_MD_FLMTIME &&
+ body->mbo_mtime > LTIME_S(inode->i_mtime)) {
CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
- body->mtime);
- LTIME_S(inode->i_mtime) = body->mtime;
+ body->mbo_mtime);
+ LTIME_S(inode->i_mtime) = body->mbo_mtime;
}
- if (body->valid & OBD_MD_FLCTIME &&
- body->ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->ctime;
+ if (body->mbo_valid & OBD_MD_FLCTIME &&
+ body->mbo_ctime > LTIME_S(inode->i_ctime))
+ LTIME_S(inode->i_ctime) = body->mbo_ctime;
}
static int ll_new_node(struct inode *dir, struct dentry *dentry,
- const char *tgt, int mode, int rdev,
+ const char *tgt, umode_t mode, int rdev,
__u32 opc)
{
struct ptlrpc_request *request = NULL;
@@ -753,7 +835,7 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
if (unlikely(tgt))
tgt_len = strlen(tgt) + 1;
-
+again:
op_data = ll_prep_md_op_data(NULL, dir, NULL,
dentry->d_name.name,
dentry->d_name.len,
@@ -768,9 +850,45 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
from_kgid(&init_user_ns, current_fsgid()),
cfs_curproc_cap_pack(), rdev, &request);
ll_finish_md_op_data(op_data);
- if (err)
+ if (err < 0 && err != -EREMOTE)
goto err_exit;
+ /*
+ * If the client doesn't know where to create a subdirectory (or
+ * in case of a race that sends the RPC to the wrong MDS), the
+ * MDS will return -EREMOTE and the client will fetch the layout
+ * of the directory, then create the directory on the right MDT.
+ */
+ if (unlikely(err == -EREMOTE)) {
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct lmv_user_md *lum;
+ int lumsize, err2;
+
+ ptlrpc_req_finished(request);
+ request = NULL;
+
+ err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
+ OBD_MD_DEFAULT_MEA);
+ if (!err2) {
+ /* Update stripe_offset and retry */
+ lli->lli_def_stripe_offset = lum->lum_stripe_offset;
+ } else if (err2 == -ENODATA &&
+ lli->lli_def_stripe_offset != -1) {
+ /*
+ * If there are no default stripe EA on the MDT, but the
+ * client has default stripe, then it probably means
+ * default stripe EA has just been deleted.
+ */
+ lli->lli_def_stripe_offset = -1;
+ } else {
+ goto err_exit;
+ }
+
+ ptlrpc_req_finished(request);
+ request = NULL;
+ goto again;
+ }
+
ll_update_times(request, dir);
err = ll_prep_inode(&inode, request, dir->i_sb, NULL);
@@ -779,7 +897,8 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
err_exit:
- ptlrpc_req_finished(request);
+ if (request)
+ ptlrpc_req_finished(request);
return err;
}
@@ -842,77 +961,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
return rc;
}
-int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
-{
- struct mdt_body *body;
- struct lov_mds_md *eadata;
- struct lov_stripe_md *lsm = NULL;
- struct obd_trans_info oti = { 0 };
- struct obdo *oa;
- int rc;
-
- /* req is swabbed so this is safe */
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- if (!(body->valid & OBD_MD_FLEASIZE))
- return 0;
-
- if (body->eadatasize == 0) {
- CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
- rc = -EPROTO;
- goto out;
- }
-
- /* The MDS sent back the EA because we unlinked the last reference
- * to this file. Use this EA to unlink the objects on the OST.
- * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
- * check it is complete and sensible.
- */
- eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD,
- body->eadatasize);
- LASSERT(eadata);
-
- rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
- if (rc < 0) {
- CERROR("obd_unpackmd: %d\n", rc);
- goto out;
- }
- LASSERT(rc >= sizeof(*lsm));
-
- oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oa) {
- rc = -ENOMEM;
- goto out_free_memmd;
- }
-
- oa->o_oi = lsm->lsm_oi;
- oa->o_mode = body->mode & S_IFMT;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
-
- if (body->valid & OBD_MD_FLCOOKIE) {
- oa->o_valid |= OBD_MD_FLCOOKIE;
- oti.oti_logcookies =
- req_capsule_server_sized_get(&request->rq_pill,
- &RMF_LOGCOOKIES,
- sizeof(struct llog_cookie) *
- lsm->lsm_stripe_count);
- if (!oti.oti_logcookies) {
- oa->o_valid &= ~OBD_MD_FLCOOKIE;
- body->valid &= ~OBD_MD_FLCOOKIE;
- }
- }
-
- rc = obd_destroy(NULL, ll_i2dtexp(dir), oa, lsm, &oti,
- ll_i2mdexp(dir));
- if (rc)
- CERROR("obd destroy objid "DOSTID" error %d\n",
- POSTID(&lsm->lsm_oi), rc);
-out_free_memmd:
- obd_free_memmd(ll_i2dtexp(dir), &lsm);
- kmem_cache_free(obdo_cachep, oa);
-out:
- return rc;
-}
-
/* ll_unlink() doesn't update the inode with the new link count.
* Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
* is any lock existing. They will recycle dentries and inodes based upon locks
@@ -934,7 +982,7 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- if (dchild && dchild->d_inode)
+ if (dchild->d_inode)
op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
op_data->op_fid2 = op_data->op_fid3;
@@ -946,7 +994,6 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
ll_update_times(request, dir);
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
- rc = ll_objects_destroy(request, dir);
out:
ptlrpc_req_finished(request);
return rc;
@@ -961,9 +1008,9 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
- mode = (mode & (S_IRWXUGO|S_ISVTX)) | S_IFDIR;
- err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
+ mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
+ err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
if (!err)
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
@@ -986,7 +1033,7 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- if (dchild && dchild->d_inode)
+ if (dchild->d_inode)
op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
op_data->op_fid2 = op_data->op_fid3;
@@ -1067,9 +1114,9 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- if (src_dchild && src_dchild->d_inode)
+ if (src_dchild->d_inode)
op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
- if (tgt_dchild && tgt_dchild->d_inode)
+ if (tgt_dchild->d_inode)
op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
err = md_rename(sbi->ll_md_exp, op_data,
@@ -1082,7 +1129,6 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
ll_update_times(request, src);
ll_update_times(request, tgt);
ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- err = ll_objects_destroy(request, src);
}
ptlrpc_req_finished(request);
@@ -1106,10 +1152,10 @@ const struct inode_operations ll_dir_inode_operations = {
.setattr = ll_setattr,
.getattr = ll_getattr,
.permission = ll_inode_permission,
- .setxattr = ll_setxattr,
- .getxattr = ll_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = ll_listxattr,
- .removexattr = ll_removexattr,
+ .removexattr = generic_removexattr,
.get_acl = ll_get_acl,
};
@@ -1117,9 +1163,9 @@ const struct inode_operations ll_special_inode_operations = {
.setattr = ll_setattr,
.getattr = ll_getattr,
.permission = ll_inode_permission,
- .setxattr = ll_setxattr,
- .getxattr = ll_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = ll_listxattr,
- .removexattr = ll_removexattr,
+ .removexattr = generic_removexattr,
.get_acl = ll_get_acl,
};
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
new file mode 100644
index 000000000000..94c818f1478b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/range_lock.c
@@ -0,0 +1,233 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Range lock is used to allow multiple threads writing a single shared
+ * file given each thread is writing to a non-overlapping portion of the
+ * file.
+ *
+ * Refer to the possible upstream kernel version of range lock by
+ * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
+ *
+ * This file could later replaced by the upstream kernel version.
+ */
+/*
+ * Author: Prakash Surya <surya1@llnl.gov>
+ * Author: Bobi Jam <bobijam.xu@intel.com>
+ */
+#include "range_lock.h"
+#include "../include/lustre/lustre_user.h"
+
+/**
+ * Initialize a range lock tree
+ *
+ * \param tree [in] an empty range lock tree
+ *
+ * Pre: Caller should have allocated the range lock tree.
+ * Post: The range lock tree is ready to function.
+ */
+void range_lock_tree_init(struct range_lock_tree *tree)
+{
+ tree->rlt_root = NULL;
+ tree->rlt_sequence = 0;
+ spin_lock_init(&tree->rlt_lock);
+}
+
+/**
+ * Initialize a range lock node
+ *
+ * \param lock [in] an empty range lock node
+ * \param start [in] start of the covering region
+ * \param end [in] end of the covering region
+ *
+ * Pre: Caller should have allocated the range lock node.
+ * Post: The range lock node is meant to cover [start, end] region
+ */
+void range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
+{
+ memset(&lock->rl_node, 0, sizeof(lock->rl_node));
+ if (end != LUSTRE_EOF)
+ end >>= PAGE_SHIFT;
+ interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
+ INIT_LIST_HEAD(&lock->rl_next_lock);
+ lock->rl_task = NULL;
+ lock->rl_lock_count = 0;
+ lock->rl_blocking_ranges = 0;
+ lock->rl_sequence = 0;
+}
+
+static inline struct range_lock *next_lock(struct range_lock *lock)
+{
+ return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock);
+}
+
+/**
+ * Helper function of range_unlock()
+ *
+ * \param node [in] a range lock found overlapped during interval node
+ * search
+ * \param arg [in] the range lock to be tested
+ *
+ * \retval INTERVAL_ITER_CONT indicate to continue the search for next
+ * overlapping range node
+ * \retval INTERVAL_ITER_STOP indicate to stop the search
+ */
+static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg)
+{
+ struct range_lock *lock = arg;
+ struct range_lock *overlap = node2rangelock(node);
+ struct range_lock *iter;
+
+ list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) {
+ if (iter->rl_sequence > lock->rl_sequence) {
+ --iter->rl_blocking_ranges;
+ LASSERT(iter->rl_blocking_ranges > 0);
+ }
+ }
+ if (overlap->rl_sequence > lock->rl_sequence) {
+ --overlap->rl_blocking_ranges;
+ if (overlap->rl_blocking_ranges == 0)
+ wake_up_process(overlap->rl_task);
+ }
+ return INTERVAL_ITER_CONT;
+}
+
+/**
+ * Unlock a range lock, wake up locks blocked by this lock.
+ *
+ * \param tree [in] range lock tree
+ * \param lock [in] range lock to be deleted
+ *
+ * If this lock has been granted, relase it; if not, just delete it from
+ * the tree or the same region lock list. Wake up those locks only blocked
+ * by this lock through range_unlock_cb().
+ */
+void range_unlock(struct range_lock_tree *tree, struct range_lock *lock)
+{
+ spin_lock(&tree->rlt_lock);
+ if (!list_empty(&lock->rl_next_lock)) {
+ struct range_lock *next;
+
+ if (interval_is_intree(&lock->rl_node)) { /* first lock */
+ /* Insert the next same range lock into the tree */
+ next = next_lock(lock);
+ next->rl_lock_count = lock->rl_lock_count - 1;
+ interval_erase(&lock->rl_node, &tree->rlt_root);
+ interval_insert(&next->rl_node, &tree->rlt_root);
+ } else {
+ /* find the first lock in tree */
+ list_for_each_entry(next, &lock->rl_next_lock,
+ rl_next_lock) {
+ if (!interval_is_intree(&next->rl_node))
+ continue;
+
+ LASSERT(next->rl_lock_count > 0);
+ next->rl_lock_count--;
+ break;
+ }
+ }
+ list_del_init(&lock->rl_next_lock);
+ } else {
+ LASSERT(interval_is_intree(&lock->rl_node));
+ interval_erase(&lock->rl_node, &tree->rlt_root);
+ }
+
+ interval_search(tree->rlt_root, &lock->rl_node.in_extent,
+ range_unlock_cb, lock);
+ spin_unlock(&tree->rlt_lock);
+}
+
+/**
+ * Helper function of range_lock()
+ *
+ * \param node [in] a range lock found overlapped during interval node
+ * search
+ * \param arg [in] the range lock to be tested
+ *
+ * \retval INTERVAL_ITER_CONT indicate to continue the search for next
+ * overlapping range node
+ * \retval INTERVAL_ITER_STOP indicate to stop the search
+ */
+static enum interval_iter range_lock_cb(struct interval_node *node, void *arg)
+{
+ struct range_lock *lock = (struct range_lock *)arg;
+ struct range_lock *overlap = node2rangelock(node);
+
+ lock->rl_blocking_ranges += overlap->rl_lock_count + 1;
+ return INTERVAL_ITER_CONT;
+}
+
+/**
+ * Lock a region
+ *
+ * \param tree [in] range lock tree
+ * \param lock [in] range lock node containing the region span
+ *
+ * \retval 0 get the range lock
+ * \retval <0 error code while not getting the range lock
+ *
+ * If there exists overlapping range lock, the new lock will wait and
+ * retry, if later it find that it is not the chosen one to wake up,
+ * it wait again.
+ */
+int range_lock(struct range_lock_tree *tree, struct range_lock *lock)
+{
+ struct interval_node *node;
+ int rc = 0;
+
+ spin_lock(&tree->rlt_lock);
+ /*
+ * We need to check for all conflicting intervals
+ * already in the tree.
+ */
+ interval_search(tree->rlt_root, &lock->rl_node.in_extent,
+ range_lock_cb, lock);
+ /*
+ * Insert to the tree if I am unique, otherwise I've been linked to
+ * the rl_next_lock of another lock which has the same range as mine
+ * in range_lock_cb().
+ */
+ node = interval_insert(&lock->rl_node, &tree->rlt_root);
+ if (node) {
+ struct range_lock *tmp = node2rangelock(node);
+
+ list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock);
+ tmp->rl_lock_count++;
+ }
+ lock->rl_sequence = ++tree->rlt_sequence;
+
+ while (lock->rl_blocking_ranges > 0) {
+ lock->rl_task = current;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock(&tree->rlt_lock);
+ schedule();
+
+ if (signal_pending(current)) {
+ range_unlock(tree, lock);
+ rc = -EINTR;
+ goto out;
+ }
+ spin_lock(&tree->rlt_lock);
+ }
+ spin_unlock(&tree->rlt_lock);
+out:
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
new file mode 100644
index 000000000000..c6d04a6f99fd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/range_lock.h
@@ -0,0 +1,82 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Range lock is used to allow multiple threads writing a single shared
+ * file given each thread is writing to a non-overlapping portion of the
+ * file.
+ *
+ * Refer to the possible upstream kernel version of range lock by
+ * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
+ *
+ * This file could later replaced by the upstream kernel version.
+ */
+/*
+ * Author: Prakash Surya <surya1@llnl.gov>
+ * Author: Bobi Jam <bobijam.xu@intel.com>
+ */
+#ifndef _RANGE_LOCK_H
+#define _RANGE_LOCK_H
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/interval_tree.h"
+
+struct range_lock {
+ struct interval_node rl_node;
+ /**
+ * Process to enqueue this lock.
+ */
+ struct task_struct *rl_task;
+ /**
+ * List of locks with the same range.
+ */
+ struct list_head rl_next_lock;
+ /**
+ * Number of locks in the list rl_next_lock
+ */
+ unsigned int rl_lock_count;
+ /**
+ * Number of ranges which are blocking acquisition of the lock
+ */
+ unsigned int rl_blocking_ranges;
+ /**
+ * Sequence number of range lock. This number is used to get to know
+ * the order the locks are queued; this is required for range_cancel().
+ */
+ __u64 rl_sequence;
+};
+
+static inline struct range_lock *node2rangelock(const struct interval_node *n)
+{
+ return container_of(n, struct range_lock, rl_node);
+}
+
+struct range_lock_tree {
+ struct interval_node *rlt_root;
+ spinlock_t rlt_lock; /* protect range lock tree */
+ __u64 rlt_sequence;
+};
+
+void range_lock_tree_init(struct range_lock_tree *tree);
+void range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
+int range_lock(struct range_lock_tree *tree, struct range_lock *lock);
+void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
+#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 87393c4bd51e..50c0152ba022 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -50,10 +50,8 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "../include/obd_cksum.h"
#include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
@@ -413,7 +411,7 @@ static int ll_read_ahead_pages(const struct lu_env *env,
* forward read-ahead, it will be fixed when backward
* read-ahead is implemented
*/
- LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
+ LASSERTF(page_idx >= ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
page_idx,
ria->ria_start, ria->ria_end, ria->ria_stoff,
ria->ria_length, ria->ria_pages);
@@ -474,10 +472,22 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
}
/* Reserve a part of the read-ahead window that we'll be issuing */
- if (ras->ras_window_len) {
- start = ras->ras_next_readahead;
+ if (ras->ras_window_len > 0) {
+ /*
+ * Note: other thread might rollback the ras_next_readahead,
+ * if it can not get the full size of prepared pages, see the
+ * end of this function. For stride read ahead, it needs to
+ * make sure the offset is no less than ras_stride_offset,
+ * so that stride read ahead can work correctly.
+ */
+ if (stride_io_mode(ras))
+ start = max(ras->ras_next_readahead,
+ ras->ras_stride_offset);
+ else
+ start = ras->ras_next_readahead;
end = ras->ras_window_start + ras->ras_window_len - 1;
}
+
if (end != 0) {
unsigned long rpc_boundary;
/*
@@ -648,10 +658,11 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras,
{
unsigned long stride_gap = index - ras->ras_last_readpage - 1;
- if (!stride_io_mode(ras) && (stride_gap != 0 ||
- ras->ras_consecutive_stride_requests == 0)) {
+ if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) &&
+ !stride_io_mode(ras)) {
ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
+ ras->ras_stride_length = ras->ras_consecutive_pages +
+ stride_gap;
}
LASSERT(ras->ras_request_index == 0);
LASSERT(ras->ras_consecutive_stride_requests == 0);
@@ -663,10 +674,9 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras,
}
ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
+ ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages;
RAS_CDEBUG(ras);
- return;
}
/* Stride Read-ahead window will be increased inc_len according to
@@ -882,7 +892,6 @@ out_unlock:
RAS_CDEBUG(ras);
ras->ras_request_index++;
spin_unlock(&ras->ras_lock);
- return;
}
int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
@@ -1015,11 +1024,15 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
* is called later on.
*/
ignore_layout = 1;
+
+ if (!ll_i2info(inode)->lli_clob)
+ return 0;
+
result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
if (result > 0) {
wbc->nr_to_write -= result;
result = 0;
- }
+ }
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
if (end == OBD_OBJECT_EOF)
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index d98c7acc0832..26f3a37873a7 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -51,9 +51,7 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
/**
* Implements Linux VM address_space::invalidatepage() method. This method is
@@ -161,7 +159,7 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
return result;
}
-#define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
+#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL)
static inline int ll_get_user_pages(int rw, unsigned long user_addr,
size_t size, struct page ***pages,
@@ -214,10 +212,10 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
int i;
ssize_t rc = 0;
loff_t file_offset = pv->ldp_start_offset;
- long size = pv->ldp_size;
+ size_t size = pv->ldp_size;
int page_count = pv->ldp_nr;
struct page **pages = pv->ldp_pages;
- long page_size = cl_page_size(obj);
+ size_t page_size = cl_page_size(obj);
bool do_io;
int io_pages = 0;
@@ -346,7 +344,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
struct cl_io *io;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct vvp_object *obj = cl_inode2vvp(inode);
loff_t file_offset = iocb->ki_pos;
ssize_t count = iov_iter_count(iter);
ssize_t tot_bytes = 0, result = 0;
@@ -375,14 +372,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
io = vvp_env_io(env)->vui_cl.cis_io;
LASSERT(io);
- /* 0. Need locking between buffered and direct access. and race with
- * size changing by concurrent truncates and writes.
- * 1. Need inode mutex to operate transient pages.
- */
- if (iov_iter_rw(iter) == READ)
- inode_lock(inode);
-
- LASSERT(obj->vob_transient_pages == 0);
while (iov_iter_count(iter)) {
struct page **pages;
size_t offs;
@@ -430,10 +419,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
file_offset += result;
}
out:
- LASSERT(obj->vob_transient_pages == 0);
- if (iov_iter_rw(iter) == READ)
- inode_unlock(inode);
-
if (tot_bytes > 0) {
struct vvp_io *vio = vvp_env_io(env);
@@ -616,6 +601,13 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
LASSERT(from == 0);
vio->u.write.vui_to = from + copied;
+ /*
+ * To address the deadlock in balance_dirty_pages() where
+ * this dirty page may be written back in the same thread.
+ */
+ if (PageDirty(vmpage))
+ unplug = true;
+
/* We may have one full RPC, commit it soon */
if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
unplug = true;
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index c1cb6b19e724..0677513476ec 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -39,7 +39,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "llite_internal.h"
@@ -50,24 +49,26 @@ enum se_stat {
SA_ENTRY_INIT = 0, /** init entry */
SA_ENTRY_SUCC = 1, /** stat succeed */
SA_ENTRY_INVA = 2, /** invalid entry */
- SA_ENTRY_DEST = 3, /** entry to be destroyed */
};
-struct ll_sa_entry {
- /* link into sai->sai_entries */
- struct list_head se_link;
- /* link into sai->sai_entries_{received,stated} */
+/*
+ * sa_entry is not refcounted: statahead thread allocates it and do async stat,
+ * and in async stat callback ll_statahead_interpret() will add it into
+ * sai_interim_entries, later statahead thread will call sa_handle_callback() to
+ * instantiate entry and move it into sai_entries, and then only scanner process
+ * can access and free it.
+ */
+struct sa_entry {
+ /* link into sai_interim_entries or sai_entries */
struct list_head se_list;
/* link into sai hash table locally */
struct list_head se_hash;
- /* entry reference count */
- atomic_t se_refcount;
/* entry index in the sai */
__u64 se_index;
/* low layer ldlm lock handle */
__u64 se_handle;
/* entry status */
- enum se_stat se_stat;
+ enum se_stat se_state;
/* entry size, contains name */
int se_size;
/* pointer to async getattr enqueue info */
@@ -83,27 +84,24 @@ struct ll_sa_entry {
static unsigned int sai_generation;
static DEFINE_SPINLOCK(sai_generation_lock);
-/*
- * The entry only can be released by the caller, it is necessary to hold lock.
- */
-static inline int ll_sa_entry_stated(struct ll_sa_entry *entry)
+/* sa_entry is ready to use */
+static inline int sa_ready(struct sa_entry *entry)
{
smp_rmb();
- return (entry->se_stat != SA_ENTRY_INIT);
+ return (entry->se_state != SA_ENTRY_INIT);
}
-static inline int ll_sa_entry_hash(int val)
+/* hash value to put in sai_cache */
+static inline int sa_hash(int val)
{
return val & LL_SA_CACHE_MASK;
}
-/*
- * Insert entry to hash SA table.
- */
+/* hash entry into sai_cache */
static inline void
-ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry)
{
- int i = ll_sa_entry_hash(entry->se_qstr.hash);
+ int i = sa_hash(entry->se_qstr.hash);
spin_lock(&sai->sai_cache_lock[i]);
list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
@@ -114,9 +112,9 @@ ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
* Remove entry from SA table.
*/
static inline void
-ll_sa_entry_unhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry)
{
- int i = ll_sa_entry_hash(entry->se_qstr.hash);
+ int i = sa_hash(entry->se_qstr.hash);
spin_lock(&sai->sai_cache_lock[i]);
list_del_init(&entry->se_hash);
@@ -129,19 +127,21 @@ static inline int agl_should_run(struct ll_statahead_info *sai,
return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
}
+/* statahead window is full */
static inline int sa_sent_full(struct ll_statahead_info *sai)
{
return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
}
-static inline int sa_received_empty(struct ll_statahead_info *sai)
+/* got async stat replies */
+static inline int sa_has_callback(struct ll_statahead_info *sai)
{
- return list_empty(&sai->sai_entries_received);
+ return !list_empty(&sai->sai_interim_entries);
}
static inline int agl_list_empty(struct ll_statahead_info *sai)
{
- return list_empty(&sai->sai_entries_agl);
+ return list_empty(&sai->sai_agls);
}
/**
@@ -157,7 +157,7 @@ static inline int sa_low_hit(struct ll_statahead_info *sai)
}
/*
- * If the given index is behind of statahead window more than
+ * if the given index is behind of statahead window more than
* SA_OMITTED_ENTRY_MAX, then it is old.
*/
static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
@@ -166,20 +166,17 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
sai->sai_index);
}
-/*
- * Insert it into sai_entries tail when init.
- */
-static struct ll_sa_entry *
-ll_sa_entry_alloc(struct dentry *parent,
- struct ll_statahead_info *sai, __u64 index,
- const char *name, int len)
+/* allocate sa_entry and hash it to allow scanner process to find it */
+static struct sa_entry *
+sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
+ const char *name, int len)
{
struct ll_inode_info *lli;
- struct ll_sa_entry *entry;
+ struct sa_entry *entry;
int entry_size;
char *dname;
- entry_size = sizeof(struct ll_sa_entry) + (len & ~3) + 4;
+ entry_size = sizeof(struct sa_entry) + (len & ~3) + 4;
entry = kzalloc(entry_size, GFP_NOFS);
if (unlikely(!entry))
return ERR_PTR(-ENOMEM);
@@ -188,34 +185,9 @@ ll_sa_entry_alloc(struct dentry *parent,
len, name, entry, index);
entry->se_index = index;
-
- /*
- * Statahead entry reference rules:
- *
- * 1) When statahead entry is initialized, its reference is set as 2.
- * One reference is used by the directory scanner. When the scanner
- * searches the statahead cache for the given name, it can perform
- * lockless hash lookup (only the scanner can remove entry from hash
- * list), and once found, it needn't to call "atomic_inc()" for the
- * entry reference. So the performance is improved. After using the
- * statahead entry, the scanner will call "atomic_dec()" to drop the
- * reference held when initialization. If it is the last reference,
- * the statahead entry will be freed.
- *
- * 2) All other threads, including statahead thread and ptlrpcd thread,
- * when they process the statahead entry, the reference for target
- * should be held to guarantee the entry will not be released by the
- * directory scanner. After processing the entry, these threads will
- * drop the entry reference. If it is the last reference, the entry
- * will be freed.
- *
- * The second reference when initializes the statahead entry is used
- * by the statahead thread, following the rule 2).
- */
- atomic_set(&entry->se_refcount, 2);
- entry->se_stat = SA_ENTRY_INIT;
+ entry->se_state = SA_ENTRY_INIT;
entry->se_size = entry_size;
- dname = (char *)entry + sizeof(struct ll_sa_entry);
+ dname = (char *)entry + sizeof(struct sa_entry);
memcpy(dname, name, len);
dname[len] = 0;
@@ -223,11 +195,10 @@ ll_sa_entry_alloc(struct dentry *parent,
entry->se_qstr.len = len;
entry->se_qstr.name = dname;
- lli = ll_i2info(sai->sai_inode);
+ lli = ll_i2info(sai->sai_dentry->d_inode);
spin_lock(&lli->lli_sa_lock);
- list_add_tail(&entry->se_link, &sai->sai_entries);
INIT_LIST_HEAD(&entry->se_list);
- ll_sa_entry_enhash(sai, entry);
+ sa_rehash(sai, entry);
spin_unlock(&lli->lli_sa_lock);
atomic_inc(&sai->sai_cache_count);
@@ -235,18 +206,29 @@ ll_sa_entry_alloc(struct dentry *parent,
return entry;
}
+/* free sa_entry, which should have been unhashed and not in any list */
+static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry)
+{
+ CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
+ entry->se_qstr.len, entry->se_qstr.name, entry,
+ entry->se_index);
+
+ LASSERT(list_empty(&entry->se_list));
+ LASSERT(list_empty(&entry->se_hash));
+
+ kfree(entry);
+ atomic_dec(&sai->sai_cache_count);
+}
+
/*
- * Used by the directory scanner to search entry with name.
- *
- * Only the caller can remove the entry from hash, so it is unnecessary to hold
- * hash lock. It is caller's duty to release the init refcount on the entry, so
- * it is also unnecessary to increase refcount on the entry.
+ * find sa_entry by name, used by directory scanner, lock is not needed because
+ * only scanner can remove the entry from cache.
*/
-static struct ll_sa_entry *
-ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr)
+static struct sa_entry *
+sa_get(struct ll_statahead_info *sai, const struct qstr *qstr)
{
- struct ll_sa_entry *entry;
- int i = ll_sa_entry_hash(qstr->hash);
+ struct sa_entry *entry;
+ int i = sa_hash(qstr->hash);
list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
if (entry->se_qstr.hash == qstr->hash &&
@@ -257,164 +239,126 @@ ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr)
return NULL;
}
-/*
- * Used by the async getattr request callback to find entry with index.
- *
- * Inside lli_sa_lock to prevent others to change the list during the search.
- * It needs to increase entry refcount before returning to guarantee that the
- * entry cannot be freed by others.
- */
-static struct ll_sa_entry *
-ll_sa_entry_get_byindex(struct ll_statahead_info *sai, __u64 index)
-{
- struct ll_sa_entry *entry;
-
- list_for_each_entry(entry, &sai->sai_entries, se_link) {
- if (entry->se_index == index) {
- LASSERT(atomic_read(&entry->se_refcount) > 0);
- atomic_inc(&entry->se_refcount);
- return entry;
- }
- if (entry->se_index > index)
- break;
- }
- return NULL;
-}
-
-static void ll_sa_entry_cleanup(struct ll_statahead_info *sai,
- struct ll_sa_entry *entry)
-{
- struct md_enqueue_info *minfo = entry->se_minfo;
- struct ptlrpc_request *req = entry->se_req;
-
- if (minfo) {
- entry->se_minfo = NULL;
- ll_intent_release(&minfo->mi_it);
- iput(minfo->mi_dir);
- kfree(minfo);
- }
-
- if (req) {
- entry->se_req = NULL;
- ptlrpc_req_finished(req);
- }
-}
-
-static void ll_sa_entry_put(struct ll_statahead_info *sai,
- struct ll_sa_entry *entry)
-{
- if (atomic_dec_and_test(&entry->se_refcount)) {
- CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
- entry->se_qstr.len, entry->se_qstr.name, entry,
- entry->se_index);
-
- LASSERT(list_empty(&entry->se_link));
- LASSERT(list_empty(&entry->se_list));
- LASSERT(list_empty(&entry->se_hash));
-
- ll_sa_entry_cleanup(sai, entry);
- iput(entry->se_inode);
-
- kfree(entry);
- atomic_dec(&sai->sai_cache_count);
- }
-}
-
+/* unhash and unlink sa_entry, and then free it */
static inline void
-do_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry)
{
- struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
+ struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
LASSERT(!list_empty(&entry->se_hash));
- LASSERT(!list_empty(&entry->se_link));
+ LASSERT(!list_empty(&entry->se_list));
+ LASSERT(sa_ready(entry));
- ll_sa_entry_unhash(sai, entry);
+ sa_unhash(sai, entry);
spin_lock(&lli->lli_sa_lock);
- entry->se_stat = SA_ENTRY_DEST;
- list_del_init(&entry->se_link);
- if (likely(!list_empty(&entry->se_list)))
- list_del_init(&entry->se_list);
+ list_del_init(&entry->se_list);
spin_unlock(&lli->lli_sa_lock);
- ll_sa_entry_put(sai, entry);
+ if (entry->se_inode)
+ iput(entry->se_inode);
+
+ sa_free(sai, entry);
}
-/*
- * Delete it from sai_entries_stated list when fini.
- */
+/* called by scanner after use, sa_entry will be killed */
static void
-ll_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_put(struct ll_statahead_info *sai, struct sa_entry *entry)
{
- struct ll_sa_entry *pos, *next;
+ struct sa_entry *tmp, *next;
+
+ if (entry && entry->se_state == SA_ENTRY_SUCC) {
+ struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
+
+ sai->sai_hit++;
+ sai->sai_consecutive_miss = 0;
+ sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
+ } else {
+ sai->sai_miss++;
+ sai->sai_consecutive_miss++;
+ }
if (entry)
- do_sa_entry_fini(sai, entry);
+ sa_kill(sai, entry);
- /* drop old entry, only 'scanner' process does this, no need to lock */
- list_for_each_entry_safe(pos, next, &sai->sai_entries, se_link) {
- if (!is_omitted_entry(sai, pos->se_index))
+ /*
+ * kill old completed entries, only scanner process does this, no need
+ * to lock
+ */
+ list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) {
+ if (!is_omitted_entry(sai, tmp->se_index))
break;
- do_sa_entry_fini(sai, pos);
+ sa_kill(sai, tmp);
}
+
+ wake_up(&sai->sai_thread.t_ctl_waitq);
}
/*
- * Inside lli_sa_lock.
+ * update state and sort add entry to sai_entries by index, return true if
+ * scanner is waiting on this entry.
*/
-static void
-do_sa_entry_to_stated(struct ll_statahead_info *sai,
- struct ll_sa_entry *entry, enum se_stat stat)
+static bool
+__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
{
- struct ll_sa_entry *se;
- struct list_head *pos = &sai->sai_entries_stated;
+ struct list_head *pos = &sai->sai_entries;
+ __u64 index = entry->se_index;
+ struct sa_entry *se;
- if (!list_empty(&entry->se_list))
- list_del_init(&entry->se_list);
+ LASSERT(!sa_ready(entry));
+ LASSERT(list_empty(&entry->se_list));
- list_for_each_entry_reverse(se, &sai->sai_entries_stated, se_list) {
+ list_for_each_entry_reverse(se, &sai->sai_entries, se_list) {
if (se->se_index < entry->se_index) {
pos = &se->se_list;
break;
}
}
-
list_add(&entry->se_list, pos);
- entry->se_stat = stat;
+ entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC;
+
+ return (index == sai->sai_index_wait);
}
/*
- * Move entry to sai_entries_stated and sort with the index.
- * \retval 1 -- entry to be destroyed.
- * \retval 0 -- entry is inserted into stated list.
+ * release resources used in async stat RPC, update entry state and wakeup if
+ * scanner process it waiting on this entry.
*/
-static int
-ll_sa_entry_to_stated(struct ll_statahead_info *sai,
- struct ll_sa_entry *entry, enum se_stat stat)
+static void
+sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
{
- struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
- int ret = 1;
+ struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
+ struct md_enqueue_info *minfo = entry->se_minfo;
+ struct ptlrpc_request *req = entry->se_req;
+ bool wakeup;
+
+ /* release resources used in RPC */
+ if (minfo) {
+ entry->se_minfo = NULL;
+ ll_intent_release(&minfo->mi_it);
+ iput(minfo->mi_dir);
+ kfree(minfo);
+ }
- ll_sa_entry_cleanup(sai, entry);
+ if (req) {
+ entry->se_req = NULL;
+ ptlrpc_req_finished(req);
+ }
spin_lock(&lli->lli_sa_lock);
- if (likely(entry->se_stat != SA_ENTRY_DEST)) {
- do_sa_entry_to_stated(sai, entry, stat);
- ret = 0;
- }
+ wakeup = __sa_make_ready(sai, entry, ret);
spin_unlock(&lli->lli_sa_lock);
- return ret;
+ if (wakeup)
+ wake_up(&sai->sai_waitq);
}
-/*
- * Insert inode into the list of sai_entries_agl.
- */
+/* Insert inode into the list of sai_agls. */
static void ll_agl_add(struct ll_statahead_info *sai,
struct inode *inode, int index)
{
struct ll_inode_info *child = ll_i2info(inode);
- struct ll_inode_info *parent = ll_i2info(sai->sai_inode);
+ struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode);
int added = 0;
spin_lock(&child->lli_agl_lock);
@@ -426,9 +370,9 @@ static void ll_agl_add(struct ll_statahead_info *sai,
igrab(inode);
spin_lock(&parent->lli_agl_lock);
- if (list_empty(&sai->sai_entries_agl))
+ if (list_empty(&sai->sai_agls))
added = 1;
- list_add_tail(&child->lli_agl_list, &sai->sai_entries_agl);
+ list_add_tail(&child->lli_agl_list, &sai->sai_agls);
spin_unlock(&parent->lli_agl_lock);
} else {
spin_unlock(&child->lli_agl_lock);
@@ -438,8 +382,10 @@ static void ll_agl_add(struct ll_statahead_info *sai,
wake_up(&sai->sai_agl_thread.t_ctl_waitq);
}
-static struct ll_statahead_info *ll_sai_alloc(void)
+/* allocate sai */
+static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry)
{
+ struct ll_inode_info *lli = ll_i2info(dentry->d_inode);
struct ll_statahead_info *sai;
int i;
@@ -447,24 +393,18 @@ static struct ll_statahead_info *ll_sai_alloc(void)
if (!sai)
return NULL;
+ sai->sai_dentry = dget(dentry);
atomic_set(&sai->sai_refcount, 1);
- spin_lock(&sai_generation_lock);
- sai->sai_generation = ++sai_generation;
- if (unlikely(sai_generation == 0))
- sai->sai_generation = ++sai_generation;
- spin_unlock(&sai_generation_lock);
-
sai->sai_max = LL_SA_RPC_MIN;
sai->sai_index = 1;
init_waitqueue_head(&sai->sai_waitq);
init_waitqueue_head(&sai->sai_thread.t_ctl_waitq);
init_waitqueue_head(&sai->sai_agl_thread.t_ctl_waitq);
+ INIT_LIST_HEAD(&sai->sai_interim_entries);
INIT_LIST_HEAD(&sai->sai_entries);
- INIT_LIST_HEAD(&sai->sai_entries_received);
- INIT_LIST_HEAD(&sai->sai_entries_stated);
- INIT_LIST_HEAD(&sai->sai_entries_agl);
+ INIT_LIST_HEAD(&sai->sai_agls);
for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
INIT_LIST_HEAD(&sai->sai_cache[i]);
@@ -472,63 +412,74 @@ static struct ll_statahead_info *ll_sai_alloc(void)
}
atomic_set(&sai->sai_cache_count, 0);
+ spin_lock(&sai_generation_lock);
+ lli->lli_sa_generation = ++sai_generation;
+ if (unlikely(!sai_generation))
+ lli->lli_sa_generation = ++sai_generation;
+ spin_unlock(&sai_generation_lock);
+
return sai;
}
-static inline struct ll_statahead_info *
-ll_sai_get(struct ll_statahead_info *sai)
+/* free sai */
+static inline void ll_sai_free(struct ll_statahead_info *sai)
{
- atomic_inc(&sai->sai_refcount);
+ LASSERT(sai->sai_dentry);
+ dput(sai->sai_dentry);
+ kfree(sai);
+}
+
+/*
+ * take refcount of sai if sai for @dir exists, which means statahead is on for
+ * this directory.
+ */
+static inline struct ll_statahead_info *ll_sai_get(struct inode *dir)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = NULL;
+
+ spin_lock(&lli->lli_sa_lock);
+ sai = lli->lli_sai;
+ if (sai)
+ atomic_inc(&sai->sai_refcount);
+ spin_unlock(&lli->lli_sa_lock);
+
return sai;
}
+/*
+ * put sai refcount after use, if refcount reaches zero, free sai and sa_entries
+ * attached to it.
+ */
static void ll_sai_put(struct ll_statahead_info *sai)
{
- struct inode *inode = sai->sai_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
- struct ll_sa_entry *entry, *next;
-
- if (unlikely(atomic_read(&sai->sai_refcount) > 0)) {
- /* It is race case, the interpret callback just hold
- * a reference count
- */
- spin_unlock(&lli->lli_sa_lock);
- return;
- }
-
- LASSERT(!lli->lli_opendir_key);
- LASSERT(thread_is_stopped(&sai->sai_thread));
- LASSERT(thread_is_stopped(&sai->sai_agl_thread));
+ struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
+ struct sa_entry *entry, *next;
lli->lli_sai = NULL;
- lli->lli_opendir_pid = 0;
spin_unlock(&lli->lli_sa_lock);
- if (sai->sai_sent > sai->sai_replied)
- CDEBUG(D_READA, "statahead for dir "DFID
- " does not finish: [sent:%llu] [replied:%llu]\n",
- PFID(&lli->lli_fid),
- sai->sai_sent, sai->sai_replied);
+ LASSERT(thread_is_stopped(&sai->sai_thread));
+ LASSERT(thread_is_stopped(&sai->sai_agl_thread));
+ LASSERT(sai->sai_sent == sai->sai_replied);
+ LASSERT(!sa_has_callback(sai));
list_for_each_entry_safe(entry, next, &sai->sai_entries,
- se_link)
- do_sa_entry_fini(sai, entry);
-
- LASSERT(list_empty(&sai->sai_entries));
- LASSERT(list_empty(&sai->sai_entries_received));
- LASSERT(list_empty(&sai->sai_entries_stated));
+ se_list)
+ sa_kill(sai, entry);
LASSERT(atomic_read(&sai->sai_cache_count) == 0);
- LASSERT(list_empty(&sai->sai_entries_agl));
+ LASSERT(list_empty(&sai->sai_agls));
- iput(inode);
- kfree(sai);
+ ll_sai_free(sai);
+ atomic_dec(&sbi->ll_sa_running);
}
}
-/* Do NOT forget to drop inode refcount when into sai_entries_agl. */
+/* Do NOT forget to drop inode refcount when into sai_agls. */
static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
{
struct ll_inode_info *lli = ll_i2info(inode);
@@ -588,29 +539,21 @@ static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
iput(inode);
}
-static void ll_post_statahead(struct ll_statahead_info *sai)
+/*
+ * prepare inode for sa entry, add it into agl list, now sa_entry is ready
+ * to be used by scanner process.
+ */
+static void sa_instantiate(struct ll_statahead_info *sai,
+ struct sa_entry *entry)
{
- struct inode *dir = sai->sai_inode;
+ struct inode *dir = sai->sai_dentry->d_inode;
struct inode *child;
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_sa_entry *entry;
struct md_enqueue_info *minfo;
struct lookup_intent *it;
struct ptlrpc_request *req;
struct mdt_body *body;
int rc = 0;
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(list_empty(&sai->sai_entries_received))) {
- spin_unlock(&lli->lli_sa_lock);
- return;
- }
- entry = list_entry(sai->sai_entries_received.next,
- struct ll_sa_entry, se_list);
- atomic_inc(&entry->se_refcount);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
LASSERT(entry->se_handle != 0);
minfo = entry->se_minfo;
@@ -632,7 +575,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
/* XXX: No fid in reply, this is probably cross-ref case.
* SA can't handle it yet.
*/
- if (body->valid & OBD_MD_MDS) {
+ if (body->mbo_valid & OBD_MD_MDS) {
rc = -EAGAIN;
goto out;
}
@@ -641,7 +584,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
* revalidate.
*/
/* unlinked and re-created with the same name */
- if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1))) {
+ if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
entry->se_inode = NULL;
iput(child);
child = NULL;
@@ -659,8 +602,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
if (rc)
goto out;
- CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+ CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n",
ll_get_fsname(child->i_sb, NULL, 0),
+ entry->se_qstr.len, entry->se_qstr.name,
PFID(ll_inode2fid(child)), child);
ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
@@ -670,34 +614,75 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
ll_agl_add(sai, child, entry->se_index);
out:
- /* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock
- * reference count by calling "ll_intent_drop_lock()" in spite of the
- * above operations failed or not. Do not worry about calling
- * "ll_intent_drop_lock()" more than once.
+ /*
+ * sa_make_ready() will drop ldlm ibits lock refcount by calling
+ * ll_intent_drop_lock() in spite of failures. Do not worry about
+ * calling ll_intent_drop_lock() more than once.
*/
- rc = ll_sa_entry_to_stated(sai, entry,
- rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
- if (rc == 0 && entry->se_index == sai->sai_index_wait)
- wake_up(&sai->sai_waitq);
- ll_sa_entry_put(sai, entry);
+ sa_make_ready(sai, entry, rc);
}
+/* once there are async stat replies, instantiate sa_entry from replies */
+static void sa_handle_callback(struct ll_statahead_info *sai)
+{
+ struct ll_inode_info *lli;
+
+ lli = ll_i2info(sai->sai_dentry->d_inode);
+
+ while (sa_has_callback(sai)) {
+ struct sa_entry *entry;
+
+ spin_lock(&lli->lli_sa_lock);
+ if (unlikely(!sa_has_callback(sai))) {
+ spin_unlock(&lli->lli_sa_lock);
+ break;
+ }
+ entry = list_entry(sai->sai_interim_entries.next,
+ struct sa_entry, se_list);
+ list_del_init(&entry->se_list);
+ spin_unlock(&lli->lli_sa_lock);
+
+ sa_instantiate(sai, entry);
+ }
+}
+
+/*
+ * callback for async stat, because this is called in ptlrpcd context, we only
+ * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really
+ * prepare inode and instantiate sa_entry later.
+ */
static int ll_statahead_interpret(struct ptlrpc_request *req,
struct md_enqueue_info *minfo, int rc)
{
struct lookup_intent *it = &minfo->mi_it;
struct inode *dir = minfo->mi_dir;
struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
- struct ll_sa_entry *entry;
- __u64 handle = 0;
- int wakeup;
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
+ __u64 handle = 0;
+ bool wakeup;
if (it_disposition(it, DISP_LOOKUP_NEG))
rc = -ENOENT;
- if (rc == 0) {
- /* release ibits lock ASAP to avoid deadlock when statahead
+ /*
+ * because statahead thread will wait for all inflight RPC to finish,
+ * sai should be always valid, no need to refcount
+ */
+ LASSERT(sai);
+ LASSERT(!thread_is_stopped(&sai->sai_thread));
+ LASSERT(entry);
+
+ CDEBUG(D_READA, "sa_entry %.*s rc %d\n",
+ entry->se_qstr.len, entry->se_qstr.name, rc);
+
+ if (rc) {
+ ll_intent_release(it);
+ iput(dir);
+ kfree(minfo);
+ } else {
+ /*
+ * release ibits lock ASAP to avoid deadlock when statahead
* thread enqueues lock on parent in readdir and another
* process enqueues lock on child with parent lock held, eg.
* unlink.
@@ -707,65 +692,32 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
}
spin_lock(&lli->lli_sa_lock);
- /* stale entry */
- if (unlikely(!lli->lli_sai ||
- lli->lli_sai->sai_generation != minfo->mi_generation)) {
- spin_unlock(&lli->lli_sa_lock);
- rc = -ESTALE;
- goto out;
+ if (rc) {
+ wakeup = __sa_make_ready(sai, entry, rc);
} else {
- sai = ll_sai_get(lli->lli_sai);
- if (unlikely(!thread_is_running(&sai->sai_thread))) {
- sai->sai_replied++;
- spin_unlock(&lli->lli_sa_lock);
- rc = -EBADFD;
- goto out;
- }
-
- entry = ll_sa_entry_get_byindex(sai, minfo->mi_cbdata);
- if (!entry) {
- sai->sai_replied++;
- spin_unlock(&lli->lli_sa_lock);
- rc = -EIDRM;
- goto out;
- }
-
- if (rc != 0) {
- do_sa_entry_to_stated(sai, entry, SA_ENTRY_INVA);
- wakeup = (entry->se_index == sai->sai_index_wait);
- } else {
- entry->se_minfo = minfo;
- entry->se_req = ptlrpc_request_addref(req);
- /* Release the async ibits lock ASAP to avoid deadlock
- * when statahead thread tries to enqueue lock on parent
- * for readpage and other tries to enqueue lock on child
- * with parent's lock held, for example: unlink.
- */
- entry->se_handle = handle;
- wakeup = list_empty(&sai->sai_entries_received);
- list_add_tail(&entry->se_list,
- &sai->sai_entries_received);
- }
- sai->sai_replied++;
- spin_unlock(&lli->lli_sa_lock);
-
- ll_sa_entry_put(sai, entry);
- if (wakeup)
- wake_up(&sai->sai_thread.t_ctl_waitq);
+ entry->se_minfo = minfo;
+ entry->se_req = ptlrpc_request_addref(req);
+ /*
+ * Release the async ibits lock ASAP to avoid deadlock
+ * when statahead thread tries to enqueue lock on parent
+ * for readpage and other tries to enqueue lock on child
+ * with parent's lock held, for example: unlink.
+ */
+ entry->se_handle = handle;
+ wakeup = !sa_has_callback(sai);
+ list_add_tail(&entry->se_list, &sai->sai_interim_entries);
}
+ sai->sai_replied++;
+
+ if (wakeup)
+ wake_up(&sai->sai_thread.t_ctl_waitq);
+ spin_unlock(&lli->lli_sa_lock);
-out:
- if (rc != 0) {
- ll_intent_release(it);
- iput(dir);
- kfree(minfo);
- }
- if (sai)
- ll_sai_put(sai);
return rc;
}
-static void sa_args_fini(struct md_enqueue_info *minfo,
+/* finish async stat RPC arguments */
+static void sa_fini_data(struct md_enqueue_info *minfo,
struct ldlm_enqueue_info *einfo)
{
LASSERT(minfo && einfo);
@@ -777,12 +729,11 @@ static void sa_args_fini(struct md_enqueue_info *minfo,
/**
* prepare arguments for async stat RPC.
*/
-static int sa_args_init(struct inode *dir, struct inode *child,
- struct ll_sa_entry *entry, struct md_enqueue_info **pmi,
+static int sa_prep_data(struct inode *dir, struct inode *child,
+ struct sa_entry *entry, struct md_enqueue_info **pmi,
struct ldlm_enqueue_info **pei)
{
const struct qstr *qstr = &entry->se_qstr;
- struct ll_inode_info *lli = ll_i2info(dir);
struct md_enqueue_info *minfo;
struct ldlm_enqueue_info *einfo;
struct md_op_data *op_data;
@@ -808,8 +759,7 @@ static int sa_args_init(struct inode *dir, struct inode *child,
minfo->mi_it.it_op = IT_GETATTR;
minfo->mi_dir = igrab(dir);
minfo->mi_cb = ll_statahead_interpret;
- minfo->mi_generation = lli->lli_sai->sai_generation;
- minfo->mi_cbdata = entry->se_index;
+ minfo->mi_cbdata = entry;
einfo->ei_type = LDLM_IBITS;
einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
@@ -824,31 +774,33 @@ static int sa_args_init(struct inode *dir, struct inode *child,
return 0;
}
-static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry)
+/* async stat for file not found in dcache */
+static int sa_lookup(struct inode *dir, struct sa_entry *entry)
{
struct md_enqueue_info *minfo;
struct ldlm_enqueue_info *einfo;
int rc;
- rc = sa_args_init(dir, NULL, entry, &minfo, &einfo);
+ rc = sa_prep_data(dir, NULL, entry, &minfo, &einfo);
if (rc)
return rc;
rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
- if (rc < 0)
- sa_args_fini(minfo, einfo);
+ if (rc)
+ sa_fini_data(minfo, einfo);
return rc;
}
/**
- * similar to ll_revalidate_it().
- * \retval 1 -- dentry valid
- * \retval 0 -- will send stat-ahead request
- * \retval others -- prepare stat-ahead request failed
+ * async stat for file found in dcache, similar to .revalidate
+ *
+ * \retval 1 dentry valid, no RPC sent
+ * \retval 0 dentry invalid, will send async stat RPC
+ * \retval negative number upon error
*/
-static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
- struct dentry *dentry)
+static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
+ struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct lookup_intent it = { .it_op = IT_GETATTR,
@@ -872,7 +824,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
return 1;
}
- rc = sa_args_init(dir, inode, entry, &minfo, &einfo);
+ rc = sa_prep_data(dir, inode, entry, &minfo, &einfo);
if (rc) {
entry->se_inode = NULL;
iput(inode);
@@ -880,56 +832,50 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
}
rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
- if (rc < 0) {
+ if (rc) {
entry->se_inode = NULL;
iput(inode);
- sa_args_fini(minfo, einfo);
+ sa_fini_data(minfo, einfo);
}
return rc;
}
-static void ll_statahead_one(struct dentry *parent, const char *entry_name,
- int entry_name_len)
+/* async stat for file with @name */
+static void sa_statahead(struct dentry *parent, const char *name, int len)
{
struct inode *dir = d_inode(parent);
struct ll_inode_info *lli = ll_i2info(dir);
struct ll_statahead_info *sai = lli->lli_sai;
struct dentry *dentry = NULL;
- struct ll_sa_entry *entry;
+ struct sa_entry *entry;
int rc;
- int rc1;
- entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name,
- entry_name_len);
+ entry = sa_alloc(parent, sai, sai->sai_index, name, len);
if (IS_ERR(entry))
return;
dentry = d_lookup(parent, &entry->se_qstr);
if (!dentry) {
- rc = do_sa_lookup(dir, entry);
+ rc = sa_lookup(dir, entry);
} else {
- rc = do_sa_revalidate(dir, entry, dentry);
+ rc = sa_revalidate(dir, entry, dentry);
if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
ll_agl_add(sai, d_inode(dentry), entry->se_index);
+ }
+ if (dentry)
dput(dentry);
- }
- if (rc) {
- rc1 = ll_sa_entry_to_stated(sai, entry,
- rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
- if (rc1 == 0 && entry->se_index == sai->sai_index_wait)
- wake_up(&sai->sai_waitq);
- } else {
+ if (rc)
+ sa_make_ready(sai, entry, rc);
+ else
sai->sai_sent++;
- }
sai->sai_index++;
- /* drop one refcount on entry by ll_sa_entry_alloc */
- ll_sa_entry_put(sai, entry);
}
+/* async glimpse (agl) thread main function */
static int ll_agl_thread(void *arg)
{
struct dentry *parent = arg;
@@ -937,10 +883,12 @@ static int ll_agl_thread(void *arg)
struct ll_inode_info *plli = ll_i2info(dir);
struct ll_inode_info *clli;
struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);
- struct ptlrpc_thread *thread = &sai->sai_agl_thread;
+ struct ll_statahead_info *sai;
+ struct ptlrpc_thread *thread;
struct l_wait_info lwi = { 0 };
+ sai = ll_sai_get(dir);
+ thread = &sai->sai_agl_thread;
thread->t_pid = current_pid();
CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n",
sai, parent);
@@ -959,7 +907,7 @@ static int ll_agl_thread(void *arg)
while (1) {
l_wait_event(thread->t_ctl_waitq,
- !list_empty(&sai->sai_entries_agl) ||
+ !list_empty(&sai->sai_agls) ||
!thread_is_running(thread),
&lwi);
@@ -970,8 +918,8 @@ static int ll_agl_thread(void *arg)
/* The statahead thread maybe help to process AGL entries,
* so check whether list empty again.
*/
- if (!list_empty(&sai->sai_entries_agl)) {
- clli = list_entry(sai->sai_entries_agl.next,
+ if (!list_empty(&sai->sai_agls)) {
+ clli = list_entry(sai->sai_agls.next,
struct ll_inode_info, lli_agl_list);
list_del_init(&clli->lli_agl_list);
spin_unlock(&plli->lli_agl_lock);
@@ -983,8 +931,8 @@ static int ll_agl_thread(void *arg)
spin_lock(&plli->lli_agl_lock);
sai->sai_agl_valid = 0;
- while (!list_empty(&sai->sai_entries_agl)) {
- clli = list_entry(sai->sai_entries_agl.next,
+ while (!list_empty(&sai->sai_agls)) {
+ clli = list_entry(sai->sai_agls.next,
struct ll_inode_info, lli_agl_list);
list_del_init(&clli->lli_agl_list);
spin_unlock(&plli->lli_agl_lock);
@@ -1001,6 +949,7 @@ static int ll_agl_thread(void *arg)
return 0;
}
+/* start agl thread */
static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
{
struct ptlrpc_thread *thread = &sai->sai_agl_thread;
@@ -1025,58 +974,71 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
&lwi);
}
+/* statahead thread main function */
static int ll_statahead_thread(void *arg)
{
struct dentry *parent = arg;
struct inode *dir = d_inode(parent);
- struct ll_inode_info *plli = ll_i2info(dir);
- struct ll_inode_info *clli;
+ struct ll_inode_info *lli = ll_i2info(dir);
struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);
- struct ptlrpc_thread *thread = &sai->sai_thread;
- struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread;
- struct page *page;
+ struct ll_statahead_info *sai;
+ struct ptlrpc_thread *sa_thread;
+ struct ptlrpc_thread *agl_thread;
+ struct page *page = NULL;
__u64 pos = 0;
int first = 0;
int rc = 0;
- struct ll_dir_chain chain;
+ struct md_op_data *op_data;
struct l_wait_info lwi = { 0 };
- thread->t_pid = current_pid();
+ sai = ll_sai_get(dir);
+ sa_thread = &sai->sai_thread;
+ agl_thread = &sai->sai_agl_thread;
+ sa_thread->t_pid = current_pid();
CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n",
sai, parent);
+ op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+ LUSTRE_OPC_ANY, dir);
+ if (IS_ERR(op_data)) {
+ rc = PTR_ERR(op_data);
+ goto out;
+ }
+
+ op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
+
if (sbi->ll_flags & LL_SBI_AGL_ENABLED)
ll_start_agl(parent, sai);
atomic_inc(&sbi->ll_sa_total);
- spin_lock(&plli->lli_sa_lock);
- if (thread_is_init(thread))
+ spin_lock(&lli->lli_sa_lock);
+ if (thread_is_init(sa_thread))
/* If someone else has changed the thread state
* (e.g. already changed to SVC_STOPPING), we can't just
* blindly overwrite that setting.
*/
- thread_set_flags(thread, SVC_RUNNING);
- spin_unlock(&plli->lli_sa_lock);
- wake_up(&thread->t_ctl_waitq);
-
- ll_dir_chain_init(&chain);
- page = ll_get_dir_page(dir, pos, &chain);
+ thread_set_flags(sa_thread, SVC_RUNNING);
+ spin_unlock(&lli->lli_sa_lock);
+ wake_up(&sa_thread->t_ctl_waitq);
- while (1) {
+ while (pos != MDS_DIR_END_OFF && thread_is_running(sa_thread)) {
struct lu_dirpage *dp;
struct lu_dirent *ent;
+ sai->sai_in_readpage = 1;
+ page = ll_get_dir_page(dir, op_data, pos);
+ sai->sai_in_readpage = 0;
if (IS_ERR(page)) {
rc = PTR_ERR(page);
- CDEBUG(D_READA, "error reading dir "DFID" at %llu/%llu: [rc %d] [parent %u]\n",
+ CDEBUG(D_READA, "error reading dir "DFID" at %llu/%llu: opendir_pid = %u: rc = %d\n",
PFID(ll_inode2fid(dir)), pos, sai->sai_index,
- rc, plli->lli_opendir_pid);
- goto out;
+ lli->lli_opendir_pid, rc);
+ break;
}
dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent;
+ for (ent = lu_dirent_start(dp);
+ ent && thread_is_running(sa_thread) && !sa_low_hit(sai);
ent = lu_dirent_next(ent)) {
__u64 hash;
int namelen;
@@ -1123,123 +1085,79 @@ static int ll_statahead_thread(void *arg)
if (unlikely(++first == 1))
continue;
-keep_it:
- l_wait_event(thread->t_ctl_waitq,
- !sa_sent_full(sai) ||
- !list_empty(&sai->sai_entries_received) ||
- !list_empty(&sai->sai_entries_agl) ||
- !thread_is_running(thread),
- &lwi);
-
-interpret_it:
- while (!list_empty(&sai->sai_entries_received))
- ll_post_statahead(sai);
-
- if (unlikely(!thread_is_running(thread))) {
- ll_release_page(page, 0);
- rc = 0;
- goto out;
- }
+ /* wait for spare statahead window */
+ do {
+ l_wait_event(sa_thread->t_ctl_waitq,
+ !sa_sent_full(sai) ||
+ sa_has_callback(sai) ||
+ !list_empty(&sai->sai_agls) ||
+ !thread_is_running(sa_thread),
+ &lwi);
+ sa_handle_callback(sai);
- /* If no window for metadata statahead, but there are
- * some AGL entries to be triggered, then try to help
- * to process the AGL entries.
- */
- if (sa_sent_full(sai)) {
- spin_lock(&plli->lli_agl_lock);
- while (!list_empty(&sai->sai_entries_agl)) {
- clli = list_entry(sai->sai_entries_agl.next,
+ spin_lock(&lli->lli_agl_lock);
+ while (sa_sent_full(sai) &&
+ !agl_list_empty(sai)) {
+ struct ll_inode_info *clli;
+
+ clli = list_entry(sai->sai_agls.next,
struct ll_inode_info, lli_agl_list);
list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
+ spin_unlock(&lli->lli_agl_lock);
+
ll_agl_trigger(&clli->lli_vfs_inode,
sai);
- if (!list_empty(&sai->sai_entries_received))
- goto interpret_it;
-
- if (unlikely(
- !thread_is_running(thread))) {
- ll_release_page(page, 0);
- rc = 0;
- goto out;
- }
-
- if (!sa_sent_full(sai))
- goto do_it;
-
- spin_lock(&plli->lli_agl_lock);
+ spin_lock(&lli->lli_agl_lock);
}
- spin_unlock(&plli->lli_agl_lock);
+ spin_unlock(&lli->lli_agl_lock);
+ } while (sa_sent_full(sai) &&
+ thread_is_running(sa_thread));
- goto keep_it;
- }
-
-do_it:
- ll_statahead_one(parent, name, namelen);
+ sa_statahead(parent, name, namelen);
}
- pos = le64_to_cpu(dp->ldp_hash_end);
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- ll_release_page(page, 0);
- while (1) {
- l_wait_event(thread->t_ctl_waitq,
- !list_empty(&sai->sai_entries_received) ||
- sai->sai_sent == sai->sai_replied ||
- !thread_is_running(thread),
- &lwi);
- while (!list_empty(&sai->sai_entries_received))
- ll_post_statahead(sai);
+ pos = le64_to_cpu(dp->ldp_hash_end);
+ ll_release_page(dir, page,
+ le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
- if (unlikely(!thread_is_running(thread))) {
- rc = 0;
- goto out;
- }
+ if (sa_low_hit(sai)) {
+ rc = -EFAULT;
+ atomic_inc(&sbi->ll_sa_wrong);
+ CDEBUG(D_READA, "Statahead for dir "DFID" hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n",
+ PFID(&lli->lli_fid), sai->sai_hit,
+ sai->sai_miss, sai->sai_sent,
+ sai->sai_replied, current_pid());
+ break;
+ }
+ }
+ ll_finish_md_op_data(op_data);
- if (sai->sai_sent == sai->sai_replied &&
- list_empty(&sai->sai_entries_received))
- break;
- }
+ if (rc < 0) {
+ spin_lock(&lli->lli_sa_lock);
+ thread_set_flags(sa_thread, SVC_STOPPING);
+ lli->lli_sa_enabled = 0;
+ spin_unlock(&lli->lli_sa_lock);
+ }
- spin_lock(&plli->lli_agl_lock);
- while (!list_empty(&sai->sai_entries_agl) &&
- thread_is_running(thread)) {
- clli = list_entry(sai->sai_entries_agl.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- ll_agl_trigger(&clli->lli_vfs_inode, sai);
- spin_lock(&plli->lli_agl_lock);
- }
- spin_unlock(&plli->lli_agl_lock);
+ /*
+ * statahead is finished, but statahead entries need to be cached, wait
+ * for file release to stop me.
+ */
+ while (thread_is_running(sa_thread)) {
+ l_wait_event(sa_thread->t_ctl_waitq,
+ sa_has_callback(sai) ||
+ !agl_list_empty(sai) ||
+ !thread_is_running(sa_thread),
+ &lwi);
- rc = 0;
- goto out;
- } else if (1) {
- /*
- * chain is exhausted.
- * Normal case: continue to the next page.
- */
- ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- page = ll_get_dir_page(dir, pos, &chain);
- } else {
- LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
- ll_release_page(page, 1);
- /*
- * go into overflow page.
- */
- }
+ sa_handle_callback(sai);
}
-
out:
if (sai->sai_agl_valid) {
- spin_lock(&plli->lli_agl_lock);
+ spin_lock(&lli->lli_agl_lock);
thread_set_flags(agl_thread, SVC_STOPPING);
- spin_unlock(&plli->lli_agl_lock);
+ spin_unlock(&lli->lli_agl_lock);
wake_up(&agl_thread->t_ctl_waitq);
CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n",
@@ -1249,84 +1167,93 @@ out:
&lwi);
} else {
/* Set agl_thread flags anyway. */
- thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
+ thread_set_flags(agl_thread, SVC_STOPPED);
}
- ll_dir_chain_fini(&chain);
- spin_lock(&plli->lli_sa_lock);
- if (!list_empty(&sai->sai_entries_received)) {
- thread_set_flags(thread, SVC_STOPPING);
- spin_unlock(&plli->lli_sa_lock);
-
- /* To release the resources held by received entries. */
- while (!list_empty(&sai->sai_entries_received))
- ll_post_statahead(sai);
- spin_lock(&plli->lli_sa_lock);
+ /*
+ * wait for inflight statahead RPCs to finish, and then we can free sai
+ * safely because statahead RPC will access sai data
+ */
+ while (sai->sai_sent != sai->sai_replied) {
+ /* in case we're not woken up, timeout wait */
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(MSEC_PER_SEC >> 3),
+ NULL, NULL);
+ l_wait_event(sa_thread->t_ctl_waitq,
+ sai->sai_sent == sai->sai_replied, &lwi);
}
- thread_set_flags(thread, SVC_STOPPED);
- spin_unlock(&plli->lli_sa_lock);
- wake_up(&sai->sai_waitq);
- wake_up(&thread->t_ctl_waitq);
- ll_sai_put(sai);
- dput(parent);
+
+ /* release resources held by statahead RPCs */
+ sa_handle_callback(sai);
+
+ spin_lock(&lli->lli_sa_lock);
+ thread_set_flags(sa_thread, SVC_STOPPED);
+ spin_unlock(&lli->lli_sa_lock);
+
CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n",
sai, parent);
+
+ wake_up(&sai->sai_waitq);
+ wake_up(&sa_thread->t_ctl_waitq);
+ ll_sai_put(sai);
+
return rc;
}
-/**
- * called in ll_file_release().
- */
-void ll_stop_statahead(struct inode *dir, void *key)
+/* authorize opened dir handle @key to statahead */
+void ll_authorize_statahead(struct inode *dir, void *key)
{
struct ll_inode_info *lli = ll_i2info(dir);
- if (unlikely(!key))
- return;
-
spin_lock(&lli->lli_sa_lock);
- if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) {
- spin_unlock(&lli->lli_sa_lock);
- return;
+ if (!lli->lli_opendir_key && !lli->lli_sai) {
+ /*
+ * if lli_sai is not NULL, it means previous statahead is not
+ * finished yet, we'd better not start a new statahead for now.
+ */
+ LASSERT(!lli->lli_opendir_pid);
+ lli->lli_opendir_key = key;
+ lli->lli_opendir_pid = current_pid();
+ lli->lli_sa_enabled = 1;
}
+ spin_unlock(&lli->lli_sa_lock);
+}
- lli->lli_opendir_key = NULL;
-
- if (lli->lli_sai) {
- struct l_wait_info lwi = { 0 };
- struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread;
+/*
+ * deauthorize opened dir handle @key to statahead, but statahead thread may
+ * still be running, notify it to quit.
+ */
+void ll_deauthorize_statahead(struct inode *dir, void *key)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai;
- if (!thread_is_stopped(thread)) {
- thread_set_flags(thread, SVC_STOPPING);
- spin_unlock(&lli->lli_sa_lock);
- wake_up(&thread->t_ctl_waitq);
+ LASSERT(lli->lli_opendir_key == key);
+ LASSERT(lli->lli_opendir_pid);
- CDEBUG(D_READA, "stop statahead thread: sai %p pid %u\n",
- lli->lli_sai, (unsigned int)thread->t_pid);
- l_wait_event(thread->t_ctl_waitq,
- thread_is_stopped(thread),
- &lwi);
- } else {
- spin_unlock(&lli->lli_sa_lock);
- }
+ CDEBUG(D_READA, "deauthorize statahead for "DFID"\n",
+ PFID(&lli->lli_fid));
+ spin_lock(&lli->lli_sa_lock);
+ lli->lli_opendir_key = NULL;
+ lli->lli_opendir_pid = 0;
+ lli->lli_sa_enabled = 0;
+ sai = lli->lli_sai;
+ if (sai && thread_is_running(&sai->sai_thread)) {
/*
- * Put the ref which was held when first statahead_enter.
- * It maybe not the last ref for some statahead requests
- * maybe inflight.
+ * statahead thread may not quit yet because it needs to cache
+ * entries, now it's time to tell it to quit.
*/
- ll_sai_put(lli->lli_sai);
- } else {
- lli->lli_opendir_pid = 0;
- spin_unlock(&lli->lli_sa_lock);
+ thread_set_flags(&sai->sai_thread, SVC_STOPPING);
+ wake_up(&sai->sai_thread.t_ctl_waitq);
}
+ spin_unlock(&lli->lli_sa_lock);
}
enum {
/**
* not first dirent, or is "."
*/
- LS_NONE_FIRST_DE = 0,
+ LS_NOT_FIRST_DE = 0,
/**
* the first non-hidden dirent
*/
@@ -1337,17 +1264,26 @@ enum {
LS_FIRST_DOT_DE
};
+/* file is first dirent under @dir */
static int is_first_dirent(struct inode *dir, struct dentry *dentry)
{
- struct ll_dir_chain chain;
const struct qstr *target = &dentry->d_name;
+ struct md_op_data *op_data;
struct page *page;
__u64 pos = 0;
int dot_de;
- int rc = LS_NONE_FIRST_DE;
+ int rc = LS_NOT_FIRST_DE;
- ll_dir_chain_init(&chain);
- page = ll_get_dir_page(dir, pos, &chain);
+ op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+ LUSTRE_OPC_ANY, dir);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+ /**
+ * FIXME choose the start offset of the readdir
+ */
+ op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
+
+ page = ll_get_dir_page(dir, op_data, pos);
while (1) {
struct lu_dirpage *dp;
@@ -1357,9 +1293,10 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
struct ll_inode_info *lli = ll_i2info(dir);
rc = PTR_ERR(page);
- CERROR("error reading dir "DFID" at %llu: [rc %d] [parent %u]\n",
+ CERROR("%s: error reading dir "DFID" at %llu: opendir_pid = %u : rc = %d\n",
+ ll_get_fsname(dir->i_sb, NULL, 0),
PFID(ll_inode2fid(dir)), pos,
- rc, lli->lli_opendir_pid);
+ lli->lli_opendir_pid, rc);
break;
}
@@ -1411,13 +1348,13 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
if (target->len != namelen ||
memcmp(target->name, name, namelen) != 0)
- rc = LS_NONE_FIRST_DE;
+ rc = LS_NOT_FIRST_DE;
else if (!dot_de)
rc = LS_FIRST_DE;
else
rc = LS_FIRST_DOT_DE;
- ll_release_page(page, 0);
+ ll_release_page(dir, page, false);
goto out;
}
pos = le64_to_cpu(dp->ldp_hash_end);
@@ -1425,261 +1362,228 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
/*
* End of directory reached.
*/
- ll_release_page(page, 0);
- break;
- } else if (1) {
+ ll_release_page(dir, page, false);
+ goto out;
+ } else {
/*
* chain is exhausted
* Normal case: continue to the next page.
*/
- ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- page = ll_get_dir_page(dir, pos, &chain);
- } else {
- /*
- * go into overflow page.
- */
- LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
- ll_release_page(page, 1);
+ ll_release_page(dir, page,
+ le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ page = ll_get_dir_page(dir, op_data, pos);
}
}
-
out:
- ll_dir_chain_fini(&chain);
+ ll_finish_md_op_data(op_data);
return rc;
}
-static void
-ll_sai_unplug(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
-{
- struct ptlrpc_thread *thread = &sai->sai_thread;
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_inode);
- int hit;
-
- if (entry && entry->se_stat == SA_ENTRY_SUCC)
- hit = 1;
- else
- hit = 0;
-
- ll_sa_entry_fini(sai, entry);
- if (hit) {
- sai->sai_hit++;
- sai->sai_consecutive_miss = 0;
- sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
- } else {
- struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
-
- sai->sai_miss++;
- sai->sai_consecutive_miss++;
- if (sa_low_hit(sai) && thread_is_running(thread)) {
- atomic_inc(&sbi->ll_sa_wrong);
- CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread\n",
- PFID(&lli->lli_fid), sai->sai_hit,
- sai->sai_miss, sai->sai_sent,
- sai->sai_replied);
- spin_lock(&lli->lli_sa_lock);
- if (!thread_is_stopped(thread))
- thread_set_flags(thread, SVC_STOPPING);
- spin_unlock(&lli->lli_sa_lock);
- }
- }
-
- if (!thread_is_stopped(thread))
- wake_up(&thread->t_ctl_waitq);
-}
-
/**
- * Start statahead thread if this is the first dir entry.
- * Otherwise if a thread is started already, wait it until it is ahead of me.
- * \retval 1 -- find entry with lock in cache, the caller needs to do
- * nothing.
- * \retval 0 -- find entry in cache, but without lock, the caller needs
- * refresh from MDS.
- * \retval others -- the caller need to process as non-statahead.
+ * revalidate @dentryp from statahead cache
+ *
+ * \param[in] dir parent directory
+ * \param[in] sai sai structure
+ * \param[out] dentryp pointer to dentry which will be revalidated
+ * \param[in] unplug unplug statahead window only (normally for negative
+ * dentry)
+ * \retval 1 on success, dentry is saved in @dentryp
+ * \retval 0 if revalidation failed (no proper lock on client)
+ * \retval negative number upon error
*/
-int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
- int only_unplug)
+static int revalidate_statahead_dentry(struct inode *dir,
+ struct ll_statahead_info *sai,
+ struct dentry **dentryp,
+ bool unplug)
{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct dentry *parent;
- struct ll_sa_entry *entry;
- struct ptlrpc_thread *thread;
- struct l_wait_info lwi = { 0 };
- struct task_struct *task;
- int rc = 0;
- struct ll_inode_info *plli;
+ struct sa_entry *entry = NULL;
+ struct l_wait_info lwi = { 0 };
+ struct ll_dentry_data *ldd;
+ struct ll_inode_info *lli;
+ int rc = 0;
- LASSERT(lli->lli_opendir_pid == current_pid());
+ if ((*dentryp)->d_name.name[0] == '.') {
+ if (sai->sai_ls_all ||
+ sai->sai_miss_hidden >= sai->sai_skip_hidden) {
+ /*
+ * Hidden dentry is the first one, or statahead
+ * thread does not skip so many hidden dentries
+ * before "sai_ls_all" enabled as below.
+ */
+ } else {
+ if (!sai->sai_ls_all)
+ /*
+ * It maybe because hidden dentry is not
+ * the first one, "sai_ls_all" was not
+ * set, then "ls -al" missed. Enable
+ * "sai_ls_all" for such case.
+ */
+ sai->sai_ls_all = 1;
- if (sai) {
- thread = &sai->sai_thread;
- if (unlikely(thread_is_stopped(thread) &&
- list_empty(&sai->sai_entries_stated))) {
- /* to release resource */
- ll_stop_statahead(dir, lli->lli_opendir_key);
+ /*
+ * Such "getattr" has been skipped before
+ * "sai_ls_all" enabled as above.
+ */
+ sai->sai_miss_hidden++;
return -EAGAIN;
}
+ }
- if ((*dentryp)->d_name.name[0] == '.') {
- if (sai->sai_ls_all ||
- sai->sai_miss_hidden >= sai->sai_skip_hidden) {
- /*
- * Hidden dentry is the first one, or statahead
- * thread does not skip so many hidden dentries
- * before "sai_ls_all" enabled as below.
- */
- } else {
- if (!sai->sai_ls_all)
- /*
- * It maybe because hidden dentry is not
- * the first one, "sai_ls_all" was not
- * set, then "ls -al" missed. Enable
- * "sai_ls_all" for such case.
- */
- sai->sai_ls_all = 1;
+ if (unplug) {
+ rc = 1;
+ goto out_unplug;
+ }
- /*
- * Such "getattr" has been skipped before
- * "sai_ls_all" enabled as above.
- */
- sai->sai_miss_hidden++;
- return -EAGAIN;
- }
- }
+ entry = sa_get(sai, &(*dentryp)->d_name);
+ if (!entry) {
+ rc = -EAGAIN;
+ goto out_unplug;
+ }
- entry = ll_sa_entry_get_byname(sai, &(*dentryp)->d_name);
- if (!entry || only_unplug) {
- ll_sai_unplug(sai, entry);
- return entry ? 1 : -EAGAIN;
- }
+ /* if statahead is busy in readdir, help it do post-work */
+ if (!sa_ready(entry) && sai->sai_in_readpage)
+ sa_handle_callback(sai);
- if (!ll_sa_entry_stated(entry)) {
- sai->sai_index_wait = entry->se_index;
- lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
- LWI_ON_SIGNAL_NOOP, NULL);
- rc = l_wait_event(sai->sai_waitq,
- ll_sa_entry_stated(entry) ||
- thread_is_stopped(thread),
- &lwi);
- if (rc < 0) {
- ll_sai_unplug(sai, entry);
- return -EAGAIN;
- }
+ if (!sa_ready(entry)) {
+ sai->sai_index_wait = entry->se_index;
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(sai->sai_waitq, sa_ready(entry), &lwi);
+ if (rc < 0) {
+ /*
+ * entry may not be ready, so it may be used by inflight
+ * statahead RPC, don't free it.
+ */
+ entry = NULL;
+ rc = -EAGAIN;
+ goto out_unplug;
}
+ }
- if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
- struct inode *inode = entry->se_inode;
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle =
- entry->se_handle };
- __u64 bits;
-
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
- ll_inode2fid(inode), &bits);
- if (rc == 1) {
- if (!d_inode(*dentryp)) {
- struct dentry *alias;
-
- alias = ll_splice_alias(inode,
- *dentryp);
- if (IS_ERR(alias)) {
- ll_sai_unplug(sai, entry);
- return PTR_ERR(alias);
- }
- *dentryp = alias;
- } else if (d_inode(*dentryp) != inode) {
- /* revalidate, but inode is recreated */
- CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
- ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
- *dentryp,
- PFID(ll_inode2fid(d_inode(*dentryp))),
- PFID(ll_inode2fid(inode)));
- ll_sai_unplug(sai, entry);
- return -ESTALE;
- } else {
- iput(inode);
+ if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) {
+ struct inode *inode = entry->se_inode;
+ struct lookup_intent it = { .it_op = IT_GETATTR,
+ .it_lock_handle = entry->se_handle };
+ __u64 bits;
+
+ rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
+ ll_inode2fid(inode), &bits);
+ if (rc == 1) {
+ if (!(*dentryp)->d_inode) {
+ struct dentry *alias;
+
+ alias = ll_splice_alias(inode, *dentryp);
+ if (IS_ERR(alias)) {
+ rc = PTR_ERR(alias);
+ goto out_unplug;
}
+ *dentryp = alias;
+ /**
+ * statahead prepared this inode, transfer inode
+ * refcount from sa_entry to dentry
+ */
entry->se_inode = NULL;
-
- if ((bits & MDS_INODELOCK_LOOKUP) &&
- d_lustre_invalid(*dentryp))
- d_lustre_revalidate(*dentryp);
- ll_intent_release(&it);
+ } else if ((*dentryp)->d_inode != inode) {
+ /* revalidate, but inode is recreated */
+ CDEBUG(D_READA,
+ "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+ ll_get_fsname((*dentryp)->d_inode->i_sb,
+ NULL, 0),
+ *dentryp,
+ PFID(ll_inode2fid((*dentryp)->d_inode)),
+ PFID(ll_inode2fid(inode)));
+ rc = -ESTALE;
+ goto out_unplug;
}
- }
- ll_sai_unplug(sai, entry);
- return rc;
+ if ((bits & MDS_INODELOCK_LOOKUP) &&
+ d_lustre_invalid(*dentryp))
+ d_lustre_revalidate(*dentryp);
+ ll_intent_release(&it);
+ }
}
+out_unplug:
+ /*
+ * statahead cached sa_entry can be used only once, and will be killed
+ * right after use, so if lookup/revalidate accessed statahead cache,
+ * set dentry ldd_sa_generation to parent lli_sa_generation, later if we
+ * stat this file again, we know we've done statahead before, see
+ * dentry_may_statahead().
+ */
+ ldd = ll_d2d(*dentryp);
+ lli = ll_i2info(dir);
+ /* ldd can be NULL if llite lookup failed. */
+ if (ldd)
+ ldd->lld_sa_generation = lli->lli_sa_generation;
+ sa_put(sai, entry);
+ return rc;
+}
+
+/**
+ * start statahead thread
+ *
+ * \param[in] dir parent directory
+ * \param[in] dentry dentry that triggers statahead, normally the first
+ * dirent under @dir
+ * \retval -EAGAIN on success, because when this function is
+ * called, it's already in lookup call, so client should
+ * do it itself instead of waiting for statahead thread
+ * to do it asynchronously.
+ * \retval negative number upon error
+ */
+static int start_statahead_thread(struct inode *dir, struct dentry *dentry)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = NULL;
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread;
+ struct task_struct *task;
+ struct dentry *parent = dentry->d_parent;
+ int rc;
/* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
- rc = is_first_dirent(dir, *dentryp);
- if (rc == LS_NONE_FIRST_DE) {
+ rc = is_first_dirent(dir, dentry);
+ if (rc == LS_NOT_FIRST_DE) {
/* It is not "ls -{a}l" operation, no need statahead for it. */
- rc = -EAGAIN;
+ rc = -EFAULT;
goto out;
}
- sai = ll_sai_alloc();
+ sai = ll_sai_alloc(parent);
if (!sai) {
rc = -ENOMEM;
goto out;
}
sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
- sai->sai_inode = igrab(dir);
- if (unlikely(!sai->sai_inode)) {
- CWARN("Do not start stat ahead on dying inode "DFID"\n",
- PFID(&lli->lli_fid));
- rc = -ESTALE;
- goto out;
- }
-
- /* get parent reference count here, and put it in ll_statahead_thread */
- parent = dget((*dentryp)->d_parent);
- if (unlikely(sai->sai_inode != d_inode(parent))) {
- struct ll_inode_info *nlli = ll_i2info(d_inode(parent));
-
- CWARN("Race condition, someone changed %pd just now: old parent "DFID", new parent "DFID"\n",
- *dentryp,
- PFID(&lli->lli_fid), PFID(&nlli->lli_fid));
- dput(parent);
- iput(sai->sai_inode);
- rc = -EAGAIN;
+ /*
+ * if current lli_opendir_key was deauthorized, or dir re-opened by
+ * another process, don't start statahead, otherwise the newly spawned
+ * statahead thread won't be notified to quit.
+ */
+ spin_lock(&lli->lli_sa_lock);
+ if (unlikely(lli->lli_sai || lli->lli_opendir_key ||
+ lli->lli_opendir_pid != current->pid)) {
+ spin_unlock(&lli->lli_sa_lock);
+ rc = -EPERM;
goto out;
}
+ lli->lli_sai = sai;
+ spin_unlock(&lli->lli_sa_lock);
- CDEBUG(D_READA, "start statahead thread: sai %p, parent %pd\n",
- sai, parent);
+ atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running);
- /* The sai buffer already has one reference taken at allocation time,
- * but as soon as we expose the sai by attaching it to the lli that
- * default reference can be dropped by another thread calling
- * ll_stop_statahead. We need to take a local reference to protect
- * the sai buffer while we intend to access it.
- */
- ll_sai_get(sai);
- lli->lli_sai = sai;
+ CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n",
+ current_pid(), parent);
- plli = ll_i2info(d_inode(parent));
task = kthread_run(ll_statahead_thread, parent, "ll_sa_%u",
- plli->lli_opendir_pid);
+ lli->lli_opendir_pid);
thread = &sai->sai_thread;
if (IS_ERR(task)) {
rc = PTR_ERR(task);
- CERROR("can't start ll_sa thread, rc: %d\n", rc);
- dput(parent);
- lli->lli_opendir_key = NULL;
- thread_set_flags(thread, SVC_STOPPED);
- thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
- /* Drop both our own local reference and the default
- * reference from allocation time.
- */
- ll_sai_put(sai);
- ll_sai_put(sai);
- LASSERT(!lli->lli_sai);
- return -EAGAIN;
+ CERROR("can't start ll_sa thread, rc : %d\n", rc);
+ goto out;
}
l_wait_event(thread->t_ctl_waitq,
@@ -1694,10 +1598,47 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
return -EAGAIN;
out:
- kfree(sai);
+ /*
+ * once we start statahead thread failed, disable statahead so
+ * that subsequent stat won't waste time to try it.
+ */
spin_lock(&lli->lli_sa_lock);
- lli->lli_opendir_key = NULL;
- lli->lli_opendir_pid = 0;
+ lli->lli_sa_enabled = 0;
+ lli->lli_sai = NULL;
spin_unlock(&lli->lli_sa_lock);
+ if (sai)
+ ll_sai_free(sai);
return rc;
}
+
+/**
+ * statahead entry function, this is called when client getattr on a file, it
+ * will start statahead thread if this is the first dir entry, else revalidate
+ * dentry from statahead cache.
+ *
+ * \param[in] dir parent directory
+ * \param[out] dentryp dentry to getattr
+ * \param[in] unplug unplug statahead window only (normally for negative
+ * dentry)
+ * \retval 1 on success
+ * \retval 0 revalidation from statahead cache failed, caller needs
+ * to getattr from server directly
+ * \retval negative number on error, caller often ignores this and
+ * then getattr from server
+ */
+int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug)
+{
+ struct ll_statahead_info *sai;
+
+ sai = ll_sai_get(dir);
+ if (sai) {
+ int rc;
+
+ rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug);
+ CDEBUG(D_READA, "revalidate statahead %pd: %d.\n",
+ *dentryp, rc);
+ ll_sai_put(sai);
+ return rc;
+ }
+ return start_statahead_thread(dir, *dentryp);
+}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 3dd7e0eb0b54..106cd00910a7 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -34,7 +34,6 @@
#include <linux/module.h>
#include <linux/types.h>
-#include "../include/lustre_lite.h"
#include "../include/lustre_ha.h"
#include "../include/lustre_dlm.h"
#include <linux/init.h>
@@ -83,8 +82,6 @@ struct super_operations lustre_super_operations = {
};
MODULE_ALIAS_FS("lustre");
-void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
-
static int __init lustre_init(void)
{
lnet_process_id_t lnet_id;
@@ -102,8 +99,8 @@ static int __init lustre_init(void)
rc = -ENOMEM;
ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
- sizeof(struct ll_inode_info),
- 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
+ sizeof(struct ll_inode_info), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!ll_inode_cachep)
goto out_cache;
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 8c8bdfe1ad71..f8bc7ed59646 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -35,7 +35,6 @@
#include <linux/stat.h>
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
static int ll_readlink_internal(struct inode *inode,
@@ -80,17 +79,17 @@ static int ll_readlink_internal(struct inode *inode,
}
body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if ((body->valid & OBD_MD_LINKNAME) == 0) {
+ if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) {
CERROR("OBD_MD_LINKNAME not set on reply\n");
rc = -EPROTO;
goto failed;
}
LASSERT(symlen != 0);
- if (body->eadatasize != symlen) {
+ if (body->mbo_eadatasize != symlen) {
CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+ PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1,
symlen - 1);
rc = -EPROTO;
goto failed;
@@ -155,8 +154,8 @@ const struct inode_operations ll_fast_symlink_inode_operations = {
.get_link = ll_get_link,
.getattr = ll_getattr,
.permission = ll_inode_permission,
- .setxattr = ll_setxattr,
- .getxattr = ll_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = ll_listxattr,
- .removexattr = ll_removexattr,
+ .removexattr = generic_removexattr,
};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index e623216e962d..8aa8ecc09a48 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -38,7 +38,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd.h"
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
#include "vvp_internal.h"
@@ -368,12 +367,6 @@ int cl_sb_fini(struct super_block *sb)
CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
result = PTR_ERR(env);
}
- /*
- * If mount failed (sbi->ll_cl == NULL), and this there are no other
- * mounts, stop device types manually (this usually happens
- * automatically when last device is destroyed).
- */
- lu_types_stop();
return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index 79fc428461ed..5802da81cd0e 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -217,11 +217,12 @@ struct vvp_object {
struct list_head vob_pending_list;
/**
- * Access this counter is protected by inode->i_sem. Now that
- * the lifetime of transient pages must be covered by inode sem,
- * we don't need to hold any lock..
+ * Number of transient pages. This is no longer protected by i_sem,
+ * and needs to be atomic. This is not actually used for anything,
+ * and can probably be removed.
*/
- int vob_transient_pages;
+ atomic_t vob_transient_pages;
+
/**
* Number of outstanding mmaps on this file.
*
@@ -247,9 +248,9 @@ struct vvp_object {
*/
struct vvp_page {
struct cl_page_slice vpg_cl;
- int vpg_defer_uptodate;
- int vpg_ra_used;
- int vpg_write_queued;
+ unsigned int vpg_defer_uptodate:1,
+ vpg_ra_used:1,
+ vpg_write_queued:1;
/**
* Non-empty iff this page is already counted in
* vvp_object::vob_pending_list. This list is only used as a flag,
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 94916dcc6caa..2ab450359b6d 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -38,7 +38,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd.h"
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
#include "vvp_internal.h"
@@ -628,7 +627,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
valid |= CAT_MTIME;
}
- result = cl_object_attr_set(env, obj, attr, valid);
+ result = cl_object_attr_update(env, obj, attr, valid);
cl_object_attr_unlock(obj);
return result;
@@ -821,7 +820,7 @@ static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
cl_page_disown(env, io, page);
/* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
+ lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
cl_page_put(env, page);
}
@@ -959,10 +958,30 @@ static int vvp_io_write_start(const struct lu_env *env,
CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
- if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
+ if (!vio->vui_iter) {
+ /* from a temp io in ll_cl_init(). */
result = 0;
- else
- result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+ } else {
+ /*
+ * When using the locked AIO function (generic_file_aio_write())
+ * testing has shown the inode mutex to be a limiting factor
+ * with multi-threaded single shared file performance. To get
+ * around this, we now use the lockless version. To maintain
+ * consistency, proper locking to protect against writes,
+ * trucates, etc. is handled in the higher layers of lustre.
+ */
+ bool lock_node = !IS_NOSEC(inode);
+
+ if (lock_node)
+ inode_lock(inode);
+ result = __generic_file_write_iter(vio->vui_iocb,
+ vio->vui_iter);
+ if (lock_node)
+ inode_unlock(inode);
+
+ if (result > 0 || result == -EIOCBQUEUED)
+ result = generic_write_sync(vio->vui_iocb, result);
+ }
if (result > 0) {
result = vvp_io_write_commit(env, io);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index 64be0c9df35b..07eb26cc43f5 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -37,7 +37,6 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "vvp_internal.h"
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 2c520b0bf6ca..b57195d15674 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -39,7 +39,6 @@
#include "../../include/linux/libcfs/libcfs.h"
#include "../include/obd.h"
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
#include "vvp_internal.h"
@@ -68,8 +67,8 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
(*p)(env, cookie, "(%s %d %d) inode: %p ",
list_empty(&obj->vob_pending_list) ? "-" : "+",
- obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
- inode);
+ atomic_read(&obj->vob_transient_pages),
+ atomic_read(&obj->vob_mmap_cnt), inode);
if (inode) {
lli = ll_i2info(inode);
(*p)(env, cookie, "%lu/%u %o %u %d %p "DFID,
@@ -102,8 +101,8 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
return 0; /* layers below have to fill in the rest */
}
-static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid)
+static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid)
{
struct inode *inode = vvp_object_inode(obj);
@@ -120,7 +119,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
if (0 && valid & CAT_SIZE)
i_size_write(inode, attr->cat_size);
/* not currently necessary */
- if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
+ if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE))
mark_inode_dirty(inode);
return 0;
}
@@ -210,7 +209,7 @@ static const struct cl_object_operations vvp_ops = {
.coo_lock_init = vvp_lock_init,
.coo_io_init = vvp_io_init,
.coo_attr_get = vvp_attr_get,
- .coo_attr_set = vvp_attr_set,
+ .coo_attr_update = vvp_attr_update,
.coo_conf_set = vvp_conf_set,
.coo_prune = vvp_prune,
.coo_glimpse = vvp_object_glimpse
@@ -221,7 +220,7 @@ static int vvp_object_init0(const struct lu_env *env,
const struct cl_object_conf *conf)
{
vob->vob_inode = conf->coc_inode;
- vob->vob_transient_pages = 0;
+ atomic_set(&vob->vob_transient_pages, 0);
cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
return 0;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 2e566d90bb94..5d79efc1aafe 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -44,8 +44,6 @@
#include <linux/page-flags.h>
#include <linux/pagemap.h>
-#include "../include/lustre_lite.h"
-
#include "llite_internal.h"
#include "vvp_internal.h"
@@ -249,7 +247,7 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
set_bit(AS_EIO, &inode->i_mapping->flags);
if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->vob_discard_page_warned == 0) {
+ obj->vob_discard_page_warned == 0) {
obj->vob_discard_page_warned = 1;
ll_dirty_page_discard_warn(vmpage, ioret);
}
@@ -444,18 +442,10 @@ static int vvp_transient_page_prep(const struct lu_env *env,
return 0;
}
-static void vvp_transient_page_verify(const struct cl_page *page)
-{
- struct inode *inode = vvp_object_inode(page->cp_obj);
-
- LASSERT(!inode_trylock(inode));
-}
-
static int vvp_transient_page_own(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *unused, int nonblock)
{
- vvp_transient_page_verify(slice->cpl_page);
return 0;
}
@@ -463,21 +453,18 @@ static void vvp_transient_page_assume(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *unused)
{
- vvp_transient_page_verify(slice->cpl_page);
}
static void vvp_transient_page_unassume(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *unused)
{
- vvp_transient_page_verify(slice->cpl_page);
}
static void vvp_transient_page_disown(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *unused)
{
- vvp_transient_page_verify(slice->cpl_page);
}
static void vvp_transient_page_discard(const struct lu_env *env,
@@ -486,8 +473,6 @@ static void vvp_transient_page_discard(const struct lu_env *env,
{
struct cl_page *page = slice->cpl_page;
- vvp_transient_page_verify(slice->cpl_page);
-
/*
* For transient pages, remove it from the radix tree.
*/
@@ -511,7 +496,6 @@ vvp_transient_page_completion(const struct lu_env *env,
const struct cl_page_slice *slice,
int ioret)
{
- vvp_transient_page_verify(slice->cpl_page);
}
static void vvp_transient_page_fini(const struct lu_env *env,
@@ -522,8 +506,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
struct vvp_object *clobj = cl2vvp(clp->cp_obj);
vvp_page_fini_common(vpg);
- LASSERT(!inode_trylock(clobj->vob_inode));
- clobj->vob_transient_pages--;
+ atomic_dec(&clobj->vob_transient_pages);
}
static const struct cl_page_operations vvp_transient_page_ops = {
@@ -549,7 +532,7 @@ static const struct cl_page_operations vvp_transient_page_ops = {
};
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
+ struct cl_page *page, pgoff_t index)
{
struct vvp_page *vpg = cl_object_page_slice(obj, page);
struct page *vmpage = page->cp_vmpage;
@@ -570,10 +553,9 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
} else {
struct vvp_object *clobj = cl2vvp(obj);
- LASSERT(!inode_trylock(clobj->vob_inode));
cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
&vvp_transient_page_ops);
- clobj->vob_transient_pages++;
+ atomic_inc(&clobj->vob_transient_pages);
}
return 0;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
index 9fe9d6c0a7d1..e3f4c790d646 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_req.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -32,7 +32,6 @@
#include "../include/cl_object.h"
#include "../include/obd.h"
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "llite_internal.h"
#include "vvp_internal.h"
@@ -83,8 +82,10 @@ static void vvp_req_attr_set(const struct lu_env *env,
}
obdo_from_inode(oa, inode, valid_flags & flags);
obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
+ oa->o_parent_oid++;
memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
- JOBSTATS_JOBID_SIZE);
+ LUSTRE_JOBID_SIZE);
}
static void vvp_req_completion(const struct lu_env *env,
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 98303cf85815..e070adb7a3cc 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -38,21 +38,12 @@
#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "../include/lustre_ver.h"
#include "../include/lustre_eacl.h"
#include "llite_internal.h"
-#define XATTR_USER_T (1)
-#define XATTR_TRUSTED_T (2)
-#define XATTR_SECURITY_T (3)
-#define XATTR_ACL_ACCESS_T (4)
-#define XATTR_ACL_DEFAULT_T (5)
-#define XATTR_LUSTRE_T (6)
-#define XATTR_OTHER_T (7)
-
static
int get_xattr_type(const char *name)
{
@@ -99,46 +90,57 @@ int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type)
return 0;
}
-static
-int ll_setxattr_common(struct inode *inode, const char *name,
- const void *value, size_t size,
- int flags, __u64 valid)
+static int
+ll_xattr_set_common(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
{
+ char fullname[strlen(handler->prefix) + strlen(name) + 1];
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req = NULL;
- int xattr_type, rc;
const char *pv = value;
+ __u64 valid;
+ int rc;
+
+ if (flags == XATTR_REPLACE) {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
+ valid = OBD_MD_FLXATTRRM;
+ } else {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
+ valid = OBD_MD_FLXATTR;
+ }
- xattr_type = get_xattr_type(name);
- rc = xattr_type_filter(sbi, xattr_type);
+ rc = xattr_type_filter(sbi, handler->flags);
if (rc)
return rc;
- if ((xattr_type == XATTR_ACL_ACCESS_T ||
- xattr_type == XATTR_ACL_DEFAULT_T) &&
+ if ((handler->flags == XATTR_ACL_ACCESS_T ||
+ handler->flags == XATTR_ACL_DEFAULT_T) &&
!inode_owner_or_capable(inode))
return -EPERM;
/* b10667: ignore lustre special xattr for now */
- if ((xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) ||
- (xattr_type == XATTR_LUSTRE_T && strcmp(name, "lustre.lov") == 0))
+ if ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
+ (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))
return 0;
/* b15587: ignore security.capability xattr for now */
- if ((xattr_type == XATTR_SECURITY_T &&
- strcmp(name, "security.capability") == 0))
+ if ((handler->flags == XATTR_SECURITY_T &&
+ !strcmp(name, "capability")))
return 0;
/* LU-549: Disable security.selinux when selinux is disabled */
- if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
- strcmp(name, "security.selinux") == 0)
+ if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
+ strcmp(name, "selinux") == 0)
return -EOPNOTSUPP;
+ sprintf(fullname, "%s%s\n", handler->prefix, name);
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, name, pv, size, 0, flags,
+ valid, fullname, pv, size, 0, flags,
ll_i2suppgid(inode), &req);
if (rc) {
- if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+ if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
sbi->ll_flags &= ~LL_SBI_USER_XATTR;
}
@@ -149,8 +151,10 @@ int ll_setxattr_common(struct inode *inode, const char *name,
return 0;
}
-int ll_setxattr(struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size, int flags)
+static int ll_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
{
LASSERT(inode);
LASSERT(name);
@@ -158,20 +162,24 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
PFID(ll_inode2fid(inode)), inode, name);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
-
- if ((strncmp(name, XATTR_TRUSTED_PREFIX,
- sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
- strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
- (strncmp(name, XATTR_LUSTRE_PREFIX,
- sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
- strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+ if (!strcmp(name, "lov")) {
struct lov_user_md *lump = (struct lov_user_md *)value;
+ int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
+ LPROC_LL_SETXATTR;
int rc = 0;
+ ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
+
if (size != 0 && size < sizeof(struct lov_user_md))
return -EINVAL;
+ /*
+ * It is possible to set an xattr to a "" value of zero size.
+ * For this case we are going to treat it as a removal.
+ */
+ if (!size && lump)
+ lump = NULL;
+
/* Attributes that are saved via getxattr will always have
* the stripe_offset as 0. Instead, the MDS should be
* allowed to pick the starting OST index. b=17846
@@ -181,12 +189,15 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
if (lump && S_ISREG(inode->i_mode)) {
__u64 it_flags = FMODE_WRITE;
- int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
- sizeof(*lump) : sizeof(struct lov_user_md_v3);
+ int lum_size;
+
+ lum_size = ll_lov_user_md_size(lump);
+ if (lum_size < 0 || size < lum_size)
+ return 0; /* b=10667: ignore error */
rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
lump, lum_size);
- /* b10667: rc always be 0 here for now */
+ /* b=10667: rc always be 0 here for now */
rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
rc = ll_dir_setstripe(inode, lump, 0);
@@ -194,92 +205,27 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
return rc;
- } else if (strcmp(name, XATTR_NAME_LMA) == 0 ||
- strcmp(name, XATTR_NAME_LINK) == 0)
+ } else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
return 0;
+ }
- return ll_setxattr_common(inode, name, value, size, flags,
- OBD_MD_FLXATTR);
-}
-
-int ll_removexattr(struct dentry *dentry, const char *name)
-{
- struct inode *inode = d_inode(dentry);
-
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
- return ll_setxattr_common(inode, name, NULL, 0, 0,
- OBD_MD_FLXATTRRM);
+ return ll_xattr_set_common(handler, dentry, inode, name, value, size,
+ flags);
}
-static
-int ll_getxattr_common(struct inode *inode, const char *name,
- void *buffer, size_t size, __u64 valid)
+int
+ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
+ size_t size, __u64 valid)
{
+ struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req = NULL;
struct mdt_body *body;
- int xattr_type, rc;
void *xdata;
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- /* listxattr have slightly different behavior from of ext3:
- * without 'user_xattr' ext3 will list all xattr names but
- * filtered out "^user..*"; we list them all for simplicity.
- */
- if (!name) {
- xattr_type = XATTR_OTHER_T;
- goto do_getxattr;
- }
+ int rc;
- xattr_type = get_xattr_type(name);
- rc = xattr_type_filter(sbi, xattr_type);
- if (rc)
- return rc;
-
- /* b15587: ignore security.capability xattr for now */
- if ((xattr_type == XATTR_SECURITY_T &&
- strcmp(name, "security.capability") == 0))
- return -ENODATA;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
- strcmp(name, "security.selinux") == 0)
- return -EOPNOTSUPP;
-
-#ifdef CONFIG_FS_POSIX_ACL
- /* posix acl is under protection of LOOKUP lock. when calling to this,
- * we just have path resolution to the target inode, so we have great
- * chance that cached ACL is uptodate.
- */
- if (xattr_type == XATTR_ACL_ACCESS_T) {
- struct posix_acl *acl;
-
- spin_lock(&lli->lli_lock);
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- if (!acl)
- return -ENODATA;
-
- rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- return rc;
- }
- if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
- return -ENODATA;
-#endif
-
-do_getxattr:
- if (sbi->ll_xattr_cache_enabled && xattr_type != XATTR_ACL_ACCESS_T) {
+ if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T) {
rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
if (rc == -EAGAIN)
goto getxattr_nocache;
@@ -311,36 +257,36 @@ getxattr_nocache:
/* only detect the xattr size */
if (size == 0) {
- rc = body->eadatasize;
+ rc = body->mbo_eadatasize;
goto out;
}
- if (size < body->eadatasize) {
+ if (size < body->mbo_eadatasize) {
CERROR("server bug: replied size %u > %u\n",
- body->eadatasize, (int)size);
+ body->mbo_eadatasize, (int)size);
rc = -ERANGE;
goto out;
}
- if (body->eadatasize == 0) {
+ if (body->mbo_eadatasize == 0) {
rc = -ENODATA;
goto out;
}
/* do not need swab xattr data */
xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->eadatasize);
+ body->mbo_eadatasize);
if (!xdata) {
rc = -EFAULT;
goto out;
}
- memcpy(buffer, xdata, body->eadatasize);
- rc = body->eadatasize;
+ memcpy(buffer, xdata, body->mbo_eadatasize);
+ rc = body->mbo_eadatasize;
}
out_xattr:
- if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+ if (rc == -EOPNOTSUPP && type == XATTR_USER_T) {
LCONSOLE_INFO(
"%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
ll_get_fsname(inode->i_sb, NULL, 0), rc);
@@ -351,8 +297,65 @@ out:
return rc;
}
-ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
+static int ll_xattr_get_common(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ char fullname[strlen(handler->prefix) + strlen(name) + 1];
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+ struct ll_inode_info *lli = ll_i2info(inode);
+#endif
+ int rc;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
+ rc = xattr_type_filter(sbi, handler->flags);
+ if (rc)
+ return rc;
+
+ /* b15587: ignore security.capability xattr for now */
+ if ((handler->flags == XATTR_SECURITY_T && !strcmp(name, "capability")))
+ return -ENODATA;
+
+ /* LU-549: Disable security.selinux when selinux is disabled */
+ if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
+ !strcmp(name, "selinux"))
+ return -EOPNOTSUPP;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ /* posix acl is under protection of LOOKUP lock. when calling to this,
+ * we just have path resolution to the target inode, so we have great
+ * chance that cached ACL is uptodate.
+ */
+ if (handler->flags == XATTR_ACL_ACCESS_T) {
+ struct posix_acl *acl;
+
+ spin_lock(&lli->lli_lock);
+ acl = posix_acl_dup(lli->lli_posix_acl);
+ spin_unlock(&lli->lli_lock);
+
+ if (!acl)
+ return -ENODATA;
+
+ rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+ posix_acl_release(acl);
+ return rc;
+ }
+ if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
+ return -ENODATA;
+#endif
+ sprintf(fullname, "%s%s\n", handler->prefix, name);
+ return ll_xattr_list(inode, fullname, handler->flags, buffer, size,
+ OBD_MD_FLXATTR);
+}
+
+static int ll_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
{
LASSERT(inode);
LASSERT(name);
@@ -360,36 +363,23 @@ ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
PFID(ll_inode2fid(inode)), inode, name);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- if ((strncmp(name, XATTR_TRUSTED_PREFIX,
- sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
- strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
- (strncmp(name, XATTR_LUSTRE_PREFIX,
- sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
- strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+ if (!strcmp(name, "lov")) {
struct lov_stripe_md *lsm;
struct lov_user_md *lump;
struct lov_mds_md *lmm = NULL;
struct ptlrpc_request *request = NULL;
int rc = 0, lmmsize = 0;
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
return -ENODATA;
- if (size == 0 && S_ISDIR(inode->i_mode)) {
- /* XXX directory EA is fix for now, optimize to save
- * RPC transfer
- */
- rc = sizeof(struct lov_user_md);
- goto out;
- }
-
lsm = ccc_inode_lsm_get(inode);
if (!lsm) {
if (S_ISDIR(inode->i_mode)) {
- rc = ll_dir_getstripe(inode, &lmm,
- &lmmsize, &request);
+ rc = ll_dir_getstripe(inode, (void **)&lmm,
+ &lmmsize, &request, 0);
} else {
rc = -ENODATA;
}
@@ -439,7 +429,7 @@ out:
return rc;
}
- return ll_getxattr_common(inode, name, buffer, size, OBD_MD_FLXATTR);
+ return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
}
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -457,7 +447,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
- rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS);
+ rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
+ OBD_MD_FLXATTRLS);
if (rc < 0)
goto out;
@@ -488,7 +479,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (!ll_i2info(inode)->lli_has_smd)
rc2 = -1;
} else if (S_ISDIR(inode->i_mode)) {
- rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+ rc2 = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
+ &request, 0);
}
if (rc2 < 0) {
@@ -518,3 +510,57 @@ out:
return rc;
}
+
+static const struct xattr_handler ll_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .flags = XATTR_USER_T,
+ .get = ll_xattr_get_common,
+ .set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = XATTR_TRUSTED_T,
+ .get = ll_xattr_get,
+ .set = ll_xattr_set,
+};
+
+static const struct xattr_handler ll_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = XATTR_SECURITY_T,
+ .get = ll_xattr_get_common,
+ .set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_acl_access_xattr_handler = {
+ .prefix = XATTR_NAME_POSIX_ACL_ACCESS,
+ .flags = XATTR_ACL_ACCESS_T,
+ .get = ll_xattr_get_common,
+ .set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_acl_default_xattr_handler = {
+ .prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
+ .flags = XATTR_ACL_DEFAULT_T,
+ .get = ll_xattr_get_common,
+ .set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_lustre_xattr_handler = {
+ .prefix = XATTR_LUSTRE_PREFIX,
+ .flags = XATTR_LUSTRE_T,
+ .get = ll_xattr_get,
+ .set = ll_xattr_set,
+};
+
+const struct xattr_handler *ll_xattr_handlers[] = {
+ &ll_user_xattr_handler,
+ &ll_trusted_xattr_handler,
+ &ll_security_xattr_handler,
+#ifdef CONFIG_FS_POSIX_ACL
+ &ll_acl_access_xattr_handler,
+ &ll_acl_default_xattr_handler,
+#endif
+ &ll_lustre_xattr_handler,
+ NULL,
+};
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 8089da8143d9..50a19a40bd4e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -13,7 +13,6 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
#include "../include/lustre_dlm.h"
#include "../include/lustre_ver.h"
#include "llite_internal.h"
@@ -270,10 +269,12 @@ static int ll_xattr_find_get_lock(struct inode *inode,
struct lustre_handle lockh = { 0 };
struct md_op_data *op_data;
struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
- .ei_mode = it_to_lock_mode(oit),
- .ei_cb_bl = ll_md_blocking_ast,
- .ei_cb_cp = ldlm_completion_ast };
+ struct ldlm_enqueue_info einfo = {
+ .ei_type = LDLM_IBITS,
+ .ei_mode = it_to_lock_mode(oit),
+ .ei_cb_bl = &ll_md_blocking_ast,
+ .ei_cb_cp = &ldlm_completion_ast,
+ };
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct obd_export *exp = sbi->ll_md_exp;
int rc;
@@ -304,7 +305,7 @@ static int ll_xattr_find_get_lock(struct inode *inode,
op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
- rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0);
+ rc = md_enqueue(exp, &einfo, NULL, oit, op_data, &lockh, 0);
ll_finish_md_op_data(op_data);
if (rc < 0) {
@@ -380,25 +381,25 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
}
/* do not need swab xattr data */
xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->eadatasize);
+ body->mbo_eadatasize);
xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
- body->aclsize);
+ body->mbo_aclsize);
xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
- body->max_mdsize * sizeof(__u32));
+ body->mbo_max_mdsize * sizeof(__u32));
if (!xdata || !xval || !xsizes) {
CERROR("wrong setxattr reply\n");
rc = -EPROTO;
goto out_destroy;
}
- xtail = xdata + body->eadatasize;
- xvtail = xval + body->aclsize;
+ xtail = xdata + body->mbo_eadatasize;
+ xvtail = xval + body->mbo_aclsize;
CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
ll_xattr_cache_init(lli);
- for (i = 0; i < body->max_mdsize; i++) {
+ for (i = 0; i < body->mbo_max_mdsize; i++) {
CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
/* Perform consistency checks: attr names and vals in pill */
if (!memchr(xdata, 0, xtail - xdata)) {
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
index a3d170aa6fd2..a5265f9b5797 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -47,18 +47,20 @@
#include "../include/lprocfs_status.h"
#include "lmv_internal.h"
-int lmv_fld_lookup(struct lmv_obd *lmv,
- const struct lu_fid *fid,
- u32 *mds)
+int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds)
{
+ struct obd_device *obd = lmv2obd_dev(lmv);
int rc;
- /* FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
+ /*
+ * FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
* this fid_is_local check should be removed once LU-2240 is fixed
*/
- LASSERTF((fid_seq_in_fldb(fid_seq(fid)) ||
- fid_seq_is_local_file(fid_seq(fid))) &&
- fid_is_sane(fid), DFID" is insane!\n", PFID(fid));
+ if (!fid_is_sane(fid) || !(fid_seq_in_fldb(fid_seq(fid)) ||
+ fid_seq_is_local_file(fid_seq(fid)))) {
+ CERROR("%s: invalid FID " DFID "\n", obd->obd_name, PFID(fid));
+ return -EINVAL;
+ }
rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds,
LU_SEQ_RANGE_MDT, NULL);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index 2f58fdab8d1e..9f4e826bb0af 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -43,13 +43,13 @@
#include "../include/lustre_lib.h"
#include "../include/lustre_net.h"
#include "../include/lustre_dlm.h"
+#include "../include/lustre_mdc.h"
#include "../include/obd_class.h"
#include "../include/lprocfs_status.h"
#include "lmv_internal.h"
-static int lmv_intent_remote(struct obd_export *exp, void *lmm,
- int lmmsize, struct lookup_intent *it,
- const struct lu_fid *parent_fid, int flags,
+static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it,
+ const struct lu_fid *parent_fid,
struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
@@ -68,7 +68,7 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
if (!body)
return -EPROTO;
- LASSERT((body->valid & OBD_MD_MDS));
+ LASSERT((body->mbo_valid & OBD_MD_MDS));
/*
* Unfortunately, we have to lie to MDC/MDS to retrieve
@@ -87,9 +87,9 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
it->it_request = NULL;
}
- LASSERT(fid_is_sane(&body->fid1));
+ LASSERT(fid_is_sane(&body->mbo_fid1));
- tgt = lmv_find_target(lmv, &body->fid1);
+ tgt = lmv_find_target(lmv, &body->mbo_fid1);
if (IS_ERR(tgt)) {
rc = PTR_ERR(tgt);
goto out;
@@ -101,7 +101,7 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
goto out;
}
- op_data->op_fid1 = body->fid1;
+ op_data->op_fid1 = body->mbo_fid1;
/* Sent the parent FID to the remote MDT */
if (parent_fid) {
/* The parent fid is only for remote open to
@@ -110,18 +110,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
*/
LASSERT(it->it_op & IT_OPEN);
op_data->op_fid2 = *parent_fid;
- /* Add object FID to op_fid3, in case it needs to check stale
- * (M_CHECK_STALE), see mdc_finish_intent_lock
- */
- op_data->op_fid3 = body->fid1;
}
op_data->op_bias = MDS_CROSS_REF;
- CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
- PFID(&body->fid1), tgt->ltd_idx);
+ CDEBUG(D_INODE, "REMOTE_INTENT with fid=" DFID " -> mds #%u\n",
+ PFID(&body->mbo_fid1), tgt->ltd_idx);
- rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
- flags, &req, cb_blocking, extra_lock_flags);
+ rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking,
+ extra_lock_flags);
if (rc)
goto out_free_op_data;
@@ -136,8 +132,10 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
it->it_remote_lock_mode = it->it_lock_mode;
}
- it->it_lock_handle = plock.cookie;
- it->it_lock_mode = pmode;
+ if (pmode) {
+ it->it_lock_handle = plock.cookie;
+ it->it_lock_mode = pmode;
+ }
out_free_op_data:
kfree(op_data);
@@ -150,13 +148,126 @@ out:
return rc;
}
+int lmv_revalidate_slaves(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ ldlm_blocking_callback cb_blocking,
+ int extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct ptlrpc_request *req = NULL;
+ struct mdt_body *body;
+ struct md_op_data *op_data;
+ int rc = 0, i;
+
+ /**
+ * revalidate slaves has some problems, temporarily return,
+ * we may not need that
+ */
+ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ if (!op_data)
+ return -ENOMEM;
+
+ /**
+ * Loop over the stripe information, check validity and update them
+ * from MDS if needed.
+ */
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+ struct lookup_intent it = { .it_op = IT_GETATTR };
+ struct lustre_handle *lockh = NULL;
+ struct lmv_tgt_desc *tgt = NULL;
+ struct inode *inode;
+ struct lu_fid fid;
+
+ fid = lsm->lsm_md_oinfo[i].lmo_fid;
+ inode = lsm->lsm_md_oinfo[i].lmo_root;
+
+ /*
+ * Prepare op_data for revalidating. Note that @fid2 shluld be
+ * defined otherwise it will go to server and take new lock
+ * which is not needed here.
+ */
+ memset(op_data, 0, sizeof(*op_data));
+ op_data->op_fid1 = fid;
+ op_data->op_fid2 = fid;
+
+ tgt = lmv_locate_mds(lmv, op_data, &fid);
+ if (IS_ERR(tgt)) {
+ rc = PTR_ERR(tgt);
+ goto cleanup;
+ }
+
+ CDEBUG(D_INODE, "Revalidate slave " DFID " -> mds #%u\n",
+ PFID(&fid), tgt->ltd_idx);
+
+ if (req) {
+ ptlrpc_req_finished(req);
+ req = NULL;
+ }
+
+ rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req,
+ cb_blocking, extra_lock_flags);
+ if (rc < 0)
+ goto cleanup;
+
+ lockh = (struct lustre_handle *)&it.it_lock_handle;
+ if (rc > 0 && !req) {
+ /* slave inode is still valid */
+ CDEBUG(D_INODE, "slave "DFID" is still valid.\n",
+ PFID(&fid));
+ rc = 0;
+ } else {
+ /* refresh slave from server */
+ body = req_capsule_server_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+ LASSERT(body);
+
+ if (unlikely(body->mbo_nlink < 2)) {
+ CERROR("%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n",
+ obd->obd_name, body->mbo_nlink, i,
+ PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
+ PFID(&lsm->lsm_md_oinfo[0].lmo_fid));
+
+ if (it.it_lock_mode && lockh) {
+ ldlm_lock_decref(lockh, it.it_lock_mode);
+ it.it_lock_mode = 0;
+ }
+
+ rc = -EIO;
+ goto cleanup;
+ }
+
+ i_size_write(inode, body->mbo_size);
+ inode->i_blocks = body->mbo_blocks;
+ set_nlink(inode, body->mbo_nlink);
+ LTIME_S(inode->i_atime) = body->mbo_atime;
+ LTIME_S(inode->i_ctime) = body->mbo_ctime;
+ LTIME_S(inode->i_mtime) = body->mbo_mtime;
+ }
+
+ md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL);
+
+ if (it.it_lock_mode && lockh) {
+ ldlm_lock_decref(lockh, it.it_lock_mode);
+ it.it_lock_mode = 0;
+ }
+ }
+
+cleanup:
+ if (req)
+ ptlrpc_req_finished(req);
+
+ kfree(op_data);
+ return rc;
+}
+
/*
* IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
* may be split dir.
*/
static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
- void *lmm, int lmmsize, struct lookup_intent *it,
- int flags, struct ptlrpc_request **reqp,
+ struct lookup_intent *it,
+ struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
{
@@ -166,35 +277,55 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
struct mdt_body *body;
int rc;
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
+ if (it->it_flags & MDS_OPEN_BY_FID) {
+ LASSERT(fid_is_sane(&op_data->op_fid2));
+
+ /*
+ * for striped directory, we can't know parent stripe fid
+ * without name, but we can set it to child fid, and MDT
+ * will obtain it from linkea in open in such case.
+ */
+ if (op_data->op_mea1)
+ op_data->op_fid1 = op_data->op_fid2;
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid2);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ op_data->op_mds = tgt->ltd_idx;
+ } else {
+ LASSERT(fid_is_sane(&op_data->op_fid1));
+ LASSERT(fid_is_zero(&op_data->op_fid2));
+ LASSERT(op_data->op_name);
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+ }
/* If it is ready to open the file by FID, do not need
* allocate FID at all, otherwise it will confuse MDT
*/
- if ((it->it_op & IT_CREAT) &&
- !(it->it_flags & MDS_OPEN_BY_FID)) {
+ if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
/*
- * For open with IT_CREATE and for IT_CREATE cases allocate new
- * fid and setup FLD for it.
+ * For lookup(IT_CREATE) cases allocate new fid and setup FLD
+ * for it.
*/
- op_data->op_fid3 = op_data->op_fid2;
- rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+ rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc != 0)
return rc;
}
- CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%d\n",
+ CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u\n",
PFID(&op_data->op_fid1),
PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx);
- rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags,
- reqp, cb_blocking, extra_lock_flags);
+ rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
+ extra_lock_flags);
if (rc != 0)
return rc;
/*
- * Nothing is found, do not access body->fid1 as it is zero and thus
+ * Nothing is found, do not access body->mbo_fid1 as it is zero and thus
* pointless.
*/
if ((it->it_disposition & DISP_LOOKUP_NEG) &&
@@ -205,31 +336,17 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
if (!body)
return -EPROTO;
- /*
- * Not cross-ref case, just get out of here.
- */
- if (likely(!(body->valid & OBD_MD_MDS)))
- return 0;
- /*
- * Okay, MDS has returned success. Probably name has been resolved in
- * remote inode.
- */
- rc = lmv_intent_remote(exp, lmm, lmmsize, it, &op_data->op_fid1, flags,
- reqp, cb_blocking, extra_lock_flags);
- if (rc != 0) {
- LASSERT(rc < 0);
- /*
- * This is possible, that some userspace application will try to
- * open file as directory and we will have -ENOTDIR here. As
- * this is normal situation, we should not print error here,
- * only debug info.
- */
- CDEBUG(D_INODE, "Can't handle remote %s: dir " DFID "(" DFID "):%*s: %d\n",
- LL_IT2STR(it), PFID(&op_data->op_fid2),
- PFID(&op_data->op_fid1), op_data->op_namelen,
- op_data->op_name, rc);
- return rc;
+ /* Not cross-ref case, just get out of here. */
+ if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
+ rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp,
+ cb_blocking, extra_lock_flags);
+ if (rc != 0)
+ return rc;
+
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ if (!body)
+ return -EPROTO;
}
return rc;
@@ -240,37 +357,102 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
*/
static int lmv_intent_lookup(struct obd_export *exp,
struct md_op_data *op_data,
- void *lmm, int lmmsize, struct lookup_intent *it,
- int flags, struct ptlrpc_request **reqp,
+ struct lookup_intent *it,
+ struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
{
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt = NULL;
struct mdt_body *body;
int rc = 0;
+ /*
+ * If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
+ * it will try all stripes to locate the object
+ */
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
+ if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
return PTR_ERR(tgt);
+ /*
+ * Both migrating dir and unknown hash dir need to try
+ * all of sub-stripes
+ */
+ if (lsm && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
+ struct lmv_oinfo *oinfo = &lsm->lsm_md_oinfo[0];
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ op_data->op_mds = oinfo->lmo_mds;
+ tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+ }
+
if (!fid_is_sane(&op_data->op_fid2))
fid_zero(&op_data->op_fid2);
- CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
- ", name='%s' -> mds #%d\n", PFID(&op_data->op_fid1),
- PFID(&op_data->op_fid2),
+ CDEBUG(D_INODE, "LOOKUP_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n",
+ PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
op_data->op_name ? op_data->op_name : "<NULL>",
- tgt->ltd_idx);
+ tgt->ltd_idx, lsm, !lsm ? -1 : lsm->lsm_md_magic);
op_data->op_bias &= ~MDS_CROSS_REF;
- rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
- flags, reqp, cb_blocking, extra_lock_flags);
+ rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
+ extra_lock_flags);
+ if (rc < 0)
+ return rc;
- if (rc < 0 || !*reqp)
+ if (!*reqp) {
+ /*
+ * If RPC happens, lsm information will be revalidated
+ * during update_inode process (see ll_update_lsm_md)
+ */
+ if (op_data->op_mea2) {
+ rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
+ cb_blocking,
+ extra_lock_flags);
+ if (rc != 0)
+ return rc;
+ }
return rc;
+ } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm &&
+ lmv_need_try_all_stripes(lsm)) {
+ /*
+ * For migrating and unknown hash type directory, it will
+ * try to target the entry on other stripes
+ */
+ int stripe_index;
+
+ for (stripe_index = 1;
+ stripe_index < lsm->lsm_md_stripe_count &&
+ it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
+ struct lmv_oinfo *oinfo;
+
+ /* release the previous request */
+ ptlrpc_req_finished(*reqp);
+ it->it_request = NULL;
+ *reqp = NULL;
+
+ oinfo = &lsm->lsm_md_oinfo[stripe_index];
+ tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ CDEBUG(D_INODE, "Try other stripes " DFID"\n",
+ PFID(&oinfo->lmo_fid));
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ it->it_disposition &= ~DISP_ENQ_COMPLETE;
+ rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
+ cb_blocking, extra_lock_flags);
+ if (rc)
+ return rc;
+ }
+ }
/*
* MDS has returned success. Probably name has been resolved in
@@ -279,19 +461,23 @@ static int lmv_intent_lookup(struct obd_export *exp,
body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
if (!body)
return -EPROTO;
- /* Not cross-ref case, just get out of here. */
- if (likely(!(body->valid & OBD_MD_MDS)))
- return 0;
- rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp,
- cb_blocking, extra_lock_flags);
+ /* Not cross-ref case, just get out of here. */
+ if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
+ rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking,
+ extra_lock_flags);
+ if (rc != 0)
+ return rc;
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ if (!body)
+ return -EPROTO;
+ }
return rc;
}
int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- void *lmm, int lmmsize, struct lookup_intent *it,
- int flags, struct ptlrpc_request **reqp,
+ struct lookup_intent *it, struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
{
@@ -300,8 +486,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
LASSERT(fid_is_sane(&op_data->op_fid1));
- CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
- LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
+ CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%*s' on "DFID"\n",
+ LL_IT2STR(it), PFID(&op_data->op_fid2),
+ (int)op_data->op_namelen, op_data->op_name,
PFID(&op_data->op_fid1));
rc = lmv_check_connect(obd);
@@ -309,14 +496,34 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
return rc;
if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT))
- rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
- flags, reqp, cb_blocking,
+ rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking,
extra_lock_flags);
else if (it->it_op & IT_OPEN)
- rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
- flags, reqp, cb_blocking,
+ rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking,
extra_lock_flags);
else
LBUG();
+
+ if (rc < 0) {
+ struct lustre_handle lock_handle;
+
+ if (it->it_lock_mode) {
+ lock_handle.cookie = it->it_lock_handle;
+ ldlm_lock_decref(&lock_handle, it->it_lock_mode);
+ }
+
+ it->it_lock_handle = 0;
+ it->it_lock_mode = 0;
+
+ if (it->it_remote_lock_mode) {
+ lock_handle.cookie = it->it_remote_lock_handle;
+ ldlm_lock_decref(&lock_handle,
+ it->it_remote_lock_mode);
+ }
+
+ it->it_remote_lock_handle = 0;
+ it->it_remote_lock_mode = 0;
+ }
+
return rc;
}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 0beafc49b8d2..52b03745ac19 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -35,6 +35,7 @@
#include "../include/lustre/lustre_idl.h"
#include "../include/obd.h"
+#include "../include/lustre_lmv.h"
#define LMV_MAX_TGT_COUNT 128
@@ -44,77 +45,116 @@
int lmv_check_connect(struct obd_device *obd);
int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- void *lmm, int lmmsize, struct lookup_intent *it,
- int flags, struct ptlrpc_request **reqp,
+ struct lookup_intent *it, struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags);
int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds);
int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds);
-int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
- struct md_op_data *op_data);
+int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data);
-static inline struct lmv_stripe_md *lmv_get_mea(struct ptlrpc_request *req)
-{
- struct mdt_body *body;
- struct lmv_stripe_md *mea;
-
- LASSERT(req);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- if (!body || !S_ISDIR(body->mode) || !body->eadatasize)
- return NULL;
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+ const union lmv_mds_md *lmm, int stripe_count);
- mea = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD,
- body->eadatasize);
- if (mea->mea_count == 0)
- return NULL;
- if (mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
- mea->mea_magic != MEA_MAGIC_ALL_CHARS &&
- mea->mea_magic != MEA_MAGIC_HASH_SEGMENT)
- return NULL;
+int lmv_revalidate_slaves(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ ldlm_blocking_callback cb_blocking,
+ int extra_lock_flags);
- return mea;
-}
-
-static inline int lmv_get_easize(struct lmv_obd *lmv)
+static inline struct obd_device *lmv2obd_dev(struct lmv_obd *lmv)
{
- return sizeof(struct lmv_stripe_md) +
- lmv->desc.ld_tgt_count *
- sizeof(struct lu_fid);
+ return container_of0(lmv, struct obd_device, u.lmv);
}
static inline struct lmv_tgt_desc *
-lmv_get_target(struct lmv_obd *lmv, u32 mds)
+lmv_get_target(struct lmv_obd *lmv, u32 mdt_idx, int *index)
{
- int count = lmv->desc.ld_tgt_count;
int i;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
if (!lmv->tgts[i])
continue;
- if (lmv->tgts[i]->ltd_idx == mds)
+ if (lmv->tgts[i]->ltd_idx == mdt_idx) {
+ if (index)
+ *index = i;
return lmv->tgts[i];
+ }
}
return ERR_PTR(-ENODEV);
}
-static inline struct lmv_tgt_desc *
-lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
+static inline int
+lmv_find_target_index(struct lmv_obd *lmv, const struct lu_fid *fid)
{
- u32 mds = 0;
- int rc;
+ struct lmv_tgt_desc *ltd;
+ u32 mdt_idx = 0;
+ int index = 0;
if (lmv->desc.ld_tgt_count > 1) {
- rc = lmv_fld_lookup(lmv, fid, &mds);
- if (rc)
- return ERR_PTR(rc);
+ int rc;
+
+ rc = lmv_fld_lookup(lmv, fid, &mdt_idx);
+ if (rc < 0)
+ return rc;
}
- return lmv_get_target(lmv, mds);
+ ltd = lmv_get_target(lmv, mdt_idx, &index);
+ if (IS_ERR(ltd))
+ return PTR_ERR(ltd);
+
+ return index;
+}
+
+static inline struct lmv_tgt_desc *
+lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
+{
+ int index;
+
+ index = lmv_find_target_index(lmv, fid);
+ if (index < 0)
+ return ERR_PTR(index);
+
+ return lmv->tgts[index];
+}
+
+static inline int lmv_stripe_md_size(int stripe_count)
+{
+ struct lmv_stripe_md *lsm;
+
+ return sizeof(*lsm) + stripe_count * sizeof(lsm->lsm_md_oinfo[0]);
+}
+
+int lmv_name_to_stripe_index(enum lmv_hash_type hashtype,
+ unsigned int max_mdt_index,
+ const char *name, int namelen);
+
+static inline const struct lmv_oinfo *
+lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
+ int namelen)
+{
+ int stripe_index;
+
+ stripe_index = lmv_name_to_stripe_index(lsm->lsm_md_hash_type,
+ lsm->lsm_md_stripe_count,
+ name, namelen);
+ if (stripe_index < 0)
+ return ERR_PTR(stripe_index);
+
+ LASSERTF(stripe_index < lsm->lsm_md_stripe_count,
+ "stripe_index = %d, stripe_count = %d hash_type = %x name = %.*s\n",
+ stripe_index, lsm->lsm_md_stripe_count,
+ lsm->lsm_md_hash_type, namelen, name);
+
+ return &lsm->lsm_md_oinfo[stripe_index];
+}
+
+static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm)
+{
+ return !lmv_is_known_hash_type(lsm->lsm_md_hash_type) ||
+ lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
}
struct lmv_tgt_desc
@@ -123,6 +163,6 @@ struct lmv_tgt_desc
/* lproc_lmv.c */
void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars);
-extern struct file_operations lmv_proc_target_fops;
+extern const struct file_operations lmv_proc_target_fops;
#endif
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 0e1588a43187..7dbb2b946acf 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -43,12 +43,13 @@
#include "../include/lustre/lustre_idl.h"
#include "../include/obd_support.h"
-#include "../include/lustre_lib.h"
#include "../include/lustre_net.h"
#include "../include/obd_class.h"
+#include "../include/lustre_lmv.h"
#include "../include/lprocfs_status.h"
-#include "../include/lustre_lite.h"
+#include "../include/cl_object.h"
#include "../include/lustre_fid.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_kernelcomm.h"
#include "lmv_internal.h"
@@ -70,12 +71,12 @@ static void lmv_activate_target(struct lmv_obd *lmv,
* -ENOTCONN: The UUID is found, but the target connection is bad (!)
* -EBADF : The UUID is found, but the OBD of the wrong type (!)
*/
-static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
+static int lmv_set_mdc_active(struct lmv_obd *lmv, const struct obd_uuid *uuid,
int activate)
{
struct lmv_tgt_desc *uninitialized_var(tgt);
struct obd_device *obd;
- int i;
+ u32 i;
int rc = 0;
CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
@@ -244,36 +245,12 @@ static int lmv_connect(const struct lu_env *env,
return rc;
}
-static void lmv_set_timeouts(struct obd_device *obd)
-{
- struct lmv_obd *lmv;
- int i;
-
- lmv = &obd->u.lmv;
- if (lmv->server_timeout == 0)
- return;
-
- if (lmv->connected == 0)
- return;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
- continue;
-
- obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
- KEY_INTERMDS, 0, NULL, NULL);
- }
-}
-
-static int lmv_init_ea_size(struct obd_export *exp, int easize,
- int def_easize, int cookiesize, int def_cookiesize)
+static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
+ u32 cookiesize, u32 def_cookiesize)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
- int i;
+ u32 i;
int rc = 0;
int change = 0;
@@ -420,6 +397,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
{
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt;
+ int orig_tgt_count = 0;
int rc = 0;
CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
@@ -489,14 +467,17 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
tgt->ltd_uuid = *uuidp;
tgt->ltd_active = 0;
lmv->tgts[index] = tgt;
- if (index >= lmv->desc.ld_tgt_count)
+ if (index >= lmv->desc.ld_tgt_count) {
+ orig_tgt_count = lmv->desc.ld_tgt_count;
lmv->desc.ld_tgt_count = index + 1;
+ }
if (lmv->connected) {
rc = lmv_connect_mdc(obd, tgt);
if (rc) {
spin_lock(&lmv->lmv_lock);
- lmv->desc.ld_tgt_count--;
+ if (lmv->desc.ld_tgt_count == index + 1)
+ lmv->desc.ld_tgt_count = orig_tgt_count;
memset(tgt, 0, sizeof(*tgt));
spin_unlock(&lmv->lmv_lock);
} else {
@@ -514,7 +495,7 @@ int lmv_check_connect(struct obd_device *obd)
{
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt;
- int i;
+ u32 i;
int rc;
int easize;
@@ -554,10 +535,9 @@ int lmv_check_connect(struct obd_device *obd)
goto out_disc;
}
- lmv_set_timeouts(obd);
class_export_put(lmv->exp);
lmv->connected = 1;
- easize = lmv_get_easize(lmv);
+ easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC);
lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
mutex_unlock(&lmv->lmv_init_mutex);
return 0;
@@ -629,7 +609,7 @@ static int lmv_disconnect(struct obd_export *exp)
struct obd_device *obd = class_exp2obd(exp);
struct lmv_obd *lmv = &obd->u.lmv;
int rc;
- int i;
+ u32 i;
if (!lmv->tgts)
goto out_local;
@@ -758,7 +738,7 @@ static int lmv_hsm_req_count(struct lmv_obd *lmv,
const struct hsm_user_request *hur,
const struct lmv_tgt_desc *tgt_mds)
{
- int i, nr = 0;
+ u32 i, nr = 0;
struct lmv_tgt_desc *curr_tgt;
/* count how many requests must be sent to the given target */
@@ -885,10 +865,8 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group,
&kcd, sizeof(kcd));
- if (rc) {
- if (filp)
- fput(filp);
- }
+ if (rc)
+ fput(filp);
return rc;
}
@@ -899,10 +877,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
struct obd_device *obddev = class_exp2obd(exp);
struct lmv_obd *lmv = &obddev->u.lmv;
struct lmv_tgt_desc *tgt = NULL;
- int i = 0;
+ u32 i = 0;
int rc = 0;
int set = 0;
- int count = lmv->desc.ld_tgt_count;
+ u32 count = lmv->desc.ld_tgt_count;
if (count == 0)
return -ENOTTY;
@@ -1011,6 +989,21 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
break;
}
+ case LL_IOC_FID2MDTIDX: {
+ struct lu_fid *fid = karg;
+ int mdt_index;
+
+ rc = lmv_fld_lookup(lmv, fid, &mdt_index);
+ if (rc)
+ return rc;
+
+ /*
+ * Note: this is from llite(see ll_dir_ioctl()), @uarg does not
+ * point to user space memory for FID2MDTIDX.
+ */
+ *(__u32 *)uarg = mdt_index;
+ break;
+ }
case OBD_IOC_FID2PATH: {
rc = lmv_fid2path(exp, len, karg, uarg);
break;
@@ -1169,32 +1162,37 @@ static int lmv_placement_policy(struct obd_device *obd,
return 0;
}
+ if (op_data->op_default_stripe_offset != -1) {
+ *mds = op_data->op_default_stripe_offset;
+ return 0;
+ }
+
/**
* If stripe_offset is provided during setdirstripe
* (setdirstripe -i xx), xx MDS will be chosen.
*/
- if (op_data->op_cli_flags & CLI_SET_MEA) {
+ if (op_data->op_cli_flags & CLI_SET_MEA && op_data->op_data) {
struct lmv_user_md *lum;
- lum = (struct lmv_user_md *)op_data->op_data;
- if (lum->lum_type == LMV_STRIPE_TYPE &&
- lum->lum_stripe_offset != -1) {
- if (lum->lum_stripe_offset >= lmv->desc.ld_tgt_count) {
- CERROR("%s: Stripe_offset %d > MDT count %d: rc = %d\n",
- obd->obd_name,
- lum->lum_stripe_offset,
- lmv->desc.ld_tgt_count, -ERANGE);
- return -ERANGE;
- }
- *mds = lum->lum_stripe_offset;
- return 0;
+ lum = op_data->op_data;
+ if (le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
+ *mds = le32_to_cpu(lum->lum_stripe_offset);
+ } else {
+ /*
+ * -1 means default, which will be in the same MDT with
+ * the stripe
+ */
+ *mds = op_data->op_mds;
+ lum->lum_stripe_offset = cpu_to_le32(op_data->op_mds);
}
+ } else {
+ /*
+ * Allocate new fid on target according to operation type and
+ * parent home mds.
+ */
+ *mds = op_data->op_mds;
}
- /* Allocate new fid on target according to operation type and parent
- * home mds.
- */
- *mds = op_data->op_mds;
return 0;
}
@@ -1203,7 +1201,7 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
struct lmv_tgt_desc *tgt;
int rc;
- tgt = lmv_get_target(lmv, mds);
+ tgt = lmv_get_target(lmv, mds, NULL);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
@@ -1221,7 +1219,7 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
/*
* Asking underlaying tgt layer to allocate new fid.
*/
- rc = obd_fid_alloc(tgt->ltd_exp, fid, NULL);
+ rc = obd_fid_alloc(NULL, tgt->ltd_exp, fid, NULL);
if (rc > 0) {
LASSERT(fid_is_sane(fid));
rc = 0;
@@ -1232,8 +1230,8 @@ out:
return rc;
}
-int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
- struct md_op_data *op_data)
+int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data)
{
struct obd_device *obd = class_exp2obd(exp);
struct lmv_obd *lmv = &obd->u.lmv;
@@ -1278,10 +1276,10 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
return -EINVAL;
}
- lmv->tgts = kcalloc(32, sizeof(*lmv->tgts), GFP_NOFS);
+ lmv->tgts_size = 32U;
+ lmv->tgts = kcalloc(lmv->tgts_size, sizeof(*lmv->tgts), GFP_NOFS);
if (!lmv->tgts)
return -ENOMEM;
- lmv->tgts_size = 32;
obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
lmv->desc.ld_tgt_count = 0;
@@ -1354,7 +1352,7 @@ static int lmv_process_config(struct obd_device *obd, u32 len, void *buf)
obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1));
- if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
+ if (sscanf(lustre_cfg_buf(lcfg, 2), "%u", &index) != 1) {
rc = -EINVAL;
goto out;
}
@@ -1380,7 +1378,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
struct lmv_obd *lmv = &obd->u.lmv;
struct obd_statfs *temp;
int rc = 0;
- int i;
+ u32 i;
rc = lmv_check_connect(obd);
if (rc)
@@ -1522,7 +1520,7 @@ static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
- int i;
+ u32 i;
int rc;
rc = lmv_check_connect(obd);
@@ -1545,36 +1543,6 @@ static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
return 0;
}
-static int lmv_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
- ldlm_iterator_t it, void *data)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- int i;
- int rc;
-
- rc = lmv_check_connect(obd);
- if (rc)
- return rc;
-
- CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid));
-
- /*
- * With DNE every object can have two locks in different namespaces:
- * lookup lock in space of MDT storing direntry and update/open lock in
- * space of MDT storing inode.
- */
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
- continue;
- rc = md_find_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data);
- if (rc)
- return rc;
- }
-
- return rc;
-}
-
static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
struct md_open_data *mod, struct ptlrpc_request **request)
{
@@ -1596,25 +1564,116 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-struct lmv_tgt_desc
-*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
- struct lu_fid *fid)
+/**
+ * Choosing the MDT by name or FID in @op_data.
+ * For non-striped directory, it will locate MDT by fid.
+ * For striped-directory, it will locate MDT by name. And also
+ * it will reset op_fid1 with the FID of the chosen stripe.
+ **/
+static struct lmv_tgt_desc *
+lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
+ const char *name, int namelen, struct lu_fid *fid,
+ u32 *mds)
+{
+ const struct lmv_oinfo *oinfo;
+ struct lmv_tgt_desc *tgt;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) {
+ if (cfs_fail_val >= lsm->lsm_md_stripe_count)
+ return ERR_PTR(-EBADF);
+ oinfo = &lsm->lsm_md_oinfo[cfs_fail_val];
+ } else {
+ oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
+ if (IS_ERR(oinfo))
+ return ERR_CAST(oinfo);
+ }
+
+ if (fid)
+ *fid = oinfo->lmo_fid;
+ if (mds)
+ *mds = oinfo->lmo_mds;
+
+ tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+
+ CDEBUG(D_INFO, "locate on mds %u " DFID "\n", oinfo->lmo_mds,
+ PFID(&oinfo->lmo_fid));
+ return tgt;
+}
+
+/**
+ * Locate mds by fid or name
+ *
+ * For striped directory (lsm != NULL), it will locate the stripe
+ * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
+ * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
+ * walk through all of stripes to locate the entry.
+ *
+ * For normal direcotry, it will locate MDS by FID directly.
+ * \param[in] lmv LMV device
+ * \param[in] op_data client MD stack parameters, name, namelen
+ * mds_num etc.
+ * \param[in] fid object FID used to locate MDS.
+ *
+ * retval pointer to the lmv_tgt_desc if succeed.
+ * ERR_PTR(errno) if failed.
+ */
+struct lmv_tgt_desc*
+lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
+ struct lu_fid *fid)
{
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
struct lmv_tgt_desc *tgt;
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
+ /*
+ * During creating VOLATILE file, it should honor the mdt
+ * index if the file under striped dir is being restored, see
+ * ct_restore().
+ */
+ if (op_data->op_bias & MDS_CREATE_VOLATILE &&
+ (int)op_data->op_mds != -1 && lsm) {
+ int i;
+
+ tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
+ if (IS_ERR(tgt))
+ return tgt;
+
+ /* refill the right parent fid */
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+ struct lmv_oinfo *oinfo;
+
+ oinfo = &lsm->lsm_md_oinfo[i];
+ if (oinfo->lmo_mds == op_data->op_mds) {
+ *fid = oinfo->lmo_fid;
+ break;
+ }
+ }
+
+ /* Hmm, can not find the stripe by mdt_index(op_mds) */
+ if (i == lsm->lsm_md_stripe_count)
+ tgt = ERR_PTR(-EINVAL);
+
return tgt;
+ }
- op_data->op_mds = tgt->ltd_idx;
+ if (!lsm || !op_data->op_namelen) {
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ return tgt;
- return tgt;
+ op_data->op_mds = tgt->ltd_idx;
+
+ return tgt;
+ }
+
+ return lmv_locate_target_for_name(lmv, lsm, op_data->op_name,
+ op_data->op_namelen, fid,
+ &op_data->op_mds);
}
static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid,
- __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
- struct ptlrpc_request **request)
+ const void *data, size_t datalen, umode_t mode,
+ uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+ __u64 rdev, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
@@ -1632,13 +1691,30 @@ static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+ CDEBUG(D_INODE, "CREATE name '%.*s' on "DFID" -> mds #%x\n",
+ (int)op_data->op_namelen, op_data->op_name,
+ PFID(&op_data->op_fid1), op_data->op_mds);
+
+ rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc)
return rc;
- CDEBUG(D_INODE, "CREATE '%*s' on "DFID" -> mds #%x\n",
- op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
- op_data->op_mds);
+ if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) {
+ /*
+ * Send the create request to the MDT where the object
+ * will be located
+ */
+ tgt = lmv_find_target(lmv, &op_data->op_fid2);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ op_data->op_mds = tgt->ltd_idx;
+ } else {
+ CDEBUG(D_CONFIG, "Server doesn't support striped dirs\n");
+ }
+
+ CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
+ PFID(&op_data->op_fid1), op_data->op_mds);
op_data->op_flags |= MF_MDC_CANCEL_FID1;
rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid,
@@ -1674,70 +1750,10 @@ static int lmv_done_writing(struct obd_export *exp,
}
static int
-lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- struct lookup_intent *it, struct md_op_data *op_data,
- struct lustre_handle *lockh, void *lmm, int lmmsize,
- __u64 extra_lock_flags)
-{
- struct ptlrpc_request *req = it->it_request;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lustre_handle plock;
- struct lmv_tgt_desc *tgt;
- struct md_op_data *rdata;
- struct lu_fid fid1;
- struct mdt_body *body;
- int rc = 0;
- int pmode;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- if (!(body->valid & OBD_MD_MDS))
- return 0;
-
- CDEBUG(D_INODE, "REMOTE_ENQUEUE '%s' on "DFID" -> "DFID"\n",
- LL_IT2STR(it), PFID(&op_data->op_fid1), PFID(&body->fid1));
-
- /*
- * We got LOOKUP lock, but we really need attrs.
- */
- pmode = it->it_lock_mode;
- LASSERT(pmode != 0);
- memcpy(&plock, lockh, sizeof(plock));
- it->it_lock_mode = 0;
- it->it_request = NULL;
- fid1 = body->fid1;
-
- ptlrpc_req_finished(req);
-
- tgt = lmv_find_target(lmv, &fid1);
- if (IS_ERR(tgt)) {
- rc = PTR_ERR(tgt);
- goto out;
- }
-
- rdata = kzalloc(sizeof(*rdata), GFP_NOFS);
- if (!rdata) {
- rc = -ENOMEM;
- goto out;
- }
-
- rdata->op_fid1 = fid1;
- rdata->op_bias = MDS_CROSS_REF;
-
- rc = md_enqueue(tgt->ltd_exp, einfo, it, rdata, lockh,
- lmm, lmmsize, NULL, extra_lock_flags);
- kfree(rdata);
-out:
- ldlm_lock_decref(&plock, pmode);
- return rc;
-}
-
-static int
lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ const ldlm_policy_data_t *policy,
struct lookup_intent *it, struct md_op_data *op_data,
- struct lustre_handle *lockh, void *lmm, int lmmsize,
- struct ptlrpc_request **req, __u64 extra_lock_flags)
+ struct lustre_handle *lockh, __u64 extra_lock_flags)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
@@ -1755,22 +1771,18 @@ lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID" -> mds #%d\n",
+ CDEBUG(D_INODE, "ENQUEUE '%s' on " DFID " -> mds #%u\n",
LL_IT2STR(it), PFID(&op_data->op_fid1), tgt->ltd_idx);
- rc = md_enqueue(tgt->ltd_exp, einfo, it, op_data, lockh,
- lmm, lmmsize, req, extra_lock_flags);
+ rc = md_enqueue(tgt->ltd_exp, einfo, policy, it, op_data, lockh,
+ extra_lock_flags);
- if (rc == 0 && it && it->it_op == IT_OPEN) {
- rc = lmv_enqueue_remote(exp, einfo, it, op_data, lockh,
- lmm, lmmsize, extra_lock_flags);
- }
return rc;
}
static int
lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
+ struct ptlrpc_request **preq)
{
struct ptlrpc_request *req = NULL;
struct obd_device *obd = exp->exp_obd;
@@ -1787,26 +1799,25 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- CDEBUG(D_INODE, "GETATTR_NAME for %*s on "DFID" -> mds #%d\n",
- op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
- tgt->ltd_idx);
+ CDEBUG(D_INODE, "GETATTR_NAME for %*s on " DFID " -> mds #%u\n",
+ (int)op_data->op_namelen, op_data->op_name,
+ PFID(&op_data->op_fid1), tgt->ltd_idx);
- rc = md_getattr_name(tgt->ltd_exp, op_data, request);
+ rc = md_getattr_name(tgt->ltd_exp, op_data, preq);
if (rc != 0)
return rc;
- body = req_capsule_server_get(&(*request)->rq_pill,
- &RMF_MDT_BODY);
-
- if (body->valid & OBD_MD_MDS) {
- struct lu_fid rid = body->fid1;
+ body = req_capsule_server_get(&(*preq)->rq_pill, &RMF_MDT_BODY);
+ if (body->mbo_valid & OBD_MD_MDS) {
+ struct lu_fid rid = body->mbo_fid1;
CDEBUG(D_INODE, "Request attrs for "DFID"\n",
PFID(&rid));
tgt = lmv_find_target(lmv, &rid);
if (IS_ERR(tgt)) {
- ptlrpc_req_finished(*request);
+ ptlrpc_req_finished(*preq);
+ *preq = NULL;
return PTR_ERR(tgt);
}
@@ -1815,8 +1826,8 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
op_data->op_namelen = 0;
op_data->op_name = NULL;
rc = md_getattr_name(tgt->ltd_exp, op_data, &req);
- ptlrpc_req_finished(*request);
- *request = req;
+ ptlrpc_req_finished(*preq);
+ *preq = req;
}
return rc;
@@ -1829,23 +1840,24 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
NULL)
-static int lmv_early_cancel(struct obd_export *exp, struct md_op_data *op_data,
- int op_tgt, enum ldlm_mode mode, int bits,
- int flag)
+static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt,
+ struct md_op_data *op_data, int op_tgt,
+ enum ldlm_mode mode, int bits, int flag)
{
struct lu_fid *fid = md_op_data_fid(op_data, flag);
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
ldlm_policy_data_t policy = { {0} };
int rc = 0;
if (!fid_is_sane(fid))
return 0;
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
+ if (!tgt) {
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+ }
if (tgt->ltd_idx != op_tgt) {
CDEBUG(D_INODE, "EARLY_CANCEL on "DFID"\n", PFID(fid));
@@ -1882,12 +1894,24 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
LASSERT(op_data->op_namelen != 0);
CDEBUG(D_INODE, "LINK "DFID":%*s to "DFID"\n",
- PFID(&op_data->op_fid2), op_data->op_namelen,
+ PFID(&op_data->op_fid2), (int)op_data->op_namelen,
op_data->op_name, PFID(&op_data->op_fid1));
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
op_data->op_cap = cfs_curproc_cap_pack();
+ if (op_data->op_mea2) {
+ struct lmv_stripe_md *lsm = op_data->op_mea2;
+ const struct lmv_oinfo *oinfo;
+
+ oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name,
+ op_data->op_namelen);
+ if (IS_ERR(oinfo))
+ return PTR_ERR(oinfo);
+
+ op_data->op_fid2 = oinfo->lmo_fid;
+ }
+
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
@@ -1896,7 +1920,7 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
* Cancel UPDATE lock on child (fid1).
*/
op_data->op_flags |= MF_MDC_CANCEL_FID2;
- rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
if (rc != 0)
return rc;
@@ -1907,20 +1931,22 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
}
static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new, int newlen,
+ const char *old, size_t oldlen,
+ const char *new, size_t newlen,
struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *src_tgt;
- struct lmv_tgt_desc *tgt_tgt;
int rc;
LASSERT(oldlen != 0);
- CDEBUG(D_INODE, "RENAME %*s in "DFID" to %*s in "DFID"\n",
- oldlen, old, PFID(&op_data->op_fid1),
- newlen, new, PFID(&op_data->op_fid2));
+ CDEBUG(D_INODE, "RENAME %.*s in "DFID":%d to %.*s in "DFID":%d\n",
+ (int)oldlen, old, PFID(&op_data->op_fid1),
+ op_data->op_mea1 ? op_data->op_mea1->lsm_md_stripe_count : 0,
+ (int)newlen, new, PFID(&op_data->op_fid2),
+ op_data->op_mea2 ? op_data->op_mea2->lsm_md_stripe_count : 0);
rc = lmv_check_connect(obd);
if (rc)
@@ -1929,13 +1955,60 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
op_data->op_cap = cfs_curproc_cap_pack();
- src_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+
+ if (op_data->op_cli_flags & CLI_MIGRATE) {
+ LASSERTF(fid_is_sane(&op_data->op_fid3), "invalid FID "DFID"\n",
+ PFID(&op_data->op_fid3));
+
+ if (op_data->op_mea1) {
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
+ struct lmv_tgt_desc *tmp;
+
+ /* Fix the parent fid for striped dir */
+ tmp = lmv_locate_target_for_name(lmv, lsm, old,
+ oldlen,
+ &op_data->op_fid1,
+ NULL);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ }
+
+ rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
+ if (rc)
+ return rc;
+ src_tgt = lmv_find_target(lmv, &op_data->op_fid3);
+ } else {
+ if (op_data->op_mea1) {
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
+
+ src_tgt = lmv_locate_target_for_name(lmv, lsm, old,
+ oldlen,
+ &op_data->op_fid1,
+ &op_data->op_mds);
+ if (IS_ERR(src_tgt))
+ return PTR_ERR(src_tgt);
+ } else {
+ src_tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(src_tgt))
+ return PTR_ERR(src_tgt);
+
+ op_data->op_mds = src_tgt->ltd_idx;
+ }
+
+ if (op_data->op_mea2) {
+ struct lmv_stripe_md *lsm = op_data->op_mea2;
+ const struct lmv_oinfo *oinfo;
+
+ oinfo = lsm_name_to_stripe_info(lsm, new, newlen);
+ if (IS_ERR(oinfo))
+ return PTR_ERR(oinfo);
+
+ op_data->op_fid2 = oinfo->lmo_fid;
+ }
+ }
if (IS_ERR(src_tgt))
return PTR_ERR(src_tgt);
- tgt_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
- if (IS_ERR(tgt_tgt))
- return PTR_ERR(tgt_tgt);
/*
* LOOKUP lock on src child (fid3) should also be cancelled for
* src_tgt in mdc_rename.
@@ -1946,35 +2019,53 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
* Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
* own target.
*/
- rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
LCK_EX, MDS_INODELOCK_UPDATE,
MF_MDC_CANCEL_FID2);
-
+ if (rc)
+ return rc;
/*
- * Cancel LOOKUP locks on tgt child (fid4) for parent tgt_tgt.
+ * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt.
*/
- if (rc == 0) {
- rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ if (fid_is_sane(&op_data->op_fid3)) {
+ struct lmv_tgt_desc *tgt;
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ /* Cancel LOOKUP lock on its parent */
+ rc = lmv_early_cancel(exp, tgt, op_data, src_tgt->ltd_idx,
LCK_EX, MDS_INODELOCK_LOOKUP,
- MF_MDC_CANCEL_FID4);
+ MF_MDC_CANCEL_FID3);
+ if (rc)
+ return rc;
+
+ rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
+ LCK_EX, MDS_INODELOCK_FULL,
+ MF_MDC_CANCEL_FID3);
+ if (rc)
+ return rc;
}
/*
* Cancel all the locks on tgt child (fid4).
*/
- if (rc == 0)
- rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ if (fid_is_sane(&op_data->op_fid4))
+ rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
LCK_EX, MDS_INODELOCK_FULL,
MF_MDC_CANCEL_FID4);
- if (rc == 0)
- rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
- new, newlen, request);
+ CDEBUG(D_INODE, DFID":m%d to "DFID"\n", PFID(&op_data->op_fid1),
+ op_data->op_mds, PFID(&op_data->op_fid2));
+
+ rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
+ new, newlen, request);
return rc;
}
static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, size_t ealen, void *ea2, size_t ea2len,
struct ptlrpc_request **request,
struct md_open_data **mod)
{
@@ -2021,169 +2112,419 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
return rc;
}
-/*
- * Adjust a set of pages, each page containing an array of lu_dirpages,
- * so that each page can be used as a single logical lu_dirpage.
+/**
+ * Get current minimum entry from striped directory
*
- * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
- * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
- * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
- * value is used as a cookie to request the next lu_dirpage in a
- * directory listing that spans multiple pages (two in this example):
- * ________
- * | |
- * .|--------v------- -----.
- * |s|e|f|p|ent|ent| ... |ent|
- * '--|-------------- -----' Each CFS_PAGE contains a single
- * '------. lu_dirpage.
- * .---------v------- -----.
- * |s|e|f|p|ent| 0 | ... | 0 |
- * '----------------- -----'
+ * This function will search the dir entry, whose hash value is the
+ * closest(>=) to @hash_offset, from all of sub-stripes, and it is
+ * only being called for striped directory.
*
- * However, on hosts where the native VM page size (PAGE_SIZE) is
- * larger than LU_PAGE_SIZE, a single host page may contain multiple
- * lu_dirpages. After reading the lu_dirpages from the MDS, the
- * ldp_hash_end of the first lu_dirpage refers to the one immediately
- * after it in the same CFS_PAGE (arrows simplified for brevity, but
- * in general e0==s1, e1==s2, etc.):
+ * \param[in] exp export of LMV
+ * \param[in] op_data parameters transferred beween client MD stack
+ * stripe_information will be included in this
+ * parameter
+ * \param[in] cb_op ldlm callback being used in enqueue in
+ * mdc_read_page
+ * \param[in] hash_offset the hash value, which is used to locate
+ * minum(closet) dir entry
+ * \param[in|out] stripe_offset the caller use this to indicate the stripe
+ * index of last entry, so to avoid hash conflict
+ * between stripes. It will also be used to
+ * return the stripe index of current dir entry.
+ * \param[in|out] entp the minum entry and it also is being used
+ * to input the last dir entry to resolve the
+ * hash conflict
*
- * .-------------------- -----.
- * |s0|e0|f0|p|ent|ent| ... |ent|
- * |---v---------------- -----|
- * |s1|e1|f1|p|ent|ent| ... |ent|
- * |---v---------------- -----| Here, each CFS_PAGE contains
- * ... multiple lu_dirpages.
- * |---v---------------- -----|
- * |s'|e'|f'|p|ent|ent| ... |ent|
- * '---|---------------- -----'
- * v
- * .----------------------------.
- * | next CFS_PAGE |
+ * \param[out] ppage the page which holds the minum entry
*
- * This structure is transformed into a single logical lu_dirpage as follows:
+ * \retval = 0 get the entry successfully
+ * negative errno (< 0) does not get the entry
+ */
+static int lmv_get_min_striped_entry(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct md_callback *cb_op,
+ __u64 hash_offset, int *stripe_offset,
+ struct lu_dirent **entp,
+ struct page **ppage)
+{
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lu_dirent *min_ent = NULL;
+ struct page *min_page = NULL;
+ struct lmv_tgt_desc *tgt;
+ int stripe_count;
+ int min_idx = 0;
+ int rc = 0;
+ int i;
+
+ stripe_count = lsm->lsm_md_stripe_count;
+ for (i = 0; i < stripe_count; i++) {
+ __u64 stripe_hash = hash_offset;
+ struct lu_dirent *ent = NULL;
+ struct page *page = NULL;
+ struct lu_dirpage *dp;
+
+ tgt = lmv_get_target(lmv, lsm->lsm_md_oinfo[i].lmo_mds, NULL);
+ if (IS_ERR(tgt)) {
+ rc = PTR_ERR(tgt);
+ goto out;
+ }
+
+ /*
+ * op_data will be shared by each stripe, so we need
+ * reset these value for each stripe
+ */
+ op_data->op_fid1 = lsm->lsm_md_oinfo[i].lmo_fid;
+ op_data->op_fid2 = lsm->lsm_md_oinfo[i].lmo_fid;
+ op_data->op_data = lsm->lsm_md_oinfo[i].lmo_root;
+next:
+ rc = md_read_page(tgt->ltd_exp, op_data, cb_op, stripe_hash,
+ &page);
+ if (rc)
+ goto out;
+
+ dp = page_address(page);
+ for (ent = lu_dirent_start(dp); ent;
+ ent = lu_dirent_next(ent)) {
+ /* Skip dummy entry */
+ if (!le16_to_cpu(ent->lde_namelen))
+ continue;
+
+ if (le64_to_cpu(ent->lde_hash) < hash_offset)
+ continue;
+
+ if (le64_to_cpu(ent->lde_hash) == hash_offset &&
+ (*entp == ent || i < *stripe_offset))
+ continue;
+
+ /* skip . and .. for other stripes */
+ if (i && (!strncmp(ent->lde_name, ".",
+ le16_to_cpu(ent->lde_namelen)) ||
+ !strncmp(ent->lde_name, "..",
+ le16_to_cpu(ent->lde_namelen))))
+ continue;
+ break;
+ }
+
+ if (!ent) {
+ stripe_hash = le64_to_cpu(dp->ldp_hash_end);
+
+ kunmap(page);
+ put_page(page);
+ page = NULL;
+
+ /*
+ * reach the end of current stripe, go to next stripe
+ */
+ if (stripe_hash == MDS_DIR_END_OFF)
+ continue;
+ else
+ goto next;
+ }
+
+ if (min_ent) {
+ if (le64_to_cpu(min_ent->lde_hash) >
+ le64_to_cpu(ent->lde_hash)) {
+ min_ent = ent;
+ kunmap(min_page);
+ put_page(min_page);
+ min_idx = i;
+ min_page = page;
+ } else {
+ kunmap(page);
+ put_page(page);
+ page = NULL;
+ }
+ } else {
+ min_ent = ent;
+ min_page = page;
+ min_idx = i;
+ }
+ }
+
+out:
+ if (*ppage) {
+ kunmap(*ppage);
+ put_page(*ppage);
+ }
+ *stripe_offset = min_idx;
+ *entp = min_ent;
+ *ppage = min_page;
+ return rc;
+}
+
+/**
+ * Build dir entry page from a striped directory
*
- * - Replace e0 with e' so the request for the next lu_dirpage gets the page
- * labeled 'next CFS_PAGE'.
+ * This function gets one entry by @offset from a striped directory. It will
+ * read entries from all of stripes, and choose one closest to the required
+ * offset(&offset). A few notes
+ * 1. skip . and .. for non-zero stripes, because there can only have one .
+ * and .. in a directory.
+ * 2. op_data will be shared by all of stripes, instead of allocating new
+ * one, so need to restore before reusing.
+ * 3. release the entry page if that is not being chosen.
*
- * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
- * a hash collision with the next page exists.
+ * \param[in] exp obd export refer to LMV
+ * \param[in] op_data hold those MD parameters of read_entry
+ * \param[in] cb_op ldlm callback being used in enqueue in mdc_read_entry
+ * \param[out] ldp the entry being read
+ * \param[out] ppage the page holding the entry. Note: because the entry
+ * will be accessed in upper layer, so we need hold the
+ * page until the usages of entry is finished, see
+ * ll_dir_entry_next.
*
- * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
- * to the first entry of the next lu_dirpage.
+ * retval =0 if get entry successfully
+ * <0 cannot get entry
*/
-#if PAGE_SIZE > LU_PAGE_SIZE
-static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
-{
- int i;
+static int lmv_read_striped_page(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct md_callback *cb_op,
+ __u64 offset, struct page **ppage)
+{
+ struct inode *master_inode = op_data->op_data;
+ struct lu_fid master_fid = op_data->op_fid1;
+ struct obd_device *obd = exp->exp_obd;
+ __u64 hash_offset = offset;
+ struct page *min_ent_page = NULL;
+ struct page *ent_page = NULL;
+ struct lu_dirent *min_ent = NULL;
+ struct lu_dirent *last_ent;
+ struct lu_dirent *ent;
+ struct lu_dirpage *dp;
+ size_t left_bytes;
+ int ent_idx = 0;
+ void *area;
+ int rc;
- for (i = 0; i < ncfspgs; i++) {
- struct lu_dirpage *dp = kmap(pages[i]);
- struct lu_dirpage *first = dp;
- struct lu_dirent *end_dirent = NULL;
- struct lu_dirent *ent;
- __u64 hash_end = dp->ldp_hash_end;
- __u32 flags = dp->ldp_flags;
-
- while (--nlupgs > 0) {
- ent = lu_dirent_start(dp);
- for (end_dirent = ent; ent;
- end_dirent = ent, ent = lu_dirent_next(ent))
- ;
-
- /* Advance dp to next lu_dirpage. */
- dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
-
- /* Check if we've reached the end of the CFS_PAGE. */
- if (!((unsigned long)dp & ~PAGE_MASK))
- break;
+ rc = lmv_check_connect(obd);
+ if (rc)
+ return rc;
- /* Save the hash and flags of this lu_dirpage. */
- hash_end = dp->ldp_hash_end;
- flags = dp->ldp_flags;
+ /*
+ * Allocate a page and read entries from all of stripes and fill
+ * the page by hash order
+ */
+ ent_page = alloc_page(GFP_KERNEL);
+ if (!ent_page)
+ return -ENOMEM;
- /* Check if lu_dirpage contains no entries. */
- if (!end_dirent)
- break;
+ /* Initialize the entry page */
+ dp = kmap(ent_page);
+ memset(dp, 0, sizeof(*dp));
+ dp->ldp_hash_start = cpu_to_le64(offset);
+ dp->ldp_flags |= LDF_COLLIDE;
+
+ area = dp + 1;
+ left_bytes = PAGE_SIZE - sizeof(*dp);
+ ent = area;
+ last_ent = ent;
+ do {
+ __u16 ent_size;
+
+ /* Find the minum entry from all sub-stripes */
+ rc = lmv_get_min_striped_entry(exp, op_data, cb_op, hash_offset,
+ &ent_idx, &min_ent,
+ &min_ent_page);
+ if (rc)
+ goto out;
- /* Enlarge the end entry lde_reclen from 0 to
- * first entry of next lu_dirpage.
- */
- LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0);
- end_dirent->lde_reclen =
- cpu_to_le16((char *)(dp->ldp_entries) -
- (char *)end_dirent);
+ /*
+ * If it can not get minum entry, it means it already reaches
+ * the end of this directory
+ */
+ if (!min_ent) {
+ last_ent->lde_reclen = 0;
+ hash_offset = MDS_DIR_END_OFF;
+ goto out;
+ }
+
+ ent_size = le16_to_cpu(min_ent->lde_reclen);
+
+ /*
+ * the last entry lde_reclen is 0, but it might not
+ * the end of this entry of this temporay entry
+ */
+ if (!ent_size)
+ ent_size = lu_dirent_calc_size(
+ le16_to_cpu(min_ent->lde_namelen),
+ le32_to_cpu(min_ent->lde_attrs));
+ if (ent_size > left_bytes) {
+ last_ent->lde_reclen = cpu_to_le16(0);
+ hash_offset = le64_to_cpu(min_ent->lde_hash);
+ goto out;
}
- first->ldp_hash_end = hash_end;
- first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
- first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+ memcpy(ent, min_ent, ent_size);
+
+ /*
+ * Replace . with master FID and Replace .. with the parent FID
+ * of master object
+ */
+ if (!strncmp(ent->lde_name, ".",
+ le16_to_cpu(ent->lde_namelen)) &&
+ le16_to_cpu(ent->lde_namelen) == 1)
+ fid_cpu_to_le(&ent->lde_fid, &master_fid);
+ else if (!strncmp(ent->lde_name, "..",
+ le16_to_cpu(ent->lde_namelen)) &&
+ le16_to_cpu(ent->lde_namelen) == 2)
+ fid_cpu_to_le(&ent->lde_fid, &op_data->op_fid3);
+
+ left_bytes -= ent_size;
+ ent->lde_reclen = cpu_to_le16(ent_size);
+ last_ent = ent;
+ ent = (void *)ent + ent_size;
+ hash_offset = le64_to_cpu(min_ent->lde_hash);
+ if (hash_offset == MDS_DIR_END_OFF) {
+ last_ent->lde_reclen = 0;
+ break;
+ }
+ } while (1);
+out:
+ if (min_ent_page) {
+ kunmap(min_ent_page);
+ put_page(min_ent_page);
+ }
- kunmap(pages[i]);
+ if (unlikely(rc)) {
+ __free_page(ent_page);
+ ent_page = NULL;
+ } else {
+ if (ent == area)
+ dp->ldp_flags |= LDF_EMPTY;
+ dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
+ dp->ldp_hash_end = cpu_to_le64(hash_offset);
}
- LASSERTF(nlupgs == 0, "left = %d", nlupgs);
+
+ /*
+ * We do not want to allocate md_op_data during each
+ * dir entry reading, so op_data will be shared by every stripe,
+ * then we need to restore it back to original value before
+ * return to the upper layer
+ */
+ op_data->op_fid1 = master_fid;
+ op_data->op_fid2 = master_fid;
+ op_data->op_data = master_inode;
+
+ *ppage = ent_page;
+
+ return rc;
}
-#else
-#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
-#endif /* PAGE_SIZE > LU_PAGE_SIZE */
-static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
- struct page **pages, struct ptlrpc_request **request)
+static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_callback *cb_op, __u64 offset,
+ struct page **ppage)
{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- __u64 offset = op_data->op_offset;
- int rc;
- int ncfspgs; /* pages read in PAGE_SIZE */
- int nlupgs; /* pages read in LU_PAGE_SIZE */
- struct lmv_tgt_desc *tgt;
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
rc = lmv_check_connect(obd);
if (rc)
return rc;
- CDEBUG(D_INODE, "READPAGE at %#llx from "DFID"\n",
- offset, PFID(&op_data->op_fid1));
+ if (unlikely(lsm)) {
+ rc = lmv_read_striped_page(exp, op_data, cb_op, offset, ppage);
+ return rc;
+ }
tgt = lmv_find_target(lmv, &op_data->op_fid1);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- rc = md_readpage(tgt->ltd_exp, op_data, pages, request);
- if (rc != 0)
- return rc;
-
- ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_SIZE - 1)
- >> PAGE_SHIFT;
- nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT;
- LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
- LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages);
-
- CDEBUG(D_INODE, "read %d(%d)/%d pages\n", ncfspgs, nlupgs,
- op_data->op_npages);
-
- lmv_adjust_dirpages(pages, ncfspgs, nlupgs);
+ rc = md_read_page(tgt->ltd_exp, op_data, cb_op, offset, ppage);
return rc;
}
+/**
+ * Unlink a file/directory
+ *
+ * Unlink a file or directory under the parent dir. The unlink request
+ * usually will be sent to the MDT where the child is located, but if
+ * the client does not have the child FID then request will be sent to the
+ * MDT where the parent is located.
+ *
+ * If the parent is a striped directory then it also needs to locate which
+ * stripe the name of the child is located, and replace the parent FID
+ * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
+ * it will walk through all of sub-stripes until the child is being
+ * unlinked finally.
+ *
+ * \param[in] exp export refer to LMV
+ * \param[in] op_data different parameters transferred beween client
+ * MD stacks, name, namelen, FIDs etc.
+ * op_fid1 is the parent FID, op_fid2 is the child
+ * FID.
+ * \param[out] request point to the request of unlink.
+ *
+ * retval 0 if succeed
+ * negative errno if failed.
+ */
static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request)
{
- struct obd_device *obd = exp->exp_obd;
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
+ struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *parent_tgt = NULL;
struct lmv_tgt_desc *tgt = NULL;
struct mdt_body *body;
+ int stripe_index = 0;
int rc;
rc = lmv_check_connect(obd);
if (rc)
return rc;
-retry:
+retry_unlink:
+ /* For striped dir, we need to locate the parent as well */
+ if (lsm) {
+ struct lmv_tgt_desc *tmp;
+
+ LASSERT(op_data->op_name && op_data->op_namelen);
+
+ tmp = lmv_locate_target_for_name(lmv, lsm,
+ op_data->op_name,
+ op_data->op_namelen,
+ &op_data->op_fid1,
+ &op_data->op_mds);
+
+ /*
+ * return -EBADFD means unknown hash type, might
+ * need try all sub-stripe here
+ */
+ if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
+ return PTR_ERR(tmp);
+
+ /*
+ * Note: both migrating dir and unknown hash dir need to
+ * try all of sub-stripes, so we need start search the
+ * name from stripe 0, but migrating dir is already handled
+ * inside lmv_locate_target_for_name(), so we only check
+ * unknown hash type directory here
+ */
+ if (!lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
+ struct lmv_oinfo *oinfo;
+
+ oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ op_data->op_mds = oinfo->lmo_mds;
+ }
+ }
+
+try_next_stripe:
/* Send unlink requests to the MDT where the child is located */
if (likely(!fid_is_zero(&op_data->op_fid2)))
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
+ tgt = lmv_find_target(lmv, &op_data->op_fid2);
+ else if (lsm)
+ tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
else
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+
if (IS_ERR(tgt))
return PTR_ERR(tgt);
@@ -2203,29 +2544,57 @@ retry:
/*
* Cancel FULL locks on child (fid3).
*/
- rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
- MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
+ parent_tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(parent_tgt))
+ return PTR_ERR(parent_tgt);
+ if (parent_tgt != tgt) {
+ rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx,
+ LCK_EX, MDS_INODELOCK_LOOKUP,
+ MF_MDC_CANCEL_FID3);
+ }
+
+ rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
+ MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
if (rc != 0)
return rc;
- CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%d\n",
+ CDEBUG(D_INODE, "unlink with fid=" DFID "/" DFID " -> mds #%u\n",
PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
rc = md_unlink(tgt->ltd_exp, op_data, request);
- if (rc != 0 && rc != -EREMOTE)
+ if (rc != 0 && rc != -EREMOTE && rc != -ENOENT)
return rc;
+ /* Try next stripe if it is needed. */
+ if (rc == -ENOENT && lsm && lmv_need_try_all_stripes(lsm)) {
+ struct lmv_oinfo *oinfo;
+
+ stripe_index++;
+ if (stripe_index >= lsm->lsm_md_stripe_count)
+ return rc;
+
+ oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ op_data->op_mds = oinfo->lmo_mds;
+
+ ptlrpc_req_finished(*request);
+ *request = NULL;
+
+ goto try_next_stripe;
+ }
+
body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
if (!body)
return -EPROTO;
/* Not cross-ref case, just get out of here. */
- if (likely(!(body->valid & OBD_MD_MDS)))
- return 0;
+ if (likely(!(body->mbo_valid & OBD_MD_MDS)))
+ return rc;
CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n",
- exp->exp_obd->obd_name, PFID(&body->fid1));
+ exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
/* This is a remote object, try remote MDT, Note: it may
* try more than 1 time here, Considering following case
@@ -2247,11 +2616,11 @@ retry:
* In theory, it might try unlimited time here, but it should
* be very rare case.
*/
- op_data->op_fid2 = body->fid1;
+ op_data->op_fid2 = body->mbo_fid1;
ptlrpc_req_finished(*request);
*request = NULL;
- goto retry;
+ goto retry_unlink;
}
static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
@@ -2274,6 +2643,22 @@ static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
return 0;
}
+/**
+ * Get by key a value associated with a LMV device.
+ *
+ * Dispatch request to lower-layer devices as needed.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] exp export for the LMV device
+ * \param[in] keylen length of key identifier
+ * \param[in] key identifier of key to get value for
+ * \param[in] vallen size of \a val
+ * \param[out] val pointer to storage location for value
+ * \param[in] lsm optional striping metadata of object
+ *
+ * \retval 0 on success
+ * \retval negative negated errno on failure
+ */
static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
__u32 keylen, void *key, __u32 *vallen, void *val,
struct lov_stripe_md *lsm)
@@ -2337,6 +2722,22 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
return -EINVAL;
}
+/**
+ * Asynchronously set by key a value associated with a LMV device.
+ *
+ * Dispatch request to lower-layer devices as needed.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] exp export for the LMV device
+ * \param[in] keylen length of key identifier
+ * \param[in] key identifier of key to store value for
+ * \param[in] vallen size of value to store
+ * \param[in] val pointer to data to be stored
+ * \param[in] set optional list of related ptlrpc requests
+ *
+ * \retval 0 on success
+ * \retval negative negated errno on failure
+ */
static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
u32 keylen, void *key, u32 vallen,
void *val, struct ptlrpc_request_set *set)
@@ -2354,7 +2755,8 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
}
lmv = &obd->u.lmv;
- if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX)) {
+ if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) ||
+ KEY_IS(KEY_DEFAULT_EASIZE)) {
int i, err = 0;
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
@@ -2375,105 +2777,247 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
return -EINVAL;
}
-static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
+static int lmv_pack_md_v1(const struct lmv_stripe_md *lsm,
+ struct lmv_mds_md_v1 *lmm1)
{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_stripe_md *meap;
- struct lmv_stripe_md *lsmp;
- int mea_size;
- int i;
+ int cplen;
+ int i;
+
+ lmm1->lmv_magic = cpu_to_le32(lsm->lsm_md_magic);
+ lmm1->lmv_stripe_count = cpu_to_le32(lsm->lsm_md_stripe_count);
+ lmm1->lmv_master_mdt_index = cpu_to_le32(lsm->lsm_md_master_mdt_index);
+ lmm1->lmv_hash_type = cpu_to_le32(lsm->lsm_md_hash_type);
+ cplen = strlcpy(lmm1->lmv_pool_name, lsm->lsm_md_pool_name,
+ sizeof(lmm1->lmv_pool_name));
+ if (cplen >= sizeof(lmm1->lmv_pool_name))
+ return -E2BIG;
+
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++)
+ fid_cpu_to_le(&lmm1->lmv_stripe_fids[i],
+ &lsm->lsm_md_oinfo[i].lmo_fid);
+ return 0;
+}
- mea_size = lmv_get_easize(lmv);
- if (!lmmp)
- return mea_size;
+static int
+lmv_pack_md(union lmv_mds_md **lmmp, const struct lmv_stripe_md *lsm,
+ int stripe_count)
+{
+ int lmm_size = 0, rc = 0;
+ bool allocated = false;
+ LASSERT(lmmp);
+
+ /* Free lmm */
if (*lmmp && !lsm) {
+ int stripe_cnt;
+
+ stripe_cnt = lmv_mds_md_stripe_count_get(*lmmp);
+ lmm_size = lmv_mds_md_size(stripe_cnt,
+ le32_to_cpu((*lmmp)->lmv_magic));
+ if (!lmm_size)
+ return -EINVAL;
kvfree(*lmmp);
*lmmp = NULL;
return 0;
}
+ /* Alloc lmm */
+ if (!*lmmp && !lsm) {
+ lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
+ LASSERT(lmm_size > 0);
+ *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
+ if (!*lmmp)
+ return -ENOMEM;
+ lmv_mds_md_stripe_count_set(*lmmp, stripe_count);
+ (*lmmp)->lmv_magic = cpu_to_le32(LMV_MAGIC);
+ return lmm_size;
+ }
+
+ /* pack lmm */
+ LASSERT(lsm);
+ lmm_size = lmv_mds_md_size(lsm->lsm_md_stripe_count,
+ lsm->lsm_md_magic);
if (!*lmmp) {
- *lmmp = libcfs_kvzalloc(mea_size, GFP_NOFS);
+ *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
if (!*lmmp)
return -ENOMEM;
+ allocated = true;
}
- if (!lsm)
- return mea_size;
+ switch (lsm->lsm_md_magic) {
+ case LMV_MAGIC_V1:
+ rc = lmv_pack_md_v1(lsm, &(*lmmp)->lmv_md_v1);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
- lsmp = (struct lmv_stripe_md *)lsm;
- meap = (struct lmv_stripe_md *)*lmmp;
+ if (rc && allocated) {
+ kvfree(*lmmp);
+ *lmmp = NULL;
+ }
- if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
- lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
- return -EINVAL;
+ return lmm_size;
+}
- meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
- meap->mea_count = cpu_to_le32(lsmp->mea_count);
- meap->mea_master = cpu_to_le32(lsmp->mea_master);
+static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
+ const struct lmv_mds_md_v1 *lmm1)
+{
+ struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+ int stripe_count;
+ int rc = 0;
+ int cplen;
+ int i;
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- meap->mea_ids[i] = lsmp->mea_ids[i];
- fid_cpu_to_le(&meap->mea_ids[i], &lsmp->mea_ids[i]);
+ lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
+ lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
+ lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
+ if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
+ lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
+ else
+ lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
+ lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
+ cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
+ sizeof(lsm->lsm_md_pool_name));
+
+ if (cplen >= sizeof(lsm->lsm_md_pool_name))
+ return -E2BIG;
+
+ CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %d layout_version %d\n",
+ lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
+ lsm->lsm_md_hash_type, lsm->lsm_md_layout_version);
+
+ stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
+ for (i = 0; i < stripe_count; i++) {
+ fid_le_to_cpu(&lsm->lsm_md_oinfo[i].lmo_fid,
+ &lmm1->lmv_stripe_fids[i]);
+ rc = lmv_fld_lookup(lmv, &lsm->lsm_md_oinfo[i].lmo_fid,
+ &lsm->lsm_md_oinfo[i].lmo_mds);
+ if (rc)
+ return rc;
+ CDEBUG(D_INFO, "unpack fid #%d "DFID"\n", i,
+ PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
}
- return mea_size;
+ return rc;
}
-static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int lmm_size)
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+ const union lmv_mds_md *lmm, int stripe_count)
{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_stripe_md **tmea = (struct lmv_stripe_md **)lsmp;
- struct lmv_stripe_md *mea = (struct lmv_stripe_md *)lmm;
- struct lmv_obd *lmv = &obd->u.lmv;
- int mea_size;
- int i;
- __u32 magic;
+ struct lmv_stripe_md *lsm;
+ bool allocated = false;
+ int lsm_size, rc;
- mea_size = lmv_get_easize(lmv);
- if (!lsmp)
- return mea_size;
+ LASSERT(lsmp);
- if (*lsmp && !lmm) {
- kvfree(*tmea);
+ lsm = *lsmp;
+ /* Free memmd */
+ if (lsm && !lmm) {
+ int i;
+
+ for (i = 1; i < lsm->lsm_md_stripe_count; i++) {
+ /*
+ * For migrating inode, the master stripe and master
+ * object will be the same, so do not need iput, see
+ * ll_update_lsm_md
+ */
+ if (!(lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION &&
+ !i) && lsm->lsm_md_oinfo[i].lmo_root)
+ iput(lsm->lsm_md_oinfo[i].lmo_root);
+ }
+
+ kvfree(lsm);
*lsmp = NULL;
return 0;
}
- LASSERT(mea_size == lmm_size);
+ /* Alloc memmd */
+ if (!lsm && !lmm) {
+ lsm_size = lmv_stripe_md_size(stripe_count);
+ lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
+ if (!lsm)
+ return -ENOMEM;
+ lsm->lsm_md_stripe_count = stripe_count;
+ *lsmp = lsm;
+ return 0;
+ }
- *tmea = libcfs_kvzalloc(mea_size, GFP_NOFS);
- if (!*tmea)
- return -ENOMEM;
+ if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE)
+ return -EPERM;
- if (!lmm)
- return mea_size;
+ /* Unpack memmd */
+ if (le32_to_cpu(lmm->lmv_magic) != LMV_MAGIC_V1 &&
+ le32_to_cpu(lmm->lmv_magic) != LMV_USER_MAGIC) {
+ CERROR("%s: invalid lmv magic %x: rc = %d\n",
+ exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic),
+ -EIO);
+ return -EIO;
+ }
- if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
- mea->mea_magic == MEA_MAGIC_ALL_CHARS ||
- mea->mea_magic == MEA_MAGIC_HASH_SEGMENT) {
- magic = le32_to_cpu(mea->mea_magic);
- } else {
- /*
- * Old mea is not handled here.
+ if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_V1)
+ lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm));
+ else
+ /**
+ * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md,
+ * stripecount should be 0 then.
*/
- CERROR("Old not supportable EA is found\n");
- LBUG();
+ lsm_size = lmv_stripe_md_size(0);
+
+ if (!lsm) {
+ lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
+ if (!lsm)
+ return -ENOMEM;
+ allocated = true;
+ *lsmp = lsm;
+ }
+
+ switch (le32_to_cpu(lmm->lmv_magic)) {
+ case LMV_MAGIC_V1:
+ rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1);
+ break;
+ default:
+ CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name,
+ le32_to_cpu(lmm->lmv_magic));
+ rc = -EINVAL;
+ break;
}
- (*tmea)->mea_magic = magic;
- (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
- (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
+ if (rc && allocated) {
+ kvfree(lsm);
+ *lsmp = NULL;
+ lsm_size = rc;
+ }
+ return lsm_size;
+}
+EXPORT_SYMBOL(lmv_unpack_md);
+
+static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int disk_len)
+{
+ return lmv_unpack_md(exp, (struct lmv_stripe_md **)lsmp,
+ (union lmv_mds_md *)lmm, disk_len);
+}
+
+static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ const struct lmv_stripe_md *lmv = (struct lmv_stripe_md *)lsm;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv_obd = &obd->u.lmv;
+ int stripe_count;
- for (i = 0; i < (*tmea)->mea_count; i++) {
- (*tmea)->mea_ids[i] = mea->mea_ids[i];
- fid_le_to_cpu(&(*tmea)->mea_ids[i], &(*tmea)->mea_ids[i]);
+ if (!lmmp) {
+ if (lsm)
+ stripe_count = lmv->lsm_md_stripe_count;
+ else
+ stripe_count = lmv_obd->desc.ld_tgt_count;
+
+ return lmv_mds_md_size(stripe_count, LMV_MAGIC_V1);
}
- return mea_size;
+
+ return lmv_pack_md((union lmv_mds_md **)lmmp, lmv, 0);
}
static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
@@ -2484,7 +3028,7 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
struct lmv_obd *lmv = &obd->u.lmv;
int rc = 0;
int err;
- int i;
+ u32 i;
LASSERT(fid);
@@ -2502,8 +3046,9 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
return rc;
}
-static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
- __u64 *bits)
+static int lmv_set_lock_data(struct obd_export *exp,
+ const struct lustre_handle *lockh,
+ void *data, __u64 *bits)
{
struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
struct lmv_tgt_desc *tgt = lmv->tgts[0];
@@ -2526,24 +3071,32 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
enum ldlm_mode rc;
- int i;
+ int tgt;
+ u32 i;
CDEBUG(D_INODE, "Lock match for "DFID"\n", PFID(fid));
/*
- * With CMD every object can have two locks in different namespaces:
- * lookup lock in space of mds storing direntry and update/open lock in
- * space of mds storing inode. Thus we check all targets, not only that
- * one fid was created in.
+ * With DNE every object can have two locks in different namespaces:
+ * lookup lock in space of MDT storing direntry and update/open lock in
+ * space of MDT storing inode. Try the MDT that the FID maps to first,
+ * since this can be easily found, and only try others if that fails.
*/
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
+ for (i = 0, tgt = lmv_find_target_index(lmv, fid);
+ i < lmv->desc.ld_tgt_count;
+ i++, tgt = (tgt + 1) % lmv->desc.ld_tgt_count) {
+ if (tgt < 0) {
+ CDEBUG(D_HA, "%s: "DFID" is inaccessible: rc = %d\n",
+ obd->obd_name, PFID(fid), tgt);
+ tgt = 0;
+ }
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
+ if (!lmv->tgts[tgt] || !lmv->tgts[tgt]->ltd_exp ||
+ !lmv->tgts[tgt]->ltd_active)
continue;
- rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode,
- lockh);
+ rc = md_lock_match(lmv->tgts[tgt]->ltd_exp, flags, fid,
+ type, policy, mode, lockh);
if (rc)
return rc;
}
@@ -2571,8 +3124,10 @@ static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt = lmv->tgts[0];
- if (md->mea)
- obd_free_memmd(exp, (void *)&md->mea);
+ if (md->lmv) {
+ lmv_free_memmd(md->lmv);
+ md->lmv = NULL;
+ }
if (!tgt || !tgt->ltd_exp)
return -EINVAL;
return md_free_lustre_md(tgt->ltd_exp, md);
@@ -2621,7 +3176,7 @@ static int lmv_intent_getattr_async(struct obd_export *exp,
if (rc)
return rc;
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
@@ -2649,6 +3204,23 @@ static int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
return rc;
}
+static int
+lmv_get_fid_from_lsm(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ const char *name, int namelen, struct lu_fid *fid)
+{
+ const struct lmv_oinfo *oinfo;
+
+ LASSERT(lsm);
+ oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
+ if (IS_ERR(oinfo))
+ return PTR_ERR(oinfo);
+
+ *fid = oinfo->lmo_fid;
+
+ return 0;
+}
+
/**
* For lmv, only need to send request to master MDT, and the master MDT will
* process with other slave MDTs. The only exception is Q_GETOQUOTA for which
@@ -2660,8 +3232,9 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
struct obd_device *obd = class_exp2obd(exp);
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt = lmv->tgts[0];
- int rc = 0, i;
+ int rc = 0;
__u64 curspace = 0, curinodes = 0;
+ u32 i;
if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
!lmv->desc.ld_tgt_count) {
@@ -2704,7 +3277,8 @@ static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
struct obd_device *obd = class_exp2obd(exp);
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt;
- int i, rc = 0;
+ int rc = 0;
+ u32 i;
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
int err;
@@ -2723,6 +3297,47 @@ static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
return rc;
}
+static int lmv_merge_attr(struct obd_export *exp,
+ const struct lmv_stripe_md *lsm,
+ struct cl_attr *attr,
+ ldlm_blocking_callback cb_blocking)
+{
+ int rc, i;
+
+ rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+ struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root;
+
+ CDEBUG(D_INFO, ""DFID" size %llu, blocks %llu nlink %u, atime %lu ctime %lu, mtime %lu.\n",
+ PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
+ i_size_read(inode), (unsigned long long)inode->i_blocks,
+ inode->i_nlink, LTIME_S(inode->i_atime),
+ LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime));
+
+ /* for slave stripe, it needs to subtract nlink for . and .. */
+ if (i)
+ attr->cat_nlink += inode->i_nlink - 2;
+ else
+ attr->cat_nlink = inode->i_nlink;
+
+ attr->cat_size += i_size_read(inode);
+ attr->cat_blocks += inode->i_blocks;
+
+ if (attr->cat_atime < LTIME_S(inode->i_atime))
+ attr->cat_atime = LTIME_S(inode->i_atime);
+
+ if (attr->cat_ctime < LTIME_S(inode->i_ctime))
+ attr->cat_ctime = LTIME_S(inode->i_ctime);
+
+ if (attr->cat_mtime < LTIME_S(inode->i_mtime))
+ attr->cat_mtime = LTIME_S(inode->i_mtime);
+ }
+ return 0;
+}
+
static struct obd_ops lmv_obd_ops = {
.owner = THIS_MODULE,
.setup = lmv_setup,
@@ -2746,7 +3361,6 @@ static struct obd_ops lmv_obd_ops = {
static struct md_ops lmv_md_ops = {
.getstatus = lmv_getstatus,
.null_inode = lmv_null_inode,
- .find_cbdata = lmv_find_cbdata,
.close = lmv_close,
.create = lmv_create,
.done_writing = lmv_done_writing,
@@ -2760,7 +3374,7 @@ static struct md_ops lmv_md_ops = {
.setattr = lmv_setattr,
.setxattr = lmv_setxattr,
.sync = lmv_sync,
- .readpage = lmv_readpage,
+ .read_page = lmv_read_page,
.unlink = lmv_unlink,
.init_ea_size = lmv_init_ea_size,
.cancel_unused = lmv_cancel_unused,
@@ -2768,10 +3382,12 @@ static struct md_ops lmv_md_ops = {
.lock_match = lmv_lock_match,
.get_lustre_md = lmv_get_lustre_md,
.free_lustre_md = lmv_free_lustre_md,
+ .merge_attr = lmv_merge_attr,
.set_open_replay_data = lmv_set_open_replay_data,
.clear_open_replay_data = lmv_clear_open_replay_data,
.intent_getattr_async = lmv_intent_getattr_async,
- .revalidate_lock = lmv_revalidate_lock
+ .revalidate_lock = lmv_revalidate_lock,
+ .get_fid_from_lsm = lmv_get_fid_from_lsm,
};
static int __init lmv_init(void)
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
index c29c361eb0cc..20bbdfc21d15 100644
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -169,7 +169,7 @@ static int lmv_tgt_seq_show(struct seq_file *p, void *v)
if (!tgt)
return 0;
- seq_printf(p, "%d: %s %sACTIVE\n",
+ seq_printf(p, "%u: %s %sACTIVE\n",
tgt->ltd_idx, tgt->ltd_uuid.uuid,
tgt->ltd_active ? "" : "IN");
return 0;
@@ -202,7 +202,7 @@ static struct lprocfs_vars lprocfs_lmv_obd_vars[] = {
{ NULL }
};
-struct file_operations lmv_proc_target_fops = {
+const struct file_operations lmv_proc_target_fops = {
.owner = THIS_MODULE,
.open = lmv_target_seq_open,
.read = seq_read,
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 9740568d9521..4d2b7d303fea 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -289,8 +289,8 @@ struct lov_lock {
};
struct lov_page {
- struct cl_page_slice lps_cl;
- int lps_invalid;
+ struct cl_page_slice lps_cl;
+ unsigned int lps_stripe; /* stripe index */
};
/*
@@ -556,6 +556,8 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
struct lovsub_lock *sub);
struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
const struct cl_page_slice *slice);
+
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov);
int lov_page_stripe(const struct cl_page *page);
#define lov_foreach_target(lov, var) \
@@ -742,11 +744,15 @@ static inline struct lov_thread_info *lov_env_info(const struct lu_env *env)
static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov)
{
LASSERT(lov->lo_type == LLT_RAID0);
- LASSERT(lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC ||
- lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC_V3);
+ LASSERT(lov->lo_lsm->lsm_magic == LOV_MAGIC ||
+ lov->lo_lsm->lsm_magic == LOV_MAGIC_V3);
return &lov->u.raid0;
}
+/* lov_pack.c */
+int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
+ struct lov_user_md __user *lump);
+
/** @} lov */
#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index b1f260d43bc7..056ae2ed88e8 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -516,6 +516,5 @@ struct lu_device_type lov_device_type = {
.ldt_ops = &lov_device_type_ops,
.ldt_ctx_tags = LCT_CL_THREAD
};
-EXPORT_SYMBOL(lov_device_type);
/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index 5053dead17bb..214c561767e0 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -66,7 +66,8 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
}
if (lmm->lmm_stripe_size == 0 ||
- (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
+ (le32_to_cpu(lmm->lmm_stripe_size) &
+ (LOV_MIN_STRIPE_SIZE - 1)) != 0) {
CERROR("bad stripe size %u\n",
le32_to_cpu(lmm->lmm_stripe_size));
lov_dump_lmm_common(D_WARNING, lmm);
@@ -146,21 +147,15 @@ lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
*swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
}
-static int lsm_destroy_plain(struct lov_stripe_md *lsm, struct obdo *oa,
- struct obd_export *md_exp)
-{
- return 0;
-}
-
/* Find minimum stripe maxbytes value. For inactive or
- * reconnecting targets use LUSTRE_STRIPE_MAXBYTES.
+ * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
*/
static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
{
struct obd_import *imp = tgt->ltd_obd->u.cli.cl_import;
if (!imp || !tgt->ltd_active) {
- *stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
return;
}
@@ -171,7 +166,7 @@ static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
if (*stripe_maxbytes > imp->imp_connect_data.ocd_maxbytes)
*stripe_maxbytes = imp->imp_connect_data.ocd_maxbytes;
} else {
- *stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
}
spin_unlock(&imp->imp_lock);
}
@@ -245,7 +240,6 @@ static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
const struct lsm_operations lsm_v1_ops = {
.lsm_free = lsm_free_plain,
- .lsm_destroy = lsm_destroy_plain,
.lsm_stripe_by_index = lsm_stripe_by_index_plain,
.lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
.lsm_lmm_verify = lsm_lmm_verify_v1,
@@ -335,7 +329,6 @@ static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
const struct lsm_operations lsm_v3_ops = {
.lsm_free = lsm_free_plain,
- .lsm_destroy = lsm_destroy_plain,
.lsm_stripe_by_index = lsm_stripe_by_index_plain,
.lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
.lsm_lmm_verify = lsm_lmm_verify_v3,
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 12bd511e8988..07e5ede3e952 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -134,8 +134,6 @@ static inline void lov_put_reqset(struct lov_request_set *set)
/* lov_merge.c */
void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
struct lov_stripe_md *lsm, int stripeno, int *set);
-int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
- u64 size, int shrink);
int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
struct ost_lvb *lvb, __u64 *kms_place);
@@ -157,11 +155,6 @@ int lov_update_common_set(struct lov_request_set *set,
int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
struct lov_request_set **reqset);
int lov_fini_getattr_set(struct lov_request_set *set);
-int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
- struct obdo *src_oa, struct lov_stripe_md *lsm,
- struct obd_trans_info *oti,
- struct lov_request_set **reqset);
-int lov_fini_destroy_set(struct lov_request_set *set);
int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
struct obd_trans_info *oti,
struct lov_request_set **reqset);
@@ -197,8 +190,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm,
struct lov_stripe_md *lsm);
int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
struct lov_mds_md *lmm, int lmm_bytes);
-int lov_getstripe(struct obd_export *exp,
- struct lov_stripe_md *lsm, struct lov_user_md __user *lump);
int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
int pattern, int magic);
int lov_free_memmd(struct lov_stripe_md **lsmp);
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 84032a510254..d10157985ed9 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -87,6 +87,9 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
case CIT_SETATTR: {
io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid;
+ io->u.ci_setattr.sa_stripe_index = stripe;
+ io->u.ci_setattr.sa_parent_fid =
+ parent->u.ci_setattr.sa_parent_fid;
if (cl_io_is_trunc(io)) {
loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size;
@@ -244,14 +247,12 @@ void lov_sub_put(struct lov_io_sub *sub)
int lov_page_stripe(const struct cl_page *page)
{
- struct lovsub_object *subobj;
const struct cl_page_slice *slice;
- slice = cl_page_at(page, &lovsub_device_type);
+ slice = cl_page_at(page, &lov_device_type);
LASSERT(slice->cpl_obj);
- subobj = cl2lovsub(slice->cpl_obj);
- return subobj->lso_index;
+ return cl2lov_page(slice)->lps_stripe;
}
struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
@@ -298,8 +299,8 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
return result;
}
-static void lov_io_slice_init(struct lov_io *lio,
- struct lov_object *obj, struct cl_io *io)
+static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj,
+ struct cl_io *io)
{
io->ci_result = 0;
lio->lis_object = obj;
@@ -314,6 +315,15 @@ static void lov_io_slice_init(struct lov_io *lio,
lio->lis_io_endpos = lio->lis_endpos;
if (cl_io_is_append(io)) {
LASSERT(io->ci_type == CIT_WRITE);
+
+ /*
+ * If there is LOV EA hole, then we may cannot locate
+ * the current file-tail exactly.
+ */
+ if (unlikely(obj->lo_lsm->lsm_pattern &
+ LOV_PATTERN_F_HOLE))
+ return -EIO;
+
lio->lis_pos = 0;
lio->lis_endpos = OBD_OBJECT_EOF;
}
@@ -349,6 +359,7 @@ static void lov_io_slice_init(struct lov_io *lio,
default:
LBUG();
}
+ return 0;
}
static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
@@ -870,7 +881,7 @@ int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
struct lov_object *lov = cl2lov(obj);
INIT_LIST_HEAD(&lio->lis_active);
- lov_io_slice_init(lio, lov, io);
+ io->ci_result = lov_io_slice_init(lio, lov, io);
if (io->ci_result == 0) {
io->ci_result = lov_io_subio_init(env, lio, io);
if (io->ci_result == 0) {
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index b9c90865fdfc..674af106b50b 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -105,45 +105,6 @@ int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
return rc;
}
-/* Must be called under the lov_stripe_lock() */
-int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
- u64 size, int shrink)
-{
- struct lov_oinfo *loi;
- int stripe = 0;
- __u64 kms;
-
- assert_spin_locked(&lsm->lsm_lock);
- LASSERT(lsm->lsm_lock_owner == current_pid());
-
- if (shrink) {
- for (; stripe < lsm->lsm_stripe_count; stripe++) {
- struct lov_oinfo *loi = lsm->lsm_oinfo[stripe];
-
- kms = lov_size_to_stripe(lsm, size, stripe);
- CDEBUG(D_INODE,
- "stripe %d KMS %sing %llu->%llu\n",
- stripe, kms > loi->loi_kms ? "increase":"shrink",
- loi->loi_kms, kms);
- loi->loi_lvb.lvb_size = kms;
- loi_kms_set(loi, loi->loi_lvb.lvb_size);
- }
- return 0;
- }
-
- if (size > 0)
- stripe = lov_stripe_number(lsm, size - 1);
- kms = lov_size_to_stripe(lsm, size, stripe);
- loi = lsm->lsm_oinfo[stripe];
-
- CDEBUG(D_INODE, "stripe %d KMS %sincreasing %llu->%llu\n",
- stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
- if (kms > loi->loi_kms)
- loi_kms_set(loi, kms);
-
- return 0;
-}
-
void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
struct lov_stripe_md *lsm, int stripeno, int *set)
{
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index 9b92d5522edb..b23016f7ec26 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -41,6 +41,7 @@
#include "../../include/linux/libcfs/libcfs.h"
#include "../include/obd_support.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_lib.h"
#include "../include/lustre_net.h"
#include "../include/lustre/lustre_idl.h"
@@ -940,7 +941,7 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
}
case LCFG_PARAM: {
struct lprocfs_static_vars lvars = { NULL };
- struct lov_desc *desc = &(obd->u.lov.desc);
+ struct lov_desc *desc = &obd->u.lov.desc;
if (!desc) {
rc = -EINVAL;
@@ -971,92 +972,6 @@ out:
return rc;
}
-static int lov_recreate(struct obd_export *exp, struct obdo *src_oa,
- struct lov_stripe_md **ea, struct obd_trans_info *oti)
-{
- struct lov_stripe_md *obj_mdp, *lsm;
- struct lov_obd *lov = &exp->exp_obd->u.lov;
- unsigned ost_idx;
- int rc, i;
-
- LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS &&
- src_oa->o_flags & OBD_FL_RECREATE_OBJS);
-
- obj_mdp = kzalloc(sizeof(*obj_mdp), GFP_NOFS);
- if (!obj_mdp)
- return -ENOMEM;
-
- ost_idx = src_oa->o_nlink;
- lsm = *ea;
- if (!lsm) {
- rc = -EINVAL;
- goto out;
- }
- if (ost_idx >= lov->desc.ld_tgt_count ||
- !lov->lov_tgts[ost_idx]) {
- rc = -EINVAL;
- goto out;
- }
-
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (loi->loi_ost_idx == ost_idx) {
- if (ostid_id(&loi->loi_oi) != ostid_id(&src_oa->o_oi)) {
- rc = -EINVAL;
- goto out;
- }
- break;
- }
- }
- if (i == lsm->lsm_stripe_count) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = obd_create(NULL, lov->lov_tgts[ost_idx]->ltd_exp,
- src_oa, &obj_mdp, oti);
-out:
- kfree(obj_mdp);
- return rc;
-}
-
-/* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *src_oa, struct lov_stripe_md **ea,
- struct obd_trans_info *oti)
-{
- struct lov_obd *lov;
- int rc = 0;
-
- LASSERT(ea);
- if (!exp)
- return -EINVAL;
-
- if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
- src_oa->o_flags == OBD_FL_DELORPHAN) {
- /* should be used with LOV anymore */
- LBUG();
- }
-
- lov = &exp->exp_obd->u.lov;
- if (!lov->desc.ld_active_tgt_count)
- return -EIO;
-
- obd_getref(exp->exp_obd);
- /* Recreate a specific object id at the given OST index */
- if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
- (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) {
- rc = lov_recreate(exp, src_oa, ea, oti);
- }
-
- obd_putref(exp->exp_obd);
- return rc;
-}
-
#define ASSERT_LSM_MAGIC(lsmp) \
do { \
LASSERT((lsmp)); \
@@ -1065,59 +980,6 @@ do { \
"%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \
} while (0)
-static int lov_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md *lsm,
- struct obd_trans_info *oti, struct obd_export *md_exp)
-{
- struct lov_request_set *set;
- struct obd_info oinfo;
- struct lov_request *req;
- struct lov_obd *lov;
- int rc = 0, err = 0;
-
- ASSERT_LSM_MAGIC(lsm);
-
- if (!exp || !exp->exp_obd)
- return -ENODEV;
-
- if (oa->o_valid & OBD_MD_FLCOOKIE) {
- LASSERT(oti);
- LASSERT(oti->oti_logcookies);
- }
-
- lov = &exp->exp_obd->u.lov;
- obd_getref(exp->exp_obd);
- rc = lov_prep_destroy_set(exp, &oinfo, oa, lsm, oti, &set);
- if (rc)
- goto out;
-
- list_for_each_entry(req, &set->set_list, rq_link) {
- if (oa->o_valid & OBD_MD_FLCOOKIE)
- oti->oti_logcookies = set->set_cookies + req->rq_stripe;
-
- err = obd_destroy(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
- req->rq_oi.oi_oa, NULL, oti, NULL);
- err = lov_update_common_set(set, req, err);
- if (err) {
- CERROR("%s: destroying objid "DOSTID" subobj "
- DOSTID" on OST idx %d: rc = %d\n",
- exp->exp_obd->obd_name, POSTID(&oa->o_oi),
- POSTID(&req->rq_oi.oi_oa->o_oi),
- req->rq_idx, err);
- if (!rc)
- rc = err;
- }
- }
-
- if (rc == 0)
- rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp);
-
- err = lov_fini_destroy_set(set);
-out:
- obd_putref(exp->exp_obd);
- return rc ? rc : err;
-}
-
static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
void *data, int rc)
{
@@ -1267,46 +1129,6 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
return 0;
}
-/* find any ldlm lock of the inode in lov
- * return 0 not find
- * 1 find one
- * < 0 error
- */
-static int lov_find_cbdata(struct obd_export *exp,
- struct lov_stripe_md *lsm, ldlm_iterator_t it,
- void *data)
-{
- struct lov_obd *lov;
- int rc = 0, i;
-
- ASSERT_LSM_MAGIC(lsm);
-
- if (!exp || !exp->exp_obd)
- return -ENODEV;
-
- lov = &exp->exp_obd->u.lov;
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_stripe_md submd;
- struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (!lov->lov_tgts[loi->loi_ost_idx]) {
- CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx);
- continue;
- }
-
- submd.lsm_oi = loi->loi_oi;
- submd.lsm_stripe_count = 0;
- rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
- &submd, it, data);
- if (rc != 0)
- return rc;
- }
- return rc;
-}
-
int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
{
struct lov_request_set *lovset = (struct lov_request_set *)data;
@@ -1460,7 +1282,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
}
desc = (struct lov_desc *)data->ioc_inlbuf1;
- memcpy(desc, &(lov->desc), sizeof(*desc));
+ memcpy(desc, &lov->desc, sizeof(*desc));
uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
genp = (__u32 *)data->ioc_inlbuf3;
@@ -1477,9 +1299,6 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
obd_ioctl_freedata(buf, len);
break;
}
- case LL_IOC_LOV_GETSTRIPE:
- rc = lov_getstripe(exp, karg, uarg);
- break;
case OBD_IOC_QUOTACTL: {
struct if_quotactl *qctl = karg;
struct lov_tgt_desc *tgt = NULL;
@@ -1726,6 +1545,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
u64 fm_start, fm_end, fm_length, fm_end_offset;
u64 curr_loc;
int current_extent = 0, rc = 0, i;
+ /* Whether have we collected enough extents */
+ bool enough = false;
int ost_eof = 0; /* EOF for object */
int ost_done = 0; /* done with required mapping for this OST? */
int last_stripe;
@@ -1860,7 +1681,7 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
lun_start += len_mapped_single_call;
fm_local->fm_length = req_fm_len - len_mapped_single_call;
req_fm_len = fm_local->fm_length;
- fm_local->fm_extent_count = count_local;
+ fm_local->fm_extent_count = enough ? 1 : count_local;
fm_local->fm_mapped_extents = 0;
fm_local->fm_flags = fiemap->fm_flags;
@@ -1908,6 +1729,12 @@ inactive_tgt:
goto finish;
}
break;
+ } else if (enough) {
+ /*
+ * We've collected enough extents and there are
+ * more extents after it.
+ */
+ goto finish;
}
/* If we just need num of extents then go to next device */
@@ -1916,8 +1743,9 @@ inactive_tgt:
break;
}
- len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical -
- lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+ len_mapped_single_call =
+ lcl_fm_ext[ext_count - 1].fe_logical -
+ lun_start + lcl_fm_ext[ext_count - 1].fe_length;
/* Have we finished mapping on this device? */
if (req_fm_len <= len_mapped_single_call)
@@ -1926,14 +1754,15 @@ inactive_tgt:
/* Clear the EXTENT_LAST flag which can be present on
* last extent
*/
- if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST)
+ if (lcl_fm_ext[ext_count - 1].fe_flags &
+ FIEMAP_EXTENT_LAST)
lcl_fm_ext[ext_count - 1].fe_flags &=
~FIEMAP_EXTENT_LAST;
curr_loc = lov_stripe_size(lsm,
- lcl_fm_ext[ext_count - 1].fe_logical+
- lcl_fm_ext[ext_count - 1].fe_length,
- cur_stripe);
+ lcl_fm_ext[ext_count - 1].fe_logical +
+ lcl_fm_ext[ext_count - 1].fe_length,
+ cur_stripe);
if (curr_loc >= fm_key->oa.o_size)
ost_eof = 1;
@@ -1945,7 +1774,7 @@ inactive_tgt:
/* Ran out of available extents? */
if (current_extent >= fiemap->fm_extent_count)
- goto finish;
+ enough = true;
} while (ost_done == 0 && ost_eof == 0);
if (cur_stripe_wrap == last_stripe)
@@ -1985,73 +1814,14 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
{
struct obd_device *obddev = class_exp2obd(exp);
struct lov_obd *lov = &obddev->u.lov;
- int i, rc;
+ int rc;
if (!vallen || !val)
return -EFAULT;
obd_getref(obddev);
- if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
- struct {
- char name[16];
- struct ldlm_lock *lock;
- } *data = key;
- struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name;
- struct lov_oinfo *loi;
- __u32 *stripe = val;
-
- if (*vallen < sizeof(*stripe)) {
- rc = -EFAULT;
- goto out;
- }
- *vallen = sizeof(*stripe);
-
- /* XXX This is another one of those bits that will need to
- * change if we ever actually support nested LOVs. It uses
- * the lock's export to find out which stripe it is.
- */
- /* XXX - it's assumed all the locks for deleted OSTs have
- * been cancelled. Also, the export for deleted OSTs will
- * be NULL and won't match the lock's export.
- */
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- loi = lsm->lsm_oinfo[i];
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (!lov->lov_tgts[loi->loi_ost_idx])
- continue;
- if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp ==
- data->lock->l_conn_export &&
- ostid_res_name_eq(&loi->loi_oi, res_id)) {
- *stripe = i;
- rc = 0;
- goto out;
- }
- }
- LDLM_ERROR(data->lock, "lock on inode without such object");
- dump_lsm(D_ERROR, lsm);
- rc = -ENXIO;
- goto out;
- } else if (KEY_IS(KEY_LAST_ID)) {
- struct obd_id_info *info = val;
- __u32 size = sizeof(u64);
- struct lov_tgt_desc *tgt;
-
- LASSERT(*vallen == sizeof(struct obd_id_info));
- tgt = lov->lov_tgts[info->idx];
-
- if (!tgt || !tgt->ltd_active) {
- rc = -ESRCH;
- goto out;
- }
-
- rc = obd_get_info(env, tgt->ltd_exp, keylen, key,
- &size, info->data, NULL);
- rc = 0;
- goto out;
- } else if (KEY_IS(KEY_LOVDESC)) {
+ if (KEY_IS(KEY_LOVDESC)) {
struct lov_desc *desc_ret = val;
*desc_ret = lov->desc;
@@ -2060,22 +1830,6 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
} else if (KEY_IS(KEY_FIEMAP)) {
rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
goto out;
- } else if (KEY_IS(KEY_CONNECT_FLAG)) {
- struct lov_tgt_desc *tgt;
- __u64 ost_idx = *((__u64 *)val);
-
- LASSERT(*vallen == sizeof(__u64));
- LASSERT(ost_idx < lov->desc.ld_tgt_count);
- tgt = lov->lov_tgts[ost_idx];
-
- if (!tgt || !tgt->ltd_exp) {
- rc = -ESRCH;
- goto out;
- }
-
- *((__u64 *)val) = exp_connect_flags(tgt->ltd_exp);
- rc = 0;
- goto out;
} else if (KEY_IS(KEY_TGT_COUNT)) {
*((int *)val) = lov->desc.ld_tgt_count;
rc = 0;
@@ -2098,8 +1852,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
u32 count;
int i, rc = 0, err;
struct lov_tgt_desc *tgt;
- unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0;
- unsigned int next_id = 0, mds_con = 0;
+ int do_inactive = 0, no_set = 0;
if (!set) {
no_set = 1;
@@ -2111,18 +1864,8 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
obd_getref(obddev);
count = lov->desc.ld_tgt_count;
- if (KEY_IS(KEY_NEXT_ID)) {
- count = vallen / sizeof(struct obd_id_info);
- vallen = sizeof(u64);
- incr = sizeof(struct obd_id_info);
- do_inactive = 1;
- next_id = 1;
- } else if (KEY_IS(KEY_CHECKSUM)) {
+ if (KEY_IS(KEY_CHECKSUM)) {
do_inactive = 1;
- } else if (KEY_IS(KEY_EVICT_BY_NID)) {
- /* use defaults: do_inactive = incr = 0; */
- } else if (KEY_IS(KEY_MDS_CONN)) {
- mds_con = 1;
} else if (KEY_IS(KEY_CACHE_SET)) {
LASSERT(!lov->lov_cache);
lov->lov_cache = val;
@@ -2130,11 +1873,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
cl_cache_incref(lov->lov_cache);
}
- for (i = 0; i < count; i++, val = (char *)val + incr) {
- if (next_id)
- tgt = lov->lov_tgts[((struct obd_id_info *)val)->idx];
- else
- tgt = lov->lov_tgts[i];
+ for (i = 0; i < count; i++) {
+ tgt = lov->lov_tgts[i];
+
/* OST was disconnected */
if (!tgt || !tgt->ltd_exp)
continue;
@@ -2143,34 +1884,8 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
if (!tgt->ltd_active && !do_inactive)
continue;
- if (mds_con) {
- struct mds_group_info *mgi;
-
- LASSERT(vallen == sizeof(*mgi));
- mgi = (struct mds_group_info *)val;
-
- /* Only want a specific OSC */
- if (mgi->uuid && !obd_uuid_equals(mgi->uuid,
- &tgt->ltd_uuid))
- continue;
-
- err = obd_set_info_async(env, tgt->ltd_exp,
- keylen, key, sizeof(int),
- &mgi->group, set);
- } else if (next_id) {
- err = obd_set_info_async(env, tgt->ltd_exp,
- keylen, key, vallen,
- ((struct obd_id_info *)val)->data, set);
- } else {
- /* Only want a specific OSC */
- if (check_uuid &&
- !obd_uuid_equals(val, &tgt->ltd_uuid))
- continue;
-
- err = obd_set_info_async(env, tgt->ltd_exp,
- keylen, key, vallen, val, set);
- }
-
+ err = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
+ vallen, val, set);
if (!rc)
rc = err;
}
@@ -2318,12 +2033,8 @@ static struct obd_ops lov_obd_ops = {
.statfs_async = lov_statfs_async,
.packmd = lov_packmd,
.unpackmd = lov_unpackmd,
- .create = lov_create,
- .destroy = lov_destroy,
.getattr_async = lov_getattr_async,
.setattr_async = lov_setattr_async,
- .adjust_kms = lov_adjust_kms,
- .find_cbdata = lov_find_cbdata,
.iocontrol = lov_iocontrol,
.get_info = lov_get_info,
.set_info_async = lov_set_info_async,
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index f9621b0fd469..52f736338887 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -75,6 +75,13 @@ struct lov_layout_operations {
static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
+void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
+{
+ if (lsm)
+ lov_free_memmd(&lsm);
+}
+EXPORT_SYMBOL(lov_lsm_put);
+
/*****************************************************************************
*
* Lov object layout operations.
@@ -195,6 +202,10 @@ static int lov_page_slice_fixup(struct lov_object *lov,
struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
struct cl_object *o;
+ if (!stripe)
+ return hdr->coh_page_bufsize - lov->lo_cl.co_slice_off -
+ cfs_size_round(sizeof(struct lov_page));
+
cl_object_for_each(o, stripe)
o->co_slice_off += hdr->coh_page_bufsize;
@@ -224,6 +235,7 @@ static int lov_init_raid0(const struct lu_env *env,
LASSERT(!lov->lo_lsm);
lov->lo_lsm = lsm_addref(lsm);
+ lov->lo_layout_invalid = true;
r0->lo_nr = lsm->lsm_stripe_count;
LASSERT(r0->lo_nr <= lov_targets_nr(dev));
@@ -719,6 +731,10 @@ static int lov_layout_change(const struct lu_env *unused,
LASSERT(atomic_read(&lov->lo_active_ios) == 0);
lov->lo_type = LLT_EMPTY;
+ /* page bufsize fixup */
+ cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
+ lov_page_slice_fixup(lov, NULL);
+
result = new_ops->llo_init(env,
lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
lov, conf, state);
@@ -878,8 +894,8 @@ static int lov_attr_get(const struct lu_env *env, struct cl_object *obj,
return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr);
}
-static int lov_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid)
+static int lov_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid)
{
/*
* No dispatch is required here, as no layout implements this.
@@ -895,13 +911,30 @@ int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
io);
}
+static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+ struct lov_user_md __user *lum)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm;
+ int rc = 0;
+
+ lsm = lov_lsm_addref(lov);
+ if (!lsm)
+ return -ENODATA;
+
+ rc = lov_getstripe(cl2lov(obj), lsm, lum);
+ lov_lsm_put(obj, lsm);
+ return rc;
+}
+
static const struct cl_object_operations lov_ops = {
.coo_page_init = lov_page_init,
.coo_lock_init = lov_lock_init,
.coo_io_init = lov_io_init,
.coo_attr_get = lov_attr_get,
- .coo_attr_set = lov_attr_set,
- .coo_conf_set = lov_conf_set
+ .coo_attr_update = lov_attr_update,
+ .coo_conf_set = lov_conf_set,
+ .coo_getstripe = lov_object_getstripe
};
static const struct lu_object_operations lov_lu_obj_ops = {
@@ -938,7 +971,7 @@ struct lu_object *lov_object_alloc(const struct lu_env *env,
return obj;
}
-static struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
{
struct lov_stripe_md *lsm = NULL;
@@ -969,13 +1002,6 @@ struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj)
}
EXPORT_SYMBOL(lov_lsm_get);
-void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
-{
- if (lsm)
- lov_free_memmd(&lsm);
-}
-EXPORT_SYMBOL(lov_lsm_put);
-
int lov_read_and_clear_async_rc(struct cl_object *clob)
{
struct lu_object *luobj;
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 869ef41b13ca..be6e9857ce2a 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -45,6 +45,7 @@
#include "../include/lustre/lustre_user.h"
#include "lov_internal.h"
+#include "lov_cl_internal.h"
void lov_dump_lmm_common(int level, void *lmmp)
{
@@ -104,11 +105,9 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
* LOVs properly. For now lov_mds_md_size() just assumes one u64
* per stripe.
*/
-int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
+int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
{
- struct obd_device *obd = class_exp2obd(exp);
- struct lov_obd *lov = &obd->u.lov;
struct lov_mds_md_v1 *lmmv1;
struct lov_mds_md_v3 *lmmv3;
__u16 stripe_count;
@@ -148,16 +147,11 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
stripe_count = 0;
}
} else {
- /* No need to allocate more than maximum supported stripes.
- * Anyway, this is pretty inaccurate since ld_tgt_count now
- * represents max index and we should rely on the actual number
- * of OSTs instead
+ /*
+ * To calculate maximum easize by active targets at present,
+ * which is exactly the maximum easize to be seen by LOV
*/
- stripe_count = lov_mds_md_max_stripe_count(
- lov->lov_ocd.ocd_max_easize, lmm_magic);
-
- if (stripe_count > lov->desc.ld_tgt_count)
- stripe_count = lov->desc.ld_tgt_count;
+ stripe_count = lov->desc.ld_active_tgt_count;
}
/* XXX LOV STACKING call into osc for sizes */
@@ -225,6 +219,15 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
return lmm_size;
}
+int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_obd *lov = &obd->u.lov;
+
+ return lov_obd_packmd(lov, lmmp, lsm);
+}
+
/* Find the max stripecount we should use */
__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
{
@@ -284,7 +287,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
spin_lock_init(&(*lsmp)->lsm_lock);
(*lsmp)->lsm_magic = magic;
(*lsmp)->lsm_stripe_count = stripe_count;
- (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+ (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count;
(*lsmp)->lsm_pattern = pattern;
(*lsmp)->lsm_pool_name[0] = '\0';
(*lsmp)->lsm_layout_gen = 0;
@@ -372,16 +375,17 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
* the maximum number of OST indices which will fit in the user buffer.
* lmm_magic must be LOV_USER_MAGIC.
*/
-int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
+int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
struct lov_user_md __user *lump)
{
/*
* XXX huge struct allocated on stack.
*/
/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+ struct lov_obd *lov;
struct lov_user_md_v3 lum;
struct lov_mds_md *lmmk = NULL;
- int rc, lmm_size;
+ int rc, lmmk_size, lmm_size;
int lum_size;
mm_segment_t seg;
@@ -401,12 +405,13 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
lum_size = sizeof(struct lov_user_md_v1);
if (copy_from_user(&lum, lump, lum_size)) {
rc = -EFAULT;
- goto out_set;
+ goto out;
}
- if ((lum.lmm_magic != LOV_USER_MAGIC) &&
- (lum.lmm_magic != LOV_USER_MAGIC_V3)) {
+ if (lum.lmm_magic != LOV_USER_MAGIC_V1 &&
+ lum.lmm_magic != LOV_USER_MAGIC_V3 &&
+ lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC) {
rc = -EINVAL;
- goto out_set;
+ goto out;
}
if (lum.lmm_stripe_count &&
@@ -415,11 +420,13 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
lum.lmm_stripe_count = lsm->lsm_stripe_count;
rc = copy_to_user(lump, &lum, lum_size);
rc = -EOVERFLOW;
- goto out_set;
+ goto out;
}
- rc = lov_packmd(exp, &lmmk, lsm);
+ lov = lu2lov_dev(obj->lo_cl.co_lu.lo_dev)->ld_lov;
+ rc = lov_obd_packmd(lov, &lmmk, lsm);
if (rc < 0)
- goto out_set;
+ goto out;
+ lmmk_size = rc;
lmm_size = rc;
rc = 0;
@@ -455,7 +462,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
lmm_size = lum_size;
} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
rc = -EOVERFLOW;
- goto out_set;
+ goto out_free;
}
/*
* Have a difference between lov_mds_md & lov_user_md.
@@ -468,8 +475,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
if (copy_to_user(lump, lmmk, lmm_size))
rc = -EFAULT;
- obd_free_diskmd(exp, &lmmk);
-out_set:
+out_free:
+ kfree(lmmk);
+out:
set_fs(seg);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index c17026f14896..00bfabad78eb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -65,7 +65,9 @@ static int lov_raid0_page_is_under_lock(const struct lu_env *env,
pgoff_t index = *max_index;
unsigned int pps; /* pages per stripe */
- CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+ CDEBUG(D_READA, DFID "*max_index = %lu, nr = %d\n",
+ PFID(lu_object_fid(lov2lu(loo))), index, r0->lo_nr);
+
if (index == 0) /* the page is not covered by any lock */
return 0;
@@ -80,7 +82,12 @@ static int lov_raid0_page_is_under_lock(const struct lu_env *env,
/* calculate the end of current stripe */
pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
- index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
+ index = slice->cpl_index + pps - slice->cpl_index % pps - 1;
+
+ CDEBUG(D_READA, DFID "*max_index = %lu, index = %lu, pps = %u, stripe_size = %u, stripe no = %u, page index = %lu\n",
+ PFID(lu_object_fid(lov2lu(loo))), *max_index, index, pps,
+ loo->lo_lsm->lsm_stripe_size, lov_page_stripe(slice->cpl_page),
+ slice->cpl_index);
/* never exceed the end of the stripe */
*max_index = min_t(pgoff_t, *max_index, index);
@@ -122,6 +129,7 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
LASSERT(rc == 0);
+ lpg->lps_stripe = stripe;
cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
sub = lov_sub_get(env, lio, stripe);
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 4c2d21729589..f8c8a361ef79 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -61,7 +61,7 @@ void lov_pool_putref(struct pool_desc *pool)
LASSERT(hlist_unhashed(&pool->pool_hash));
LASSERT(list_empty(&pool->pool_list));
LASSERT(!pool->pool_debugfs_entry);
- lov_ost_pool_free(&(pool->pool_obds));
+ lov_ost_pool_free(&pool->pool_obds);
kfree(pool);
}
}
@@ -92,7 +92,7 @@ static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, unsigned m
for (i = 0; i < LOV_MAXPOOLNAME; i++) {
if (poolname[i] == '\0')
break;
- result = (result << 4)^(result >> 28) ^ poolname[i];
+ result = (result << 4) ^ (result >> 28) ^ poolname[i];
}
return (result % mask);
}
@@ -260,7 +260,7 @@ static int pool_proc_show(struct seq_file *s, void *v)
tgt = pool_tgt(iter->pool, iter->idx);
up_read(&pool_tgt_rw_sem(iter->pool));
if (tgt)
- seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
+ seq_printf(s, "%s\n", obd_uuid2str(&tgt->ltd_uuid));
return 0;
}
@@ -400,7 +400,7 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
struct pool_desc *new_pool;
int rc;
- lov = &(obd->u.lov);
+ lov = &obd->u.lov;
if (strlen(poolname) > LOV_MAXPOOLNAME)
return -ENAMETOOLONG;
@@ -471,7 +471,7 @@ int lov_pool_del(struct obd_device *obd, char *poolname)
struct lov_obd *lov;
struct pool_desc *pool;
- lov = &(obd->u.lov);
+ lov = &obd->u.lov;
/* lookup and kill hash reference */
pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
@@ -503,7 +503,7 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
unsigned int lov_idx;
int rc;
- lov = &(obd->u.lov);
+ lov = &obd->u.lov;
pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
if (!pool)
@@ -517,7 +517,7 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
if (!lov->lov_tgts[lov_idx])
continue;
if (obd_uuid_equals(&ost_uuid,
- &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+ &lov->lov_tgts[lov_idx]->ltd_uuid))
break;
}
/* test if ost found in lov */
@@ -547,7 +547,7 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
unsigned int lov_idx;
int rc = 0;
- lov = &(obd->u.lov);
+ lov = &obd->u.lov;
pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
if (!pool)
@@ -562,7 +562,7 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
continue;
if (obd_uuid_equals(&ost_uuid,
- &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+ &lov->lov_tgts[lov_idx]->ltd_uuid))
break;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 4099b51f826e..09dcaf484c89 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -325,84 +325,6 @@ out_set:
return rc;
}
-int lov_fini_destroy_set(struct lov_request_set *set)
-{
- if (!set)
- return 0;
- LASSERT(set->set_exp);
- if (atomic_read(&set->set_completes)) {
- /* FIXME update qos data here */
- }
-
- lov_put_reqset(set);
-
- return 0;
-}
-
-int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
- struct obdo *src_oa, struct lov_stripe_md *lsm,
- struct obd_trans_info *oti,
- struct lov_request_set **reqset)
-{
- struct lov_request_set *set;
- struct lov_obd *lov = &exp->exp_obd->u.lov;
- int rc = 0, i;
-
- set = kzalloc(sizeof(*set), GFP_NOFS);
- if (!set)
- return -ENOMEM;
- lov_init_set(set);
-
- set->set_exp = exp;
- set->set_oi = oinfo;
- set->set_oi->oi_md = lsm;
- set->set_oi->oi_oa = src_oa;
- if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
- set->set_cookies = oti->oti_logcookies;
-
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *loi;
- struct lov_request *req;
-
- loi = lsm->lsm_oinfo[i];
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (!req) {
- rc = -ENOMEM;
- goto out_set;
- }
-
- req->rq_stripe = i;
- req->rq_idx = loi->loi_ost_idx;
-
- req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!req->rq_oi.oi_oa) {
- kfree(req);
- rc = -ENOMEM;
- goto out_set;
- }
- memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
- req->rq_oi.oi_oa->o_oi = loi->loi_oi;
- lov_set_add_req(req, set);
- }
- if (!set->set_count) {
- rc = -EIO;
- goto out_set;
- }
- *reqset = set;
- return rc;
-out_set:
- lov_fini_destroy_set(set);
- return rc;
-}
-
int lov_fini_setattr_set(struct lov_request_set *set)
{
int rc = 0;
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index fb2f2660b3e9..a2bac7a3b71b 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -98,8 +98,8 @@ static int lovsub_object_print(const struct lu_env *env, void *cookie,
return (*p)(env, cookie, "[%d]", los->lso_index);
}
-static int lovsub_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid)
+static int lovsub_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid)
{
struct lov_object *lov = cl2lovsub(obj)->lso_super;
@@ -119,7 +119,7 @@ static int lovsub_object_glimpse(const struct lu_env *env,
static const struct cl_object_operations lovsub_ops = {
.coo_page_init = lovsub_page_init,
.coo_lock_init = lovsub_lock_init,
- .coo_attr_set = lovsub_attr_set,
+ .coo_attr_update = lovsub_attr_update,
.coo_glimpse = lovsub_object_glimpse
};
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 98d15fb247bc..fca9450de57c 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -43,11 +43,10 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
int len;
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kobj);
- struct client_obd *cli = &dev->u.cli;
+ __u32 max;
- spin_lock(&cli->cl_loi_list_lock);
- len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
- spin_unlock(&cli->cl_loi_list_lock);
+ max = obd_get_max_rpcs_in_flight(&dev->u.cli);
+ len = sprintf(buf, "%u\n", max);
return len;
}
@@ -59,7 +58,6 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
{
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kobj);
- struct client_obd *cli = &dev->u.cli;
int rc;
unsigned long val;
@@ -67,12 +65,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
if (rc)
return rc;
- if (val < 1 || val > MDC_MAX_RIF_MAX)
- return -ERANGE;
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_max_rpcs_in_flight = val;
- spin_unlock(&cli->cl_loi_list_lock);
+ rc = obd_set_max_rpcs_in_flight(&dev->u.cli, val);
+ if (rc)
+ count = rc;
return count;
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index 58f2841cabe4..f446c1c2584b 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -34,63 +34,57 @@
#define _MDC_INTERNAL_H
#include "../include/lustre_mdc.h"
-#include "../include/lustre_mds.h"
void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
- __u64 valid, int ea_size, __u32 suppgid, int flags);
-void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
- const struct lu_fid *cfid, int flags);
+ __u64 valid, size_t ea_size, __u32 suppgid, u32 flags);
void mdc_swap_layouts_pack(struct ptlrpc_request *req,
struct md_op_data *op_data);
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
const struct lu_fid *fid);
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
- struct md_op_data *data, int ea_size);
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
+ struct md_op_data *data, size_t ea_size);
void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len);
+ void *ea, size_t ealen, void *ea2, size_t ea2len);
void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const void *data, int datalen, __u32 mode, __u32 uid,
- __u32 gid, cfs_cap_t capability, __u64 rdev);
+ const void *data, size_t datalen, umode_t mode, uid_t uid,
+ gid_t gid, cfs_cap_t capability, __u64 rdev);
void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- __u32 mode, __u64 rdev, __u64 flags, const void *data,
- int datalen);
+ umode_t mode, __u64 rdev, __u64 flags, const void *data,
+ size_t datalen);
void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new, int newlen);
+ const char *old, size_t oldlen,
+ const char *new, size_t newlen);
void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
-int mdc_enter_request(struct client_obd *cli);
-void mdc_exit_request(struct client_obd *cli);
/* mdc/mdc_locks.c */
int mdc_set_lock_data(struct obd_export *exp,
- __u64 *lockh, void *data, __u64 *bits);
+ const struct lustre_handle *lockh,
+ void *data, __u64 *bits);
int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid);
-int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
- ldlm_iterator_t it, void *data);
-
int mdc_intent_lock(struct obd_export *exp,
- struct md_op_data *,
- void *lmm, int lmmsize,
- struct lookup_intent *, int,
+ struct md_op_data *op_data,
+ struct lookup_intent *it,
struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags);
+
int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ const ldlm_policy_data_t *policy,
struct lookup_intent *it, struct md_op_data *op_data,
- struct lustre_handle *lockh, void *lmm, int lmmsize,
- struct ptlrpc_request **req, __u64 extra_lock_flags);
+ struct lustre_handle *lockh, __u64 extra_lock_flags);
int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
struct list_head *cancels, enum ldlm_mode mode,
__u64 bits);
/* mdc/mdc_request.c */
-int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
- struct md_op_data *op_data);
+int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data);
struct obd_client_handle;
int mdc_set_open_replay_data(struct obd_export *exp,
@@ -101,16 +95,17 @@ void mdc_commit_open(struct ptlrpc_request *req);
void mdc_replay_open(struct ptlrpc_request *req);
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid, __u32 gid,
- cfs_cap_t capability, __u64 rdev,
+ const void *data, size_t datalen, umode_t mode, uid_t uid,
+ gid_t gid, cfs_cap_t capability, __u64 rdev,
struct ptlrpc_request **request);
int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request);
int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new, int newlen,
+ const char *old, size_t oldlen,
+ const char *new, size_t newlen,
struct ptlrpc_request **request);
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, size_t ealen, void *ea2, size_t ea2len,
struct ptlrpc_request **request, struct md_open_data **mod);
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request);
@@ -138,4 +133,12 @@ static inline int mdc_prep_elc_req(struct obd_export *exp,
count);
}
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
+{
+ if (BITS_PER_LONG == 32 && hash64)
+ hash >>= 32;
+ /* save hash 0 with hash 1 */
+ return ~0UL - (hash + !hash);
+}
+
#endif
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index 143bd7628572..aac7e04873e2 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -37,27 +37,12 @@
static void __mdc_pack_body(struct mdt_body *b, __u32 suppgid)
{
- b->suppgid = suppgid;
- b->uid = from_kuid(&init_user_ns, current_uid());
- b->gid = from_kgid(&init_user_ns, current_gid());
- b->fsuid = from_kuid(&init_user_ns, current_fsuid());
- b->fsgid = from_kgid(&init_user_ns, current_fsgid());
- b->capability = cfs_curproc_cap_pack();
-}
-
-void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
- const struct lu_fid *cfid, int flags)
-{
- struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
- &RMF_MDT_BODY);
-
- if (pfid) {
- b->fid1 = *pfid;
- b->valid = OBD_MD_FLID;
- }
- if (cfid)
- b->fid2 = *cfid;
- b->flags = flags;
+ b->mbo_suppgid = suppgid;
+ b->mbo_uid = from_kuid(&init_user_ns, current_uid());
+ b->mbo_gid = from_kgid(&init_user_ns, current_gid());
+ b->mbo_fsuid = from_kuid(&init_user_ns, current_fsuid());
+ b->mbo_fsgid = from_kgid(&init_user_ns, current_fsgid());
+ b->mbo_capability = cfs_curproc_cap_pack();
}
void mdc_swap_layouts_pack(struct ptlrpc_request *req,
@@ -67,43 +52,74 @@ void mdc_swap_layouts_pack(struct ptlrpc_request *req,
&RMF_MDT_BODY);
__mdc_pack_body(b, op_data->op_suppgids[0]);
- b->fid1 = op_data->op_fid1;
- b->fid2 = op_data->op_fid2;
- b->valid |= OBD_MD_FLID;
+ b->mbo_fid1 = op_data->op_fid1;
+ b->mbo_fid2 = op_data->op_fid2;
+ b->mbo_valid |= OBD_MD_FLID;
}
void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
- __u64 valid, int ea_size, __u32 suppgid, int flags)
+ __u64 valid, size_t ea_size, __u32 suppgid, u32 flags)
{
struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
&RMF_MDT_BODY);
- b->valid = valid;
- b->eadatasize = ea_size;
- b->flags = flags;
+ b->mbo_valid = valid;
+ b->mbo_eadatasize = ea_size;
+ b->mbo_flags = flags;
__mdc_pack_body(b, suppgid);
if (fid) {
- b->fid1 = *fid;
- b->valid |= OBD_MD_FLID;
+ b->mbo_fid1 = *fid;
+ b->mbo_valid |= OBD_MD_FLID;
}
}
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff,
- __u32 size, const struct lu_fid *fid)
+/**
+ * Pack a name (path component) into a request
+ *
+ * \param[in] req request
+ * \param[in] field request field (usually RMF_NAME)
+ * \param[in] name path component
+ * \param[in] name_len length of path component
+ *
+ * \a field must be present in \a req and of size \a name_len + 1.
+ *
+ * \a name must be '\0' terminated of length \a name_len and represent
+ * a single path component (not contain '/').
+ */
+static void mdc_pack_name(struct ptlrpc_request *req,
+ const struct req_msg_field *field,
+ const char *name, size_t name_len)
+{
+ size_t buf_size;
+ size_t cpy_len;
+ char *buf;
+
+ buf = req_capsule_client_get(&req->rq_pill, field);
+ buf_size = req_capsule_get_size(&req->rq_pill, field, RCL_CLIENT);
+
+ LASSERT(name && name_len && buf && buf_size == name_len + 1);
+
+ cpy_len = strlcpy(buf, name, buf_size);
+
+ LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len));
+}
+
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
+ const struct lu_fid *fid)
{
struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
&RMF_MDT_BODY);
- b->fid1 = *fid;
- b->valid |= OBD_MD_FLID;
- b->size = pgoff; /* !! */
- b->nlink = size; /* !! */
+ b->mbo_fid1 = *fid;
+ b->mbo_valid |= OBD_MD_FLID;
+ b->mbo_size = pgoff; /* !! */
+ b->mbo_nlink = size; /* !! */
__mdc_pack_body(b, -1);
- b->mode = LUDA_FID | LUDA_TYPE;
+ b->mbo_mode = LUDA_FID | LUDA_TYPE;
}
/* packing of MDS records */
void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const void *data, int datalen, __u32 mode,
- __u32 uid, __u32 gid, cfs_cap_t cap_effective, __u64 rdev)
+ const void *data, size_t datalen, umode_t mode,
+ uid_t uid, gid_t gid, cfs_cap_t cap_effective, __u64 rdev)
{
struct mdt_rec_create *rec;
char *tmp;
@@ -130,22 +146,17 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec->cr_bias = op_data->op_bias;
rec->cr_umask = current_umask();
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-
+ mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
if (data) {
tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
memcpy(tmp, data, datalen);
}
}
-static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
+static inline __u64 mds_pack_open_flags(__u64 flags)
{
__u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
- MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
- MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
- MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
- MDS_OPEN_RELEASE));
+ MDS_OPEN_FL_INTERNAL));
if (flags & O_CREAT)
cr_flags |= MDS_OPEN_CREAT;
if (flags & O_EXCL)
@@ -171,8 +182,8 @@ static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
/* packing of MDS records */
void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- __u32 mode, __u64 rdev, __u64 flags, const void *lmm,
- int lmmlen)
+ umode_t mode, __u64 rdev, __u64 flags, const void *lmm,
+ size_t lmmlen)
{
struct mdt_rec_create *rec;
char *tmp;
@@ -190,7 +201,7 @@ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec->cr_fid2 = op_data->op_fid2;
rec->cr_mode = mode;
- cr_flags = mds_pack_open_flags(flags, mode);
+ cr_flags = mds_pack_open_flags(flags);
rec->cr_rdev = rdev;
rec->cr_time = op_data->op_mod_time;
rec->cr_suppgid1 = op_data->op_suppgids[0];
@@ -200,8 +211,9 @@ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec->cr_old_handle = op_data->op_handle;
if (op_data->op_name) {
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+ mdc_pack_name(req, &RMF_NAME, op_data->op_name,
+ op_data->op_namelen);
+
if (op_data->op_bias & MDS_CREATE_VOLATILE)
cr_flags |= MDS_OPEN_VOLATILE;
}
@@ -295,7 +307,7 @@ static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch,
}
void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len)
+ void *ea, size_t ealen, void *ea2, size_t ea2len)
{
struct mdt_rec_setattr *rec;
struct mdt_ioepoch *epoch;
@@ -316,7 +328,7 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
lum = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
if (!ea) { /* Remove LOV EA */
- lum->lmm_magic = LOV_USER_MAGIC_V1;
+ lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
lum->lmm_stripe_size = 0;
lum->lmm_stripe_count = 0;
lum->lmm_stripe_offset = (typeof(lum->lmm_stripe_offset))(-1);
@@ -334,7 +346,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
{
struct mdt_rec_unlink *rec;
- char *tmp;
CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_unlink));
rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
@@ -352,15 +363,12 @@ void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
rec->ul_time = op_data->op_mod_time;
rec->ul_bias = op_data->op_bias;
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- LASSERT(tmp);
- LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+ mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
}
void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
{
struct mdt_rec_link *rec;
- char *tmp;
CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_link));
rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
@@ -376,20 +384,21 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
rec->lk_time = op_data->op_mod_time;
rec->lk_bias = op_data->op_bias;
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+ mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
}
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new, int newlen)
+ const char *old, size_t oldlen,
+ const char *new, size_t newlen)
{
struct mdt_rec_rename *rec;
- char *tmp;
CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_rename));
rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
/* XXX do something about time, uid, gid */
+ rec->rn_opcode = op_data->op_cli_flags & CLI_MIGRATE ?
+ REINT_MIGRATE : REINT_RENAME;
rec->rn_opcode = REINT_RENAME;
rec->rn_fsuid = op_data->op_fsuid;
rec->rn_fsgid = op_data->op_fsgid;
@@ -402,39 +411,34 @@ void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec->rn_mode = op_data->op_mode;
rec->rn_bias = op_data->op_bias;
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- LOGL0(old, oldlen, tmp);
+ mdc_pack_name(req, &RMF_NAME, old, oldlen);
- if (new) {
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_SYMTGT);
- LOGL0(new, newlen, tmp);
- }
+ if (new)
+ mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
}
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
- struct md_op_data *op_data, int ea_size)
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
+ struct md_op_data *op_data, size_t ea_size)
{
struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
&RMF_MDT_BODY);
- b->valid = valid;
+ b->mbo_valid = valid;
if (op_data->op_bias & MDS_CHECK_SPLIT)
- b->valid |= OBD_MD_FLCKSPLIT;
+ b->mbo_valid |= OBD_MD_FLCKSPLIT;
if (op_data->op_bias & MDS_CROSS_REF)
- b->valid |= OBD_MD_FLCROSSREF;
- b->eadatasize = ea_size;
- b->flags = flags;
+ b->mbo_valid |= OBD_MD_FLCROSSREF;
+ b->mbo_eadatasize = ea_size;
+ b->mbo_flags = flags;
__mdc_pack_body(b, op_data->op_suppgids[0]);
- b->fid1 = op_data->op_fid1;
- b->fid2 = op_data->op_fid2;
- b->valid |= OBD_MD_FLID;
-
- if (op_data->op_name) {
- char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ b->mbo_fid1 = op_data->op_fid1;
+ b->mbo_fid2 = op_data->op_fid2;
+ b->mbo_valid |= OBD_MD_FLID;
- LOGL0(op_data->op_name, op_data->op_namelen, tmp);
- }
+ if (op_data->op_name)
+ mdc_pack_name(req, &RMF_NAME, op_data->op_name,
+ op_data->op_namelen);
}
static void mdc_hsm_release_pack(struct ptlrpc_request *req,
@@ -482,67 +486,3 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
mdc_ioepoch_pack(epoch, op_data);
mdc_hsm_release_pack(req, op_data);
}
-
-static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
-{
- int rc;
-
- spin_lock(&cli->cl_loi_list_lock);
- rc = list_empty(&mcw->mcw_entry);
- spin_unlock(&cli->cl_loi_list_lock);
- return rc;
-};
-
-/* We record requests in flight in cli->cl_r_in_flight here.
- * There is only one write rpc possible in mdc anyway. If this to change
- * in the future - the code may need to be revisited.
- */
-int mdc_enter_request(struct client_obd *cli)
-{
- int rc = 0;
- struct mdc_cache_waiter mcw;
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-
- spin_lock(&cli->cl_loi_list_lock);
- if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
- list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
- init_waitqueue_head(&mcw.mcw_waitq);
- spin_unlock(&cli->cl_loi_list_lock);
- rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw),
- &lwi);
- if (rc) {
- spin_lock(&cli->cl_loi_list_lock);
- if (list_empty(&mcw.mcw_entry))
- cli->cl_r_in_flight--;
- list_del_init(&mcw.mcw_entry);
- spin_unlock(&cli->cl_loi_list_lock);
- }
- } else {
- cli->cl_r_in_flight++;
- spin_unlock(&cli->cl_loi_list_lock);
- }
- return rc;
-}
-
-void mdc_exit_request(struct client_obd *cli)
-{
- struct list_head *l, *tmp;
- struct mdc_cache_waiter *mcw;
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_r_in_flight--;
- list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
- if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
- /* No free request slots anymore */
- break;
- }
-
- mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
- list_del_init(&mcw->mcw_entry);
- cli->cl_r_in_flight++;
- wake_up(&mcw->mcw_waitq);
- }
- /* Empty waiting list? Decrease reqs in-flight number */
-
- spin_unlock(&cli->cl_loi_list_lock);
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index f48b58423307..f1f6c082fa42 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -93,8 +93,8 @@ int it_open_error(int phase, struct lookup_intent *it)
EXPORT_SYMBOL(it_open_error);
/* this must be called on a lockh that is known to have a referenced lock */
-int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
- __u64 *bits)
+int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh,
+ void *data, __u64 *bits)
{
struct ldlm_lock *lock;
struct inode *new_inode = data;
@@ -102,10 +102,10 @@ int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
if (bits)
*bits = 0;
- if (!*lockh)
+ if (!lustre_handle_is_used(lockh))
return 0;
- lock = ldlm_handle2lock((struct lustre_handle *)lockh);
+ lock = ldlm_handle2lock(lockh);
LASSERT(lock);
lock_res_and_lock(lock);
@@ -174,7 +174,7 @@ int mdc_null_inode(struct obd_export *exp,
fid_build_reg_res_name(fid, &res_id);
res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
- if (!res)
+ if (IS_ERR(res))
return 0;
lock_res(res);
@@ -185,28 +185,6 @@ int mdc_null_inode(struct obd_export *exp,
return 0;
}
-/* find any ldlm lock of the inode in mdc
- * return 0 not find
- * 1 find one
- * < 0 error
- */
-int mdc_find_cbdata(struct obd_export *exp,
- const struct lu_fid *fid,
- ldlm_iterator_t it, void *data)
-{
- struct ldlm_res_id res_id;
- int rc = 0;
-
- fid_build_reg_res_name((struct lu_fid *)fid, &res_id);
- rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
- it, data);
- if (rc == LDLM_ITER_STOP)
- return 1;
- else if (rc == LDLM_ITER_CONTINUE)
- return 0;
- return rc;
-}
-
static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
{
/* Don't hold error requests for replay. */
@@ -240,24 +218,24 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
/* FIXME: remove this explicit offset. */
rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4,
- body->eadatasize);
+ body->mbo_eadatasize);
if (rc) {
CERROR("Can't enlarge segment %d size to %d\n",
- DLM_INTENT_REC_OFF + 4, body->eadatasize);
- body->valid &= ~OBD_MD_FLEASIZE;
- body->eadatasize = 0;
+ DLM_INTENT_REC_OFF + 4, body->mbo_eadatasize);
+ body->mbo_valid &= ~OBD_MD_FLEASIZE;
+ body->mbo_eadatasize = 0;
}
}
-static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
- struct lookup_intent *it,
- struct md_op_data *op_data,
- void *lmm, int lmmsize,
- void *cb_data)
+static struct ptlrpc_request *
+mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it,
+ struct md_op_data *op_data)
{
struct ptlrpc_request *req;
struct obd_device *obddev = class_exp2obd(exp);
struct ldlm_intent *lit;
+ const void *lmm = op_data->op_data;
+ u32 lmmsize = op_data->op_data_size;
LIST_HEAD(cancels);
int count = 0;
int mode;
@@ -274,7 +252,7 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
else
mode = LCK_PR;
} else {
- if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
+ if (it->it_flags & (FMODE_WRITE | MDS_OPEN_TRUNC))
mode = LCK_CW;
else if (it->it_flags & __FMODE_EXEC)
mode = LCK_PR;
@@ -325,6 +303,9 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
lmmsize);
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obddev->u.cli.cl_max_mds_easize);
+
ptlrpc_request_set_replen(req);
return req;
}
@@ -336,7 +317,8 @@ mdc_intent_getxattr_pack(struct obd_export *exp,
{
struct ptlrpc_request *req;
struct ldlm_intent *lit;
- int rc, count = 0, maxdata;
+ int rc, count = 0;
+ u32 maxdata;
LIST_HEAD(cancels);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -421,7 +403,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
OBD_MD_MEA | OBD_MD_FLACL;
struct ldlm_intent *lit;
int rc;
- int easize;
+ u32 easize;
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_LDLM_INTENT_GETATTR);
@@ -526,7 +508,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
struct ldlm_reply *lockrep;
struct ldlm_lock *lock;
void *lvb_data = NULL;
- int lvb_len = 0;
+ u32 lvb_len = 0;
LASSERT(rc >= 0);
/* Similarly, if we're going to replay this request, we don't want to
@@ -605,7 +587,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
mdc_set_open_replay_data(NULL, NULL, it);
}
- if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
+ if ((body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
void *eadata;
mdc_update_max_ea_from_body(exp, body);
@@ -615,7 +597,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
* Eventually, obd_unpackmd() will check the contents.
*/
eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- body->eadatasize);
+ body->mbo_eadatasize);
if (!eadata)
return -EPROTO;
@@ -623,7 +605,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
* lock
*/
lvb_data = eadata;
- lvb_len = body->eadatasize;
+ lvb_len = body->mbo_eadatasize;
/*
* We save the reply LOV EA in case we have to replay a
@@ -639,20 +621,20 @@ static int mdc_finish_enqueue(struct obd_export *exp,
if (req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT) <
- body->eadatasize)
+ body->mbo_eadatasize)
mdc_realloc_openmsg(req, body);
else
req_capsule_shrink(pill, &RMF_EADATA,
- body->eadatasize,
+ body->mbo_eadatasize,
RCL_CLIENT);
req_capsule_set_size(pill, &RMF_EADATA,
RCL_CLIENT,
- body->eadatasize);
+ body->mbo_eadatasize);
lmm = req_capsule_client_get(pill, &RMF_EADATA);
if (lmm)
- memcpy(lmm, eadata, body->eadatasize);
+ memcpy(lmm, eadata, body->mbo_eadatasize);
}
}
} else if (it->it_op & IT_LAYOUT) {
@@ -662,7 +644,8 @@ static int mdc_finish_enqueue(struct obd_export *exp,
lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER);
if (lvb_len > 0) {
lvb_data = req_capsule_server_sized_get(pill,
- &RMF_DLM_LVB, lvb_len);
+ &RMF_DLM_LVB,
+ lvb_len);
if (!lvb_data)
return -EPROTO;
}
@@ -705,9 +688,9 @@ static int mdc_finish_enqueue(struct obd_export *exp,
* we don't know in advance the file type.
*/
int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ const ldlm_policy_data_t *policy,
struct lookup_intent *it, struct md_op_data *op_data,
- struct lustre_handle *lockh, void *lmm, int lmmsize,
- struct ptlrpc_request **reqp, u64 extra_lock_flags)
+ struct lustre_handle *lockh, u64 extra_lock_flags)
{
static const ldlm_policy_data_t lookup_policy = {
.l_inodebits = { MDS_INODELOCK_LOOKUP }
@@ -721,9 +704,8 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
static const ldlm_policy_data_t getxattr_policy = {
.l_inodebits = { MDS_INODELOCK_XATTR }
};
- ldlm_policy_data_t const *policy = &lookup_policy;
struct obd_device *obddev = class_exp2obd(exp);
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
u64 flags, saved_flags = extra_lock_flags;
struct ldlm_res_id res_id;
int generation, resends = 0;
@@ -733,40 +715,32 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n",
einfo->ei_type);
-
fid_build_reg_res_name(&op_data->op_fid1, &res_id);
if (it) {
+ LASSERT(!policy);
+
saved_flags |= LDLM_FL_HAS_INTENT;
- if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR))
+ if (it->it_op & (IT_OPEN | IT_UNLINK | IT_GETATTR | IT_READDIR))
policy = &update_policy;
else if (it->it_op & IT_LAYOUT)
policy = &layout_policy;
else if (it->it_op & (IT_GETXATTR | IT_SETXATTR))
policy = &getxattr_policy;
+ else
+ policy = &lookup_policy;
}
- LASSERT(!reqp);
-
generation = obddev->u.cli.cl_import->imp_generation;
resend:
flags = saved_flags;
if (!it) {
- /* The only way right now is FLOCK, in this case we hide flock
- * policy as lmm, but lmmsize is 0
- */
- LASSERT(lmm && lmmsize == 0);
+ /* The only way right now is FLOCK. */
LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n",
einfo->ei_type);
- policy = lmm;
res_id.name[3] = LDLM_FLOCK;
- req = NULL;
} else if (it->it_op & IT_OPEN) {
- req = mdc_intent_open_pack(exp, it, op_data, lmm, lmmsize,
- einfo->ei_cbdata);
- policy = &update_policy;
- einfo->ei_cbdata = NULL;
- lmm = NULL;
+ req = mdc_intent_open_pack(exp, it, op_data);
} else if (it->it_op & IT_UNLINK) {
req = mdc_intent_unlink_pack(exp, it, op_data);
} else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
@@ -806,7 +780,7 @@ resend:
*/
if (it) {
mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
- rc = mdc_enter_request(&obddev->u.cli);
+ rc = obd_get_request_slot(&obddev->u.cli);
if (rc != 0) {
mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
mdc_clear_replay_flag(req, 0);
@@ -834,13 +808,12 @@ resend:
return rc;
}
- mdc_exit_request(&obddev->u.cli);
+ obd_put_request_slot(&obddev->u.cli);
mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
if (rc < 0) {
- CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
- "%s: ldlm_cli_enqueue failed: rc = %d\n",
- obddev->obd_name, rc);
+ CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n",
+ obddev->obd_name, rc);
mdc_clear_replay_flag(req, rc);
ptlrpc_req_finished(req);
@@ -903,6 +876,9 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
LASSERT(request != LP_POISON);
LASSERT(request->rq_repmsg != LP_POISON);
+ if (it->it_op & IT_READDIR)
+ return 0;
+
if (!it_disposition(it, DISP_IT_EXECD)) {
/* The server failed before it even started executing the
* intent, i.e. because it couldn't unpack the request.
@@ -917,27 +893,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
LASSERT(mdt_body); /* mdc_enqueue checked */
- /* If we were revalidating a fid/name pair, mark the intent in
- * case we fail and get called again from lookup
- */
- if (fid_is_sane(&op_data->op_fid2) &&
- it->it_create_mode & M_CHECK_STALE &&
- it->it_op != IT_GETATTR) {
- /* Also: did we find the same inode? */
- /* sever can return one of two fids:
- * op_fid2 - new allocated fid - if file is created.
- * op_fid3 - existent fid - if file only open.
- * op_fid3 is saved in lmv_intent_open
- */
- if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->fid1)) &&
- (!lu_fid_eq(&op_data->op_fid3, &mdt_body->fid1))) {
- CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
- "\n", PFID(&op_data->op_fid2),
- PFID(&op_data->op_fid2), PFID(&mdt_body->fid1));
- return -ESTALE;
- }
- }
-
rc = it_open_error(DISP_LOOKUP_EXECD, it);
if (rc)
return rc;
@@ -980,10 +935,10 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
LDLM_DEBUG(lock, "matching against this");
- LASSERTF(fid_res_name_eq(&mdt_body->fid1,
+ LASSERTF(fid_res_name_eq(&mdt_body->mbo_fid1,
&lock->l_resource->lr_name),
"Lock res_id: "DLDLMRES", fid: "DFID"\n",
- PLDLMRES(lock->l_resource), PFID(&mdt_body->fid1));
+ PLDLMRES(lock->l_resource), PFID(&mdt_body->mbo_fid1));
LDLM_LOCK_PUT(lock);
memcpy(&old_lock, lockh, sizeof(*lockh));
@@ -998,8 +953,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
}
CDEBUG(D_DENTRY,
"D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
- op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
- it->it_status, it->it_disposition, rc);
+ (int)op_data->op_namelen, op_data->op_name,
+ ldlm_it2str(it->it_op), it->it_status, it->it_disposition, rc);
return rc;
}
@@ -1042,6 +997,9 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
MDS_INODELOCK_LOOKUP |
MDS_INODELOCK_PERM;
break;
+ case IT_READDIR:
+ policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+ break;
case IT_LAYOUT:
policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT;
break;
@@ -1095,10 +1053,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
* child lookup.
*/
int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- void *lmm, int lmmsize, struct lookup_intent *it,
- int lookup_flags, struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
+ struct lookup_intent *it, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags)
{
struct ldlm_enqueue_info einfo = {
.ei_type = LDLM_IBITS,
@@ -1112,14 +1068,14 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
LASSERT(it);
CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
- ", intent: %s flags %#Lo\n", op_data->op_namelen,
+ ", intent: %s flags %#Lo\n", (int)op_data->op_namelen,
op_data->op_name, PFID(&op_data->op_fid2),
PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
it->it_flags);
lockh.cookie = 0;
if (fid_is_sane(&op_data->op_fid2) &&
- (it->it_op & (IT_LOOKUP | IT_GETATTR))) {
+ (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_READDIR))) {
/* We could just return 1 immediately, but since we should only
* be called in revalidate_it if we already have a lock, let's
* verify that.
@@ -1135,13 +1091,13 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
/* For case if upper layer did not alloc fid, do it now. */
if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) {
- rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+ rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc < 0) {
CERROR("Can't alloc new fid, rc %d\n", rc);
return rc;
}
}
- rc = mdc_enqueue(exp, &einfo, it, op_data, &lockh, lmm, lmmsize, NULL,
+ rc = mdc_enqueue(exp, &einfo, NULL, it, op_data, &lockh,
extra_lock_flags);
if (rc < 0)
return rc;
@@ -1170,7 +1126,7 @@ static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
obddev = class_exp2obd(exp);
- mdc_exit_request(&obddev->u.cli);
+ obd_put_request_slot(&obddev->u.cli);
if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
rc = -ETIMEDOUT;
@@ -1222,15 +1178,15 @@ int mdc_intent_getattr_async(struct obd_export *exp,
CDEBUG(D_DLMTRACE,
"name: %.*s in inode " DFID ", intent: %s flags %#Lo\n",
- op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
- ldlm_it2str(it->it_op), it->it_flags);
+ (int)op_data->op_namelen, op_data->op_name,
+ PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), it->it_flags);
fid_build_reg_res_name(&op_data->op_fid1, &res_id);
req = mdc_intent_getattr_pack(exp, it, op_data);
if (IS_ERR(req))
return PTR_ERR(req);
- rc = mdc_enter_request(&obddev->u.cli);
+ rc = obd_get_request_slot(&obddev->u.cli);
if (rc != 0) {
ptlrpc_req_finished(req);
return rc;
@@ -1239,7 +1195,7 @@ int mdc_intent_getattr_async(struct obd_export *exp,
rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, &policy, &flags, NULL,
0, LVB_T_NONE, &minfo->mi_lockh, 1);
if (rc < 0) {
- mdc_exit_request(&obddev->u.cli);
+ obd_put_request_slot(&obddev->u.cli);
ptlrpc_req_finished(req);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 5dba2c813857..c921e471fa27 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -86,7 +86,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
fid_build_reg_res_name(fid, &res_id);
res = ldlm_resource_get(exp->exp_obd->obd_namespace,
NULL, &res_id, 0, 0);
- if (!res)
+ if (IS_ERR(res))
return 0;
LDLM_RESOURCE_ADDREF(res);
/* Initialize ibits lock policy. */
@@ -99,7 +99,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
}
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, size_t ealen, void *ea2, size_t ea2len,
struct ptlrpc_request **request, struct md_open_data **mod)
{
LIST_HEAD(cancels);
@@ -110,11 +110,10 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
__u64 bits;
bits = MDS_INODELOCK_UPDATE;
- if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
+ if (op_data->op_attr.ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
bits |= MDS_INODELOCK_LOOKUP;
if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid1)))
count = mdc_resource_get_unused(exp, &op_data->op_fid1,
&cancels, LCK_EX, bits);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -177,8 +176,8 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- epoch->handle = body->handle;
- epoch->ioepoch = body->ioepoch;
+ epoch->handle = body->mbo_handle;
+ epoch->ioepoch = body->mbo_ioepoch;
req->rq_replay_cb = mdc_replay_open;
/** bug 3633, open may be committed and estale answer is not error */
} else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) {
@@ -197,9 +196,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
}
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid, __u32 gid,
- cfs_cap_t cap_effective, __u64 rdev,
- struct ptlrpc_request **request)
+ const void *data, size_t datalen, umode_t mode,
+ uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+ __u64 rdev, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
int level, rc;
@@ -214,11 +213,9 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
* mdc_fid_alloc() may return errno 1 in case of switch to new
* sequence, handle this.
*/
- rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
- if (rc < 0) {
- CERROR("Can't alloc new fid, rc %d\n", rc);
+ rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
+ if (rc < 0)
return rc;
- }
}
rebuild:
@@ -307,14 +304,12 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
LASSERT(!req);
if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid1)))
count = mdc_resource_get_unused(exp, &op_data->op_fid1,
&cancels, LCK_EX,
MDS_INODELOCK_UPDATE);
if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
- (fid_is_sane(&op_data->op_fid3)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid3)))
count += mdc_resource_get_unused(exp, &op_data->op_fid3,
&cancels, LCK_EX,
MDS_INODELOCK_FULL);
@@ -394,7 +389,7 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
}
int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, int oldlen, const char *new, int newlen,
+ const char *old, size_t oldlen, const char *new, size_t newlen,
struct ptlrpc_request **request)
{
LIST_HEAD(cancels);
@@ -431,7 +426,8 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
}
req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
- req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, newlen+1);
+ req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT,
+ newlen + 1);
rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
if (rc) {
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 542801f04b0d..f56ea643f9bf 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -39,7 +39,9 @@
# include <linux/utsname.h>
#include "../include/lustre_acl.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/obd_class.h"
+#include "../include/lustre_lmv.h"
#include "../include/lustre_fid.h"
#include "../include/lprocfs_status.h"
#include "../include/lustre_param.h"
@@ -57,16 +59,16 @@ static inline int mdc_queue_wait(struct ptlrpc_request *req)
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
int rc;
- /* mdc_enter_request() ensures that this client has no more
+ /* obd_get_request_slot() ensures that this client has no more
* than cl_max_rpcs_in_flight RPCs simultaneously inf light
* against an MDT.
*/
- rc = mdc_enter_request(cli);
+ rc = obd_get_request_slot(cli);
if (rc != 0)
return rc;
rc = ptlrpc_queue_wait(req);
- mdc_exit_request(cli);
+ obd_put_request_slot(cli);
return rc;
}
@@ -98,7 +100,7 @@ static int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid)
goto out;
}
- *rootfid = body->fid1;
+ *rootfid = body->mbo_fid1;
CDEBUG(D_NET,
"root fid="DFID", last_committed=%llu\n",
PFID(rootfid),
@@ -136,12 +138,12 @@ static int mdc_getattr_common(struct obd_export *exp,
if (!body)
return -EPROTO;
- CDEBUG(D_NET, "mode: %o\n", body->mode);
+ CDEBUG(D_NET, "mode: %o\n", body->mbo_mode);
mdc_update_max_ea_from_body(exp, body);
- if (body->eadatasize != 0) {
+ if (body->mbo_eadatasize != 0) {
eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- body->eadatasize);
+ body->mbo_eadatasize);
if (!eadata)
return -EPROTO;
}
@@ -230,32 +232,6 @@ static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static int mdc_is_subdir(struct obd_export *exp,
- const struct lu_fid *pfid,
- const struct lu_fid *cfid,
- struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int rc;
-
- *request = NULL;
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MDS_IS_SUBDIR, LUSTRE_MDS_VERSION,
- MDS_IS_SUBDIR);
- if (!req)
- return -ENOMEM;
-
- mdc_is_subdir_pack(req, pfid, cfid, 0);
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc && rc != -EREMOTE)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
static int mdc_xattr_common(struct obd_export *exp,
const struct req_format *fmt,
const struct lu_fid *fid,
@@ -397,15 +373,15 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
void *buf;
int rc;
- if (!body->aclsize)
+ if (!body->mbo_aclsize)
return 0;
- buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->aclsize);
+ buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->mbo_aclsize);
if (!buf)
return -EPROTO;
- acl = posix_acl_from_xattr(&init_user_ns, buf, body->aclsize);
+ acl = posix_acl_from_xattr(&init_user_ns, buf, body->mbo_aclsize);
if (!acl)
return 0;
@@ -443,24 +419,24 @@ static int mdc_get_lustre_md(struct obd_export *exp,
md->body = req_capsule_server_get(pill, &RMF_MDT_BODY);
- if (md->body->valid & OBD_MD_FLEASIZE) {
+ if (md->body->mbo_valid & OBD_MD_FLEASIZE) {
int lmmsize;
struct lov_mds_md *lmm;
- if (!S_ISREG(md->body->mode)) {
+ if (!S_ISREG(md->body->mbo_mode)) {
CDEBUG(D_INFO,
"OBD_MD_FLEASIZE set, should be a regular file, but is not\n");
rc = -EPROTO;
goto out;
}
- if (md->body->eadatasize == 0) {
+ if (md->body->mbo_eadatasize == 0) {
CDEBUG(D_INFO,
"OBD_MD_FLEASIZE set, but eadatasize 0\n");
rc = -EPROTO;
goto out;
}
- lmmsize = md->body->eadatasize;
+ lmmsize = md->body->mbo_eadatasize;
lmm = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmmsize);
if (!lmm) {
rc = -EPROTO;
@@ -471,7 +447,7 @@ static int mdc_get_lustre_md(struct obd_export *exp,
if (rc < 0)
goto out;
- if (rc < sizeof(*md->lsm)) {
+ if (rc < (typeof(rc))sizeof(*md->lsm)) {
CDEBUG(D_INFO,
"lsm size too small: rc < sizeof (*md->lsm) (%d < %d)\n",
rc, (int)sizeof(*md->lsm));
@@ -479,24 +455,24 @@ static int mdc_get_lustre_md(struct obd_export *exp,
goto out;
}
- } else if (md->body->valid & OBD_MD_FLDIREA) {
+ } else if (md->body->mbo_valid & OBD_MD_FLDIREA) {
int lmvsize;
struct lov_mds_md *lmv;
- if (!S_ISDIR(md->body->mode)) {
+ if (!S_ISDIR(md->body->mbo_mode)) {
CDEBUG(D_INFO,
"OBD_MD_FLDIREA set, should be a directory, but is not\n");
rc = -EPROTO;
goto out;
}
- if (md->body->eadatasize == 0) {
+ if (md->body->mbo_eadatasize == 0) {
CDEBUG(D_INFO,
"OBD_MD_FLDIREA is set, but eadatasize 0\n");
return -EPROTO;
}
- if (md->body->valid & OBD_MD_MEA) {
- lmvsize = md->body->eadatasize;
+ if (md->body->mbo_valid & OBD_MD_MEA) {
+ lmvsize = md->body->mbo_eadatasize;
lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
lmvsize);
if (!lmv) {
@@ -504,15 +480,15 @@ static int mdc_get_lustre_md(struct obd_export *exp,
goto out;
}
- rc = obd_unpackmd(md_exp, (void *)&md->mea, lmv,
+ rc = obd_unpackmd(md_exp, (void *)&md->lmv, lmv,
lmvsize);
if (rc < 0)
goto out;
- if (rc < sizeof(*md->mea)) {
+ if (rc < (typeof(rc))sizeof(*md->lmv)) {
CDEBUG(D_INFO,
- "size too small: rc < sizeof(*md->mea) (%d < %d)\n",
- rc, (int)sizeof(*md->mea));
+ "size too small: rc < sizeof(*md->lmv) (%d < %d)\n",
+ rc, (int)sizeof(*md->lmv));
rc = -EPROTO;
goto out;
}
@@ -520,12 +496,12 @@ static int mdc_get_lustre_md(struct obd_export *exp,
}
rc = 0;
- if (md->body->valid & OBD_MD_FLACL) {
+ if (md->body->mbo_valid & OBD_MD_FLACL) {
/* for ACL, it's possible that FLACL is set but aclsize is zero.
* only when aclsize != 0 there's an actual segment for ACL
* in reply buffer.
*/
- if (md->body->aclsize) {
+ if (md->body->mbo_aclsize) {
rc = mdc_unpack_acl(req, md);
if (rc)
goto out;
@@ -580,9 +556,9 @@ void mdc_replay_open(struct ptlrpc_request *req)
file_fh = &och->och_fh;
CDEBUG(D_HA, "updating handle from %#llx to %#llx\n",
- file_fh->cookie, body->handle.cookie);
+ file_fh->cookie, body->mbo_handle.cookie);
old = *file_fh;
- *file_fh = body->handle;
+ *file_fh = body->mbo_handle;
}
close_req = mod->mod_close_req;
if (close_req) {
@@ -597,7 +573,7 @@ void mdc_replay_open(struct ptlrpc_request *req)
if (och)
LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
- epoch->handle = body->handle;
+ epoch->handle = body->mbo_handle;
}
}
@@ -679,11 +655,11 @@ int mdc_set_open_replay_data(struct obd_export *exp,
spin_unlock(&open_req->rq_lock);
}
- rec->cr_fid2 = body->fid1;
- rec->cr_ioepoch = body->ioepoch;
- rec->cr_old_handle.cookie = body->handle.cookie;
+ rec->cr_fid2 = body->mbo_fid1;
+ rec->cr_ioepoch = body->mbo_ioepoch;
+ rec->cr_old_handle.cookie = body->mbo_handle.cookie;
open_req->rq_replay_cb = mdc_replay_open;
- if (!fid_is_sane(&body->fid1)) {
+ if (!fid_is_sane(&body->mbo_fid1)) {
DEBUG_REQ(D_ERROR, open_req,
"Saving replay request with insane fid");
LBUG();
@@ -701,9 +677,15 @@ static void mdc_free_open(struct md_open_data *mod)
imp_connect_disp_stripe(mod->mod_open_req->rq_import))
committed = 1;
- LASSERT(mod->mod_open_req->rq_replay == 0);
-
- DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request\n");
+ /*
+ * No reason to asssert here if the open request has
+ * rq_replay == 1. It means that mdc_close failed, and
+ * close request wasn`t sent. It is not fatal to client.
+ * The worst thing is eviction if the client gets open lock
+ */
+ DEBUG_REQ(D_RPCTRACE, mod->mod_open_req,
+ "free open request rq_replay = %d\n",
+ mod->mod_open_req->rq_replay);
ptlrpc_request_committed(mod->mod_open_req, committed);
if (mod->mod_close_req)
@@ -744,7 +726,7 @@ static void mdc_close_handle_reply(struct ptlrpc_request *req,
epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
epoch->flags |= MF_SOM_AU;
- if (repbody->valid & OBD_MD_FLGETATTRLOCK)
+ if (repbody->mbo_valid & OBD_MD_FLGETATTRLOCK)
op_data->op_flags |= MF_GETATTR_LOCK;
}
}
@@ -763,7 +745,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
req_fmt = &RQF_MDS_RELEASE_CLOSE;
/* allocate a FID for volatile file */
- rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+ rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc < 0) {
CERROR("%s: "DFID" failed to allocate FID: %d\n",
obd->obd_name, PFID(&op_data->op_fid1), rc);
@@ -773,22 +755,10 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
}
*request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
- * portal whose threads are not taking any DLM locks and are therefore
- * always progressing
- */
- req->rq_request_portal = MDS_READPAGE_PORTAL;
- ptlrpc_at_set_req_timeout(req);
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDC_CLOSE))
+ req = NULL;
+ else
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
/* Ensure that this close's handle is fixed up during replay. */
if (likely(mod)) {
@@ -809,6 +779,29 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
CDEBUG(D_HA,
"couldn't find open req; expecting close error\n");
}
+ if (!req) {
+ /*
+ * TODO: repeat close after errors
+ */
+ CWARN("%s: close of FID "DFID" failed, file reference will be dropped when this client unmounts or is evicted\n",
+ obd->obd_name, PFID(&op_data->op_fid1));
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ goto out;
+ }
+
+ /*
+ * To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
+ * portal whose threads are not taking any DLM locks and are therefore
+ * always progressing
+ */
+ req->rq_request_portal = MDS_READPAGE_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
mdc_close_pack(req, op_data);
@@ -854,6 +847,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
}
}
+out:
if (mod) {
if (rc != 0)
mod->mod_close_req = NULL;
@@ -936,16 +930,17 @@ static int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data,
- struct page **pages, struct ptlrpc_request **request)
+static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid,
+ u64 offset, struct page **pages, int npages,
+ struct ptlrpc_request **request)
{
- struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
- int i;
- wait_queue_head_t waitq;
- int resends = 0;
- struct l_wait_info lwi;
- int rc;
+ struct ptlrpc_request *req;
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ int resends = 0;
+ int rc;
+ int i;
*request = NULL;
init_waitqueue_head(&waitq);
@@ -964,7 +959,7 @@ restart_bulk:
req->rq_request_portal = MDS_READPAGE_PORTAL;
ptlrpc_at_set_req_timeout(req);
- desc = ptlrpc_prep_bulk_imp(req, op_data->op_npages, 1, BULK_PUT_SINK,
+ desc = ptlrpc_prep_bulk_imp(req, npages, 1, BULK_PUT_SINK,
MDS_BULK_PORTAL);
if (!desc) {
ptlrpc_request_free(req);
@@ -972,12 +967,10 @@ restart_bulk:
}
/* NB req now owns desc and will free it when it gets freed */
- for (i = 0; i < op_data->op_npages; i++)
+ for (i = 0; i < npages; i++)
ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
- mdc_readdir_pack(req, op_data->op_offset,
- PAGE_SIZE * op_data->op_npages,
- &op_data->op_fid1);
+ mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid);
ptlrpc_request_set_replen(req);
rc = ptlrpc_queue_wait(req);
@@ -988,11 +981,12 @@ restart_bulk:
resends++;
if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
- CERROR("too many resend retries, returning error\n");
+ CERROR("%s: too many resend retries: rc = %d\n",
+ exp->exp_obd->obd_name, -EIO);
return -EIO;
}
- lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends),
- NULL, NULL, NULL);
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+ NULL);
l_wait_event(waitq, 0, &lwi);
goto restart_bulk;
@@ -1006,9 +1000,9 @@ restart_bulk:
}
if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
- CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
- req->rq_bulk->bd_nob_transferred,
- PAGE_SIZE * op_data->op_npages);
+ CERROR("%s: unexpected bytes transferred: %d (%ld expected)\n",
+ exp->exp_obd->obd_name, req->rq_bulk->bd_nob_transferred,
+ PAGE_SIZE * npages);
ptlrpc_req_finished(req);
return -EPROTO;
}
@@ -1017,6 +1011,453 @@ restart_bulk:
return 0;
}
+static void mdc_release_page(struct page *page, int remove)
+{
+ if (remove) {
+ lock_page(page);
+ if (likely(page->mapping))
+ truncate_complete_page(page->mapping, page);
+ unlock_page(page);
+ }
+ put_page(page);
+}
+
+static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
+ __u64 *start, __u64 *end, int hash64)
+{
+ /*
+ * Complement of hash is used as an index so that
+ * radix_tree_gang_lookup() can be used to find a page with starting
+ * hash _smaller_ than one we are looking for.
+ */
+ unsigned long offset = hash_x_index(*hash, hash64);
+ struct page *page;
+ int found;
+
+ spin_lock_irq(&mapping->tree_lock);
+ found = radix_tree_gang_lookup(&mapping->page_tree,
+ (void **)&page, offset, 1);
+ if (found > 0 && !radix_tree_exceptional_entry(page)) {
+ struct lu_dirpage *dp;
+
+ get_page(page);
+ spin_unlock_irq(&mapping->tree_lock);
+ /*
+ * In contrast to find_lock_page() we are sure that directory
+ * page cannot be truncated (while DLM lock is held) and,
+ * hence, can avoid restart.
+ *
+ * In fact, page cannot be locked here at all, because
+ * mdc_read_page_remote does synchronous io.
+ */
+ wait_on_page_locked(page);
+ if (PageUptodate(page)) {
+ dp = kmap(page);
+ if (BITS_PER_LONG == 32 && hash64) {
+ *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ *hash = *hash >> 32;
+ } else {
+ *start = le64_to_cpu(dp->ldp_hash_start);
+ *end = le64_to_cpu(dp->ldp_hash_end);
+ }
+ if (unlikely(*start == 1 && *hash == 0))
+ *hash = *start;
+ else
+ LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
+ *start, *end, *hash);
+ CDEBUG(D_VFSTRACE, "offset %lx [%#llx %#llx], hash %#llx\n",
+ offset, *start, *end, *hash);
+ if (*hash > *end) {
+ kunmap(page);
+ mdc_release_page(page, 0);
+ page = NULL;
+ } else if (*end != *start && *hash == *end) {
+ /*
+ * upon hash collision, remove this page,
+ * otherwise put page reference, and
+ * mdc_read_page_remote() will issue RPC to
+ * fetch the page we want.
+ */
+ kunmap(page);
+ mdc_release_page(page,
+ le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+ page = NULL;
+ }
+ } else {
+ put_page(page);
+ page = ERR_PTR(-EIO);
+ }
+ } else {
+ spin_unlock_irq(&mapping->tree_lock);
+ page = NULL;
+ }
+ return page;
+}
+
+/*
+ * Adjust a set of pages, each page containing an array of lu_dirpages,
+ * so that each page can be used as a single logical lu_dirpage.
+ *
+ * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
+ * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
+ * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
+ * value is used as a cookie to request the next lu_dirpage in a
+ * directory listing that spans multiple pages (two in this example):
+ * ________
+ * | |
+ * .|--------v------- -----.
+ * |s|e|f|p|ent|ent| ... |ent|
+ * '--|-------------- -----' Each PAGE contains a single
+ * '------. lu_dirpage.
+ * .---------v------- -----.
+ * |s|e|f|p|ent| 0 | ... | 0 |
+ * '----------------- -----'
+ *
+ * However, on hosts where the native VM page size (PAGE_SIZE) is
+ * larger than LU_PAGE_SIZE, a single host page may contain multiple
+ * lu_dirpages. After reading the lu_dirpages from the MDS, the
+ * ldp_hash_end of the first lu_dirpage refers to the one immediately
+ * after it in the same PAGE (arrows simplified for brevity, but
+ * in general e0==s1, e1==s2, etc.):
+ *
+ * .-------------------- -----.
+ * |s0|e0|f0|p|ent|ent| ... |ent|
+ * |---v---------------- -----|
+ * |s1|e1|f1|p|ent|ent| ... |ent|
+ * |---v---------------- -----| Here, each PAGE contains
+ * ... multiple lu_dirpages.
+ * |---v---------------- -----|
+ * |s'|e'|f'|p|ent|ent| ... |ent|
+ * '---|---------------- -----'
+ * v
+ * .----------------------------.
+ * | next PAGE |
+ *
+ * This structure is transformed into a single logical lu_dirpage as follows:
+ *
+ * - Replace e0 with e' so the request for the next lu_dirpage gets the page
+ * labeled 'next PAGE'.
+ *
+ * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
+ * a hash collision with the next page exists.
+ *
+ * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
+ * to the first entry of the next lu_dirpage.
+ */
+#if PAGE_SIZE > LU_PAGE_SIZE
+static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs)
+{
+ int i;
+
+ for (i = 0; i < cfs_pgs; i++) {
+ struct lu_dirpage *dp = kmap(pages[i]);
+ __u64 hash_end = le64_to_cpu(dp->ldp_hash_end);
+ __u32 flags = le32_to_cpu(dp->ldp_flags);
+ struct lu_dirpage *first = dp;
+ struct lu_dirent *end_dirent = NULL;
+ struct lu_dirent *ent;
+
+ while (--lu_pgs > 0) {
+ ent = lu_dirent_start(dp);
+ for (end_dirent = ent; ent;
+ end_dirent = ent, ent = lu_dirent_next(ent));
+
+ /* Advance dp to next lu_dirpage. */
+ dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
+
+ /* Check if we've reached the end of the CFS_PAGE. */
+ if (!((unsigned long)dp & ~PAGE_MASK))
+ break;
+
+ /* Save the hash and flags of this lu_dirpage. */
+ hash_end = le64_to_cpu(dp->ldp_hash_end);
+ flags = le32_to_cpu(dp->ldp_flags);
+
+ /* Check if lu_dirpage contains no entries. */
+ if (!end_dirent)
+ break;
+
+ /*
+ * Enlarge the end entry lde_reclen from 0 to
+ * first entry of next lu_dirpage.
+ */
+ LASSERT(!le16_to_cpu(end_dirent->lde_reclen));
+ end_dirent->lde_reclen =
+ cpu_to_le16((char *)(dp->ldp_entries) -
+ (char *)end_dirent);
+ }
+
+ first->ldp_hash_end = hash_end;
+ first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
+ first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+
+ kunmap(pages[i]);
+ }
+ LASSERTF(lu_pgs == 0, "left = %d", lu_pgs);
+}
+#else
+#define mdc_adjust_dirpages(pages, cfs_pgs, lu_pgs) do {} while (0)
+#endif /* PAGE_SIZE > LU_PAGE_SIZE */
+
+/* parameters for readdir page */
+struct readpage_param {
+ struct md_op_data *rp_mod;
+ __u64 rp_off;
+ int rp_hash64;
+ struct obd_export *rp_exp;
+ struct md_callback *rp_cb;
+};
+
+/**
+ * Read pages from server.
+ *
+ * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
+ * a header lu_dirpage which describes the start/end hash, and whether this
+ * page is empty (contains no dir entry) or hash collide with next page.
+ * After client receives reply, several pages will be integrated into dir page
+ * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the
+ * lu_dirpage for this integrated page will be adjusted.
+ **/
+static int mdc_read_page_remote(void *data, struct page *page0)
+{
+ struct readpage_param *rp = data;
+ struct page **page_pool;
+ struct page *page;
+ struct lu_dirpage *dp;
+ int rd_pgs = 0; /* number of pages read actually */
+ int npages;
+ struct md_op_data *op_data = rp->rp_mod;
+ struct ptlrpc_request *req;
+ int max_pages = op_data->op_max_pages;
+ struct inode *inode;
+ struct lu_fid *fid;
+ int i;
+ int rc;
+
+ LASSERT(max_pages > 0 && max_pages <= PTLRPC_MAX_BRW_PAGES);
+ inode = op_data->op_data;
+ fid = &op_data->op_fid1;
+ LASSERT(inode);
+
+ page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
+ if (page_pool) {
+ page_pool[0] = page0;
+ } else {
+ page_pool = &page0;
+ max_pages = 1;
+ }
+
+ for (npages = 1; npages < max_pages; npages++) {
+ page = page_cache_alloc_cold(inode->i_mapping);
+ if (!page)
+ break;
+ page_pool[npages] = page;
+ }
+
+ rc = mdc_getpage(rp->rp_exp, fid, rp->rp_off, page_pool, npages, &req);
+ if (!rc) {
+ int lu_pgs = req->rq_bulk->bd_nob_transferred;
+
+ rd_pgs = (req->rq_bulk->bd_nob_transferred +
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
+ lu_pgs >>= LU_PAGE_SHIFT;
+ LASSERT(!(req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
+
+ CDEBUG(D_INODE, "read %d(%d) pages\n", rd_pgs, lu_pgs);
+
+ mdc_adjust_dirpages(page_pool, rd_pgs, lu_pgs);
+
+ SetPageUptodate(page0);
+ }
+
+ unlock_page(page0);
+ ptlrpc_req_finished(req);
+ CDEBUG(D_CACHE, "read %d/%d pages\n", rd_pgs, npages);
+ for (i = 1; i < npages; i++) {
+ unsigned long offset;
+ __u64 hash;
+ int ret;
+
+ page = page_pool[i];
+
+ if (rc < 0 || i >= rd_pgs) {
+ put_page(page);
+ continue;
+ }
+
+ SetPageUptodate(page);
+
+ dp = kmap(page);
+ hash = le64_to_cpu(dp->ldp_hash_start);
+ kunmap(page);
+
+ offset = hash_x_index(hash, rp->rp_hash64);
+
+ prefetchw(&page->flags);
+ ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
+ GFP_KERNEL);
+ if (!ret)
+ unlock_page(page);
+ else
+ CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: rc = %d\n",
+ offset, ret);
+ put_page(page);
+ }
+
+ if (page_pool != &page0)
+ kfree(page_pool);
+
+ return rc;
+}
+
+/**
+ * Read dir page from cache first, if it can not find it, read it from
+ * server and add into the cache.
+ *
+ * \param[in] exp MDC export
+ * \param[in] op_data client MD stack parameters, transferring parameters
+ * between different layers on client MD stack.
+ * \param[in] cb_op callback required for ldlm lock enqueue during
+ * read page
+ * \param[in] hash_offset the hash offset of the page to be read
+ * \param[in] ppage the page to be read
+ *
+ * retval = 0 get the page successfully
+ * errno(<0) get the page failed
+ */
+static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_callback *cb_op, __u64 hash_offset,
+ struct page **ppage)
+{
+ struct lookup_intent it = { .it_op = IT_READDIR };
+ struct page *page;
+ struct inode *dir = op_data->op_data;
+ struct address_space *mapping;
+ struct lu_dirpage *dp;
+ __u64 start = 0;
+ __u64 end = 0;
+ struct lustre_handle lockh;
+ struct ptlrpc_request *enq_req = NULL;
+ struct readpage_param rp_param;
+ int rc;
+
+ *ppage = NULL;
+
+ LASSERT(dir);
+ mapping = dir->i_mapping;
+
+ rc = mdc_intent_lock(exp, op_data, &it, &enq_req,
+ cb_op->md_blocking_ast, 0);
+ if (enq_req)
+ ptlrpc_req_finished(enq_req);
+
+ if (rc < 0) {
+ CERROR("%s: "DFID" lock enqueue fails: rc = %d\n",
+ exp->exp_obd->obd_name, PFID(&op_data->op_fid1), rc);
+ return rc;
+ }
+
+ rc = 0;
+ lockh.cookie = it.it_lock_handle;
+ mdc_set_lock_data(exp, &lockh, dir, NULL);
+
+ rp_param.rp_off = hash_offset;
+ rp_param.rp_hash64 = op_data->op_cli_flags & CLI_HASH64;
+ page = mdc_page_locate(mapping, &rp_param.rp_off, &start, &end,
+ rp_param.rp_hash64);
+ if (IS_ERR(page)) {
+ CDEBUG(D_INFO, "%s: dir page locate: " DFID " at %llu: rc %ld\n",
+ exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+ rp_param.rp_off, PTR_ERR(page));
+ rc = PTR_ERR(page);
+ goto out_unlock;
+ } else if (page) {
+ /*
+ * XXX nikita: not entirely correct handling of a corner case:
+ * suppose hash chain of entries with hash value HASH crosses
+ * border between pages P0 and P1. First both P0 and P1 are
+ * cached, seekdir() is called for some entry from the P0 part
+ * of the chain. Later P0 goes out of cache. telldir(HASH)
+ * happens and finds P1, as it starts with matching hash
+ * value. Remaining entries from P0 part of the chain are
+ * skipped. (Is that really a bug?)
+ *
+ * Possible solutions: 0. don't cache P1 is such case, handle
+ * it as an "overflow" page. 1. invalidate all pages at
+ * once. 2. use HASH|1 as an index for P1.
+ */
+ goto hash_collision;
+ }
+
+ rp_param.rp_exp = exp;
+ rp_param.rp_mod = op_data;
+ page = read_cache_page(mapping,
+ hash_x_index(rp_param.rp_off,
+ rp_param.rp_hash64),
+ mdc_read_page_remote, &rp_param);
+ if (IS_ERR(page)) {
+ CERROR("%s: read cache page: "DFID" at %llu: rc %ld\n",
+ exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+ rp_param.rp_off, PTR_ERR(page));
+ rc = PTR_ERR(page);
+ goto out_unlock;
+ }
+
+ wait_on_page_locked(page);
+ (void)kmap(page);
+ if (!PageUptodate(page)) {
+ CERROR("%s: page not updated: "DFID" at %llu: rc %d\n",
+ exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+ rp_param.rp_off, -5);
+ goto fail;
+ }
+ if (!PageChecked(page))
+ SetPageChecked(page);
+ if (PageError(page)) {
+ CERROR("%s: page error: "DFID" at %llu: rc %d\n",
+ exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+ rp_param.rp_off, -5);
+ goto fail;
+ }
+
+hash_collision:
+ dp = page_address(page);
+ if (BITS_PER_LONG == 32 && rp_param.rp_hash64) {
+ start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ rp_param.rp_off = hash_offset >> 32;
+ } else {
+ start = le64_to_cpu(dp->ldp_hash_start);
+ end = le64_to_cpu(dp->ldp_hash_end);
+ rp_param.rp_off = hash_offset;
+ }
+ if (end == start) {
+ LASSERT(start == rp_param.rp_off);
+ CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
+#if BITS_PER_LONG == 32
+ CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
+ le64_to_cpu(dp->ldp_hash_start),
+ le64_to_cpu(dp->ldp_hash_end), hash_offset);
+#endif
+ /*
+ * Fetch whole overflow chain...
+ *
+ * XXX not yet.
+ */
+ goto fail;
+ }
+ *ppage = page;
+out_unlock:
+ ldlm_lock_decref(&lockh, it.it_lock_mode);
+ return rc;
+fail:
+ kunmap(page);
+ mdc_release_page(page, 1);
+ rc = -EIO;
+ goto out_unlock;
+}
+
static int mdc_statfs(const struct lu_env *env,
struct obd_export *exp, struct obd_statfs *osfs,
__u64 max_age, __u32 flags)
@@ -1401,7 +1842,7 @@ out:
return rc;
}
-static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
+static struct kuc_hdr *changelog_kuc_hdr(char *buf, size_t len, u32 flags)
{
struct kuc_hdr *lh = (struct kuc_hdr *)buf;
@@ -1415,40 +1856,44 @@ static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
return lh;
}
-#define D_CHANGELOG 0
-
struct changelog_show {
__u64 cs_startrec;
- __u32 cs_flags;
+ enum changelog_send_flag cs_flags;
struct file *cs_fp;
char *cs_buf;
struct obd_device *cs_obd;
};
+static inline char *cs_obd_name(struct changelog_show *cs)
+{
+ return cs->cs_obd->obd_name;
+}
+
static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
struct llog_rec_hdr *hdr, void *data)
{
struct changelog_show *cs = data;
struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
struct kuc_hdr *lh;
- int len, rc;
+ size_t len;
+ int rc;
if (rec->cr_hdr.lrh_type != CHANGELOG_REC) {
rc = -EINVAL;
CERROR("%s: not a changelog rec %x/%d: rc = %d\n",
- cs->cs_obd->obd_name, rec->cr_hdr.lrh_type,
+ cs_obd_name(cs), rec->cr_hdr.lrh_type,
rec->cr.cr_type, rc);
return rc;
}
if (rec->cr.cr_index < cs->cs_startrec) {
/* Skip entries earlier than what we are interested in */
- CDEBUG(D_CHANGELOG, "rec=%llu start=%llu\n",
+ CDEBUG(D_HSM, "rec=%llu start=%llu\n",
rec->cr.cr_index, cs->cs_startrec);
return 0;
}
- CDEBUG(D_CHANGELOG, "%llu %02d%-5s %llu 0x%x t="DFID" p="DFID
+ CDEBUG(D_HSM, "%llu %02d%-5s %llu 0x%x t=" DFID " p=" DFID
" %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
rec->cr.cr_flags & CLF_FLAGMASK,
@@ -1462,20 +1907,21 @@ static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
rc = libcfs_kkuc_msg_put(cs->cs_fp, lh);
- CDEBUG(D_CHANGELOG, "kucmsg fp %p len %d rc %d\n", cs->cs_fp, len, rc);
+ CDEBUG(D_HSM, "kucmsg fp %p len %zu rc %d\n", cs->cs_fp, len, rc);
return rc;
}
static int mdc_changelog_send_thread(void *csdata)
{
+ enum llog_flag flags = LLOG_F_IS_CAT;
struct changelog_show *cs = csdata;
struct llog_ctxt *ctxt = NULL;
struct llog_handle *llh = NULL;
struct kuc_hdr *kuch;
int rc;
- CDEBUG(D_CHANGELOG, "changelog to fp=%p start %llu\n",
+ CDEBUG(D_HSM, "changelog to fp=%p start %llu\n",
cs->cs_fp, cs->cs_startrec);
cs->cs_buf = kzalloc(KUC_CHANGELOG_MSG_MAXSIZE, GFP_NOFS);
@@ -1494,10 +1940,14 @@ static int mdc_changelog_send_thread(void *csdata)
LLOG_OPEN_EXISTS);
if (rc) {
CERROR("%s: fail to open changelog catalog: rc = %d\n",
- cs->cs_obd->obd_name, rc);
+ cs_obd_name(cs), rc);
goto out;
}
- rc = llog_init_handle(NULL, llh, LLOG_F_IS_CAT, NULL);
+
+ if (cs->cs_flags & CHANGELOG_FLAG_JOBID)
+ flags |= LLOG_F_EXT_JOBID;
+
+ rc = llog_init_handle(NULL, llh, flags, NULL);
if (rc) {
CERROR("llog_init_handle failed %d\n", rc);
goto out;
@@ -1550,12 +2000,12 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
if (IS_ERR(task)) {
rc = PTR_ERR(task);
CERROR("%s: can't start changelog thread: rc = %d\n",
- obd->obd_name, rc);
+ cs_obd_name(cs), rc);
kfree(cs);
} else {
rc = 0;
- CDEBUG(D_CHANGELOG, "%s: started changelog thread\n",
- obd->obd_name);
+ CDEBUG(D_HSM, "%s: started changelog thread\n",
+ cs_obd_name(cs));
}
CERROR("Failed to start changelog thread: %d\n", rc);
@@ -1669,9 +2119,11 @@ static int mdc_ioc_swap_layouts(struct obd_export *exp,
* with the request RPC to avoid extra RPC round trips
*/
count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels,
- LCK_CR, MDS_INODELOCK_LAYOUT);
+ LCK_CR, MDS_INODELOCK_LAYOUT |
+ MDS_INODELOCK_XATTR);
count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels,
- LCK_CR, MDS_INODELOCK_LAYOUT);
+ LCK_CR, MDS_INODELOCK_LAYOUT |
+ MDS_INODELOCK_XATTR);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_MDS_SWAP_LAYOUTS);
@@ -1917,7 +2369,7 @@ static void lustre_swab_hai(struct hsm_action_item *h)
static void lustre_swab_hal(struct hsm_action_list *h)
{
struct hsm_action_item *hai;
- int i;
+ u32 i;
__swab32s(&h->hal_version);
__swab32s(&h->hal_count);
@@ -1966,14 +2418,14 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
* @param val KUC message (kuc_hdr + hsm_action_list)
* @param len total length of message
*/
-static int mdc_hsm_copytool_send(int len, void *val)
+static int mdc_hsm_copytool_send(size_t len, void *val)
{
struct kuc_hdr *lh = (struct kuc_hdr *)val;
struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1);
if (len < sizeof(*lh) + sizeof(*hal)) {
- CERROR("Short HSM message %d < %d\n", len,
- (int)(sizeof(*lh) + sizeof(*hal)));
+ CERROR("Short HSM message %zu < %zu\n", len,
+ sizeof(*lh) + sizeof(*hal));
return -EPROTO;
}
if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
@@ -2044,9 +2496,8 @@ static int mdc_set_info_async(const struct lu_env *env,
}
spin_unlock(&imp->imp_lock);
- rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
- keylen, key, vallen, val, set);
- return rc;
+ return do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
+ keylen, key, vallen, val, set);
}
if (KEY_IS(KEY_SPTLRPC_CONF)) {
sptlrpc_conf_client_adapt(exp->exp_obd);
@@ -2065,6 +2516,12 @@ static int mdc_set_info_async(const struct lu_env *env,
rc = mdc_hsm_copytool_send(vallen, val);
return rc;
}
+ if (KEY_IS(KEY_DEFAULT_EASIZE)) {
+ u32 *default_easize = val;
+
+ exp->exp_obd->u.cli.cl_default_mds_easize = *default_easize;
+ return 0;
+ }
CERROR("Unknown key %s\n", (char *)key);
return -EINVAL;
@@ -2077,18 +2534,18 @@ static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
int rc = -EINVAL;
if (KEY_IS(KEY_MAX_EASIZE)) {
- int mdsize, *max_easize;
+ u32 mdsize, *max_easize;
if (*vallen != sizeof(int))
return -EINVAL;
- mdsize = *(int *)val;
+ mdsize = *(u32 *)val;
if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
max_easize = val;
*max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
return 0;
} else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
- int *default_easize;
+ u32 *default_easize;
if (*vallen != sizeof(int))
return -EINVAL;
@@ -2105,7 +2562,7 @@ static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
*data = imp->imp_connect_data;
return 0;
} else if (KEY_IS(KEY_TGT_COUNT)) {
- *((int *)val) = 1;
+ *((u32 *)val) = 1;
return 0;
}
@@ -2199,13 +2656,13 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
return rc;
}
-int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
- struct md_op_data *op_data)
+int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data)
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct lu_client_seq *seq = cli->cl_seq;
- return seq_client_alloc_fid(NULL, seq, fid);
+ return seq_client_alloc_fid(env, seq, fid);
}
static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
@@ -2333,8 +2790,8 @@ err_rpc_lock:
* a large number of stripes is possible. If a larger reply buffer is
* required it will be reallocated in the ptlrpc layer due to overflow.
*/
-static int mdc_init_ea_size(struct obd_export *exp, int easize,
- int def_easize, int cookiesize, int def_cookiesize)
+static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
+ u32 cookiesize, u32 def_cookiesize)
{
struct obd_device *obd = exp->exp_obd;
struct client_obd *cli = &obd->u.cli;
@@ -2430,7 +2887,6 @@ static struct obd_ops mdc_obd_ops = {
static struct md_ops mdc_md_ops = {
.getstatus = mdc_getstatus,
.null_inode = mdc_null_inode,
- .find_cbdata = mdc_find_cbdata,
.close = mdc_close,
.create = mdc_create,
.done_writing = mdc_done_writing,
@@ -2439,13 +2895,12 @@ static struct md_ops mdc_md_ops = {
.getattr_name = mdc_getattr_name,
.intent_lock = mdc_intent_lock,
.link = mdc_link,
- .is_subdir = mdc_is_subdir,
.rename = mdc_rename,
.setattr = mdc_setattr,
.setxattr = mdc_setxattr,
.getxattr = mdc_getxattr,
.sync = mdc_sync,
- .readpage = mdc_readpage,
+ .read_page = mdc_read_page,
.unlink = mdc_unlink,
.cancel_unused = mdc_cancel_unused,
.init_ea_size = mdc_init_ea_size,
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 9d0bd4745865..23374cae5133 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -549,8 +549,9 @@ static int mgc_requeue_thread(void *data)
* caused the lock revocation to finish its setup, plus some
* random so everyone doesn't try to reconnect at once.
*/
- to = MGC_TIMEOUT_MIN_SECONDS * HZ;
- to += rand * HZ / 100; /* rand is centi-seconds */
+ to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC);
+ /* rand is centi-seconds */
+ to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100);
lwi = LWI_TIMEOUT(to, NULL, NULL);
l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP),
&lwi);
@@ -1158,7 +1159,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
while (datalen > 0) {
int entry_len = sizeof(*entry);
- int is_ost;
+ int is_ost, i;
struct obd_device *obd;
char *obdname;
char *cname;
@@ -1264,11 +1265,17 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
continue;
}
- /* TODO: iterate all nids to find one */
+ /* iterate all nids to find one */
/* find uuid by nid */
- rc = client_import_find_conn(obd->u.cli.cl_import,
- entry->u.nids[0],
- (struct obd_uuid *)uuid);
+ rc = -ENOENT;
+ for (i = 0; i < entry->mne_nid_count; i++) {
+ rc = client_import_find_conn(obd->u.cli.cl_import,
+ entry->u.nids[0],
+ (struct obd_uuid *)uuid);
+ if (!rc)
+ break;
+ }
+
up_read(&obd->u.cli.cl_sem);
if (rc < 0) {
CERROR("mgc: cannot find uuid by nid %s\n",
@@ -1428,14 +1435,12 @@ again:
}
mne_swab = !!ptlrpc_rep_need_swab(req);
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
/* This import flag means the server did an extra swab of IR MNE
* records (fixed in LU-1252), reverse it here if needed. LU-1644
*/
if (unlikely(req->rq_import->imp_need_mne_swab))
mne_swab = !mne_swab;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
#endif
for (i = 0; i < nrpages && ealen > 0; i++) {
@@ -1740,8 +1745,6 @@ static struct obd_ops mgc_obd_ops = {
.del_conn = client_import_del_conn,
.connect = client_connect_import,
.disconnect = client_disconnect_export,
- /* .enqueue = mgc_enqueue, */
- /* .iocontrol = mgc_iocontrol, */
.set_info_async = mgc_set_info_async,
.get_info = mgc_get_info,
.import_event = mgc_import_event,
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index df7e47f35a66..b42e109b30e0 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_LUSTRE_FS) += obdclass.o
obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \
genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
- lustre_handles.o lustre_peer.o statfs_pack.o \
+ lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \
obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index e72f1fc00a13..bc4b7b6b9a20 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -73,7 +73,6 @@ int cl_io_is_going(const struct lu_env *env)
{
return cl_env_info(env)->clt_current_io != NULL;
}
-EXPORT_SYMBOL(cl_io_is_going);
/**
* cl_io invariant that holds at all times when exported cl_io_*() functions
@@ -859,9 +858,6 @@ void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page)
LASSERT(page->cp_owner);
LINVRNT(plist->pl_owner == current);
- lockdep_off();
- mutex_lock(&page->cp_mutex);
- lockdep_on();
LASSERT(list_empty(&page->cp_batch));
list_add_tail(&page->cp_batch, &plist->pl_pages);
++plist->pl_nr;
@@ -877,12 +873,10 @@ void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
struct cl_page *page)
{
LASSERT(plist->pl_nr > 0);
+ LASSERT(cl_page_is_vmlocked(env, page));
LINVRNT(plist->pl_owner == current);
list_del_init(&page->cp_batch);
- lockdep_off();
- mutex_unlock(&page->cp_mutex);
- lockdep_on();
--plist->pl_nr;
lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
cl_page_put(env, page);
@@ -941,8 +935,6 @@ void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
}
EXPORT_SYMBOL(cl_page_list_splice);
-void cl_page_disown0(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg);
/**
* Disowns pages in a queue.
@@ -959,9 +951,6 @@ void cl_page_list_disown(const struct lu_env *env,
LASSERT(plist->pl_nr > 0);
list_del_init(&page->cp_batch);
- lockdep_off();
- mutex_unlock(&page->cp_mutex);
- lockdep_on();
--plist->pl_nr;
/*
* cl_page_disown0 rather than usual cl_page_disown() is used,
@@ -1221,7 +1210,7 @@ void cl_req_page_add(const struct lu_env *env,
{
struct cl_object *obj;
struct cl_req_obj *rqo;
- int i;
+ unsigned int i;
LASSERT(list_empty(&page->cp_flight));
LASSERT(!page->cp_req);
@@ -1268,7 +1257,7 @@ EXPORT_SYMBOL(cl_req_page_done);
*/
int cl_req_prep(const struct lu_env *env, struct cl_req *req)
{
- int i;
+ unsigned int i;
int result;
const struct cl_req_slice *slice;
@@ -1301,7 +1290,7 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
{
const struct cl_req_slice *slice;
struct cl_page *page;
- int i;
+ unsigned int i;
LASSERT(!list_empty(&req->crq_pages));
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 91a5806d0239..3199dd4a3b72 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -163,7 +163,7 @@ static spinlock_t *cl_object_attr_guard(struct cl_object *o)
*
* Prevents data-attributes from changing, until lock is released by
* cl_object_attr_unlock(). This has to be called before calls to
- * cl_object_attr_get(), cl_object_attr_set().
+ * cl_object_attr_get(), cl_object_attr_update().
*/
void cl_object_attr_lock(struct cl_object *o)
__acquires(cl_object_attr_guard(o))
@@ -217,11 +217,11 @@ EXPORT_SYMBOL(cl_object_attr_get);
* Updates data-attributes of an object \a obj.
*
* Only attributes, mentioned in a validness bit-mask \a v are
- * updated. Calls cl_object_operations::coo_attr_set() on every layer, bottom
- * to top.
+ * updated. Calls cl_object_operations::coo_attr_update() on every layer,
+ * bottom to top.
*/
-int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned v)
+int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int v)
{
struct lu_object_header *top;
int result;
@@ -231,8 +231,9 @@ int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
top = obj->co_lu.lo_header;
result = 0;
list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_attr_set) {
- result = obj->co_ops->coo_attr_set(env, obj, attr, v);
+ if (obj->co_ops->coo_attr_update) {
+ result = obj->co_ops->coo_attr_update(env, obj, attr,
+ v);
if (result != 0) {
if (result > 0)
result = 0;
@@ -242,7 +243,7 @@ int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
}
return result;
}
-EXPORT_SYMBOL(cl_object_attr_set);
+EXPORT_SYMBOL(cl_object_attr_update);
/**
* Notifies layers (bottom-to-top) that glimpse AST was received.
@@ -321,6 +322,27 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
EXPORT_SYMBOL(cl_object_prune);
/**
+ * Get stripe information of this object.
+ */
+int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+ struct lov_user_md __user *uarg)
+{
+ struct lu_object_header *top;
+ int result = 0;
+
+ top = obj->co_lu.lo_header;
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_getstripe) {
+ result = obj->co_ops->coo_getstripe(env, obj, uarg);
+ if (result)
+ break;
+ }
+ }
+ return result;
+}
+EXPORT_SYMBOL(cl_object_getstripe);
+
+/**
* Helper function removing all object locks, and marking object for
* deletion. All object pages must have been deleted at this point.
*
@@ -377,7 +399,7 @@ static void cl_env_percpu_refill(void);
*/
int cl_site_init(struct cl_site *s, struct cl_device *d)
{
- int i;
+ size_t i;
int result;
result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
@@ -411,7 +433,7 @@ static struct cache_stats cl_env_stats = {
*/
int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
{
- int i;
+ size_t i;
static const char *pstate[] = {
[CPS_CACHED] = "c",
[CPS_OWNED] = "o",
@@ -1000,7 +1022,7 @@ static int cl_env_percpu_init(void)
* thus we must uninitialize up to i, the rest are undefined.
*/
for (j = 0; j < i; j++) {
- cle = &cl_env_percpu[i];
+ cle = &cl_env_percpu[j];
lu_context_exit(&cle->ce_ses);
lu_context_fini(&cle->ce_ses);
lu_env_fini(&cle->ce_lu);
@@ -1126,7 +1148,7 @@ static void *cl_key_init(const struct lu_context *ctx,
info = cl0_key_init(ctx, key);
if (!IS_ERR(info)) {
- int i;
+ size_t i;
for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
@@ -1138,7 +1160,7 @@ static void cl_key_fini(const struct lu_context *ctx,
struct lu_context_key *key, void *data)
{
struct cl_thread_info *info;
- int i;
+ size_t i;
info = data;
for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
@@ -1150,7 +1172,7 @@ static void cl_key_exit(const struct lu_context *ctx,
struct lu_context_key *key, void *data)
{
struct cl_thread_info *info = data;
- int i;
+ size_t i;
for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) {
LASSERT(info->clt_counters[i].ctc_nr_held == 0);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index db2dc6b39073..63973ba096da 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -151,7 +151,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
INIT_LIST_HEAD(&page->cp_layers);
INIT_LIST_HEAD(&page->cp_batch);
INIT_LIST_HEAD(&page->cp_flight);
- mutex_init(&page->cp_mutex);
lu_ref_init(&page->cp_reference);
head = o->co_lu.lo_header;
list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
@@ -171,7 +170,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
}
return page;
}
-EXPORT_SYMBOL(cl_page_alloc);
/**
* Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -229,11 +227,6 @@ EXPORT_SYMBOL(cl_page_find);
static inline int cl_page_invariant(const struct cl_page *pg)
{
- /*
- * Page invariant is protected by a VM lock.
- */
- LINVRNT(cl_page_is_vmlocked(NULL, pg));
-
return cl_page_in_use_noref(pg);
}
@@ -478,7 +471,6 @@ static void cl_page_owner_clear(struct cl_page *page)
LASSERT(page->cp_owner->ci_owned_nr > 0);
page->cp_owner->ci_owned_nr--;
page->cp_owner = NULL;
- page->cp_task = NULL;
}
}
@@ -562,7 +554,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
PASSERT(env, pg, !pg->cp_owner);
PASSERT(env, pg, !pg->cp_req);
pg->cp_owner = cl_io_top(io);
- pg->cp_task = current;
cl_page_owner_set(pg);
if (pg->cp_state != CPS_FREEING) {
cl_page_state_set(env, pg, CPS_OWNED);
@@ -619,7 +610,6 @@ void cl_page_assume(const struct lu_env *env,
cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
PASSERT(env, pg, !pg->cp_owner);
pg->cp_owner = cl_io_top(io);
- pg->cp_task = current;
cl_page_owner_set(pg);
cl_page_state_set(env, pg, CPS_OWNED);
}
@@ -860,10 +850,6 @@ void cl_page_completion(const struct lu_env *env,
PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
- if (crt == CRT_READ && ioret == 0) {
- PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
- pg->cp_flags |= CPF_READ_COMPLETED;
- }
cl_page_state_set(env, pg, CPS_CACHED);
if (crt >= CRT_NR)
@@ -872,7 +858,6 @@ void cl_page_completion(const struct lu_env *env,
(const struct lu_env *,
const struct cl_page_slice *, int), ioret);
if (anchor) {
- LASSERT(cl_page_is_vmlocked(env, pg));
LASSERT(pg->cp_sync_io == anchor);
pg->cp_sync_io = NULL;
}
@@ -989,10 +974,10 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
(*printer)(env, cookie,
- "page@%p[%d %p %d %d %d %p %p %#x]\n",
+ "page@%p[%d %p %d %d %p %p]\n",
pg, atomic_read(&pg->cp_ref), pg->cp_obj,
- pg->cp_state, pg->cp_error, pg->cp_type,
- pg->cp_owner, pg->cp_req, pg->cp_flags);
+ pg->cp_state, pg->cp_type,
+ pg->cp_owner, pg->cp_req);
}
EXPORT_SYMBOL(cl_page_header_print);
@@ -1020,7 +1005,6 @@ int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
(const struct lu_env *,
const struct cl_page_slice *));
}
-EXPORT_SYMBOL(cl_page_cancel);
/**
* Converts a byte offset within object \a obj into a page index.
@@ -1046,9 +1030,9 @@ pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
}
EXPORT_SYMBOL(cl_index);
-int cl_page_size(const struct cl_object *obj)
+size_t cl_page_size(const struct cl_object *obj)
{
- return 1 << PAGE_SHIFT;
+ return 1UL << PAGE_SHIFT;
}
EXPORT_SYMBOL(cl_page_size);
@@ -1087,11 +1071,11 @@ struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
/* Initialize cache data */
atomic_set(&cache->ccc_users, 1);
cache->ccc_lru_max = lru_page_max;
- atomic_set(&cache->ccc_lru_left, lru_page_max);
+ atomic_long_set(&cache->ccc_lru_left, lru_page_max);
spin_lock_init(&cache->ccc_lru_lock);
INIT_LIST_HEAD(&cache->ccc_lru);
- atomic_set(&cache->ccc_unstable_nr, 0);
+ atomic_long_set(&cache->ccc_unstable_nr, 0);
init_waitqueue_head(&cache->ccc_unstable_waitq);
return cache;
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index d9d2a1952b8b..76e1ee83a723 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -40,10 +40,10 @@
#include "../include/lprocfs_status.h"
#include <linux/list.h>
#include "../include/cl_object.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "llog_internal.h"
struct obd_device *obd_devs[MAX_OBD_DEVICES];
-EXPORT_SYMBOL(obd_devs);
struct list_head obd_types;
DEFINE_RWLOCK(obd_dev_lock);
@@ -54,11 +54,9 @@ unsigned int obd_dump_on_timeout;
EXPORT_SYMBOL(obd_dump_on_timeout);
unsigned int obd_dump_on_eviction;
EXPORT_SYMBOL(obd_dump_on_eviction);
-unsigned int obd_max_dirty_pages = 256;
+unsigned long obd_max_dirty_pages;
EXPORT_SYMBOL(obd_max_dirty_pages);
-atomic_t obd_unstable_pages;
-EXPORT_SYMBOL(obd_unstable_pages);
-atomic_t obd_dirty_pages;
+atomic_long_t obd_dirty_pages;
EXPORT_SYMBOL(obd_dirty_pages);
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
EXPORT_SYMBOL(obd_timeout);
@@ -76,13 +74,11 @@ EXPORT_SYMBOL(at_early_margin);
int at_extra = 30;
EXPORT_SYMBOL(at_extra);
-atomic_t obd_dirty_transit_pages;
+atomic_long_t obd_dirty_transit_pages;
EXPORT_SYMBOL(obd_dirty_transit_pages);
char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
-EXPORT_SYMBOL(obd_jobid_var);
-
-char obd_jobid_node[JOBSTATS_JOBID_SIZE + 1];
+char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
/* Get jobid of current process from stored variable or calculate
* it from pid and user_id.
@@ -93,14 +89,14 @@ char obd_jobid_node[JOBSTATS_JOBID_SIZE + 1];
*/
int lustre_get_jobid(char *jobid)
{
- memset(jobid, 0, JOBSTATS_JOBID_SIZE);
+ memset(jobid, 0, LUSTRE_JOBID_SIZE);
/* Jobstats isn't enabled */
if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
return 0;
/* Use process name + fsuid as jobid */
if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
- snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u",
+ snprintf(jobid, LUSTRE_JOBID_SIZE, "%s.%u",
current_comm(),
from_kuid(&init_user_ns, current_fsuid()));
return 0;
@@ -116,19 +112,6 @@ int lustre_get_jobid(char *jobid)
}
EXPORT_SYMBOL(lustre_get_jobid);
-static inline void obd_data2conn(struct lustre_handle *conn,
- struct obd_ioctl_data *data)
-{
- memset(conn, 0, sizeof(*conn));
- conn->cookie = data->ioc_cookie;
-}
-
-static inline void obd_conn2data(struct obd_ioctl_data *data,
- struct lustre_handle *conn)
-{
- data->ioc_cookie = conn->cookie;
-}
-
static int class_resolve_dev_name(__u32 len, const char *name)
{
int rc;
@@ -287,13 +270,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
goto out;
}
- case OBD_IOC_CLOSE_UUID: {
- CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n",
- data->ioc_inlbuf1);
- err = 0;
- goto out;
- }
-
case OBD_IOC_GETDEVICE: {
int index = data->ioc_count;
char *status, *str;
@@ -467,15 +443,10 @@ static int obd_init_checks(void)
return ret;
}
-extern int class_procfs_init(void);
-extern int class_procfs_clean(void);
-
static int __init obdclass_init(void)
{
int i, err;
- int lustre_register_fs(void);
-
LCONSOLE_INFO("Lustre: Build Version: " LUSTRE_VERSION_STRING "\n");
spin_lock_init(&obd_types_lock);
@@ -542,23 +513,9 @@ static int __init obdclass_init(void)
static void obdclass_exit(void)
{
- int i;
-
- int lustre_unregister_fs(void);
-
lustre_unregister_fs();
misc_deregister(&obd_psdev);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (obd && obd->obd_set_up &&
- OBT(obd) && OBP(obd, detach)) {
- /* XXX should this call generic detach otherwise? */
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
- OBP(obd, detach)(obd);
- }
- }
llog_info_fini();
cl_global_fini();
lu_global_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 8acf67239fa8..0bd4ad20aba7 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -48,10 +48,10 @@ int block_debug_setup(void *addr, int len, __u64 off, __u64 id)
LASSERT(addr);
put_unaligned_le64(off, addr);
- put_unaligned_le64(id, addr+LPDS);
+ put_unaligned_le64(id, addr + LPDS);
addr += len - LPDS - LPDS;
put_unaligned_le64(off, addr);
- put_unaligned_le64(id, addr+LPDS);
+ put_unaligned_le64(id, addr + LPDS);
return 0;
}
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index 99c2da632b51..cf8bb2a2f40b 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -133,7 +133,6 @@ void class_put_type(struct obd_type *type)
module_put(type->typ_dt_ops->owner);
spin_unlock(&type->obd_type_lock);
}
-EXPORT_SYMBOL(class_put_type);
#define CLASS_MAX_NAME 1024
@@ -166,10 +165,10 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
!type->typ_name)
goto failed;
- *(type->typ_dt_ops) = *dt_ops;
+ *type->typ_dt_ops = *dt_ops;
/* md_ops is optional */
if (md_ops)
- *(type->typ_md_ops) = *md_ops;
+ *type->typ_md_ops = *md_ops;
strcpy(type->typ_name, name);
spin_lock_init(&type->obd_type_lock);
@@ -391,7 +390,6 @@ int class_name2dev(const char *name)
return -1;
}
-EXPORT_SYMBOL(class_name2dev);
struct obd_device *class_name2obd(const char *name)
{
@@ -421,7 +419,6 @@ int class_uuid2dev(struct obd_uuid *uuid)
return -1;
}
-EXPORT_SYMBOL(class_uuid2dev);
/**
* Get obd device from ::obd_devs[]
@@ -450,7 +447,6 @@ struct obd_device *class_num2obd(int num)
return obd;
}
-EXPORT_SYMBOL(class_num2obd);
/* Search for a client OBD connected to tgt_uuid. If grp_uuid is
* specified, then only the client with that uuid is returned,
@@ -509,7 +505,7 @@ struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
continue;
if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
if (next)
- *next = i+1;
+ *next = i + 1;
read_unlock(&obd_dev_lock);
return obd;
}
@@ -618,7 +614,7 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
}
CDEBUG(D_INFO, "looking for export cookie %#llx\n", conn->cookie);
- export = class_handle2object(conn->cookie);
+ export = class_handle2object(conn->cookie, NULL);
return export;
}
EXPORT_SYMBOL(class_conn2export);
@@ -817,7 +813,6 @@ void class_unlink_export(struct obd_export *exp)
spin_unlock(&exp->exp_obd->obd_dev_lock);
class_export_put(exp);
}
-EXPORT_SYMBOL(class_unlink_export);
/* Import management functions */
static void class_import_destroy(struct obd_import *imp)
@@ -973,7 +968,6 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
lock, exp, lock->l_exp_refs_nr);
spin_unlock(&exp->exp_locks_list_guard);
}
-EXPORT_SYMBOL(__class_export_add_lock_ref);
void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
{
@@ -991,7 +985,6 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
lock, exp, lock->l_exp_refs_nr);
spin_unlock(&exp->exp_locks_list_guard);
}
-EXPORT_SYMBOL(__class_export_del_lock_ref);
#endif
/* A connection defines an export context in which preallocation can
@@ -1100,7 +1093,6 @@ EXPORT_SYMBOL(class_fail_export);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void (*class_export_dump_hook)(struct obd_export *) = NULL;
-EXPORT_SYMBOL(class_export_dump_hook);
#endif
/* Total amount of zombies to be destroyed */
@@ -1312,3 +1304,135 @@ void obd_zombie_impexp_stop(void)
obd_zombie_impexp_notify();
wait_for_completion(&obd_zombie_stop);
}
+
+struct obd_request_slot_waiter {
+ struct list_head orsw_entry;
+ wait_queue_head_t orsw_waitq;
+ bool orsw_signaled;
+};
+
+static bool obd_request_slot_avail(struct client_obd *cli,
+ struct obd_request_slot_waiter *orsw)
+{
+ bool avail;
+
+ spin_lock(&cli->cl_loi_list_lock);
+ avail = !!list_empty(&orsw->orsw_entry);
+ spin_unlock(&cli->cl_loi_list_lock);
+
+ return avail;
+};
+
+/*
+ * For network flow control, the RPC sponsor needs to acquire a credit
+ * before sending the RPC. The credits count for a connection is defined
+ * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then
+ * the subsequent RPC sponsors need to wait until others released their
+ * credits, or the administrator increased the "cl_max_rpcs_in_flight".
+ */
+int obd_get_request_slot(struct client_obd *cli)
+{
+ struct obd_request_slot_waiter orsw;
+ struct l_wait_info lwi;
+ int rc;
+
+ spin_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) {
+ cli->cl_r_in_flight++;
+ spin_unlock(&cli->cl_loi_list_lock);
+ return 0;
+ }
+
+ init_waitqueue_head(&orsw.orsw_waitq);
+ list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list);
+ orsw.orsw_signaled = false;
+ spin_unlock(&cli->cl_loi_list_lock);
+
+ lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(orsw.orsw_waitq,
+ obd_request_slot_avail(cli, &orsw) ||
+ orsw.orsw_signaled,
+ &lwi);
+
+ /*
+ * Here, we must take the lock to avoid the on-stack 'orsw' to be
+ * freed but other (such as obd_put_request_slot) is using it.
+ */
+ spin_lock(&cli->cl_loi_list_lock);
+ if (rc) {
+ if (!orsw.orsw_signaled) {
+ if (list_empty(&orsw.orsw_entry))
+ cli->cl_r_in_flight--;
+ else
+ list_del(&orsw.orsw_entry);
+ }
+ }
+
+ if (orsw.orsw_signaled) {
+ LASSERT(list_empty(&orsw.orsw_entry));
+
+ rc = -EINTR;
+ }
+ spin_unlock(&cli->cl_loi_list_lock);
+
+ return rc;
+}
+EXPORT_SYMBOL(obd_get_request_slot);
+
+void obd_put_request_slot(struct client_obd *cli)
+{
+ struct obd_request_slot_waiter *orsw;
+
+ spin_lock(&cli->cl_loi_list_lock);
+ cli->cl_r_in_flight--;
+
+ /* If there is free slot, wakeup the first waiter. */
+ if (!list_empty(&cli->cl_loi_read_list) &&
+ likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) {
+ orsw = list_entry(cli->cl_loi_read_list.next,
+ struct obd_request_slot_waiter, orsw_entry);
+ list_del_init(&orsw->orsw_entry);
+ cli->cl_r_in_flight++;
+ wake_up(&orsw->orsw_waitq);
+ }
+ spin_unlock(&cli->cl_loi_list_lock);
+}
+EXPORT_SYMBOL(obd_put_request_slot);
+
+__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli)
+{
+ return cli->cl_max_rpcs_in_flight;
+}
+EXPORT_SYMBOL(obd_get_max_rpcs_in_flight);
+
+int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
+{
+ struct obd_request_slot_waiter *orsw;
+ __u32 old;
+ int diff;
+ int i;
+
+ if (max > OBD_MAX_RIF_MAX || max < 1)
+ return -ERANGE;
+
+ spin_lock(&cli->cl_loi_list_lock);
+ old = cli->cl_max_rpcs_in_flight;
+ cli->cl_max_rpcs_in_flight = max;
+ diff = max - old;
+
+ /* We increase the max_rpcs_in_flight, then wakeup some waiters. */
+ for (i = 0; i < diff; i++) {
+ if (list_empty(&cli->cl_loi_read_list))
+ break;
+
+ orsw = list_entry(cli->cl_loi_read_list.next,
+ struct obd_request_slot_waiter, orsw_entry);
+ list_del_init(&orsw->orsw_entry);
+ cli->cl_r_in_flight++;
+ wake_up(&orsw->orsw_waitq);
+ }
+ spin_unlock(&cli->cl_loi_list_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c
new file mode 100644
index 000000000000..0b1d2f0a422c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linkea.c
@@ -0,0 +1,201 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/obd.h"
+#include "../include/lustre_linkea.h"
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf)
+{
+ ldata->ld_buf = lu_buf_check_and_alloc(buf, PAGE_SIZE);
+ if (!ldata->ld_buf->lb_buf)
+ return -ENOMEM;
+ ldata->ld_leh = ldata->ld_buf->lb_buf;
+ ldata->ld_leh->leh_magic = LINK_EA_MAGIC;
+ ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
+ ldata->ld_leh->leh_reccount = 0;
+ return 0;
+}
+EXPORT_SYMBOL(linkea_data_new);
+
+int linkea_init(struct linkea_data *ldata)
+{
+ struct link_ea_header *leh;
+
+ LASSERT(ldata->ld_buf);
+ leh = ldata->ld_buf->lb_buf;
+ if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
+ leh->leh_magic = LINK_EA_MAGIC;
+ leh->leh_reccount = __swab32(leh->leh_reccount);
+ leh->leh_len = __swab64(leh->leh_len);
+ /* entries are swabbed by linkea_entry_unpack */
+ }
+ if (leh->leh_magic != LINK_EA_MAGIC)
+ return -EINVAL;
+ if (leh->leh_reccount == 0)
+ return -ENODATA;
+
+ ldata->ld_leh = leh;
+ return 0;
+}
+EXPORT_SYMBOL(linkea_init);
+
+/**
+ * Pack a link_ea_entry.
+ * All elements are stored as chars to avoid alignment issues.
+ * Numbers are always big-endian
+ * \retval record length
+ */
+int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ struct lu_fid tmpfid;
+ int reclen;
+
+ tmpfid = *pfid;
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
+ tmpfid.f_ver = ~0;
+ fid_cpu_to_be(&tmpfid, &tmpfid);
+ memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
+ memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
+ reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;
+
+ lee->lee_reclen[0] = (reclen >> 8) & 0xff;
+ lee->lee_reclen[1] = reclen & 0xff;
+ return reclen;
+}
+EXPORT_SYMBOL(linkea_entry_pack);
+
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+ struct lu_name *lname, struct lu_fid *pfid)
+{
+ *reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1];
+ memcpy(pfid, &lee->lee_parent_fid, sizeof(*pfid));
+ fid_be_to_cpu(pfid, pfid);
+ if (lname) {
+ lname->ln_name = lee->lee_name;
+ lname->ln_namelen = *reclen - sizeof(struct link_ea_entry);
+ }
+}
+EXPORT_SYMBOL(linkea_entry_unpack);
+
+/**
+ * Add a record to the end of link ea buf
+ **/
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ LASSERT(ldata->ld_leh);
+
+ if (!lname || !pfid)
+ return -EINVAL;
+
+ ldata->ld_reclen = lname->ln_namelen + sizeof(struct link_ea_entry);
+ if (ldata->ld_leh->leh_len + ldata->ld_reclen >
+ ldata->ld_buf->lb_len) {
+ if (lu_buf_check_and_grow(ldata->ld_buf,
+ ldata->ld_leh->leh_len +
+ ldata->ld_reclen) < 0)
+ return -ENOMEM;
+ }
+
+ ldata->ld_leh = ldata->ld_buf->lb_buf;
+ ldata->ld_lee = ldata->ld_buf->lb_buf + ldata->ld_leh->leh_len;
+ ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid);
+ ldata->ld_leh->leh_len += ldata->ld_reclen;
+ ldata->ld_leh->leh_reccount++;
+ CDEBUG(D_INODE, "New link_ea name '" DFID ":%.*s' is added\n",
+ PFID(pfid), lname->ln_namelen, lname->ln_name);
+ return 0;
+}
+EXPORT_SYMBOL(linkea_add_buf);
+
+/** Del the current record from the link ea buf */
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname)
+{
+ LASSERT(ldata->ld_leh && ldata->ld_lee);
+
+ ldata->ld_leh->leh_reccount--;
+ ldata->ld_leh->leh_len -= ldata->ld_reclen;
+ memmove(ldata->ld_lee, (char *)ldata->ld_lee + ldata->ld_reclen,
+ (char *)ldata->ld_leh + ldata->ld_leh->leh_len -
+ (char *)ldata->ld_lee);
+ CDEBUG(D_INODE, "Old link_ea name '%.*s' is removed\n",
+ lname->ln_namelen, lname->ln_name);
+
+ if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
+ ldata->ld_leh->leh_len))
+ ldata->ld_lee = NULL;
+}
+EXPORT_SYMBOL(linkea_del_buf);
+
+/**
+ * Check if such a link exists in linkEA.
+ *
+ * \param ldata link data the search to be done on
+ * \param lname name in the parent's directory entry pointing to this object
+ * \param pfid parent fid the link to be found for
+ *
+ * \retval 0 success
+ * \retval -ENOENT link does not exist
+ * \retval -ve on error
+ */
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ struct lu_name tmpname;
+ struct lu_fid tmpfid;
+ int count;
+
+ LASSERT(ldata->ld_leh);
+
+ /* link #0 */
+ ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
+
+ for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
+ linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
+ &tmpname, &tmpfid);
+ if (tmpname.ln_namelen == lname->ln_namelen &&
+ lu_fid_eq(&tmpfid, pfid) &&
+ (strncmp(tmpname.ln_name, lname->ln_name,
+ tmpname.ln_namelen) == 0))
+ break;
+ ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
+ ldata->ld_reclen);
+ }
+
+ if (count == ldata->ld_leh->leh_reccount) {
+ CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n",
+ lname->ln_namelen, lname->ln_name);
+ ldata->ld_lee = NULL;
+ ldata->ld_reclen = 0;
+ return -ENOENT;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(linkea_links_find);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 33342bfcc90e..be09e04b042f 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -65,6 +65,7 @@
#include "../../include/obd_support.h"
#include "../../include/obd_class.h"
#include "../../include/lprocfs_status.h"
+#include "../../include/lustre/lustre_ioctl.h"
#include "../../include/lustre_ver.h"
/* buffer MUST be at least the size of obd_ioctl_hdr */
@@ -157,7 +158,6 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
err = copy_to_user(arg, data, len) ? -EFAULT : 0;
return err;
}
-EXPORT_SYMBOL(obd_ioctl_popdata);
/* opening /dev/obd */
static int obd_class_open(struct inode *inode, struct file *file)
@@ -191,7 +191,7 @@ static long obd_class_ioctl(struct file *filp, unsigned int cmd,
}
/* declare character device */
-static struct file_operations obd_psdev_fops = {
+static const struct file_operations obd_psdev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
.open = obd_class_open, /* open */
@@ -291,7 +291,7 @@ static ssize_t jobid_name_store(struct kobject *kobj, struct attribute *attr,
const char *buffer,
size_t count)
{
- if (!count || count > JOBSTATS_JOBID_SIZE)
+ if (!count || count > LUSTRE_JOBID_SIZE)
return -EINVAL;
memcpy(obd_jobid_node, buffer, count);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index c6cc6a7666e3..41b77a30feb3 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -44,7 +44,7 @@
#include <linux/fs.h>
-void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid)
+void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid)
{
valid &= src->o_valid;
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index 8f70dd2686f9..e6c785afceba 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -45,6 +45,7 @@
#include "../../include/obd_support.h"
#include "../../include/lprocfs_status.h"
+#include "../../include/obd_class.h"
struct static_lustre_uintvalue_attr {
struct {
@@ -95,8 +96,8 @@ LUSTRE_STATIC_UINT_ATTR(timeout, &obd_timeout);
static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
- return sprintf(buf, "%ul\n",
- obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
+ return sprintf(buf, "%lu\n",
+ obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
}
static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 1784ca063428..43797f106745 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -80,7 +80,7 @@ static void llog_free_handle(struct llog_handle *loghandle)
LASSERT(list_empty(&loghandle->u.phd.phd_entry));
else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
LASSERT(list_empty(&loghandle->u.chd.chd_head));
- LASSERT(sizeof(*(loghandle->lgh_hdr)) == LLOG_CHUNK_SIZE);
+ LASSERT(sizeof(*loghandle->lgh_hdr) == LLOG_CHUNK_SIZE);
kfree(loghandle->lgh_hdr);
out:
kfree(loghandle);
@@ -137,6 +137,7 @@ static int llog_read_header(const struct lu_env *env,
int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
int flags, struct obd_uuid *uuid)
{
+ enum llog_flag fmt = flags & LLOG_F_EXT_MASK;
struct llog_log_hdr *llh;
int rc;
@@ -194,6 +195,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
rc = -EINVAL;
}
+ llh->llh_flags |= fmt;
out:
if (rc) {
kfree(llh);
@@ -233,6 +235,10 @@ static int llog_process_thread(void *arg)
else
last_index = LLOG_BITMAP_BYTES * 8 - 1;
+ /* Record is not in this buffer. */
+ if (index > last_index)
+ goto out;
+
while (rc == 0) {
struct llog_rec_hdr *rec;
@@ -262,7 +268,7 @@ repeat:
*/
for (rec = (struct llog_rec_hdr *)buf;
(char *)rec < buf + LLOG_CHUNK_SIZE;
- rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
+ rec = llog_rec_hdr_next(rec)) {
CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
rec, rec->lrh_type);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
index a82a2950295a..ce8e2f6f002a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -63,11 +63,13 @@ static int llog_cat_id2handle(const struct lu_env *env,
struct llog_logid *logid)
{
struct llog_handle *loghandle;
+ enum llog_flag fmt;
int rc = 0;
if (!cathandle)
return -EBADF;
+ fmt = cathandle->lgh_hdr->llh_flags & LLOG_F_EXT_MASK;
down_write(&cathandle->lgh_lock);
list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
u.phd.phd_entry) {
@@ -99,7 +101,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
return rc;
}
- rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
+ rc = llog_init_handle(env, loghandle, fmt | LLOG_F_IS_PLAIN, NULL);
if (rc < 0) {
llog_close(env, loghandle);
loghandle = NULL;
@@ -107,7 +109,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
}
down_write(&cathandle->lgh_lock);
- list_add(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
+ list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
up_write(&cathandle->lgh_lock);
loghandle->u.phd.phd_cat_handle = cathandle;
@@ -123,7 +125,6 @@ out:
int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
{
struct llog_handle *loghandle, *n;
- int rc;
list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
u.phd.phd_entry) {
@@ -134,8 +135,7 @@ int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
/* if handle was stored in ctxt, remove it too */
if (cathandle->lgh_ctxt->loc_handle == cathandle)
cathandle->lgh_ctxt->loc_handle = NULL;
- rc = llog_close(env, cathandle);
- return rc;
+ return llog_close(env, cathandle);
}
EXPORT_SYMBOL(llog_cat_close);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
index f7949525d952..21a93c73756a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -70,4 +70,9 @@ int llog_process_or_fork(const struct lu_env *env,
llog_cb_t cb, void *data, void *catdata, bool fork);
int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
struct llog_handle *loghandle, int index);
+
+static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec)
+{
+ return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len);
+}
#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index 6ace7e097859..a4277d684614 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -210,7 +210,6 @@ LU_KEY_INIT_FINI(llog, struct llog_thread_info);
/* context key: llog_thread_key */
LU_CONTEXT_KEY_DEFINE(llog, LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL);
LU_KEY_INIT_GENERIC(llog);
-EXPORT_SYMBOL(llog_thread_key);
int llog_info_init(void)
{
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index f7b9b190350c..8c4c1b3f1b45 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -172,20 +172,23 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
__swab64s(&cr->cr.cr_time);
lustre_swab_lu_fid(&cr->cr.cr_tfid);
lustre_swab_lu_fid(&cr->cr.cr_pfid);
- if (CHANGELOG_REC_EXTENDED(&cr->cr)) {
- struct llog_changelog_ext_rec *ext =
- (struct llog_changelog_ext_rec *)rec;
-
- lustre_swab_lu_fid(&ext->cr.cr_sfid);
- lustre_swab_lu_fid(&ext->cr.cr_spfid);
- tail = &ext->cr_tail;
- } else {
- tail = &cr->cr_tail;
+ if (cr->cr.cr_flags & CLF_RENAME) {
+ struct changelog_ext_rename *rnm =
+ changelog_rec_rename(&cr->cr);
+
+ lustre_swab_lu_fid(&rnm->cr_sfid);
+ lustre_swab_lu_fid(&rnm->cr_spfid);
}
- tail = (struct llog_rec_tail *)((char *)tail +
+ /*
+ * Because the tail follows a variable-length structure we need
+ * to compute its location at runtime
+ */
+ tail = (struct llog_rec_tail *)((char *)&cr->cr +
+ changelog_rec_size(&cr->cr) +
cr->cr.cr_namelen);
break;
}
+
case CHANGELOG_USER_REC:
{
struct llog_changelog_user_rec *cur =
@@ -224,6 +227,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
__swab32s(&lsr->lsr_uid_h);
__swab32s(&lsr->lsr_gid);
__swab32s(&lsr->lsr_gid_h);
+ __swab64s(&lsr->lsr_valid);
tail = &lsr->lsr_tail;
break;
}
@@ -343,7 +347,6 @@ void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg)
print_lustre_cfg(lcfg);
}
-EXPORT_SYMBOL(lustre_swab_lustre_cfg);
/* used only for compatibility with old on-disk cfg_marker data */
struct cfg_marker32 {
@@ -403,4 +406,3 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size)
__swab64s(&marker->cm_canceltime);
}
}
-EXPORT_SYMBOL(lustre_swab_cfg_marker);
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index 279b625f1afe..852a5acfefab 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -96,6 +96,12 @@ static const char * const obd_connect_names[] = {
"pingless",
"flock_deadlock",
"disp_stripe",
+ "open_by_fid",
+ "lfsck",
+ "unknown",
+ "unlink_close",
+ "unknown",
+ "dir_stripe",
"unknown",
NULL
};
@@ -309,7 +315,7 @@ struct dentry *ldebugfs_add_simple(struct dentry *root,
}
EXPORT_SYMBOL_GPL(ldebugfs_add_simple);
-static struct file_operations lprocfs_generic_fops = { };
+static const struct file_operations lprocfs_generic_fops = { };
int ldebugfs_add_vars(struct dentry *parent,
struct lprocfs_vars *list,
@@ -615,7 +621,6 @@ void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
}
-EXPORT_SYMBOL(lprocfs_stats_collect);
/**
* Append a space separated list of current set flags to str.
@@ -1043,7 +1048,6 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
}
return rc;
}
-EXPORT_SYMBOL(lprocfs_stats_alloc_one);
struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
enum lprocfs_stats_flags flags)
@@ -1547,6 +1551,146 @@ void lprocfs_oh_clear(struct obd_histogram *oh)
}
EXPORT_SYMBOL(lprocfs_oh_clear);
+int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
+ struct root_squash_info *squash, char *name)
+{
+ char kernbuf[64], *tmp, *errmsg;
+ unsigned long uid, gid;
+ int rc;
+
+ if (count >= sizeof(kernbuf)) {
+ errmsg = "string too long";
+ rc = -EINVAL;
+ goto failed_noprint;
+ }
+ if (copy_from_user(kernbuf, buffer, count)) {
+ errmsg = "bad address";
+ rc = -EFAULT;
+ goto failed_noprint;
+ }
+ kernbuf[count] = '\0';
+
+ /* look for uid gid separator */
+ tmp = strchr(kernbuf, ':');
+ if (!tmp) {
+ errmsg = "needs uid:gid format";
+ rc = -EINVAL;
+ goto failed;
+ }
+ *tmp = '\0';
+ tmp++;
+
+ /* parse uid */
+ if (kstrtoul(kernbuf, 0, &uid) != 0) {
+ errmsg = "bad uid";
+ rc = -EINVAL;
+ goto failed;
+ }
+ /* parse gid */
+ if (kstrtoul(tmp, 0, &gid) != 0) {
+ errmsg = "bad gid";
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ squash->rsi_uid = uid;
+ squash->rsi_gid = gid;
+
+ LCONSOLE_INFO("%s: root_squash is set to %u:%u\n",
+ name, squash->rsi_uid, squash->rsi_gid);
+ return count;
+
+failed:
+ if (tmp) {
+ tmp--;
+ *tmp = ':';
+ }
+ CWARN("%s: failed to set root_squash to \"%s\", %s, rc = %d\n",
+ name, kernbuf, errmsg, rc);
+ return rc;
+failed_noprint:
+ CWARN("%s: failed to set root_squash due to %s, rc = %d\n",
+ name, errmsg, rc);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_root_squash);
+
+int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
+ struct root_squash_info *squash, char *name)
+{
+ char *kernbuf = NULL, *errmsg;
+ struct list_head tmp;
+ int len = count;
+ int rc;
+
+ if (count > 4096) {
+ errmsg = "string too long";
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ kernbuf = kzalloc(count + 1, GFP_NOFS);
+ if (!kernbuf) {
+ errmsg = "no memory";
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ if (copy_from_user(kernbuf, buffer, count)) {
+ errmsg = "bad address";
+ rc = -EFAULT;
+ goto failed;
+ }
+ kernbuf[count] = '\0';
+
+ if (count > 0 && kernbuf[count - 1] == '\n')
+ len = count - 1;
+
+ if ((len == 4 && !strncmp(kernbuf, "NONE", len)) ||
+ (len == 5 && !strncmp(kernbuf, "clear", len))) {
+ /* empty string is special case */
+ down_write(&squash->rsi_sem);
+ if (!list_empty(&squash->rsi_nosquash_nids))
+ cfs_free_nidlist(&squash->rsi_nosquash_nids);
+ up_write(&squash->rsi_sem);
+ LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name);
+ kfree(kernbuf);
+ return count;
+ }
+
+ INIT_LIST_HEAD(&tmp);
+ if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) {
+ errmsg = "can't parse";
+ rc = -EINVAL;
+ goto failed;
+ }
+ LCONSOLE_INFO("%s: nosquash_nids set to %s\n",
+ name, kernbuf);
+ kfree(kernbuf);
+ kernbuf = NULL;
+
+ down_write(&squash->rsi_sem);
+ if (!list_empty(&squash->rsi_nosquash_nids))
+ cfs_free_nidlist(&squash->rsi_nosquash_nids);
+ list_splice(&tmp, &squash->rsi_nosquash_nids);
+ up_write(&squash->rsi_sem);
+
+ return count;
+
+failed:
+ if (kernbuf) {
+ CWARN("%s: failed to set nosquash_nids to \"%s\", %s rc = %d\n",
+ name, kernbuf, errmsg, rc);
+ kfree(kernbuf);
+ kernbuf = NULL;
+ } else {
+ CWARN("%s: failed to set nosquash_nids due to %s rc = %d\n",
+ name, errmsg, rc);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_nosquash_nids);
+
static ssize_t lustre_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 9b03059f34d6..054e567e6c8d 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -55,6 +55,34 @@
#include "../include/lu_ref.h"
#include <linux/list.h>
+enum {
+ LU_CACHE_PERCENT_MAX = 50,
+ LU_CACHE_PERCENT_DEFAULT = 20
+};
+
+#define LU_CACHE_NR_MAX_ADJUST 128
+#define LU_CACHE_NR_UNLIMITED -1
+#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_ZFS_LIMIT 256
+
+#define LU_SITE_BITS_MIN 12
+#define LU_SITE_BITS_MAX 24
+/**
+ * total 256 buckets, we don't want too many buckets because:
+ * - consume too much memory
+ * - avoid unbalanced LRU list
+ */
+#define LU_SITE_BKT_BITS 8
+
+static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+module_param(lu_cache_percent, int, 0644);
+MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
+
+static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
+module_param(lu_cache_nr, long, 0644);
+MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
+
static void lu_object_free(const struct lu_env *env, struct lu_object *o);
static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
@@ -310,10 +338,10 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
struct cfs_hash_bd bd2;
struct list_head dispose;
int did_sth;
- int start;
+ unsigned int start;
int count;
int bnr;
- int i;
+ unsigned int i;
if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
return 0;
@@ -324,8 +352,13 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
* the dispose list, removing them from LRU and hash table.
*/
start = s->ls_purge_start;
- bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1;
+ bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1;
again:
+ /*
+ * It doesn't make any sense to make purge threads parallel, that can
+ * only bring troubles to us. See LU-5331.
+ */
+ mutex_lock(&s->ls_purge_mutex);
did_sth = 0;
cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
if (i < start)
@@ -371,6 +404,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
if (nr == 0)
break;
}
+ mutex_unlock(&s->ls_purge_mutex);
if (nr != 0 && did_sth && start != 0) {
start = 0; /* restart from the first bucket */
@@ -573,6 +607,27 @@ static struct lu_object *lu_object_find(const struct lu_env *env,
return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
}
+/*
+ * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because
+ * the calculation for the number of objects to reclaim is not covered by
+ * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
+ * This ensures that many concurrent threads will not accidentally purge
+ * the entire cache.
+ */
+static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
+{
+ __u64 size, nr;
+
+ if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
+ return;
+
+ size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
+ nr = (__u64)lu_cache_nr;
+ if (size > nr)
+ lu_site_purge(env, dev->ld_site,
+ min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST));
+}
+
static struct lu_object *lu_object_new(const struct lu_env *env,
struct lu_device *dev,
const struct lu_fid *f,
@@ -590,6 +645,9 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
@@ -656,6 +714,9 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
if (likely(PTR_ERR(shadow) == -ENOENT)) {
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
@@ -706,13 +767,15 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
struct lu_object *obj;
top = lu_object_find(env, dev, f, conf);
- if (!IS_ERR(top)) {
- obj = lu_object_locate(top->lo_header, dev->ld_type);
- if (!obj)
- lu_object_put(env, top);
- } else {
- obj = top;
+ if (IS_ERR(top))
+ return top;
+
+ obj = lu_object_locate(top->lo_header, dev->ld_type);
+ if (unlikely(!obj)) {
+ lu_object_put(env, top);
+ obj = ERR_PTR(-ENOENT);
}
+
return obj;
}
EXPORT_SYMBOL(lu_object_find_slice);
@@ -726,34 +789,31 @@ int lu_device_type_init(struct lu_device_type *ldt)
{
int result = 0;
+ atomic_set(&ldt->ldt_device_nr, 0);
INIT_LIST_HEAD(&ldt->ldt_linkage);
if (ldt->ldt_ops->ldto_init)
result = ldt->ldt_ops->ldto_init(ldt);
- if (result == 0)
+
+ if (!result) {
+ spin_lock(&obd_types_lock);
list_add(&ldt->ldt_linkage, &lu_device_types);
+ spin_unlock(&obd_types_lock);
+ }
+
return result;
}
EXPORT_SYMBOL(lu_device_type_init);
void lu_device_type_fini(struct lu_device_type *ldt)
{
+ spin_lock(&obd_types_lock);
list_del_init(&ldt->ldt_linkage);
+ spin_unlock(&obd_types_lock);
if (ldt->ldt_ops->ldto_fini)
ldt->ldt_ops->ldto_fini(ldt);
}
EXPORT_SYMBOL(lu_device_type_fini);
-void lu_types_stop(void)
-{
- struct lu_device_type *ldt;
-
- list_for_each_entry(ldt, &lu_device_types, ldt_linkage) {
- if (ldt->ldt_device_nr == 0 && ldt->ldt_ops->ldto_stop)
- ldt->ldt_ops->ldto_stop(ldt);
- }
-}
-EXPORT_SYMBOL(lu_types_stop);
-
/**
* Global list of all sites on this node
*/
@@ -808,22 +868,14 @@ void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
}
EXPORT_SYMBOL(lu_site_print);
-enum {
- LU_CACHE_PERCENT_MAX = 50,
- LU_CACHE_PERCENT_DEFAULT = 20
-};
-
-static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
-module_param(lu_cache_percent, int, 0644);
-MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
-
/**
* Return desired hash table order.
*/
-static int lu_htable_order(void)
+static unsigned long lu_htable_order(struct lu_device *top)
{
+ unsigned long bits_max = LU_SITE_BITS_MAX;
unsigned long cache_size;
- int bits;
+ unsigned long bits;
/*
* Calculate hash table size, assuming that we want reasonable
@@ -854,7 +906,7 @@ static int lu_htable_order(void)
for (bits = 1; (1 << bits) < cache_size; ++bits) {
;
}
- return bits;
+ return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
}
static unsigned lu_obj_hop_hash(struct cfs_hash *hs,
@@ -930,28 +982,18 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
/**
* Initialize site \a s, with \a d as the top level device.
*/
-#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 19
-/**
- * total 256 buckets, we don't want too many buckets because:
- * - consume too much memory
- * - avoid unbalanced LRU list
- */
-#define LU_SITE_BKT_BITS 8
-
int lu_site_init(struct lu_site *s, struct lu_device *top)
{
struct lu_site_bkt_data *bkt;
struct cfs_hash_bd bd;
+ unsigned long bits;
+ unsigned long i;
char name[16];
- int bits;
- int i;
memset(s, 0, sizeof(*s));
- bits = lu_htable_order();
- snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
- for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
- bits >= LU_SITE_BITS_MIN; bits--) {
+ mutex_init(&s->ls_purge_mutex);
+ snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name);
+ for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) {
s->ls_obj_hash = cfs_hash_create(name, bits, bits,
bits - LU_SITE_BKT_BITS,
sizeof(*bkt), 0, 0,
@@ -959,13 +1001,14 @@ int lu_site_init(struct lu_site *s, struct lu_device *top)
CFS_HASH_SPIN_BKTLOCK |
CFS_HASH_NO_ITEMREF |
CFS_HASH_DEPTH |
- CFS_HASH_ASSERT_EMPTY);
+ CFS_HASH_ASSERT_EMPTY |
+ CFS_HASH_COUNTER);
if (s->ls_obj_hash)
break;
}
if (!s->ls_obj_hash) {
- CERROR("failed to create lu_site hash with bits: %d\n", bits);
+ CERROR("failed to create lu_site hash with bits: %lu\n", bits);
return -ENOMEM;
}
@@ -1082,8 +1125,10 @@ EXPORT_SYMBOL(lu_device_put);
*/
int lu_device_init(struct lu_device *d, struct lu_device_type *t)
{
- if (t->ldt_device_nr++ == 0 && t->ldt_ops->ldto_start)
+ if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
+ t->ldt_ops->ldto_start)
t->ldt_ops->ldto_start(t);
+
memset(d, 0, sizeof(*d));
atomic_set(&d->ld_ref, 0);
d->ld_type = t;
@@ -1098,9 +1143,8 @@ EXPORT_SYMBOL(lu_device_init);
*/
void lu_device_fini(struct lu_device *d)
{
- struct lu_device_type *t;
+ struct lu_device_type *t = d->ld_type;
- t = d->ld_type;
if (d->ld_obd) {
d->ld_obd->obd_lu_dev = NULL;
d->ld_obd = NULL;
@@ -1109,8 +1153,10 @@ void lu_device_fini(struct lu_device *d)
lu_ref_fini(&d->ld_reference);
LASSERTF(atomic_read(&d->ld_ref) == 0,
"Refcount is %u\n", atomic_read(&d->ld_ref));
- LASSERT(t->ldt_device_nr > 0);
- if (--t->ldt_device_nr == 0 && t->ldt_ops->ldto_stop)
+ LASSERT(atomic_read(&t->ldt_device_nr) > 0);
+
+ if (atomic_dec_and_test(&t->ldt_device_nr) &&
+ t->ldt_ops->ldto_stop)
t->ldt_ops->ldto_stop(t);
}
EXPORT_SYMBOL(lu_device_fini);
@@ -1254,7 +1300,6 @@ void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
}
}
}
-EXPORT_SYMBOL(lu_stack_fini);
enum {
/**
@@ -1281,7 +1326,7 @@ static unsigned key_set_version;
int lu_context_key_register(struct lu_context_key *key)
{
int result;
- int i;
+ unsigned int i;
LASSERT(key->lct_init);
LASSERT(key->lct_fini);
@@ -1476,18 +1521,16 @@ void lu_context_key_quiesce(struct lu_context_key *key)
++key_set_version;
}
}
-EXPORT_SYMBOL(lu_context_key_quiesce);
void lu_context_key_revive(struct lu_context_key *key)
{
key->lct_tags &= ~LCT_QUIESCENT;
++key_set_version;
}
-EXPORT_SYMBOL(lu_context_key_revive);
static void keys_fini(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
if (!ctx->lc_value)
return;
@@ -1501,7 +1544,7 @@ static void keys_fini(struct lu_context *ctx)
static int keys_fill(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
LINVRNT(ctx->lc_value);
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
@@ -1614,7 +1657,7 @@ EXPORT_SYMBOL(lu_context_enter);
*/
void lu_context_exit(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
LINVRNT(ctx->lc_state == LCS_ENTERED);
ctx->lc_state = LCS_LEFT;
@@ -1642,7 +1685,6 @@ int lu_context_refill(struct lu_context *ctx)
{
return likely(ctx->lc_version == key_set_version) ? 0 : keys_fill(ctx);
}
-EXPORT_SYMBOL(lu_context_refill);
/**
* lu_ctx_tags/lu_ses_tags will be updated if there are new types of
@@ -1696,7 +1738,7 @@ static void lu_site_stats_get(struct cfs_hash *hs,
struct lu_site_stats *stats, int populated)
{
struct cfs_hash_bd bd;
- int i;
+ unsigned int i;
cfs_hash_for_each_bucket(hs, &bd, i) {
struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
@@ -1940,3 +1982,73 @@ void lu_kmem_fini(struct lu_kmem_descr *caches)
}
}
EXPORT_SYMBOL(lu_kmem_fini);
+
+void lu_buf_free(struct lu_buf *buf)
+{
+ LASSERT(buf);
+ if (buf->lb_buf) {
+ LASSERT(buf->lb_len > 0);
+ kvfree(buf->lb_buf);
+ buf->lb_buf = NULL;
+ buf->lb_len = 0;
+ }
+}
+EXPORT_SYMBOL(lu_buf_free);
+
+void lu_buf_alloc(struct lu_buf *buf, size_t size)
+{
+ LASSERT(buf);
+ LASSERT(!buf->lb_buf);
+ LASSERT(!buf->lb_len);
+ buf->lb_buf = libcfs_kvzalloc(size, GFP_NOFS);
+ if (likely(buf->lb_buf))
+ buf->lb_len = size;
+}
+EXPORT_SYMBOL(lu_buf_alloc);
+
+void lu_buf_realloc(struct lu_buf *buf, size_t size)
+{
+ lu_buf_free(buf);
+ lu_buf_alloc(buf, size);
+}
+EXPORT_SYMBOL(lu_buf_realloc);
+
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len)
+{
+ if (!buf->lb_buf && !buf->lb_len)
+ lu_buf_alloc(buf, len);
+
+ if ((len > buf->lb_len) && buf->lb_buf)
+ lu_buf_realloc(buf, len);
+
+ return buf;
+}
+EXPORT_SYMBOL(lu_buf_check_and_alloc);
+
+/**
+ * Increase the size of the \a buf.
+ * preserves old data in buffer
+ * old buffer remains unchanged on error
+ * \retval 0 or -ENOMEM
+ */
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len)
+{
+ char *ptr;
+
+ if (len <= buf->lb_len)
+ return 0;
+
+ ptr = libcfs_kvzalloc(len, GFP_NOFS);
+ if (!ptr)
+ return -ENOMEM;
+
+ /* Free the old buf */
+ if (buf->lb_buf) {
+ memcpy(ptr, buf->lb_buf, buf->lb_len);
+ kvfree(buf->lb_buf);
+ }
+
+ buf->lb_buf = ptr;
+ buf->lb_len = len;
+ return 0;
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
index 082f530c527c..c9445e5ec271 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -130,7 +130,7 @@ void class_handle_unhash(struct portals_handle *h)
}
EXPORT_SYMBOL(class_handle_unhash);
-void *class_handle2object(__u64 cookie)
+void *class_handle2object(__u64 cookie, const void *owner)
{
struct handle_bucket *bucket;
struct portals_handle *h;
@@ -145,7 +145,7 @@ void *class_handle2object(__u64 cookie)
rcu_read_lock();
list_for_each_entry_rcu(h, &bucket->head, h_link) {
- if (h->h_cookie != cookie)
+ if (h->h_cookie != cookie || h->h_owner != owner)
continue;
spin_lock(&h->h_lock);
@@ -164,8 +164,11 @@ EXPORT_SYMBOL(class_handle2object);
void class_handle_free_cb(struct rcu_head *rcu)
{
- struct portals_handle *h = RCU2HANDLE(rcu);
- void *ptr = (void *)(unsigned long)h->h_cookie;
+ struct portals_handle *h;
+ void *ptr;
+
+ h = container_of(rcu, struct portals_handle, h_rcu);
+ ptr = (void *)(unsigned long)h->h_cookie;
if (h->h_ops->hop_free)
h->h_ops->hop_free(ptr, h->h_size);
@@ -214,7 +217,7 @@ static int cleanup_all_handles(void)
struct portals_handle *h;
spin_lock(&handle_hash[i].lock);
- list_for_each_entry_rcu(h, &(handle_hash[i].head), h_link) {
+ list_for_each_entry_rcu(h, &handle_hash[i].head, h_link) {
CERROR("force clean handle %#llx addr %p ops %p\n",
h->h_cookie, h, h->h_ops);
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5974a9bf77c0..ffa740aa861c 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -139,7 +139,6 @@ int class_add_uuid(const char *uuid, __u64 nid)
}
return 0;
}
-EXPORT_SYMBOL(class_add_uuid);
/* Delete the nids for one uuid if specified, otherwise delete all */
int class_del_uuid(const char *uuid)
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 0eab1236501b..bbed1b72d52e 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -37,6 +37,7 @@
#define DEBUG_SUBSYSTEM S_CLASS
#include "../include/obd_class.h"
#include <linux/string.h>
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_log.h"
#include "../include/lprocfs_status.h"
#include "../include/lustre_param.h"
@@ -237,7 +238,7 @@ static int class_attach(struct lustre_cfg *lcfg)
/* recovery data */
init_waitqueue_head(&obd->obd_evict_inprogress_waitq);
- llog_group_init(&obd->obd_olg, FID_SEQ_LLOG);
+ llog_group_init(&obd->obd_olg);
obd->obd_conn_inprogress = 0;
@@ -250,15 +251,6 @@ static int class_attach(struct lustre_cfg *lcfg)
}
memcpy(obd->obd_uuid.uuid, uuid, len);
- /* do the attach */
- if (OBP(obd, attach)) {
- rc = OBP(obd, attach)(obd, sizeof(*lcfg), lcfg);
- if (rc) {
- rc = -EINVAL;
- goto out;
- }
- }
-
/* Detach drops this */
spin_lock(&obd->obd_dev_lock);
atomic_set(&obd->obd_refcount, 1);
@@ -422,17 +414,12 @@ static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
}
/* Leave this on forever */
obd->obd_stopping = 1;
-
- /* wait for already-arrived-connections to finish. */
- while (obd->obd_conn_inprogress > 0) {
- spin_unlock(&obd->obd_dev_lock);
-
- cond_resched();
-
- spin_lock(&obd->obd_dev_lock);
- }
spin_unlock(&obd->obd_dev_lock);
+ while (obd->obd_conn_inprogress > 0)
+ yield();
+ smp_rmb();
+
if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) {
for (flag = lustre_cfg_string(lcfg, 1); *flag != 0; flag++)
switch (*flag) {
@@ -526,11 +513,6 @@ void class_decref(struct obd_device *obd, const char *scope, const void *source)
CERROR("Cleanup %s returned %d\n",
obd->obd_name, err);
}
- if (OBP(obd, detach)) {
- err = OBP(obd, detach)(obd);
- if (err)
- CERROR("Detach returned %d\n", err);
- }
class_release_dev(obd);
}
}
@@ -756,7 +738,7 @@ static int process_param2_config(struct lustre_cfg *lcfg)
}
start = ktime_get();
- rc = call_usermodehelper(argv[0], argv, NULL, 1);
+ rc = call_usermodehelper(argv[0], argv, NULL, UMH_WAIT_PROC);
end = ktime_get();
if (rc < 0) {
@@ -1026,7 +1008,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
oldfs = get_fs();
set_fs(KERNEL_DS);
- rc = (var->fops->write)(&fakefile, sval,
+ rc = var->fops->write(&fakefile, sval,
vallen, NULL);
set_fs(oldfs);
}
@@ -1060,8 +1042,6 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
}
EXPORT_SYMBOL(class_process_proc_param);
-extern int lustre_check_exclusion(struct super_block *sb, char *svname);
-
/** Parse a configuration llog, doing various manipulations on them
* for various reasons, (modifications for compatibility, skip obsolete
* records, change uuids, etc), then class_process_config() resulting
@@ -1317,33 +1297,33 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
if (rc < 0)
return rc;
- ptr += snprintf(ptr, end-ptr, "cmd=%05x ", lcfg->lcfg_command);
+ ptr += snprintf(ptr, end - ptr, "cmd=%05x ", lcfg->lcfg_command);
if (lcfg->lcfg_flags)
- ptr += snprintf(ptr, end-ptr, "flags=%#08x ",
+ ptr += snprintf(ptr, end - ptr, "flags=%#08x ",
lcfg->lcfg_flags);
if (lcfg->lcfg_num)
- ptr += snprintf(ptr, end-ptr, "num=%#08x ", lcfg->lcfg_num);
+ ptr += snprintf(ptr, end - ptr, "num=%#08x ", lcfg->lcfg_num);
if (lcfg->lcfg_nid) {
char nidstr[LNET_NIDSTR_SIZE];
libcfs_nid2str_r(lcfg->lcfg_nid, nidstr, sizeof(nidstr));
- ptr += snprintf(ptr, end-ptr, "nid=%s(%#llx)\n ",
+ ptr += snprintf(ptr, end - ptr, "nid=%s(%#llx)\n ",
nidstr, lcfg->lcfg_nid);
}
if (lcfg->lcfg_command == LCFG_MARKER) {
struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
- ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'",
+ ptr += snprintf(ptr, end - ptr, "marker=%d(%#x)%s '%s'",
marker->cm_step, marker->cm_flags,
marker->cm_tgtname, marker->cm_comment);
} else {
int i;
for (i = 0; i < lcfg->lcfg_bufcount; i++) {
- ptr += snprintf(ptr, end-ptr, "%d:%s ", i,
+ ptr += snprintf(ptr, end - ptr, "%d:%s ", i,
lustre_cfg_string(lcfg, i));
}
}
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index aa84a50e9904..0d3a3b05a637 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -37,11 +37,11 @@
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
+#define D_MOUNT (D_SUPER | D_CONFIG/*|D_WARNING */)
#define PRINT_CMD CDEBUG
#include "../include/obd.h"
-#include "../include/linux/lustre_compat25.h"
+#include "../include/lustre_compat.h"
#include "../include/obd_class.h"
#include "../include/lustre/lustre_user.h"
#include "../include/lustre_log.h"
@@ -68,7 +68,7 @@ static void (*kill_super_cb)(struct super_block *sb);
* this log, and is added to the mgc's list of logs to follow.
*/
int lustre_process_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg)
+ struct config_llog_instance *cfg)
{
struct lustre_cfg *lcfg;
struct lustre_cfg_bufs *bufs;
@@ -384,17 +384,15 @@ int lustre_start_mgc(struct super_block *sb)
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
OBD_CONNECT_LVB_TYPE;
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
#endif
if (lmd_is_client(lsi->lsi_lmd) &&
lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
data->ocd_version = LUSTRE_VERSION_CODE;
- rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
+ rc = obd_connect(NULL, &exp, obd, &obd->obd_uuid, data, NULL);
if (rc) {
CERROR("connect failed %d\n", rc);
goto out;
@@ -670,7 +668,6 @@ int lustre_common_put_super(struct super_block *sb)
}
/* Drop a ref to the mounted disk */
lustre_put_lsi(sb);
- lu_types_stop();
return rc;
}
EXPORT_SYMBOL(lustre_common_put_super);
@@ -731,7 +728,7 @@ int lustre_check_exclusion(struct super_block *sb, char *svname)
static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
{
const char *s1 = ptr, *s2;
- __u32 index, *exclude_list;
+ __u32 index = 0, *exclude_list;
int rc = 0, devmax;
/* The shortest an ost name can be is 8 chars: -OST0000.
@@ -758,7 +755,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
exclude_list[lmd->lmd_exclude_count++] = index;
else
CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
- (uint)(s2-s1), s1, rc);
+ (uint)(s2 - s1), s1, rc);
s1 = s2;
/* now we are pointing at ':' (next exclude)
* or ',' (end of excludes)
@@ -880,7 +877,7 @@ static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
*/
static int lmd_parse(char *options, struct lustre_mount_data *lmd)
{
- char *s1, *s2, *devname = NULL;
+ char *s1, *s2, *s3, *devname = NULL;
struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
int rc = 0;
@@ -913,6 +910,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
/* Skip whitespace and extra commas */
while (*s1 == ' ' || *s1 == ',')
s1++;
+ s3 = s1;
/* Client options are parsed in ll_options: eg. flock,
* user_xattr, acl
@@ -970,6 +968,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
rc = lmd_parse_mgssec(lmd, s1 + 7);
if (rc)
goto invalid;
+ s3 = s2;
clear++;
/* ost exclusion list */
} else if (strncmp(s1, "exclude=", 8) == 0) {
@@ -990,10 +989,19 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
size_t length, params_length;
char *tail = strchr(s1 + 6, ',');
- if (!tail)
+ if (!tail) {
length = strlen(s1);
- else
- length = tail - s1;
+ } else {
+ lnet_nid_t nid;
+ char *param_str = tail + 1;
+ int supplementary = 1;
+
+ while (!class_parse_nid_quiet(param_str, &nid,
+ &param_str)) {
+ supplementary = 0;
+ }
+ length = param_str - s1 - supplementary;
+ }
length -= 6;
params_length = strlen(lmd->lmd_params);
if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
@@ -1001,6 +1009,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
strncat(lmd->lmd_params, s1 + 6, length);
lmd->lmd_params[params_length + length] = '\0';
strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
+ s3 = s1 + 6 + length;
clear++;
} else if (strncmp(s1, "osd=", 4) == 0) {
rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
@@ -1097,7 +1106,7 @@ static int lustre_fill_super(struct super_block *sb, void *data, int silent)
struct lustre_sb_info *lsi;
int rc;
- CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
+ CDEBUG(D_MOUNT | D_VFSTRACE, "VFS Op: sb %p\n", sb);
lsi = lustre_init_lsi(sb);
if (!lsi)
@@ -1133,7 +1142,7 @@ static int lustre_fill_super(struct super_block *sb, void *data, int silent)
} else {
rc = lustre_start_mgc(sb);
if (rc) {
- lustre_put_lsi(sb);
+ lustre_common_put_super(sb);
goto out;
}
/* Connect and start */
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index 8583a4a8c206..79104a66da96 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -112,7 +112,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid)
}
EXPORT_SYMBOL(obdo_from_inode);
-void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj)
+void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj)
{
ioobj->ioo_oid = oa->o_oi;
if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP)))
@@ -125,7 +125,8 @@ void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj)
}
EXPORT_SYMBOL(obdo_to_ioobj);
-static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
+static void iattr_from_obdo(struct iattr *attr, const struct obdo *oa,
+ u32 valid)
{
valid &= oa->o_valid;
@@ -152,12 +153,14 @@ static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
}
#if 0 /* you shouldn't be able to change a file's type with setattr */
if (valid & OBD_MD_FLTYPE) {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
+ attr->ia_mode = (attr->ia_mode & ~S_IFMT) |
+ (oa->o_mode & S_IFMT);
attr->ia_valid |= ATTR_MODE;
}
#endif
if (valid & OBD_MD_FLMODE) {
- attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
+ attr->ia_mode = (attr->ia_mode & S_IFMT) |
+ (oa->o_mode & ~S_IFMT);
attr->ia_valid |= ATTR_MODE;
if (!in_group_p(make_kgid(&init_user_ns, oa->o_gid)) &&
!capable(CFS_CAP_FSETID))
@@ -173,7 +176,7 @@ static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
}
}
-void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
+void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid)
{
iattr_from_obdo(&op_data->op_attr, oa, valid);
if (valid & OBD_MD_FLBLOCKS) {
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 5b29c4a44fe5..505582ff4d1e 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -41,6 +41,7 @@
#include "../include/cl_object.h"
#include "../include/lustre_fid.h"
#include "../include/lustre_acl.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_net.h"
#include "echo_internal.h"
@@ -64,14 +65,14 @@ struct echo_object {
struct echo_device *eo_dev;
struct list_head eo_obj_chain;
- struct lov_stripe_md *eo_lsm;
+ struct lov_oinfo *eo_oinfo;
atomic_t eo_npages;
int eo_deleted;
};
struct echo_object_conf {
struct cl_object_conf eoc_cl;
- struct lov_stripe_md **eoc_md;
+ struct lov_oinfo **eoc_oinfo;
};
struct echo_page {
@@ -152,9 +153,6 @@ struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
}
/** @} echo_helpers */
-
-static struct echo_object *cl_echo_object_find(struct echo_device *d,
- struct lov_stripe_md **lsm);
static int cl_echo_object_put(struct echo_object *eco);
static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
struct page **pages, int npages, int async);
@@ -413,10 +411,13 @@ static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
cconf = lu2cl_conf(conf);
econf = cl2echo_conf(cconf);
- LASSERT(econf->eoc_md);
- eco->eo_lsm = *econf->eoc_md;
- /* clear the lsm pointer so that it won't get freed. */
- *econf->eoc_md = NULL;
+ LASSERT(econf->eoc_oinfo);
+ /*
+ * Transfer the oinfo pointer to eco that it won't be
+ * freed.
+ */
+ eco->eo_oinfo = *econf->eoc_oinfo;
+ *econf->eoc_oinfo = NULL;
eco->eo_dev = ed;
atomic_set(&eco->eo_npages, 0);
@@ -429,52 +430,6 @@ static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
return 0;
}
-/* taken from osc_unpackmd() */
-static int echo_alloc_memmd(struct echo_device *ed,
- struct lov_stripe_md **lsmp)
-{
- int lsm_size;
-
- /* If export is lov/osc then use their obd method */
- if (ed->ed_next)
- return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
- /* OFD has no unpackmd method, do everything here */
- lsm_size = lov_stripe_md_size(1);
-
- LASSERT(!*lsmp);
- *lsmp = kzalloc(lsm_size, GFP_NOFS);
- if (!*lsmp)
- return -ENOMEM;
-
- (*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo), GFP_NOFS);
- if (!(*lsmp)->lsm_oinfo[0]) {
- kfree(*lsmp);
- return -ENOMEM;
- }
-
- loi_init((*lsmp)->lsm_oinfo[0]);
- (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
- ostid_set_seq_echo(&(*lsmp)->lsm_oi);
-
- return lsm_size;
-}
-
-static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
-{
- int lsm_size;
-
- /* If export is lov/osc then use their obd method */
- if (ed->ed_next)
- return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
- /* OFD has no unpackmd method, do everything here */
- lsm_size = lov_stripe_md_size(1);
-
- kfree((*lsmp)->lsm_oinfo[0]);
- kfree(*lsmp);
- *lsmp = NULL;
- return 0;
-}
-
static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
{
struct echo_object *eco = cl2echo_obj(lu2cl(obj));
@@ -489,8 +444,7 @@ static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
lu_object_fini(obj);
lu_object_header_fini(obj->lo_header);
- if (eco->eo_lsm)
- echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
+ kfree(eco->eo_oinfo);
kmem_cache_free(echo_object_kmem, eco);
}
@@ -864,25 +818,21 @@ static struct lu_device_type echo_device_type = {
*/
/* Interfaces to echo client obd device */
-static struct echo_object *cl_echo_object_find(struct echo_device *d,
- struct lov_stripe_md **lsmp)
+static struct echo_object *
+cl_echo_object_find(struct echo_device *d, const struct ost_id *oi)
{
struct lu_env *env;
struct echo_thread_info *info;
struct echo_object_conf *conf;
- struct lov_stripe_md *lsm;
+ struct lov_oinfo *oinfo = NULL;
struct echo_object *eco;
struct cl_object *obj;
struct lu_fid *fid;
int refcheck;
int rc;
- LASSERT(lsmp);
- lsm = *lsmp;
- LASSERT(lsm);
- LASSERTF(ostid_id(&lsm->lsm_oi) != 0, DOSTID"\n", POSTID(&lsm->lsm_oi));
- LASSERTF(ostid_seq(&lsm->lsm_oi) == FID_SEQ_ECHO, DOSTID"\n",
- POSTID(&lsm->lsm_oi));
+ LASSERTF(ostid_id(oi), DOSTID "\n", POSTID(oi));
+ LASSERTF(ostid_seq(oi) == FID_SEQ_ECHO, DOSTID "\n", POSTID(oi));
/* Never return an object if the obd is to be freed. */
if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
@@ -895,16 +845,24 @@ static struct echo_object *cl_echo_object_find(struct echo_device *d,
info = echo_env_info(env);
conf = &info->eti_conf;
if (d->ed_next) {
- struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
+ oinfo = kzalloc(sizeof(*oinfo), GFP_NOFS);
+ if (!oinfo) {
+ eco = ERR_PTR(-ENOMEM);
+ goto out;
+ }
- LASSERT(oinfo);
- oinfo->loi_oi = lsm->lsm_oi;
+ oinfo->loi_oi = *oi;
conf->eoc_cl.u.coc_oinfo = oinfo;
}
- conf->eoc_md = lsmp;
+
+ /*
+ * If echo_object_init() is successful then ownership of oinfo
+ * is transferred to the object.
+ */
+ conf->eoc_oinfo = &oinfo;
fid = &info->eti_fid;
- rc = ostid_to_fid(fid, &lsm->lsm_oi, 0);
+ rc = ostid_to_fid(fid, (struct ost_id *)oi, 0);
if (rc != 0) {
eco = ERR_PTR(rc);
goto out;
@@ -927,6 +885,7 @@ static struct echo_object *cl_echo_object_find(struct echo_device *d,
}
out:
+ kfree(oinfo);
cl_env_put(env, &refcheck);
return eco;
}
@@ -1051,7 +1010,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
struct cl_io *io;
struct cl_page *clp;
struct lustre_handle lh = { 0 };
- int page_size = cl_page_size(obj);
+ size_t page_size = cl_page_size(obj);
int refcheck;
int rc;
int i;
@@ -1145,7 +1104,6 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
{
struct echo_object *eco;
struct echo_client_obd *ec = ed->ed_ec;
- struct lov_stripe_md *lsm = NULL;
int rc;
int created = 0;
@@ -1156,30 +1114,19 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
return -EINVAL;
}
- rc = echo_alloc_memmd(ed, &lsm);
- if (rc < 0) {
- CERROR("Cannot allocate md: rc = %d\n", rc);
- goto failed;
- }
-
- /* setup object ID here */
- lsm->lsm_oi = oa->o_oi;
+ if (!ostid_id(&oa->o_oi))
+ ostid_set_id(&oa->o_oi, ++last_object_id);
- if (ostid_id(&lsm->lsm_oi) == 0)
- ostid_set_id(&lsm->lsm_oi, ++last_object_id);
-
- rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
+ rc = obd_create(env, ec->ec_exp, oa, oti);
if (rc != 0) {
CERROR("Cannot create objects: rc = %d\n", rc);
goto failed;
}
created = 1;
- /* See what object ID we were given */
- oa->o_oi = lsm->lsm_oi;
oa->o_valid |= OBD_MD_FLID;
- eco = cl_echo_object_find(ed, &lsm);
+ eco = cl_echo_object_find(ed, &oa->o_oi);
if (IS_ERR(eco)) {
rc = PTR_ERR(eco);
goto failed;
@@ -1190,9 +1137,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
failed:
if (created && rc)
- obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL);
- if (lsm)
- echo_free_memmd(ed, &lsm);
+ obd_destroy(env, ec->ec_exp, oa, oti);
if (rc)
CERROR("create object failed with: rc = %d\n", rc);
return rc;
@@ -1201,32 +1146,21 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
struct obdo *oa)
{
- struct lov_stripe_md *lsm = NULL;
struct echo_object *eco;
int rc;
- if ((oa->o_valid & OBD_MD_FLID) == 0 || ostid_id(&oa->o_oi) == 0) {
- /* disallow use of object id 0 */
- CERROR("No valid oid\n");
+ if (!(oa->o_valid & OBD_MD_FLID) || !(oa->o_valid & OBD_MD_FLGROUP) ||
+ !ostid_id(&oa->o_oi)) {
+ CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi));
return -EINVAL;
}
- rc = echo_alloc_memmd(ed, &lsm);
- if (rc < 0)
- return rc;
-
- lsm->lsm_oi = oa->o_oi;
- if (!(oa->o_valid & OBD_MD_FLGROUP))
- ostid_set_seq_echo(&lsm->lsm_oi);
-
rc = 0;
- eco = cl_echo_object_find(ed, &lsm);
+ eco = cl_echo_object_find(ed, &oa->o_oi);
if (!IS_ERR(eco))
*ecop = eco;
else
rc = PTR_ERR(eco);
- if (lsm)
- echo_free_memmd(ed, &lsm);
return rc;
}
@@ -1436,13 +1370,12 @@ static int echo_client_prep_commit(const struct lu_env *env,
npages = tot_pages;
for (i = 0; i < npages; i++, off += PAGE_SIZE) {
- rnb[i].offset = off;
- rnb[i].len = PAGE_SIZE;
- rnb[i].flags = brw_flags;
+ rnb[i].rnb_offset = off;
+ rnb[i].rnb_len = PAGE_SIZE;
+ rnb[i].rnb_flags = brw_flags;
}
ioo.ioo_bufcnt = npages;
- oti->oti_transno = 0;
lpages = npages;
ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
@@ -1452,14 +1385,14 @@ static int echo_client_prep_commit(const struct lu_env *env,
LASSERT(lpages == npages);
for (i = 0; i < lpages; i++) {
- struct page *page = lnb[i].page;
+ struct page *page = lnb[i].lnb_page;
/* read past eof? */
- if (!page && lnb[i].rc == 0)
+ if (!page && lnb[i].lnb_rc == 0)
continue;
if (async)
- lnb[i].flags |= OBD_BRW_ASYNC;
+ lnb[i].lnb_flags |= OBD_BRW_ASYNC;
if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
(oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
@@ -1469,13 +1402,13 @@ static int echo_client_prep_commit(const struct lu_env *env,
if (rw == OBD_BRW_WRITE)
echo_client_page_debug_setup(page, rw,
ostid_id(&oa->o_oi),
- rnb[i].offset,
- rnb[i].len);
+ rnb[i].rnb_offset,
+ rnb[i].rnb_len);
else
echo_client_page_debug_check(page,
ostid_id(&oa->o_oi),
- rnb[i].offset,
- rnb[i].len);
+ rnb[i].rnb_offset,
+ rnb[i].rnb_len);
}
ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
@@ -1613,8 +1546,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
rc = echo_get_object(&eco, ed, oa);
if (rc == 0) {
- rc = obd_destroy(env, ec->ec_exp, oa, NULL,
- &dummy_oti, NULL);
+ rc = obd_destroy(env, ec->ec_exp, oa, &dummy_oti);
if (rc == 0)
eco->eo_deleted = 1;
echo_put_object(eco);
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_internal.h b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
index f5034a253f6d..966414fd5424 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_internal.h
+++ b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
@@ -33,9 +33,9 @@
/* The persistent object (i.e. actually stores stuff!) */
#define ECHO_PERSISTENT_OBJID 1ULL
-#define ECHO_PERSISTENT_SIZE ((__u64)(1<<20))
+#define ECHO_PERSISTENT_SIZE ((__u64)(1 << 20))
/* block size to use for data verification */
-#define OBD_ECHO_BLOCK_SIZE (4<<10)
+#define OBD_ECHO_BLOCK_SIZE (4 << 10)
#endif
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 7e83d395b998..f0062d44ee03 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -119,6 +119,7 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_rpcs_in_flight = val;
+ client_adjust_max_dirty(cli);
spin_unlock(&cli->cl_loi_list_lock);
return count;
@@ -136,10 +137,10 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj,
int mult;
spin_lock(&cli->cl_loi_list_lock);
- val = cli->cl_dirty_max;
+ val = cli->cl_dirty_max_pages;
spin_unlock(&cli->cl_loi_list_lock);
- mult = 1 << 20;
+ mult = 1 << (20 - PAGE_SHIFT);
return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
}
@@ -166,7 +167,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
return -ERANGE;
spin_lock(&cli->cl_loi_list_lock);
- cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
+ cli->cl_dirty_max_pages = pages_number;
osc_wake_cache_waiters(cli);
spin_unlock(&cli->cl_loi_list_lock);
@@ -181,11 +182,11 @@ static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
int shift = 20 - PAGE_SHIFT;
seq_printf(m,
- "used_mb: %d\n"
- "busy_cnt: %d\n",
- (atomic_read(&cli->cl_lru_in_list) +
- atomic_read(&cli->cl_lru_busy)) >> shift,
- atomic_read(&cli->cl_lru_busy));
+ "used_mb: %ld\n"
+ "busy_cnt: %ld\n",
+ (atomic_long_read(&cli->cl_lru_in_list) +
+ atomic_long_read(&cli->cl_lru_busy)) >> shift,
+ atomic_long_read(&cli->cl_lru_busy));
return 0;
}
@@ -197,8 +198,10 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
{
struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
struct client_obd *cli = &dev->u.cli;
- int pages_number, mult, rc;
+ long pages_number, rc;
char kernbuf[128];
+ int mult;
+ u64 val;
if (count >= sizeof(kernbuf))
return -EINVAL;
@@ -210,14 +213,18 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
mult = 1 << (20 - PAGE_SHIFT);
buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) -
kernbuf;
- rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
if (rc)
return rc;
+ if (val > LONG_MAX)
+ return -ERANGE;
+ pages_number = (long)val;
+
if (pages_number < 0)
return -ERANGE;
- rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
+ rc = atomic_long_read(&cli->cl_lru_in_list) - pages_number;
if (rc > 0) {
struct lu_env *env;
int refcheck;
@@ -244,7 +251,7 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
int len;
spin_lock(&cli->cl_loi_list_lock);
- len = sprintf(buf, "%lu\n", cli->cl_dirty);
+ len = sprintf(buf, "%lu\n", cli->cl_dirty_pages << PAGE_SHIFT);
spin_unlock(&cli->cl_loi_list_lock);
return len;
@@ -583,6 +590,7 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
}
spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_pages_per_rpc = val;
+ client_adjust_max_dirty(cli);
spin_unlock(&cli->cl_loi_list_lock);
return count;
@@ -596,13 +604,14 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kobj);
struct client_obd *cli = &dev->u.cli;
- int pages, mb;
+ long pages;
+ int mb;
- pages = atomic_read(&cli->cl_unstable_count);
+ pages = atomic_long_read(&cli->cl_unstable_count);
mb = (pages * PAGE_SIZE) >> 20;
- return sprintf(buf, "unstable_pages: %8d\n"
- "unstable_mb: %8d\n", pages, mb);
+ return sprintf(buf, "unstable_pages: %20ld\n"
+ "unstable_mb: %10d\n", pages, mb);
}
LUSTRE_RO_ATTR(unstable_stats);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index d011135802d5..4bbe219add98 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -44,7 +44,7 @@ static int extent_debug; /* set it to be true for more debug */
static void osc_update_pending(struct osc_object *obj, int cmd, int delta);
static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
- int state);
+ enum osc_extent_state state);
static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
struct osc_async_page *oap, int sent, int rc);
static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
@@ -177,7 +177,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
{
struct osc_object *obj = ext->oe_obj;
struct osc_async_page *oap;
- int page_count;
+ size_t page_count;
int rc = 0;
if (!osc_object_is_locked(obj)) {
@@ -632,7 +632,7 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
*/
static struct osc_extent *osc_extent_find(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
- int *grants)
+ unsigned int *grants)
{
struct client_obd *cli = osc_cli(obj);
struct osc_lock *olck;
@@ -643,10 +643,10 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
struct osc_extent *found = NULL;
pgoff_t chunk;
pgoff_t max_end;
- int max_pages; /* max_pages_per_rpc */
- int chunksize;
+ unsigned int max_pages; /* max_pages_per_rpc */
+ unsigned int chunksize;
int ppc_bits; /* pages per chunk bits */
- int chunk_mask;
+ pgoff_t chunk_mask;
int rc;
cur = osc_extent_alloc(obj);
@@ -700,8 +700,8 @@ restart:
if (!ext)
ext = first_extent(obj);
while (ext) {
- loff_t ext_chk_start = ext->oe_start >> ppc_bits;
- loff_t ext_chk_end = ext->oe_end >> ppc_bits;
+ pgoff_t ext_chk_start = ext->oe_start >> ppc_bits;
+ pgoff_t ext_chk_end = ext->oe_end >> ppc_bits;
LASSERT(sanity_check_nolock(ext) == 0);
if (chunk > ext_chk_end + 1)
@@ -913,7 +913,7 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
return 0;
}
-static int extent_wait_cb(struct osc_extent *ext, int state)
+static int extent_wait_cb(struct osc_extent *ext, enum osc_extent_state state)
{
int ret;
@@ -928,7 +928,7 @@ static int extent_wait_cb(struct osc_extent *ext, int state)
* Wait for the extent's state to become @state.
*/
static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
- int state)
+ enum osc_extent_state state)
{
struct osc_object *obj = ext->oe_obj;
struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
@@ -958,8 +958,8 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi);
if (rc == -ETIMEDOUT) {
OSC_EXTENT_DUMP(D_ERROR, ext,
- "%s: wait ext to %d timedout, recovery in progress?\n",
- osc_export(obj)->exp_obd->obd_name, state);
+ "%s: wait ext to %u timedout, recovery in progress?\n",
+ osc_export(obj)->exp_obd->obd_name, state);
lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
@@ -1099,7 +1099,7 @@ static int osc_extent_make_ready(const struct lu_env *env,
struct osc_async_page *oap;
struct osc_async_page *last = NULL;
struct osc_object *obj = ext->oe_obj;
- int page_count = 0;
+ unsigned int page_count = 0;
int rc;
/* we're going to grab page lock, so object lock must not be taken. */
@@ -1140,9 +1140,11 @@ static int osc_extent_make_ready(const struct lu_env *env,
* the size of file.
*/
if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
- last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
- LASSERT(last->oap_count > 0);
- LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
+ int last_oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
+
+ LASSERT(last_oap_count > 0);
+ LASSERT(last->oap_page_off + last_oap_count <= PAGE_SIZE);
+ last->oap_count = last_oap_count;
spin_lock(&last->oap_lock);
last->oap_async_flags |= ASYNC_COUNT_STABLE;
spin_unlock(&last->oap_lock);
@@ -1174,7 +1176,8 @@ static int osc_extent_make_ready(const struct lu_env *env,
* called to expand the extent for the same IO. To expand the extent, the
* page index must be in the same or next chunk of ext->oe_end.
*/
-static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
+static int osc_extent_expand(struct osc_extent *ext, pgoff_t index,
+ unsigned int *grants)
{
struct osc_object *obj = ext->oe_obj;
struct client_obd *cli = osc_cli(obj);
@@ -1183,7 +1186,7 @@ static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
pgoff_t chunk = index >> ppc_bits;
pgoff_t end_chunk;
pgoff_t end_index;
- int chunksize = 1 << cli->cl_chunkbits;
+ unsigned int chunksize = 1 << cli->cl_chunkbits;
int rc = 0;
LASSERT(ext->oe_max_end >= index && ext->oe_start <= index);
@@ -1361,7 +1364,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
if (rc == 0 && srvlock) {
struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev;
struct osc_stats *stats = &lu2osc_dev(ld)->od_stats;
- int bytes = oap->oap_count;
+ size_t bytes = oap->oap_count;
if (crt == CRT_READ)
stats->os_lockless_reads += bytes;
@@ -1383,18 +1386,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
- CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \
- "unstable_pages: %d/%d dropped: %ld avail: %ld, " \
- "reserved: %ld, flight: %d } lru {in list: %d, " \
- "left: %d, waiters: %d }" fmt, \
+ CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu " \
+ "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \
+ "lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n", \
__tmp->cl_import->imp_obd->obd_name, \
- __tmp->cl_dirty, __tmp->cl_dirty_max, \
- atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
- atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \
+ __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \
+ atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
__tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
- atomic_read(&__tmp->cl_lru_in_list), \
- atomic_read(&__tmp->cl_lru_busy), \
+ atomic_long_read(&__tmp->cl_lru_in_list), \
+ atomic_long_read(&__tmp->cl_lru_busy), \
atomic_read(&__tmp->cl_lru_shrinkers), ##args); \
} while (0)
@@ -1404,8 +1405,8 @@ static void osc_consume_write_grant(struct client_obd *cli,
{
assert_spin_locked(&cli->cl_loi_list_lock);
LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
- atomic_inc(&obd_dirty_pages);
- cli->cl_dirty += PAGE_SIZE;
+ atomic_long_inc(&obd_dirty_pages);
+ cli->cl_dirty_pages++;
pga->flag |= OBD_BRW_FROM_GRANT;
CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
PAGE_SIZE, pga, pga->pg);
@@ -1424,12 +1425,12 @@ static void osc_release_write_grant(struct client_obd *cli,
}
pga->flag &= ~OBD_BRW_FROM_GRANT;
- atomic_dec(&obd_dirty_pages);
- cli->cl_dirty -= PAGE_SIZE;
+ atomic_long_dec(&obd_dirty_pages);
+ cli->cl_dirty_pages--;
if (pga->flag & OBD_BRW_NOCACHE) {
pga->flag &= ~OBD_BRW_NOCACHE;
- atomic_dec(&obd_dirty_transit_pages);
- cli->cl_dirty_transit -= PAGE_SIZE;
+ atomic_long_dec(&obd_dirty_transit_pages);
+ cli->cl_dirty_transit--;
}
}
@@ -1494,11 +1495,11 @@ static void osc_unreserve_grant(struct client_obd *cli,
static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
unsigned int lost_grant)
{
- int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
spin_lock(&cli->cl_loi_list_lock);
- atomic_sub(nr_pages, &obd_dirty_pages);
- cli->cl_dirty -= nr_pages << PAGE_SHIFT;
+ atomic_long_sub(nr_pages, &obd_dirty_pages);
+ cli->cl_dirty_pages -= nr_pages;
cli->cl_lost_grant += lost_grant;
if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
/* borrow some grant from truncate to avoid the case that
@@ -1511,7 +1512,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
lost_grant, cli->cl_lost_grant,
- cli->cl_avail_grant, cli->cl_dirty);
+ cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_SHIFT);
}
/**
@@ -1535,19 +1536,18 @@ static int osc_enter_cache_try(struct client_obd *cli,
{
int rc;
- OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
rc = osc_reserve_grant(cli, bytes);
if (rc < 0)
return 0;
- if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
- atomic_read(&obd_unstable_pages) + 1 +
- atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
+ if (cli->cl_dirty_pages <= cli->cl_dirty_max_pages &&
+ atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
osc_consume_write_grant(cli, &oap->oap_brw_page);
if (transient) {
- cli->cl_dirty_transit += PAGE_SIZE;
- atomic_inc(&obd_dirty_transit_pages);
+ cli->cl_dirty_transit++;
+ atomic_long_inc(&obd_dirty_transit_pages);
oap->oap_brw_flags |= OBD_BRW_NOCACHE;
}
rc = 1;
@@ -1581,11 +1581,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
struct osc_object *osc = oap->oap_obj;
struct lov_oinfo *loi = osc->oo_oinfo;
struct osc_cache_waiter ocw;
- struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
- LWI_ON_SIGNAL_NOOP, NULL);
+ struct l_wait_info lwi;
int rc = -EDQUOT;
- OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(AT_OFF ? obd_timeout : at_max),
+ NULL, LWI_ON_SIGNAL_NOOP, NULL);
+
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
spin_lock(&cli->cl_loi_list_lock);
@@ -1593,14 +1595,16 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
* of queued writes and create a discontiguous rpc stream
*/
if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
- cli->cl_dirty_max < PAGE_SIZE ||
- cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) {
+ !cli->cl_dirty_max_pages || cli->cl_ar.ar_force_sync ||
+ loi->loi_ar.ar_force_sync) {
+ OSC_DUMP_GRANT(D_CACHE, cli, "forced sync i/o\n");
rc = -EDQUOT;
goto out;
}
/* Hopefully normal case - cache space and write credits available */
if (osc_enter_cache_try(cli, oap, bytes, 0)) {
+ OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n");
rc = 0;
goto out;
}
@@ -1615,7 +1619,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
init_waitqueue_head(&ocw.ocw_waitq);
ocw.ocw_oap = oap;
ocw.ocw_grant = bytes;
- while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
+ while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) {
list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
ocw.ocw_rc = 0;
spin_unlock(&cli->cl_loi_list_lock);
@@ -1629,32 +1633,49 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
spin_lock(&cli->cl_loi_list_lock);
- /* l_wait_event is interrupted by signal, or timed out */
if (rc < 0) {
- if (rc == -ETIMEDOUT) {
- OSC_DUMP_GRANT(D_ERROR, cli,
- "try to reserve %d.\n", bytes);
- osc_extent_tree_dump(D_ERROR, osc);
- rc = -EDQUOT;
- }
-
+ /* l_wait_event is interrupted by signal, or timed out */
list_del_init(&ocw.ocw_entry);
- goto out;
+ break;
}
-
LASSERT(list_empty(&ocw.ocw_entry));
rc = ocw.ocw_rc;
if (rc != -EDQUOT)
- goto out;
+ break;
if (osc_enter_cache_try(cli, oap, bytes, 0)) {
rc = 0;
- goto out;
+ break;
}
}
+
+ switch (rc) {
+ case 0:
+ OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space\n");
+ break;
+ case -ETIMEDOUT:
+ OSC_DUMP_GRANT(D_CACHE, cli,
+ "timeout, fall back to sync i/o\n");
+ osc_extent_tree_dump(D_CACHE, osc);
+ /* fall back to synchronous I/O */
+ rc = -EDQUOT;
+ break;
+ case -EINTR:
+ /* Ensures restartability - LU-3581 */
+ OSC_DUMP_GRANT(D_CACHE, cli, "interrupted\n");
+ rc = -ERESTARTSYS;
+ break;
+ case -EDQUOT:
+ OSC_DUMP_GRANT(D_CACHE, cli,
+ "no grant space, fall back to sync i/o\n");
+ break;
+ default:
+ CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n",
+ cli->cl_import->imp_obd->obd_name, &ocw, rc);
+ break;
+ }
out:
spin_unlock(&cli->cl_loi_list_lock);
- OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
return rc;
}
@@ -1670,19 +1691,17 @@ void osc_wake_cache_waiters(struct client_obd *cli)
ocw->ocw_rc = -EDQUOT;
/* we can't dirty more */
- if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
- (atomic_read(&obd_unstable_pages) + 1 +
- atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
- CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
- cli->cl_dirty,
- cli->cl_dirty_max, obd_max_dirty_pages);
+ if ((cli->cl_dirty_pages > cli->cl_dirty_max_pages) ||
+ (atomic_long_read(&obd_dirty_pages) + 1 >
+ obd_max_dirty_pages)) {
+ CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %ld\n",
+ cli->cl_dirty_pages, cli->cl_dirty_max_pages,
+ obd_max_dirty_pages);
goto wakeup;
}
- ocw->ocw_rc = 0;
- if (!osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
- ocw->ocw_rc = -EDQUOT;
-
+ if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
+ ocw->ocw_rc = 0;
wakeup:
CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n",
ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc);
@@ -1843,97 +1862,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
ar->ar_force_sync = 0;
}
-/**
- * Performs "unstable" page accounting. This function balances the
- * increment operations performed in osc_inc_unstable_pages. It is
- * registered as the RPC request callback, and is executed when the
- * bulk RPC is committed on the server. Thus at this point, the pages
- * involved in the bulk transfer are no longer considered unstable.
- */
-void osc_dec_unstable_pages(struct ptlrpc_request *req)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- int page_count = desc->bd_iov_count;
- int i;
-
- /* No unstable page tracking */
- if (!cli->cl_cache)
- return;
-
- LASSERT(page_count >= 0);
-
- for (i = 0; i < page_count; i++)
- dec_node_page_state(desc->bd_iov[i].kiov_page,
- NR_UNSTABLE_NFS);
-
- atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
- LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
-
- atomic_sub(page_count, &cli->cl_unstable_count);
- LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
-
- atomic_sub(page_count, &obd_unstable_pages);
- LASSERT(atomic_read(&obd_unstable_pages) >= 0);
-
- spin_lock(&req->rq_lock);
- req->rq_committed = 1;
- req->rq_unstable = 0;
- spin_unlock(&req->rq_lock);
-
- wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
-}
-
-/* "unstable" page accounting. See: osc_dec_unstable_pages. */
-void osc_inc_unstable_pages(struct ptlrpc_request *req)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- long page_count = desc->bd_iov_count;
- int i;
-
- /* No unstable page tracking */
- if (!cli->cl_cache)
- return;
-
- LASSERT(page_count >= 0);
-
- for (i = 0; i < page_count; i++)
- inc_node_page_state(desc->bd_iov[i].kiov_page,
- NR_UNSTABLE_NFS);
-
- LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
- atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
-
- LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
- atomic_add(page_count, &cli->cl_unstable_count);
-
- LASSERT(atomic_read(&obd_unstable_pages) >= 0);
- atomic_add(page_count, &obd_unstable_pages);
-
- spin_lock(&req->rq_lock);
-
- /*
- * If the request has already been committed (i.e. brw_commit
- * called via rq_commit_cb), we need to undo the unstable page
- * increments we just performed because rq_commit_cb wont be
- * called again. Otherwise, just set the commit callback so the
- * unstable page accounting is properly updated when the request
- * is committed
- */
- if (req->rq_committed) {
- /* Drop lock before calling osc_dec_unstable_pages */
- spin_unlock(&req->rq_lock);
- osc_dec_unstable_pages(req);
- spin_lock(&req->rq_lock);
- } else {
- req->rq_unstable = 1;
- req->rq_commit_cb = osc_dec_unstable_pages;
- }
-
- spin_unlock(&req->rq_lock);
-}
-
/* this must be called holding the loi list lock to give coverage to exit_cache,
* async_flag maintenance, and oap_request
*/
@@ -1945,9 +1873,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
__u64 xid = 0;
if (oap->oap_request) {
- if (!rc)
- osc_inc_unstable_pages(oap->oap_request);
-
xid = ptlrpc_req_xid(oap->oap_request);
ptlrpc_req_finished(oap->oap_request);
oap->oap_request = NULL;
@@ -1979,7 +1904,7 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
*/
static int try_to_add_extent_for_io(struct client_obd *cli,
struct osc_extent *ext, struct list_head *rpclist,
- int *pc, unsigned int *max_pages)
+ unsigned int *pc, unsigned int *max_pages)
{
struct osc_extent *tmp;
struct osc_async_page *oap = list_first_entry(&ext->oe_pages,
@@ -2032,12 +1957,13 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
* 5. Traverse the extent tree from the 1st extent;
* 6. Above steps exit if there is no space in this RPC.
*/
-static int get_write_extents(struct osc_object *obj, struct list_head *rpclist)
+static unsigned int get_write_extents(struct osc_object *obj,
+ struct list_head *rpclist)
{
struct client_obd *cli = osc_cli(obj);
struct osc_extent *ext;
struct osc_extent *temp;
- int page_count = 0;
+ unsigned int page_count = 0;
unsigned int max_pages = cli->cl_max_pages_per_rpc;
LASSERT(osc_object_is_locked(obj));
@@ -2175,7 +2101,7 @@ osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli,
struct osc_extent *ext;
struct osc_extent *next;
LIST_HEAD(rpclist);
- int page_count = 0;
+ unsigned int page_count = 0;
unsigned int max_pages = cli->cl_max_pages_per_rpc;
int rc = 0;
@@ -2390,7 +2316,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
struct client_obd *cli = oap->oap_cli;
struct osc_object *osc = oap->oap_obj;
pgoff_t index;
- int grants = 0;
+ unsigned int grants = 0, tmp;
int brw_flags = OBD_BRW_ASYNC;
int cmd = OBD_BRW_WRITE;
int need_release = 0;
@@ -2434,9 +2360,6 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
return rc;
}
- if (osc_over_unstable_soft_limit(cli))
- brw_flags |= OBD_BRW_SOFT_SYNC;
-
oap->oap_cmd = cmd;
oap->oap_page_off = ops->ops_from;
oap->oap_count = ops->ops_to - ops->ops_from;
@@ -2476,7 +2399,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
grants = 0;
need_release = 1;
} else if (ext->oe_end < index) {
- int tmp = grants;
+ tmp = grants;
/* try to expand this extent */
rc = osc_extent_expand(ext, index, &tmp);
if (rc < 0) {
@@ -2501,7 +2424,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
}
if (!ext) {
- int tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
/* try to find new extent to cover this page */
LASSERT(!oio->oi_active);
@@ -2645,7 +2568,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
goto out;
spin_lock(&oap->oap_lock);
- oap->oap_async_flags |= ASYNC_READY|ASYNC_URGENT;
+ oap->oap_async_flags |= ASYNC_READY | ASYNC_URGENT;
spin_unlock(&oap->oap_lock);
if (memory_pressure_get())
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index c8c3f1ca77be..9c8de15c309c 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -64,7 +64,7 @@ struct osc_io {
/** true if this io is lockless. */
unsigned int oi_lockless;
/** how many LRU pages are reserved for this IO */
- int oi_lru_reserved;
+ unsigned long oi_lru_reserved;
/** active extents, we know how many bytes is going to be written,
* so having an active extent will prevent it from being fragmented
@@ -389,7 +389,7 @@ extern struct lu_device_type osc_device_type;
extern struct lu_context_key osc_key;
extern struct lu_context_key osc_session_key;
-#define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY)
+#define OSC_FLAGS (ASYNC_URGENT | ASYNC_READY)
int osc_lock_init(const struct lu_env *env,
struct cl_object *obj, struct cl_lock *lock,
@@ -608,7 +608,7 @@ struct osc_extent {
/** link list of osc_object's oo_{hp|urgent|locking}_exts. */
struct list_head oe_link;
/** state of this extent */
- unsigned int oe_state;
+ enum osc_extent_state oe_state;
/** flags for this extent. */
unsigned int oe_intree:1,
/** 0 is write, 1 is read */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 7a27f0961955..67fe0a254991 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -71,7 +71,6 @@ struct osc_async_page {
struct client_obd *oap_cli;
struct osc_object *oap_obj;
- struct ldlm_lock *oap_ldlm_lock;
spinlock_t oap_lock;
};
@@ -134,9 +133,9 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
struct list_head *ext_list, int cmd);
-int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
- int target, bool force);
-int osc_lru_reclaim(struct client_obd *cli);
+long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+ long target, bool force);
+long osc_lru_reclaim(struct client_obd *cli);
unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
@@ -198,7 +197,7 @@ int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
void osc_inc_unstable_pages(struct ptlrpc_request *req);
void osc_dec_unstable_pages(struct ptlrpc_request *req);
-int osc_over_unstable_soft_limit(struct client_obd *cli);
+bool osc_over_unstable_soft_limit(struct client_obd *cli);
struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 6e3dcd38913f..8a559cbcdd0c 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -109,11 +109,11 @@ static int osc_io_submit(const struct lu_env *env,
struct cl_page_list *qin = &queue->c2_qin;
struct cl_page_list *qout = &queue->c2_qout;
- int queued = 0;
+ unsigned int queued = 0;
int result = 0;
int cmd;
int brw_flags;
- int max_pages;
+ unsigned int max_pages;
LASSERT(qin->pl_nr > 0);
@@ -163,14 +163,19 @@ static int osc_io_submit(const struct lu_env *env,
continue;
}
- cl_page_list_move(qout, qin, page);
spin_lock(&oap->oap_lock);
- oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
+ oap->oap_async_flags = ASYNC_URGENT | ASYNC_READY;
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
spin_unlock(&oap->oap_lock);
osc_page_submit(env, opg, crt, brw_flags);
list_add_tail(&oap->oap_pending_item, &list);
+
+ if (page->cp_sync_io)
+ cl_page_list_move(qout, qin, page);
+ else /* async IO */
+ cl_page_list_del(env, qin, page);
+
if (++queued == max_pages) {
queued = 0;
result = osc_queue_sync_pages(env, osc, &list, cmd,
@@ -195,7 +200,7 @@ static int osc_io_submit(const struct lu_env *env,
* Expand stripe KMS if necessary.
*/
static void osc_page_touch_at(const struct lu_env *env,
- struct cl_object *obj, pgoff_t idx, unsigned to)
+ struct cl_object *obj, pgoff_t idx, size_t to)
{
struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
struct cl_attr *attr = &osc_env_info(env)->oti_attr;
@@ -228,7 +233,7 @@ static void osc_page_touch_at(const struct lu_env *env,
attr->cat_size = kms;
valid |= CAT_SIZE;
}
- cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_update(env, obj, attr, valid);
cl_object_attr_unlock(obj);
}
@@ -314,8 +319,8 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
struct osc_object *osc = cl2osc(ios->cis_obj);
struct client_obd *cli = osc_cli(osc);
unsigned long c;
- unsigned int npages;
- unsigned int max_pages;
+ unsigned long npages;
+ unsigned long max_pages;
if (cl_io_is_append(io))
return 0;
@@ -328,15 +333,15 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
if (npages > max_pages)
npages = max_pages;
- c = atomic_read(cli->cl_lru_left);
+ c = atomic_long_read(cli->cl_lru_left);
if (c < npages && osc_lru_reclaim(cli) > 0)
- c = atomic_read(cli->cl_lru_left);
+ c = atomic_long_read(cli->cl_lru_left);
while (c >= npages) {
- if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+ if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
oio->oi_lru_reserved = npages;
break;
}
- c = atomic_read(cli->cl_lru_left);
+ c = atomic_long_read(cli->cl_lru_left);
}
return 0;
@@ -350,7 +355,7 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
struct client_obd *cli = osc_cli(osc);
if (oio->oi_lru_reserved > 0) {
- atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+ atomic_long_add(oio->oi_lru_reserved, cli->cl_lru_left);
oio->oi_lru_reserved = 0;
}
oio->oi_write_osclock = NULL;
@@ -364,7 +369,7 @@ static int osc_io_fault_start(const struct lu_env *env,
io = ios->cis_io;
fio = &io->u.ci_fault;
- CDEBUG(D_INFO, "%lu %d %d\n",
+ CDEBUG(D_INFO, "%lu %d %zu\n",
fio->ft_index, fio->ft_writable, fio->ft_nob);
/*
* If mapping is writeable, adjust kms to cover this page,
@@ -471,18 +476,21 @@ static int osc_io_setattr_start(const struct lu_env *env,
attr->cat_ctime = lvb->lvb_ctime;
cl_valid |= CAT_CTIME;
}
- result = cl_object_attr_set(env, obj, attr, cl_valid);
+ result = cl_object_attr_update(env, obj, attr,
+ cl_valid);
}
cl_object_attr_unlock(obj);
}
memset(oa, 0, sizeof(*oa));
if (result == 0) {
oa->o_oi = loi->loi_oi;
+ obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid);
+ oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index;
oa->o_mtime = attr->cat_mtime;
oa->o_atime = attr->cat_atime;
oa->o_ctime = attr->cat_ctime;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
- OBD_MD_FLCTIME | OBD_MD_FLMTIME;
+ oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
+ OBD_MD_FLCTIME | OBD_MD_FLMTIME;
if (ia_valid & ATTR_SIZE) {
oa->o_size = size;
oa->o_blocks = OBD_OBJECT_EOF;
@@ -559,7 +567,7 @@ static int osc_io_read_start(const struct lu_env *env,
if (!slice->cis_io->ci_noatime) {
cl_object_attr_lock(obj);
attr->cat_atime = ktime_get_real_seconds();
- rc = cl_object_attr_set(env, obj, attr, CAT_ATIME);
+ rc = cl_object_attr_update(env, obj, attr, CAT_ATIME);
cl_object_attr_unlock(obj);
}
return rc;
@@ -576,7 +584,7 @@ static int osc_io_write_start(const struct lu_env *env,
cl_object_attr_lock(obj);
attr->cat_ctime = ktime_get_real_seconds();
attr->cat_mtime = attr->cat_ctime;
- rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME);
+ rc = cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME);
cl_object_attr_unlock(obj);
return rc;
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 717d3ffb6789..39a8a5851603 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -222,7 +222,7 @@ static void osc_lock_lvb_update(const struct lu_env *env,
ldlm_lock_allow_match_locked(dlmlock);
}
- cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_update(env, obj, attr, valid);
cl_object_attr_unlock(obj);
}
@@ -467,7 +467,7 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
*/
attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
- cl_object_attr_set(env, obj, attr, CAT_KMS);
+ cl_object_attr_update(env, obj, attr, CAT_KMS);
cl_object_attr_unlock(obj);
unlock_res_and_lock(dlmlock);
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index d211d1905e83..aae3a2d4243f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -159,8 +159,8 @@ static int osc_attr_get(const struct lu_env *env, struct cl_object *obj,
return 0;
}
-static int osc_attr_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned valid)
+static int osc_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned int valid)
{
struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
struct ost_lvb *lvb = &oinfo->loi_lvb;
@@ -195,7 +195,6 @@ static int osc_object_glimpse(const struct lu_env *env,
static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
{
- LASSERT(lock->l_granted_mode == lock->l_req_mode);
if (lock->l_ast_data == data)
lock->l_ast_data = NULL;
return LDLM_ITER_CONTINUE;
@@ -262,7 +261,7 @@ static const struct cl_object_operations osc_ops = {
.coo_lock_init = osc_lock_init,
.coo_io_init = osc_io_init,
.coo_attr_get = osc_attr_get,
- .coo_attr_set = osc_attr_set,
+ .coo_attr_update = osc_attr_update,
.coo_glimpse = osc_object_glimpse,
.coo_prune = osc_object_prune
};
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index 355f496a2093..2a7a70aa9e80 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -323,32 +323,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
return result;
}
-int osc_over_unstable_soft_limit(struct client_obd *cli)
-{
- long obd_upages, obd_dpages, osc_upages;
-
- /* Can't check cli->cl_unstable_count, therefore, no soft limit */
- if (!cli)
- return 0;
-
- obd_upages = atomic_read(&obd_unstable_pages);
- obd_dpages = atomic_read(&obd_dirty_pages);
-
- osc_upages = atomic_read(&cli->cl_unstable_count);
-
- /*
- * obd_max_dirty_pages is the max number of (dirty + unstable)
- * pages allowed at any given time. To simulate an unstable page
- * only limit, we subtract the current number of dirty pages
- * from this max. This difference is roughly the amount of pages
- * currently available for unstable pages. Thus, the soft limit
- * is half of that difference. Check osc_upages to ensure we don't
- * set SOFT_SYNC for OSCs without any outstanding unstable pages.
- */
- return osc_upages &&
- obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2;
-}
-
/**
* Helper function called by osc_io_submit() for every page in an immediate
* transfer (i.e., transferred synchronously).
@@ -368,9 +342,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
oap->oap_count = opg->ops_to - opg->ops_from;
oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
- if (osc_over_unstable_soft_limit(oap->oap_cli))
- oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
-
if (capable(CFS_CAP_SYS_RESOURCE)) {
oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
oap->oap_cmd |= OBD_BRW_NOQUOTA;
@@ -409,7 +380,7 @@ static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
static int osc_cache_too_much(struct client_obd *cli)
{
struct cl_client_cache *cache = cli->cl_cache;
- int pages = atomic_read(&cli->cl_lru_in_list);
+ long pages = atomic_long_read(&cli->cl_lru_in_list);
unsigned long budget;
budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
@@ -417,7 +388,7 @@ static int osc_cache_too_much(struct client_obd *cli)
/* if it's going to run out LRU slots, we should free some, but not
* too much to maintain fairness among OSCs.
*/
- if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
+ if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
if (pages >= budget)
return lru_shrink_max;
else if (pages >= budget / 2)
@@ -444,7 +415,7 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
{
LIST_HEAD(lru);
struct osc_async_page *oap;
- int npages = 0;
+ long npages = 0;
list_for_each_entry(oap, plist, oap_pending_item) {
struct osc_page *opg = oap2osc_page(oap);
@@ -460,8 +431,8 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
if (npages > 0) {
spin_lock(&cli->cl_lru_list_lock);
list_splice_tail(&lru, &cli->cl_lru_list);
- atomic_sub(npages, &cli->cl_lru_busy);
- atomic_add(npages, &cli->cl_lru_in_list);
+ atomic_long_sub(npages, &cli->cl_lru_busy);
+ atomic_long_add(npages, &cli->cl_lru_in_list);
spin_unlock(&cli->cl_lru_list_lock);
/* XXX: May set force to be true for better performance */
@@ -472,9 +443,9 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
{
- LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+ LASSERT(atomic_long_read(&cli->cl_lru_in_list) > 0);
list_del_init(&opg->ops_lru);
- atomic_dec(&cli->cl_lru_in_list);
+ atomic_long_dec(&cli->cl_lru_in_list);
}
/**
@@ -488,12 +459,12 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
if (!list_empty(&opg->ops_lru)) {
__osc_lru_del(cli, opg);
} else {
- LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
- atomic_dec(&cli->cl_lru_busy);
+ LASSERT(atomic_long_read(&cli->cl_lru_busy) > 0);
+ atomic_long_dec(&cli->cl_lru_busy);
}
spin_unlock(&cli->cl_lru_list_lock);
- atomic_inc(cli->cl_lru_left);
+ atomic_long_inc(cli->cl_lru_left);
/* this is a great place to release more LRU pages if
* this osc occupies too many LRU pages and kernel is
* stealing one of them.
@@ -518,7 +489,7 @@ static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
spin_lock(&cli->cl_lru_list_lock);
__osc_lru_del(cli, opg);
spin_unlock(&cli->cl_lru_list_lock);
- atomic_inc(&cli->cl_lru_busy);
+ atomic_long_inc(&cli->cl_lru_busy);
}
}
@@ -540,10 +511,32 @@ static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
}
/**
+ * Check if a cl_page can be released, i.e, it's not being used.
+ *
+ * If unstable account is turned on, bulk transfer may hold one refcount
+ * for recovery so we need to check vmpage refcount as well; otherwise,
+ * even we can destroy cl_page but the corresponding vmpage can't be reused.
+ */
+static inline bool lru_page_busy(struct client_obd *cli, struct cl_page *page)
+{
+ if (cl_page_in_use_noref(page))
+ return true;
+
+ if (cli->cl_cache->ccc_unstable_check) {
+ struct page *vmpage = cl_page_vmpage(page);
+
+ /* vmpage have two known users: cl_page and VM page cache */
+ if (page_count(vmpage) - page_mapcount(vmpage) > 2)
+ return true;
+ }
+ return false;
+}
+
+/**
* Drop @target of pages from LRU at most.
*/
-int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
- int target, bool force)
+long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+ long target, bool force)
{
struct cl_io *io;
struct cl_object *clobj = NULL;
@@ -551,12 +544,12 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
struct osc_page *opg;
struct osc_page *temp;
int maxscan = 0;
- int count = 0;
+ long count = 0;
int index = 0;
int rc = 0;
- LASSERT(atomic_read(&cli->cl_lru_in_list) >= 0);
- if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
+ LASSERT(atomic_long_read(&cli->cl_lru_in_list) >= 0);
+ if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0)
return 0;
if (!force) {
@@ -575,7 +568,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
io = &osc_env_info(env)->oti_io;
spin_lock(&cli->cl_lru_list_lock);
- maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
+ maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list));
list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
struct cl_page *page;
bool will_free = false;
@@ -584,7 +577,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
break;
page = opg->ops_cl.cpl_page;
- if (cl_page_in_use_noref(page)) {
+ if (lru_page_busy(cli, page)) {
list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
continue;
}
@@ -620,7 +613,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
}
if (cl_page_own_try(env, io, page) == 0) {
- if (!cl_page_in_use_noref(page)) {
+ if (!lru_page_busy(cli, page)) {
/* remove it from lru list earlier to avoid
* lock contention
*/
@@ -663,24 +656,19 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
atomic_dec(&cli->cl_lru_shrinkers);
if (count > 0) {
- atomic_add(count, cli->cl_lru_left);
+ atomic_long_add(count, cli->cl_lru_left);
wake_up_all(&osc_lru_waitq);
}
return count > 0 ? count : rc;
}
-static inline int max_to_shrink(struct client_obd *cli)
-{
- return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
-}
-
-int osc_lru_reclaim(struct client_obd *cli)
+long osc_lru_reclaim(struct client_obd *cli)
{
struct cl_env_nest nest;
struct lu_env *env;
struct cl_client_cache *cache = cli->cl_cache;
int max_scans;
- int rc = 0;
+ long rc = 0;
LASSERT(cache);
@@ -693,15 +681,15 @@ int osc_lru_reclaim(struct client_obd *cli)
if (rc == -EBUSY)
rc = 0;
- CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
+ CDEBUG(D_CACHE, "%s: Free %ld pages from own LRU: %p.\n",
cli->cl_import->imp_obd->obd_name, rc, cli);
goto out;
}
- CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
+ CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld, busy: %ld.\n",
cli->cl_import->imp_obd->obd_name, cli,
- atomic_read(&cli->cl_lru_in_list),
- atomic_read(&cli->cl_lru_busy));
+ atomic_long_read(&cli->cl_lru_in_list),
+ atomic_long_read(&cli->cl_lru_busy));
/* Reclaim LRU slots from other client_obd as it can't free enough
* from its own. This should rarely happen.
@@ -717,10 +705,10 @@ int osc_lru_reclaim(struct client_obd *cli)
cli = list_entry(cache->ccc_lru.next, struct client_obd,
cl_lru_osc);
- CDEBUG(D_CACHE, "%s: cli %p LRU pages: %d, busy: %d.\n",
+ CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
cli->cl_import->imp_obd->obd_name, cli,
- atomic_read(&cli->cl_lru_in_list),
- atomic_read(&cli->cl_lru_busy));
+ atomic_long_read(&cli->cl_lru_in_list),
+ atomic_long_read(&cli->cl_lru_busy));
list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
if (osc_cache_too_much(cli) > 0) {
@@ -737,11 +725,18 @@ int osc_lru_reclaim(struct client_obd *cli)
out:
cl_env_nested_put(&nest, env);
- CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
+ CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
cli->cl_import->imp_obd->obd_name, cli, rc);
return rc;
}
+/**
+ * osc_lru_reserve() is called to reserve an LRU slot for a cl_page.
+ *
+ * Usually the LRU slots are reserved in osc_io_iter_rw_init().
+ * Only in the case that the LRU slots are in extreme shortage, it should
+ * have reserved enough slots for an IO.
+ */
static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
struct osc_page *opg)
{
@@ -758,8 +753,8 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
goto out;
}
- LASSERT(atomic_read(cli->cl_lru_left) >= 0);
- while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) {
+ LASSERT(atomic_long_read(cli->cl_lru_left) >= 0);
+ while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) {
/* run out of LRU spaces, try to drop some by itself */
rc = osc_lru_reclaim(cli);
if (rc < 0)
@@ -770,7 +765,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
cond_resched();
rc = l_wait_event(osc_lru_waitq,
- atomic_read(cli->cl_lru_left) > 0,
+ atomic_long_read(cli->cl_lru_left) > 0,
&lwi);
if (rc < 0)
@@ -779,7 +774,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
out:
if (rc >= 0) {
- atomic_inc(&cli->cl_lru_busy);
+ atomic_long_inc(&cli->cl_lru_busy);
opg->ops_in_lru = 1;
rc = 0;
}
@@ -787,4 +782,151 @@ out:
return rc;
}
+/**
+ * Atomic operations are expensive. We accumulate the accounting for the
+ * same page pgdat to get better performance.
+ * In practice this can work pretty good because the pages in the same RPC
+ * are likely from the same page zone.
+ */
+static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
+ int factor)
+{
+ int page_count = desc->bd_iov_count;
+ pg_data_t *last = NULL;
+ int count = 0;
+ int i;
+
+ for (i = 0; i < page_count; i++) {
+ pg_data_t *pgdat = page_pgdat(desc->bd_iov[i].bv_page);
+
+ if (likely(pgdat == last)) {
+ ++count;
+ continue;
+ }
+
+ if (count > 0) {
+ mod_node_page_state(pgdat, NR_UNSTABLE_NFS,
+ factor * count);
+ count = 0;
+ }
+ last = pgdat;
+ ++count;
+ }
+ if (count > 0)
+ mod_node_page_state(last, NR_UNSTABLE_NFS, factor * count);
+}
+
+static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
+{
+ unstable_page_accounting(desc, 1);
+}
+
+static inline void dec_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
+{
+ unstable_page_accounting(desc, -1);
+}
+
+/**
+ * Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable.
+ *
+ * If this function is called, the request should have been committed
+ * or req:rq_unstable must have been set; it implies that the unstable
+ * statistic have been added.
+ */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ int page_count = desc->bd_iov_count;
+ long unstable_count;
+
+ LASSERT(page_count >= 0);
+ dec_unstable_page_accounting(desc);
+
+ unstable_count = atomic_long_sub_return(page_count,
+ &cli->cl_unstable_count);
+ LASSERT(unstable_count >= 0);
+
+ unstable_count = atomic_long_sub_return(page_count,
+ &cli->cl_cache->ccc_unstable_nr);
+ LASSERT(unstable_count >= 0);
+ if (!unstable_count)
+ wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+
+ if (osc_cache_too_much(cli))
+ (void)ptlrpcd_queue_work(cli->cl_lru_work);
+}
+
+/**
+ * "unstable" page accounting. See: osc_dec_unstable_pages.
+ */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ long page_count = desc->bd_iov_count;
+
+ /* No unstable page tracking */
+ if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
+ return;
+
+ add_unstable_page_accounting(desc);
+ atomic_long_add(page_count, &cli->cl_unstable_count);
+ atomic_long_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+ /*
+ * If the request has already been committed (i.e. brw_commit
+ * called via rq_commit_cb), we need to undo the unstable page
+ * increments we just performed because rq_commit_cb wont be
+ * called again.
+ */
+ spin_lock(&req->rq_lock);
+ if (unlikely(req->rq_committed)) {
+ spin_unlock(&req->rq_lock);
+
+ osc_dec_unstable_pages(req);
+ } else {
+ req->rq_unstable = 1;
+ spin_unlock(&req->rq_lock);
+ }
+}
+
+/**
+ * Check if it piggybacks SOFT_SYNC flag to OST from this OSC.
+ * This function will be called by every BRW RPC so it's critical
+ * to make this function fast.
+ */
+bool osc_over_unstable_soft_limit(struct client_obd *cli)
+{
+ long unstable_nr, osc_unstable_count;
+
+ /* Can't check cli->cl_unstable_count, therefore, no soft limit */
+ if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
+ return false;
+
+ osc_unstable_count = atomic_long_read(&cli->cl_unstable_count);
+ unstable_nr = atomic_long_read(&cli->cl_cache->ccc_unstable_nr);
+
+ CDEBUG(D_CACHE,
+ "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n",
+ cli->cl_import->imp_obd->obd_name, cli,
+ unstable_nr, osc_unstable_count);
+
+ /*
+ * If the LRU slots are in shortage - 25% remaining AND this OSC
+ * has one full RPC window of unstable pages, it's a good chance
+ * to piggyback a SOFT_SYNC flag.
+ * Please notice that the OST won't take immediate response for the
+ * SOFT_SYNC request so active OSCs will have more chance to carry
+ * the flag, this is reasonable.
+ */
+ return unstable_nr > cli->cl_cache->ccc_lru_max >> 2 &&
+ osc_unstable_count > cli->cl_max_pages_per_rpc *
+ cli->cl_max_rpcs_in_flight;
+}
+
/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 536b868ff776..749781f022e2 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -41,6 +41,7 @@
#include "../include/lustre_ha.h"
#include "../include/lprocfs_status.h"
+#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_debug.h"
#include "../include/lustre_param.h"
#include "../include/lustre_fid.h"
@@ -102,36 +103,6 @@ static void osc_release_ppga(struct brw_page **ppga, u32 count);
static int brw_interpret(const struct lu_env *env,
struct ptlrpc_request *req, void *data, int rc);
-/* Pack OSC object metadata for disk storage (LE byte order). */
-static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
-{
- int lmm_size;
-
- lmm_size = sizeof(**lmmp);
- if (!lmmp)
- return lmm_size;
-
- if (*lmmp && !lsm) {
- kfree(*lmmp);
- *lmmp = NULL;
- return 0;
- } else if (unlikely(lsm && ostid_id(&lsm->lsm_oi) == 0)) {
- return -EBADF;
- }
-
- if (!*lmmp) {
- *lmmp = kzalloc(lmm_size, GFP_NOFS);
- if (!*lmmp)
- return -ENOMEM;
- }
-
- if (lsm)
- ostid_cpu_to_le(&lsm->lsm_oi, &(*lmmp)->lmm_oi);
-
- return lmm_size;
-}
-
/* Unpack OSC object metadata from disk storage (LE byte order). */
static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
struct lov_mds_md *lmm, int lmm_bytes)
@@ -189,7 +160,7 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES))
(*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes;
else
- (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
return lsm_size;
}
@@ -427,24 +398,16 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
oinfo->oi_cb_up, oinfo, rqset);
}
-static int osc_real_create(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md **ea,
- struct obd_trans_info *oti)
+static int osc_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct obd_trans_info *oti)
{
struct ptlrpc_request *req;
struct ost_body *body;
- struct lov_stripe_md *lsm;
int rc;
LASSERT(oa);
- LASSERT(ea);
-
- lsm = *ea;
- if (!lsm) {
- rc = obd_alloc_memmd(exp, &lsm);
- if (rc < 0)
- return rc;
- }
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+ LASSERT(fid_seq_is_echo(ostid_seq(&oa->o_oi)));
req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE);
if (!req) {
@@ -490,21 +453,10 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
oa->o_blksize = cli_brw_size(exp->exp_obd);
oa->o_valid |= OBD_MD_FLBLKSZ;
- /* XXX LOV STACKING: the lsm that is passed to us from LOV does not
- * have valid lsm_oinfo data structs, so don't go touching that.
- * This needs to be fixed in a big way.
- */
- lsm->lsm_oi = oa->o_oi;
- *ea = lsm;
-
- if (oti) {
- oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
-
- if (oa->o_valid & OBD_MD_FLCOOKIE) {
- if (!oti->oti_logcookies)
- oti_alloc_cookies(oti, 1);
- *oti->oti_logcookies = oa->o_lcookie;
- }
+ if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+ if (!oti->oti_logcookies)
+ oti->oti_logcookies = &oti->oti_onecookie;
+ *oti->oti_logcookies = oa->o_lcookie;
}
CDEBUG(D_HA, "transno: %lld\n",
@@ -512,8 +464,6 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
out_req:
ptlrpc_req_finished(req);
out:
- if (rc && !*ea)
- obd_free_memmd(exp, &lsm);
return rc;
}
@@ -649,7 +599,7 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
ostid_build_res_name(&oa->o_oi, &res_id);
res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
- if (!res)
+ if (IS_ERR(res))
return 0;
LDLM_RESOURCE_ADDREF(res);
@@ -689,30 +639,6 @@ static int osc_can_send_destroy(struct client_obd *cli)
return 0;
}
-static int osc_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md **ea,
- struct obd_trans_info *oti)
-{
- int rc = 0;
-
- LASSERT(oa);
- LASSERT(ea);
- LASSERT(oa->o_valid & OBD_MD_FLGROUP);
-
- if ((oa->o_valid & OBD_MD_FLFLAGS) &&
- oa->o_flags == OBD_FL_RECREATE_OBJS) {
- return osc_real_create(exp, oa, ea, oti);
- }
-
- if (!fid_seq_is_mdt(ostid_seq(&oa->o_oi)))
- return osc_real_create(exp, oa, ea, oti);
-
- /* we should not get here anymore */
- LBUG();
-
- return rc;
-}
-
/* Destroy requests can be async always on the client, and we don't even really
* care about the return code since the client cannot do anything at all about
* a destroy failure.
@@ -725,8 +651,7 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp,
* cookies to the MDS after committing destroy transactions.
*/
static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md *ea,
- struct obd_trans_info *oti, struct obd_export *md_export)
+ struct obdo *oa, struct obd_trans_info *oti)
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct ptlrpc_request *req;
@@ -794,42 +719,44 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
long writing_bytes)
{
- u32 bits = OBD_MD_FLBLOCKS|OBD_MD_FLGRANT;
+ u32 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT;
LASSERT(!(oa->o_valid & bits));
oa->o_valid |= bits;
spin_lock(&cli->cl_loi_list_lock);
- oa->o_dirty = cli->cl_dirty;
- if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
- cli->cl_dirty_max)) {
+ oa->o_dirty = cli->cl_dirty_pages << PAGE_SHIFT;
+ if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
+ cli->cl_dirty_max_pages)) {
CERROR("dirty %lu - %lu > dirty_max %lu\n",
- cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
+ cli->cl_dirty_pages, cli->cl_dirty_transit,
+ cli->cl_dirty_max_pages);
oa->o_undirty = 0;
- } else if (unlikely(atomic_read(&obd_unstable_pages) +
- atomic_read(&obd_dirty_pages) -
- atomic_read(&obd_dirty_transit_pages) >
- (long)(obd_max_dirty_pages + 1))) {
+ } else if (unlikely(atomic_long_read(&obd_dirty_pages) -
+ atomic_long_read(&obd_dirty_transit_pages) >
+ (obd_max_dirty_pages + 1))) {
/* The atomic_read() allowing the atomic_inc() are
* not covered by a lock thus they may safely race and trip
* this CERROR() unless we add in a small fudge factor (+1).
*/
- CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+ CERROR("%s: dirty %ld + %ld > system dirty_max %lu\n",
cli->cl_import->imp_obd->obd_name,
- atomic_read(&obd_unstable_pages),
- atomic_read(&obd_dirty_pages),
- atomic_read(&obd_dirty_transit_pages),
+ atomic_long_read(&obd_dirty_pages),
+ atomic_long_read(&obd_dirty_transit_pages),
obd_max_dirty_pages);
oa->o_undirty = 0;
- } else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) {
+ } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
+ 0x7fffffff)) {
CERROR("dirty %lu - dirty_max %lu too big???\n",
- cli->cl_dirty, cli->cl_dirty_max);
+ cli->cl_dirty_pages, cli->cl_dirty_max_pages);
oa->o_undirty = 0;
} else {
- long max_in_flight = (cli->cl_max_pages_per_rpc <<
- PAGE_SHIFT)*
- (cli->cl_max_rpcs_in_flight + 1);
- oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
+ unsigned long max_in_flight;
+
+ max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT) *
+ (cli->cl_max_rpcs_in_flight + 1);
+ oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_SHIFT,
+ max_in_flight);
}
oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
oa->o_dropped = cli->cl_lost_grant;
@@ -1029,22 +956,24 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
{
/*
* ocd_grant is the total grant amount we're expect to hold: if we've
- * been evicted, it's the new avail_grant amount, cl_dirty will drop
- * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty.
+ * been evicted, it's the new avail_grant amount, cl_dirty_pages will
+ * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
+ * dirty.
*
* race is tolerable here: if we're evicted, but imp_state already
- * left EVICTED state, then cl_dirty must be 0 already.
+ * left EVICTED state, then cl_dirty_pages must be 0 already.
*/
spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
cli->cl_avail_grant = ocd->ocd_grant;
else
- cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty;
+ cli->cl_avail_grant = ocd->ocd_grant -
+ (cli->cl_dirty_pages << PAGE_SHIFT);
if (cli->cl_avail_grant < 0) {
CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
- ocd->ocd_grant, cli->cl_dirty);
+ ocd->ocd_grant, cli->cl_dirty_pages << PAGE_SHIFT);
/* workaround for servers which do not have the patch from
* LU-2679
*/
@@ -1181,7 +1110,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
}
while (nob > 0 && pg_count > 0) {
- int count = pga[i]->count > nob ? nob : pga[i]->count;
+ unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
/* corrupt the data before we compute the checksum, to
* simulate an OST->client data error
@@ -1191,7 +1120,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
unsigned char *ptr = kmap(pga[i]->pg);
int off = pga[i]->off & ~PAGE_MASK;
- memcpy(ptr + off, "bad1", min(4, nob));
+ memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob));
kunmap(pga[i]->pg);
}
cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
@@ -1335,11 +1264,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
if (i > 0 && can_merge_pages(pg_prev, pg)) {
niobuf--;
- niobuf->len += pg->count;
+ niobuf->rnb_len += pg->count;
} else {
- niobuf->offset = pg->off;
- niobuf->len = pg->count;
- niobuf->flags = pg->flag;
+ niobuf->rnb_offset = pg->off;
+ niobuf->rnb_len = pg->count;
+ niobuf->rnb_flags = pg->flag;
}
pg_prev = pg;
}
@@ -1418,6 +1347,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
INIT_LIST_HEAD(&aa->aa_oaps);
*reqp = req;
+ niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
+ CDEBUG(D_RPCTRACE, "brw rpc %p - object " DOSTID " offset %lld<>%lld\n",
+ req, POSTID(&oa->o_oi), niobuf[0].rnb_offset,
+ niobuf[niocount - 1].rnb_offset + niobuf[niocount - 1].rnb_len);
+
return 0;
out:
@@ -1463,7 +1397,8 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
POSTID(&oa->o_oi), pga[0]->off,
- pga[page_count-1]->off + pga[page_count-1]->count - 1);
+ pga[page_count - 1]->off +
+ pga[page_count - 1]->count - 1);
CERROR("original client csum %x (type %x), server csum %x (type %x), client csum now %x\n",
client_cksum, client_cksum_type,
server_cksum, cksum_type, new_cksum);
@@ -1565,7 +1500,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
char *router = "";
enum cksum_type cksum_type;
- cksum_type = cksum_type_unpack(body->oa.o_valid&OBD_MD_FLFLAGS ?
+ cksum_type = cksum_type_unpack(body->oa.o_valid &
+ OBD_MD_FLFLAGS ?
body->oa.o_flags : 0);
client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
aa->aa_ppga, OST_READ,
@@ -1794,7 +1730,8 @@ static int brw_interpret(const struct lu_env *env,
if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
- loff_t last_off = last->oap_count + last->oap_obj_off;
+ loff_t last_off = last->oap_count + last->oap_obj_off +
+ last->oap_page_off;
/* Change file size if this is an out of quota or
* direct IO write and it extends the file size
@@ -1812,11 +1749,14 @@ static int brw_interpret(const struct lu_env *env,
}
if (valid != 0)
- cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_update(env, obj, attr, valid);
cl_object_attr_unlock(obj);
}
kmem_cache_free(obdo_cachep, aa->aa_oa);
+ if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
+ osc_inc_unstable_pages(req);
+
list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
list_del_init(&ext->oe_link);
osc_extent_finish(env, ext, 1, rc);
@@ -1847,21 +1787,21 @@ static int brw_interpret(const struct lu_env *env,
static void brw_commit(struct ptlrpc_request *req)
{
- spin_lock(&req->rq_lock);
/*
* If osc_inc_unstable_pages (via osc_extent_finish) races with
* this called via the rq_commit_cb, I need to ensure
* osc_dec_unstable_pages is still called. Otherwise unstable
* pages may be leaked.
*/
- if (req->rq_unstable) {
+ spin_lock(&req->rq_lock);
+ if (unlikely(req->rq_unstable)) {
+ req->rq_unstable = 0;
spin_unlock(&req->rq_lock);
osc_dec_unstable_pages(req);
- spin_lock(&req->rq_lock);
} else {
req->rq_committed = 1;
+ spin_unlock(&req->rq_lock);
}
- spin_unlock(&req->rq_lock);
}
/**
@@ -1881,13 +1821,13 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
struct osc_async_page *tmp;
struct cl_req *clerq = NULL;
enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
- struct ldlm_lock *lock = NULL;
struct cl_req_attr *crattr = NULL;
u64 starting_offset = OBD_OBJECT_EOF;
u64 ending_offset = 0;
int mpflag = 0;
int mem_tight = 0;
int page_count = 0;
+ bool soft_sync = false;
int i;
int rc;
struct ost_body *body;
@@ -1915,6 +1855,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
}
}
+ soft_sync = osc_over_unstable_soft_limit(cli);
if (mem_tight)
mpflag = cfs_memory_pressure_get_and_set();
@@ -1947,10 +1888,11 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
rc = PTR_ERR(clerq);
goto out;
}
- lock = oap->oap_ldlm_lock;
}
if (mem_tight)
oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+ if (soft_sync)
+ oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
pga[i] = &oap->oap_brw_page;
pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
@@ -1964,10 +1906,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
LASSERT(clerq);
crattr->cra_oa = oa;
cl_req_attr_set(env, clerq, crattr, ~0ULL);
- if (lock) {
- oa->o_handle = lock->l_remote_handle;
- oa->o_valid |= OBD_MD_FLHANDLE;
- }
rc = cl_req_prep(env, clerq);
if (rc != 0) {
@@ -1998,7 +1936,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
crattr->cra_oa = &body->oa;
cl_req_attr_set(env, clerq, crattr,
- OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME);
lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
@@ -2044,7 +1982,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
}
spin_unlock(&cli->cl_loi_list_lock);
- DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
+ DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight",
page_count, aa, cli->cl_r_in_flight,
cli->cl_w_in_flight);
@@ -2116,27 +2054,6 @@ static int osc_set_data_with_check(struct lustre_handle *lockh,
return set;
}
-/* find any ldlm lock of the inode in osc
- * return 0 not find
- * 1 find one
- * < 0 error
- */
-static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
- ldlm_iterator_t replace, void *data)
-{
- struct ldlm_res_id res_id;
- struct obd_device *obd = class_exp2obd(exp);
- int rc = 0;
-
- ostid_build_res_name(&lsm->lsm_oi, &res_id);
- rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
- if (rc == LDLM_ITER_STOP)
- return 1;
- if (rc == LDLM_ITER_CONTINUE)
- return 0;
- return rc;
-}
-
static int osc_enqueue_fini(struct ptlrpc_request *req,
osc_enqueue_upcall_f upcall, void *cookie,
struct lustre_handle *lockh, enum ldlm_mode mode,
@@ -2586,71 +2503,6 @@ static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
return rc;
}
-/* Retrieve object striping information.
- *
- * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
- * the maximum number of OST indices which will fit in the user buffer.
- * lmm_magic must be LOV_MAGIC (we only use 1 slot here).
- */
-static int osc_getstripe(struct lov_stripe_md *lsm,
- struct lov_user_md __user *lump)
-{
- /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
- struct lov_user_md_v3 lum, *lumk;
- struct lov_user_ost_data_v1 *lmm_objects;
- int rc = 0, lum_size;
-
- if (!lsm)
- return -ENODATA;
-
- /* we only need the header part from user space to get lmm_magic and
- * lmm_stripe_count, (the header part is common to v1 and v3)
- */
- lum_size = sizeof(struct lov_user_md_v1);
- if (copy_from_user(&lum, lump, lum_size))
- return -EFAULT;
-
- if ((lum.lmm_magic != LOV_USER_MAGIC_V1) &&
- (lum.lmm_magic != LOV_USER_MAGIC_V3))
- return -EINVAL;
-
- /* lov_user_md_vX and lov_mds_md_vX must have the same size */
- LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1));
- LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3));
- LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0]));
-
- /* we can use lov_mds_md_size() to compute lum_size
- * because lov_user_md_vX and lov_mds_md_vX have the same size
- */
- if (lum.lmm_stripe_count > 0) {
- lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
- lumk = kzalloc(lum_size, GFP_NOFS);
- if (!lumk)
- return -ENOMEM;
-
- if (lum.lmm_magic == LOV_USER_MAGIC_V1)
- lmm_objects =
- &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]);
- else
- lmm_objects = &(lumk->lmm_objects[0]);
- lmm_objects->l_ost_oi = lsm->lsm_oi;
- } else {
- lum_size = lov_mds_md_size(0, lum.lmm_magic);
- lumk = &lum;
- }
-
- lumk->lmm_oi = lsm->lsm_oi;
- lumk->lmm_stripe_count = 1;
-
- if (copy_to_user(lump, lumk, lum_size))
- rc = -EFAULT;
-
- if (lumk != &lum)
- kfree(lumk);
-
- return rc;
-}
-
static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void __user *uarg)
{
@@ -2664,57 +2516,6 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
return -EINVAL;
}
switch (cmd) {
- case OBD_IOC_LOV_GET_CONFIG: {
- char *buf;
- struct lov_desc *desc;
- struct obd_uuid uuid;
-
- buf = NULL;
- len = 0;
- if (obd_ioctl_getdata(&buf, &len, uarg)) {
- err = -EINVAL;
- goto out;
- }
-
- data = (struct obd_ioctl_data *)buf;
-
- if (sizeof(*desc) > data->ioc_inllen1) {
- obd_ioctl_freedata(buf, len);
- err = -EINVAL;
- goto out;
- }
-
- if (data->ioc_inllen2 < sizeof(uuid)) {
- obd_ioctl_freedata(buf, len);
- err = -EINVAL;
- goto out;
- }
-
- desc = (struct lov_desc *)data->ioc_inlbuf1;
- desc->ld_tgt_count = 1;
- desc->ld_active_tgt_count = 1;
- desc->ld_default_stripe_count = 1;
- desc->ld_default_stripe_size = 0;
- desc->ld_default_stripe_offset = 0;
- desc->ld_pattern = 0;
- memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
-
- memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
-
- err = copy_to_user(uarg, buf, len);
- if (err)
- err = -EFAULT;
- obd_ioctl_freedata(buf, len);
- goto out;
- }
- case LL_IOC_LOV_SETSTRIPE:
- err = obd_alloc_memmd(exp, karg);
- if (err > 0)
- err = 0;
- goto out;
- case LL_IOC_LOV_GETSTRIPE:
- err = osc_getstripe(karg, uarg);
- goto out;
case OBD_IOC_CLIENT_RECOVER:
err = ptlrpc_recover_import(obd->u.cli.cl_import,
data->ioc_inlbuf1, 0);
@@ -2749,51 +2550,7 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
if (!vallen || !val)
return -EFAULT;
- if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
- __u32 *stripe = val;
- *vallen = sizeof(*stripe);
- *stripe = 0;
- return 0;
- } else if (KEY_IS(KEY_LAST_ID)) {
- struct ptlrpc_request *req;
- u64 *reply;
- char *tmp;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_OST_GET_INFO_LAST_ID);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
- RCL_CLIENT, keylen);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
- memcpy(tmp, key, keylen);
-
- req->rq_no_delay = 1;
- req->rq_no_resend = 1;
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- reply = req_capsule_server_get(&req->rq_pill, &RMF_OBD_ID);
- if (!reply) {
- rc = -EPROTO;
- goto out;
- }
-
- *((u64 *)val) = *reply;
-out:
- ptlrpc_req_finished(req);
- return rc;
- } else if (KEY_IS(KEY_FIEMAP)) {
+ if (KEY_IS(KEY_FIEMAP)) {
struct ll_fiemap_info_key *fm_key = key;
struct ldlm_res_id res_id;
ldlm_policy_data_t policy;
@@ -2931,11 +2688,11 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
if (KEY_IS(KEY_CACHE_LRU_SHRINK)) {
struct client_obd *cli = &obd->u.cli;
- int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
- int target = *(int *)val;
+ long nr = atomic_long_read(&cli->cl_lru_in_list) >> 1;
+ long target = *(long *)val;
nr = osc_lru_shrink(env, cli, min(nr, target), true);
- *(int *)val -= nr;
+ *(long *)val -= nr;
return 0;
}
@@ -3014,8 +2771,9 @@ static int osc_reconnect(const struct lu_env *env,
long lost_grant;
spin_lock(&cli->cl_loi_list_lock);
- data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
- 2 * cli_brw_size(obd);
+ data->ocd_grant = (cli->cl_avail_grant +
+ (cli->cl_dirty_pages << PAGE_SHIFT)) ?:
+ 2 * cli_brw_size(obd);
lost_grant = cli->cl_lost_grant;
cli->cl_lost_grant = 0;
spin_unlock(&cli->cl_loi_list_lock);
@@ -3346,7 +3104,6 @@ static struct obd_ops osc_obd_ops = {
.disconnect = osc_disconnect,
.statfs = osc_statfs,
.statfs_async = osc_statfs_async,
- .packmd = osc_packmd,
.unpackmd = osc_unpackmd,
.create = osc_create,
.destroy = osc_destroy,
@@ -3354,7 +3111,6 @@ static struct obd_ops osc_obd_ops = {
.getattr_async = osc_getattr_async,
.setattr = osc_setattr,
.setattr_async = osc_setattr_async,
- .find_cbdata = osc_find_cbdata,
.iocontrol = osc_iocontrol,
.get_info = osc_get_info,
.set_info_async = osc_set_info_async,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index d4463d7c81d2..8c51d51a678b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -45,6 +45,7 @@
static int ptlrpc_send_new_req(struct ptlrpc_request *req);
static int ptlrpcd_check_work(struct ptlrpc_request *req);
+static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async);
/**
* Initialize passed in client structure \a cl.
@@ -89,7 +90,6 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
return c;
}
-EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
/**
* Allocate and initialize new bulk descriptor on the sender.
@@ -202,7 +202,7 @@ void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
if (unpin) {
for (i = 0; i < desc->bd_iov_count; i++)
- put_page(desc->bd_iov[i].kiov_page);
+ put_page(desc->bd_iov[i].bv_page);
}
kfree(desc);
@@ -283,8 +283,8 @@ int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
}
/* Adjust expected network latency */
-static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
- unsigned int service_time)
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+ unsigned int service_time)
{
unsigned int nl, oldnl;
struct imp_at *at;
@@ -364,31 +364,37 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
}
rc = unpack_reply(early_req);
- if (rc == 0) {
- /* Expecting to increase the service time estimate here */
- ptlrpc_at_adj_service(req,
- lustre_msg_get_timeout(early_req->rq_repmsg));
- ptlrpc_at_adj_net_latency(req,
- lustre_msg_get_service_time(early_req->rq_repmsg));
- }
-
- sptlrpc_cli_finish_early_reply(early_req);
-
- if (rc != 0) {
+ if (rc) {
+ sptlrpc_cli_finish_early_reply(early_req);
spin_lock(&req->rq_lock);
return rc;
}
- /* Adjust the local timeout for this req */
- ptlrpc_at_set_req_timeout(req);
+ /*
+ * Use new timeout value just to adjust the local value for this
+ * request, don't include it into at_history. It is unclear yet why
+ * service time increased and should it be counted or skipped, e.g.
+ * that can be recovery case or some error or server, the real reply
+ * will add all new data if it is worth to add.
+ */
+ req->rq_timeout = lustre_msg_get_timeout(early_req->rq_repmsg);
+ lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+
+ /* Network latency can be adjusted, it is pure network delays */
+ ptlrpc_at_adj_net_latency(req,
+ lustre_msg_get_service_time(early_req->rq_repmsg));
+
+ sptlrpc_cli_finish_early_reply(early_req);
spin_lock(&req->rq_lock);
olddl = req->rq_deadline;
/*
- * server assumes it now has rq_timeout from when it sent the
- * early reply, so client should give it at least that long.
+ * server assumes it now has rq_timeout from when the request
+ * arrived, so the client should give it at least that long.
+ * since we don't know the arrival time we'll use the original
+ * sent time
*/
- req->rq_deadline = ktime_get_real_seconds() + req->rq_timeout +
+ req->rq_deadline = req->rq_sent + req->rq_timeout +
ptlrpc_at_get_net_latency(req);
DEBUG_REQ(D_ADAPTTO, req,
@@ -884,7 +890,6 @@ struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
return set;
}
-EXPORT_SYMBOL(ptlrpc_prep_fcset);
/**
* Wind down and free request set structure previously allocated with
@@ -1004,7 +1009,6 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
}
}
-EXPORT_SYMBOL(ptlrpc_set_add_new_req);
/**
* Based on the current state of the import, determine if the request
@@ -1035,8 +1039,8 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
*status = -EIO;
} else if (ptlrpc_send_limit_expired(req)) {
/* probably doesn't need to be a D_ERROR after initial testing */
- DEBUG_REQ(D_ERROR, req, "send limit expired ");
- *status = -EIO;
+ DEBUG_REQ(D_HA, req, "send limit expired ");
+ *status = -ETIMEDOUT;
} else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
imp->imp_state == LUSTRE_IMP_CONNECTING) {
/* allow CONNECT even if import is invalid */
@@ -1073,36 +1077,42 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
}
/**
- * Decide if the error message regarding provided request \a req
- * should be printed to the console or not.
- * Makes it's decision on request status and other properties.
- * Returns 1 to print error on the system console or 0 if not.
+ * Decide if the error message should be printed to the console or not.
+ * Makes its decision based on request type, status, and failure frequency.
+ *
+ * \param[in] req request that failed and may need a console message
+ *
+ * \retval false if no message should be printed
+ * \retval true if console message should be printed
*/
-static int ptlrpc_console_allow(struct ptlrpc_request *req)
+static bool ptlrpc_console_allow(struct ptlrpc_request *req)
{
__u32 opc;
- int err;
LASSERT(req->rq_reqmsg);
opc = lustre_msg_get_opc(req->rq_reqmsg);
- /*
- * Suppress particular reconnect errors which are to be expected. No
- * errors are suppressed for the initial connection on an import
- */
- if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
- (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
+ /* Suppress particular reconnect errors which are to be expected. */
+ if (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT) {
+ int err;
+
/* Suppress timed out reconnect requests */
- if (req->rq_timedout)
- return 0;
+ if (lustre_handle_is_used(&req->rq_import->imp_remote_handle) ||
+ req->rq_timedout)
+ return false;
- /* Suppress unavailable/again reconnect requests */
+ /*
+ * Suppress most unavailable/again reconnect requests, but
+ * print occasionally so it is clear client is trying to
+ * connect to a server where no target is running.
+ */
err = lustre_msg_get_status(req->rq_repmsg);
- if (err == -ENODEV || err == -EAGAIN)
- return 0;
+ if ((err == -ENODEV || err == -EAGAIN) &&
+ req->rq_import->imp_conn_cnt % 30 != 20)
+ return false;
}
- return 1;
+ return true;
}
/**
@@ -1116,14 +1126,14 @@ static int ptlrpc_check_status(struct ptlrpc_request *req)
err = lustre_msg_get_status(req->rq_repmsg);
if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
struct obd_import *imp = req->rq_import;
+ lnet_nid_t nid = imp->imp_connection->c_peer.nid;
__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
if (ptlrpc_console_allow(req))
- LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s, operation %s failed with %d.\n",
+ LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n",
imp->imp_obd->obd_name,
- libcfs_nid2str(
- imp->imp_connection->c_peer.nid),
- ll_opcode2str(opc), err);
+ ll_opcode2str(opc),
+ libcfs_nid2str(nid), err);
return err < 0 ? err : -EINVAL;
}
@@ -1280,7 +1290,7 @@ static int after_reply(struct ptlrpc_request *req)
* some reason. Try to reconnect, and if that fails, punt to
* the upcall.
*/
- if (ll_rpc_recoverable_error(rc)) {
+ if (ptlrpc_recoverable_error(rc)) {
if (req->rq_send_state != LUSTRE_IMP_FULL ||
imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
return rc;
@@ -1628,8 +1638,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
req->rq_waiting || req->rq_wait_ctx) {
int status;
- if (!ptlrpc_unregister_reply(req, 1))
+ if (!ptlrpc_unregister_reply(req, 1)) {
+ ptlrpc_unregister_bulk(req, 1);
continue;
+ }
spin_lock(&imp->imp_lock);
if (ptlrpc_import_delay_req(imp, req,
@@ -1995,7 +2007,6 @@ int ptlrpc_expired_set(void *data)
*/
return 1;
}
-EXPORT_SYMBOL(ptlrpc_expired_set);
/**
* Sets rq_intr flag in \a req under spinlock.
@@ -2012,7 +2023,7 @@ EXPORT_SYMBOL(ptlrpc_mark_interrupted);
* Interrupts (sets interrupted flag) all uncompleted requests in
* a set \a data. Callback for l_wait_event for interruptible waits.
*/
-void ptlrpc_interrupted_set(void *data)
+static void ptlrpc_interrupted_set(void *data)
{
struct ptlrpc_request_set *set = data;
struct list_head *tmp;
@@ -2030,7 +2041,6 @@ void ptlrpc_interrupted_set(void *data)
ptlrpc_mark_interrupted(req);
}
}
-EXPORT_SYMBOL(ptlrpc_interrupted_set);
/**
* Get the smallest timeout in the set; this does NOT set a timeout.
@@ -2074,7 +2084,6 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
}
return timeout;
}
-EXPORT_SYMBOL(ptlrpc_set_next_timeout);
/**
* Send all unset request from the set and then wait until all
@@ -2325,7 +2334,7 @@ EXPORT_SYMBOL(ptlrpc_req_xid);
* The request owner (i.e. the thread doing the I/O) must call...
* Returns 0 on success or 1 if unregistering cannot be made.
*/
-int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
+static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
{
int rc;
wait_queue_head_t *wq;
@@ -2390,7 +2399,6 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
}
return 0;
}
-EXPORT_SYMBOL(ptlrpc_unregister_reply);
static void ptlrpc_free_request(struct ptlrpc_request *req)
{
@@ -2451,7 +2459,8 @@ void ptlrpc_free_committed(struct obd_import *imp)
imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
imp->imp_generation);
- if (imp->imp_generation != imp->imp_last_generation_checked)
+ if (imp->imp_generation != imp->imp_last_generation_checked ||
+ !imp->imp_last_transno_checked)
skip_committed_list = false;
imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
@@ -2499,6 +2508,9 @@ free_req:
if (req->rq_import_generation < imp->imp_generation) {
DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
ptlrpc_free_request(req);
+ } else if (!req->rq_replay) {
+ DEBUG_REQ(D_RPCTRACE, req, "free closed open request");
+ ptlrpc_free_request(req);
}
}
}
@@ -2541,7 +2553,6 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
ptlrpc_client_wake_req(req);
spin_unlock(&req->rq_lock);
}
-EXPORT_SYMBOL(ptlrpc_resend_req);
/**
* Grab additional reference on a request \a req
@@ -2610,7 +2621,6 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
list_add(&req->rq_replay_list, &imp->imp_replay_list);
}
-EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
/**
* Send request and wait until it completes.
@@ -2783,7 +2793,6 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
ptlrpcd_add_req(req);
return 0;
}
-EXPORT_SYMBOL(ptlrpc_replay_req);
/**
* Aborts all in-flight request on import \a imp sending and delayed lists
@@ -2843,7 +2852,6 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
spin_unlock(&imp->imp_lock);
}
-EXPORT_SYMBOL(ptlrpc_abort_inflight);
/**
* Abort all uncompleted requests in request set \a set
@@ -2929,7 +2937,6 @@ __u64 ptlrpc_next_xid(void)
return next;
}
-EXPORT_SYMBOL(ptlrpc_next_xid);
/**
* Get a glimpse at what next xid value might have been.
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index 177a379da9fa..7b020d60c9e5 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -82,7 +82,6 @@ out:
libcfs_nid2str(conn->c_peer.nid));
return conn;
}
-EXPORT_SYMBOL(ptlrpc_connection_get);
int ptlrpc_connection_put(struct ptlrpc_connection *conn)
{
@@ -118,7 +117,6 @@ int ptlrpc_connection_put(struct ptlrpc_connection *conn)
return rc;
}
-EXPORT_SYMBOL(ptlrpc_connection_put);
struct ptlrpc_connection *
ptlrpc_connection_addref(struct ptlrpc_connection *conn)
@@ -130,7 +128,6 @@ ptlrpc_connection_addref(struct ptlrpc_connection *conn)
return conn;
}
-EXPORT_SYMBOL(ptlrpc_connection_addref);
int ptlrpc_connection_init(void)
{
@@ -146,13 +143,11 @@ int ptlrpc_connection_init(void)
return 0;
}
-EXPORT_SYMBOL(ptlrpc_connection_init);
void ptlrpc_connection_fini(void)
{
cfs_hash_putref(conn_hash);
}
-EXPORT_SYMBOL(ptlrpc_connection_fini);
/*
* Hash operations for net_peer<->connection
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index b1ce72511509..283dfb296d35 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -543,7 +543,7 @@ static int ptlrpc_ni_init(void)
rc = LNetNIInit(pid);
if (rc < 0) {
CDEBUG(D_NET, "Can't init network interface: %d\n", rc);
- return -ENOENT;
+ return rc;
}
/* CAVEAT EMPTOR: how we process portals events is _radically_
@@ -561,7 +561,7 @@ static int ptlrpc_ni_init(void)
CERROR("Failed to allocate event queue: %d\n", rc);
LNetNIFini();
- return -ENOMEM;
+ return rc;
}
int ptlrpc_init_portals(void)
@@ -570,7 +570,7 @@ int ptlrpc_init_portals(void)
if (rc != 0) {
CERROR("network initialisation failed\n");
- return -EIO;
+ return rc;
}
rc = ptlrpcd_addref();
if (rc == 0)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index 3292e6ea0102..a23d0a05b574 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -307,7 +307,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
*/
lwi = LWI_TIMEOUT_INTERVAL(
cfs_timeout_cap(cfs_time_seconds(timeout)),
- (timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+ (timeout > 1) ? cfs_time_seconds(1) :
+ cfs_time_seconds(1) / 2,
NULL, NULL);
rc = l_wait_event(imp->imp_recovery_waitq,
(atomic_read(&imp->imp_inflight) == 0),
@@ -424,7 +425,6 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
ptlrpc_pinger_force(imp);
}
}
-EXPORT_SYMBOL(ptlrpc_fail_import);
int ptlrpc_reconnect_import(struct obd_import *imp)
{
@@ -698,7 +698,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
request->rq_send_state = LUSTRE_IMP_CONNECTING;
/* Allow a slightly larger reply for future growth compatibility */
req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
- sizeof(struct obd_connect_data)+16*sizeof(__u64));
+ sizeof(struct obd_connect_data) +
+ 16 * sizeof(__u64));
ptlrpc_request_set_replen(request);
request->rq_interpret_reply = ptlrpc_connect_interpret;
@@ -750,6 +751,153 @@ static int ptlrpc_busy_reconnect(int rc)
return (rc == -EBUSY) || (rc == -EAGAIN);
}
+static int ptlrpc_connect_set_flags(struct obd_import *imp,
+ struct obd_connect_data *ocd,
+ u64 old_connect_flags,
+ struct obd_export *exp, int init_connect)
+{
+ struct client_obd *cli = &imp->imp_obd->u.cli;
+ static bool warned;
+
+ if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
+ !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
+ LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %#llx, replied %#llx\n",
+ imp->imp_obd->obd_name,
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_connect_flags_orig,
+ ocd->ocd_connect_flags);
+ return -EPROTO;
+ }
+
+ spin_lock(&imp->imp_lock);
+ list_del(&imp->imp_conn_current->oic_item);
+ list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
+ imp->imp_last_success_conn = imp->imp_conn_current->oic_last_attempt;
+
+ spin_unlock(&imp->imp_lock);
+
+ if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ (ocd->ocd_version > LUSTRE_VERSION_CODE +
+ LUSTRE_VERSION_OFFSET_WARN ||
+ ocd->ocd_version < LUSTRE_VERSION_CODE -
+ LUSTRE_VERSION_OFFSET_WARN)) {
+ /*
+ * Sigh, some compilers do not like #ifdef in the middle
+ * of macro arguments
+ */
+ const char *older = "older than client. Consider upgrading server";
+ const char *newer = "newer than client. Consider recompiling application";
+
+ LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
+ obd2cli_tgt(imp->imp_obd),
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version),
+ ocd->ocd_version > LUSTRE_VERSION_CODE ?
+ newer : older, LUSTRE_VERSION_STRING);
+ warned = true;
+ }
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+ /*
+ * Check if server has LU-1252 fix applied to not always swab
+ * the IR MNE entries. Do this only once per connection. This
+ * fixup is version-limited, because we don't want to carry the
+ * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+ * need interop with unpatched 2.2 servers. For newer servers,
+ * the client will do MNE swabbing only as needed. LU-1644
+ */
+ if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+ !strcmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MGC_NAME)))
+ imp->imp_need_mne_swab = 1;
+ else /* clear if server was upgraded since last connect */
+ imp->imp_need_mne_swab = 0;
+#endif
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+ /*
+ * We sent to the server ocd_cksum_types with bits set
+ * for algorithms we understand. The server masked off
+ * the checksum types it doesn't support
+ */
+ if (!(ocd->ocd_cksum_types & cksum_types_supported_client())) {
+ LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
+ obd2cli_tgt(imp->imp_obd),
+ ocd->ocd_cksum_types,
+ cksum_types_supported_client());
+ cli->cl_checksum = 0;
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ } else {
+ cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+ }
+ } else {
+ /*
+ * The server does not support OBD_CONNECT_CKSUM.
+ * Enforce ADLER for backward compatibility
+ */
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ }
+ cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+ cli->cl_max_pages_per_rpc =
+ min(ocd->ocd_brw_size >> PAGE_SHIFT,
+ cli->cl_max_pages_per_rpc);
+ else if (imp->imp_connect_op == MDS_CONNECT ||
+ imp->imp_connect_op == MGS_CONNECT)
+ cli->cl_max_pages_per_rpc = 1;
+
+ LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+ (cli->cl_max_pages_per_rpc > 0));
+
+ client_adjust_max_dirty(cli);
+
+ /*
+ * Reset ns_connect_flags only for initial connect. It might be
+ * changed in while using FS and if we reset it in reconnect
+ * this leads to losing user settings done before such as
+ * disable lru_resize, etc.
+ */
+ if (old_connect_flags != exp_connect_flags(exp) || init_connect) {
+ CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
+ imp->imp_obd->obd_name, ocd->ocd_connect_flags);
+ imp->imp_obd->obd_namespace->ns_connect_flags =
+ ocd->ocd_connect_flags;
+ imp->imp_obd->obd_namespace->ns_orig_connect_flags =
+ ocd->ocd_connect_flags;
+ }
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ /*
+ * We need a per-message support flag, because
+ * a. we don't know if the incoming connect reply
+ * supports AT or not (in reply_in_callback)
+ * until we unpack it.
+ * b. failovered server means export and flags are gone
+ * (in ptlrpc_send_reply).
+ * Can only be set when we know AT is supported at
+ * both ends
+ */
+ imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+ return 0;
+}
+
/**
* interpret_reply callback for connect RPCs.
* Looks into returned status of connect operation and decides
@@ -762,7 +910,6 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
{
struct ptlrpc_connect_async_args *aa = data;
struct obd_import *imp = request->rq_import;
- struct client_obd *cli = &imp->imp_obd->u.cli;
struct lustre_handle old_hdl;
__u64 old_connect_flags;
int msg_flags;
@@ -842,7 +989,21 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
old_connect_flags = exp_connect_flags(exp);
exp->exp_connect_data = *ocd;
imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
+
+ /*
+ * The net statistics after (re-)connect is not valid anymore,
+ * because may reflect other routing, etc.
+ */
+ at_init(&imp->imp_at.iat_net_latency, 0, 0);
+ ptlrpc_at_adj_net_latency(request,
+ lustre_msg_get_service_time(request->rq_repmsg));
+
+ /* Import flags should be updated before waking import at FULL state */
+ rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
+ aa->pcaa_initial_connect);
class_export_put(exp);
+ if (rc)
+ goto out;
obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
@@ -987,151 +1148,13 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
finish:
rc = ptlrpc_import_recovery_state_machine(imp);
- if (rc != 0) {
- if (rc == -ENOTCONN) {
- CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
- ptlrpc_connect_import(imp);
- imp->imp_connect_tried = 1;
- return 0;
- }
- } else {
- static bool warned;
-
- spin_lock(&imp->imp_lock);
- list_del(&imp->imp_conn_current->oic_item);
- list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
- imp->imp_last_success_conn =
- imp->imp_conn_current->oic_last_attempt;
-
- spin_unlock(&imp->imp_lock);
-
- if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
- !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
- LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %llx, replied %llx\n",
- imp->imp_obd->obd_name,
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_connect_flags_orig,
- ocd->ocd_connect_flags);
- rc = -EPROTO;
- goto out;
- }
-
- if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
- (ocd->ocd_version > LUSTRE_VERSION_CODE +
- LUSTRE_VERSION_OFFSET_WARN ||
- ocd->ocd_version < LUSTRE_VERSION_CODE -
- LUSTRE_VERSION_OFFSET_WARN)) {
- /* Sigh, some compilers do not like #ifdef in the middle
- * of macro arguments
- */
- const char *older = "older than client. Consider upgrading server";
- const char *newer = "newer than client. Consider recompiling application";
-
- LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
- obd2cli_tgt(imp->imp_obd),
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
- OBD_OCD_VERSION_MINOR(ocd->ocd_version),
- OBD_OCD_VERSION_PATCH(ocd->ocd_version),
- OBD_OCD_VERSION_FIX(ocd->ocd_version),
- ocd->ocd_version > LUSTRE_VERSION_CODE ?
- newer : older, LUSTRE_VERSION_STRING);
- warned = true;
- }
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
- /* Check if server has LU-1252 fix applied to not always swab
- * the IR MNE entries. Do this only once per connection. This
- * fixup is version-limited, because we don't want to carry the
- * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
- * need interop with unpatched 2.2 servers. For newer servers,
- * the client will do MNE swabbing only as needed. LU-1644
- */
- if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
- !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
- OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
- OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
- strcmp(imp->imp_obd->obd_type->typ_name,
- LUSTRE_MGC_NAME) == 0))
- imp->imp_need_mne_swab = 1;
- else /* clear if server was upgraded since last connect */
- imp->imp_need_mne_swab = 0;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
-#endif
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
- /* We sent to the server ocd_cksum_types with bits set
- * for algorithms we understand. The server masked off
- * the checksum types it doesn't support
- */
- if ((ocd->ocd_cksum_types &
- cksum_types_supported_client()) == 0) {
- LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
- obd2cli_tgt(imp->imp_obd),
- ocd->ocd_cksum_types,
- cksum_types_supported_client());
- cli->cl_checksum = 0;
- cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
- } else {
- cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
- }
- } else {
- /* The server does not support OBD_CONNECT_CKSUM.
- * Enforce ADLER for backward compatibility
- */
- cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
- }
- cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- cli->cl_max_pages_per_rpc =
- min(ocd->ocd_brw_size >> PAGE_SHIFT,
- cli->cl_max_pages_per_rpc);
- else if (imp->imp_connect_op == MDS_CONNECT ||
- imp->imp_connect_op == MGS_CONNECT)
- cli->cl_max_pages_per_rpc = 1;
-
- /* Reset ns_connect_flags only for initial connect. It might be
- * changed in while using FS and if we reset it in reconnect
- * this leads to losing user settings done before such as
- * disable lru_resize, etc.
- */
- if (old_connect_flags != exp_connect_flags(exp) ||
- aa->pcaa_initial_connect) {
- CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
- imp->imp_obd->obd_name, ocd->ocd_connect_flags);
- imp->imp_obd->obd_namespace->ns_connect_flags =
- ocd->ocd_connect_flags;
- imp->imp_obd->obd_namespace->ns_orig_connect_flags =
- ocd->ocd_connect_flags;
- }
-
- if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
- (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
- /* We need a per-message support flag, because
- * a. we don't know if the incoming connect reply
- * supports AT or not (in reply_in_callback)
- * until we unpack it.
- * b. failovered server means export and flags are gone
- * (in ptlrpc_send_reply).
- * Can only be set when we know AT is supported at
- * both ends
- */
- imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
- else
- imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
-
- if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
- (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
- imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
- else
- imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
-
- LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
- (cli->cl_max_pages_per_rpc > 0));
+ if (rc == -ENOTCONN) {
+ CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ ptlrpc_connect_import(imp);
+ imp->imp_connect_tried = 1;
+ return 0;
}
out:
@@ -1497,10 +1520,13 @@ EXPORT_SYMBOL(ptlrpc_disconnect_import);
/* Adaptive Timeout utils */
extern unsigned int at_min, at_max, at_history;
-/* Bin into timeslices using AT_BINS bins.
- * This gives us a max of the last binlimit*AT_BINS secs without the storage,
- * but still smoothing out a return to normalcy from a slow response.
- * (E.g. remember the maximum latency in each minute of the last 4 minutes.)
+/*
+ *Update at_current with the specified value (bounded by at_min and at_max),
+ * as well as the AT history "bins".
+ * - Bin into timeslices using AT_BINS bins.
+ * - This gives us a max of the last at_history seconds without the storage,
+ * but still smoothing out a return to normalcy from a slow response.
+ * - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
*/
int at_measured(struct adaptive_timeout *at, unsigned int val)
{
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index ab5d85174245..839ef3e80c1a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -667,11 +667,8 @@ static struct req_format *req_formats[] = {
&RQF_MDS_SYNC,
&RQF_MDS_CLOSE,
&RQF_MDS_RELEASE_CLOSE,
- &RQF_MDS_PIN,
- &RQF_MDS_UNPIN,
&RQF_MDS_READPAGE,
&RQF_MDS_WRITEPAGE,
- &RQF_MDS_IS_SUBDIR,
&RQF_MDS_DONE_WRITING,
&RQF_MDS_REINT,
&RQF_MDS_REINT_CREATE,
@@ -1116,9 +1113,9 @@ EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
struct req_format {
const char *rf_name;
- int rf_idx;
+ size_t rf_idx;
struct {
- int nr;
+ size_t nr;
const struct req_msg_field **d;
} rf_fields[RCL_NR];
};
@@ -1389,15 +1386,6 @@ struct req_format RQF_MDS_RELEASE_CLOSE =
mdt_release_close_client, mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
-struct req_format RQF_MDS_PIN =
- DEFINE_REQ_FMT0("MDS_PIN",
- mdt_body_capa, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_PIN);
-
-struct req_format RQF_MDS_UNPIN =
- DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty);
-EXPORT_SYMBOL(RQF_MDS_UNPIN);
-
struct req_format RQF_MDS_DONE_WRITING =
DEFINE_REQ_FMT0("MDS_DONE_WRITING",
mdt_close_client, mdt_body_only);
@@ -1448,11 +1436,6 @@ struct req_format RQF_MDS_WRITEPAGE =
mdt_body_capa, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
-struct req_format RQF_MDS_IS_SUBDIR =
- DEFINE_REQ_FMT0("MDS_IS_SUBDIR",
- mdt_body_only, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
-
struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
llog_origin_handle_create_client, llogd_body_only);
@@ -1572,9 +1555,9 @@ EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
*/
int req_layout_init(void)
{
- int i;
- int j;
- int k;
+ size_t i;
+ size_t j;
+ size_t k;
struct req_format *rf = NULL;
for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
@@ -1616,7 +1599,7 @@ EXPORT_SYMBOL(req_layout_fini);
*/
static void req_capsule_init_area(struct req_capsule *pill)
{
- int i;
+ size_t i;
for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
pill->rc_area[RCL_CLIENT][i] = -1;
@@ -1667,8 +1650,7 @@ EXPORT_SYMBOL(req_capsule_fini);
static int __req_format_is_sane(const struct req_format *fmt)
{
- return
- 0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) &&
+ return fmt->rf_idx < ARRAY_SIZE(req_formats) &&
req_formats[fmt->rf_idx] == fmt;
}
@@ -1702,11 +1684,11 @@ EXPORT_SYMBOL(req_capsule_set);
* variable-sized fields. The field sizes come from the declared \a rmf_size
* field of a \a pill's \a rc_fmt's RMF's.
*/
-int req_capsule_filled_sizes(struct req_capsule *pill,
- enum req_location loc)
+size_t req_capsule_filled_sizes(struct req_capsule *pill,
+ enum req_location loc)
{
const struct req_format *fmt = pill->rc_fmt;
- int i;
+ size_t i;
for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
if (pill->rc_area[loc][i] == -1) {
@@ -1761,11 +1743,11 @@ EXPORT_SYMBOL(req_capsule_server_pack);
* Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
* corresponding to the given RMF (\a field).
*/
-static int __req_capsule_offset(const struct req_capsule *pill,
+static u32 __req_capsule_offset(const struct req_capsule *pill,
const struct req_msg_field *field,
enum req_location loc)
{
- int offset;
+ u32 offset;
offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n", pill->rc_fmt->rf_name,
@@ -1869,10 +1851,10 @@ static void *__req_capsule_get(struct req_capsule *pill,
const struct req_format *fmt;
struct lustre_msg *msg;
void *value;
- int len;
- int offset;
+ u32 len;
+ u32 offset;
- void *(*getter)(struct lustre_msg *m, int n, int minlen);
+ void *(*getter)(struct lustre_msg *m, u32 n, u32 minlen);
static const char *rcl_names[RCL_NR] = {
[RCL_CLIENT] = "client",
@@ -1899,20 +1881,20 @@ static void *__req_capsule_get(struct req_capsule *pill,
*/
len = lustre_msg_buflen(msg, offset);
if ((len % field->rmf_size) != 0) {
- CERROR("%s: array field size mismatch %d modulo %d != 0 (%d)\n",
+ CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n",
field->rmf_name, len, field->rmf_size, loc);
return NULL;
}
} else if (pill->rc_area[loc][offset] != -1) {
len = pill->rc_area[loc][offset];
} else {
- len = max(field->rmf_size, 0);
+ len = max_t(typeof(field->rmf_size), field->rmf_size, 0);
}
value = getter(msg, offset, len);
if (!value) {
DEBUG_REQ(D_ERROR, pill->rc_req,
- "Wrong buffer for field `%s' (%d of %d) in format `%s': %d vs. %d (%s)\n",
+ "Wrong buffer for field `%s' (%u of %u) in format `%s': %u vs. %u (%s)\n",
field->rmf_name, offset, lustre_msg_bufcount(msg),
fmt->rf_name, lustre_msg_buflen(msg, offset), len,
rcl_names[loc]);
@@ -1958,7 +1940,7 @@ EXPORT_SYMBOL(req_capsule_client_swab_get);
*/
void *req_capsule_client_sized_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len)
+ u32 len)
{
req_capsule_set_size(pill, field, RCL_CLIENT, len);
return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
@@ -1999,7 +1981,7 @@ EXPORT_SYMBOL(req_capsule_server_swab_get);
*/
void *req_capsule_server_sized_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len)
+ u32 len)
{
req_capsule_set_size(pill, field, RCL_SERVER, len);
return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
@@ -2008,7 +1990,7 @@ EXPORT_SYMBOL(req_capsule_server_sized_get);
void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
const struct req_msg_field *field,
- int len, void *swabber)
+ u32 len, void *swabber)
{
req_capsule_set_size(pill, field, RCL_SERVER, len);
return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
@@ -2024,23 +2006,25 @@ EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
*/
void req_capsule_set_size(struct req_capsule *pill,
const struct req_msg_field *field,
- enum req_location loc, int size)
+ enum req_location loc, u32 size)
{
LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
- if ((size != field->rmf_size) &&
+ if ((size != (u32)field->rmf_size) &&
(field->rmf_size != -1) &&
!(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
(size > 0)) {
+ u32 rmf_size = (u32)field->rmf_size;
+
if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
- (size % field->rmf_size != 0)) {
- CERROR("%s: array field size mismatch %d %% %d != 0 (%d)\n",
- field->rmf_name, size, field->rmf_size, loc);
+ (size % rmf_size != 0)) {
+ CERROR("%s: array field size mismatch %u %% %u != 0 (%d)\n",
+ field->rmf_name, size, rmf_size, loc);
LBUG();
} else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
- size < field->rmf_size) {
- CERROR("%s: field size mismatch %d != %d (%d)\n",
- field->rmf_name, size, field->rmf_size, loc);
+ size < rmf_size) {
+ CERROR("%s: field size mismatch %u != %u (%d)\n",
+ field->rmf_name, size, rmf_size, loc);
LBUG();
}
}
@@ -2057,7 +2041,7 @@ EXPORT_SYMBOL(req_capsule_set_size);
* actually sets the size in pill.rc_area[loc][offset], but this function
* returns the message buflen[offset], maybe we should use another name.
*/
-int req_capsule_get_size(const struct req_capsule *pill,
+u32 req_capsule_get_size(const struct req_capsule *pill,
const struct req_msg_field *field,
enum req_location loc)
{
@@ -2075,7 +2059,7 @@ EXPORT_SYMBOL(req_capsule_get_size);
*
* See also req_capsule_set_size().
*/
-int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
+u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
{
return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
pill->rc_fmt->rf_fields[loc].nr,
@@ -2090,10 +2074,11 @@ int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
* This function should not be used for formats which contain variable size
* fields.
*/
-int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
enum req_location loc)
{
- int size, i = 0;
+ size_t i = 0;
+ u32 size;
/*
* This function should probably LASSERT() that fmt has no fields with
@@ -2103,7 +2088,7 @@ int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
* we do.
*/
size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
- if (size < 0)
+ if (!size)
return size;
for (; i < fmt->rf_fields[loc].nr; ++i)
@@ -2135,7 +2120,7 @@ int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
{
int i;
- int j;
+ size_t j;
const struct req_format *old;
@@ -2193,7 +2178,7 @@ static int req_capsule_field_present(const struct req_capsule *pill,
const struct req_msg_field *field,
enum req_location loc)
{
- int offset;
+ u32 offset;
LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
LASSERT(req_capsule_has_field(pill, field, loc));
@@ -2210,12 +2195,11 @@ static int req_capsule_field_present(const struct req_capsule *pill,
*/
void req_capsule_shrink(struct req_capsule *pill,
const struct req_msg_field *field,
- unsigned int newlen,
- enum req_location loc)
+ u32 newlen, enum req_location loc)
{
const struct req_format *fmt;
struct lustre_msg *msg;
- int len;
+ u32 len;
int offset;
fmt = pill->rc_fmt;
@@ -2228,7 +2212,7 @@ void req_capsule_shrink(struct req_capsule *pill,
msg = __req_msg(pill, loc);
len = lustre_msg_buflen(msg, offset);
- LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n",
+ LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n",
fmt->rf_name, field->rmf_name, len, newlen);
if (loc == RCL_CLIENT)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index bc93b75744e1..9bad57d65db4 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -191,7 +191,7 @@ ptlrpc_ldebugfs_register(struct dentry *root, char *dir,
LASSERT(!*debugfs_root_ret);
LASSERT(!*stats_ret);
- svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES,
+ svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES,
0);
if (!svc_stats)
return;
@@ -937,7 +937,7 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
static int
ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
{
- static struct seq_operations sops = {
+ static const struct seq_operations sops = {
.start = ptlrpc_lprocfs_svc_req_history_start,
.stop = ptlrpc_lprocfs_svc_req_history_stop,
.next = ptlrpc_lprocfs_svc_req_history_next,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 11ec82545347..9c937398a085 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -295,7 +295,6 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
}
return 0;
}
-EXPORT_SYMBOL(ptlrpc_unregister_bulk);
static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
{
@@ -398,7 +397,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
lustre_msg_set_status(req->rq_repmsg,
ptlrpc_status_hton(req->rq_status));
lustre_msg_set_opc(req->rq_repmsg,
- req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+ req->rq_reqmsg ?
+ lustre_msg_get_opc(req->rq_reqmsg) : 0);
target_pack_pool_reply(req);
@@ -433,7 +433,6 @@ out:
ptlrpc_connection_put(conn);
return rc;
}
-EXPORT_SYMBOL(ptlrpc_send_reply);
int ptlrpc_reply(struct ptlrpc_request *req)
{
@@ -441,7 +440,6 @@ int ptlrpc_reply(struct ptlrpc_request *req)
return 0;
return ptlrpc_send_reply(req, 0);
}
-EXPORT_SYMBOL(ptlrpc_reply);
/**
* For request \a req send an error reply back. Create empty
@@ -468,13 +466,11 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
rc = ptlrpc_send_reply(req, may_be_difficult);
return rc;
}
-EXPORT_SYMBOL(ptlrpc_send_error);
int ptlrpc_error(struct ptlrpc_request *req)
{
return ptlrpc_send_error(req, 0);
}
-EXPORT_SYMBOL(ptlrpc_error);
/**
* Send request \a request.
@@ -490,7 +486,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
struct ptlrpc_connection *connection;
lnet_handle_me_t reply_me_h;
lnet_md_t reply_md;
- struct obd_device *obd = request->rq_import->imp_obd;
+ struct obd_import *imp = request->rq_import;
+ struct obd_device *obd = imp->imp_obd;
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
return 0;
@@ -503,7 +500,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
*/
LASSERT(!request->rq_receiving_reply);
LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
- (request->rq_import->imp_state == LUSTRE_IMP_FULL)));
+ (imp->imp_state == LUSTRE_IMP_FULL)));
if (unlikely(obd && obd->obd_fail)) {
CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
@@ -516,15 +513,22 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
return -ENODEV;
}
- connection = request->rq_import->imp_connection;
+ connection = imp->imp_connection;
lustre_msg_set_handle(request->rq_reqmsg,
- &request->rq_import->imp_remote_handle);
+ &imp->imp_remote_handle);
lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
- lustre_msg_set_conn_cnt(request->rq_reqmsg,
- request->rq_import->imp_conn_cnt);
- lustre_msghdr_set_flags(request->rq_reqmsg,
- request->rq_import->imp_msghdr_flags);
+ lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt);
+ lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags);
+
+ /**
+ * For enabled AT all request should have AT_SUPPORT in the
+ * FULL import state when OBD_CONNECT_AT is set
+ */
+ LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
+ (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
+ !(imp->imp_connect_data.ocd_connect_flags &
+ OBD_CONNECT_AT));
if (request->rq_resend)
lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
@@ -628,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
ptlrpc_request_addref(request);
if (obd && obd->obd_svc_stats)
lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
- atomic_read(&request->rq_import->imp_inflight));
+ atomic_read(&imp->imp_inflight));
OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
@@ -640,7 +644,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
request->rq_deadline = request->rq_sent + request->rq_timeout +
ptlrpc_at_get_net_latency(request);
- ptlrpc_pinger_sending_on_import(request->rq_import);
+ ptlrpc_pinger_sending_on_import(imp);
DEBUG_REQ(D_INFO, request, "send flg=%x",
lustre_msg_get_flags(request->rq_reqmsg));
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index b514f18fae50..871768511e8c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -50,36 +50,34 @@
#include "ptlrpc_internal.h"
-static inline int lustre_msg_hdr_size_v2(int count)
+static inline u32 lustre_msg_hdr_size_v2(u32 count)
{
return cfs_size_round(offsetof(struct lustre_msg_v2,
lm_buflens[count]));
}
-int lustre_msg_hdr_size(__u32 magic, int count)
+u32 lustre_msg_hdr_size(__u32 magic, u32 count)
{
switch (magic) {
case LUSTRE_MSG_MAGIC_V2:
return lustre_msg_hdr_size_v2(count);
default:
LASSERTF(0, "incorrect message magic: %08x\n", magic);
- return -EINVAL;
+ return 0;
}
}
-EXPORT_SYMBOL(lustre_msg_hdr_size);
void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
- int index)
+ u32 index)
{
if (inout)
lustre_set_req_swabbed(req, index);
else
lustre_set_rep_swabbed(req, index);
}
-EXPORT_SYMBOL(ptlrpc_buf_set_swabbed);
int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
- int index)
+ u32 index)
{
if (inout)
return (ptlrpc_req_need_swab(req) &&
@@ -88,12 +86,11 @@ int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
return (ptlrpc_rep_need_swab(req) &&
!lustre_rep_swabbed(req, index));
}
-EXPORT_SYMBOL(ptlrpc_buf_need_swab);
/* early reply size */
-int lustre_msg_early_size(void)
+u32 lustre_msg_early_size(void)
{
- static int size;
+ static u32 size;
if (!size) {
/* Always reply old ptlrpc_body_v2 to keep interoperability
@@ -111,9 +108,9 @@ int lustre_msg_early_size(void)
}
EXPORT_SYMBOL(lustre_msg_early_size);
-int lustre_msg_size_v2(int count, __u32 *lengths)
+u32 lustre_msg_size_v2(int count, __u32 *lengths)
{
- int size;
+ u32 size;
int i;
size = lustre_msg_hdr_size_v2(count);
@@ -131,7 +128,7 @@ EXPORT_SYMBOL(lustre_msg_size_v2);
* target then the first buffer will be stripped because the ptlrpc
* data is part of the lustre_msg_v1 header. b=14043
*/
-int lustre_msg_size(__u32 magic, int count, __u32 *lens)
+u32 lustre_msg_size(__u32 magic, int count, __u32 *lens)
{
__u32 size[] = { sizeof(struct ptlrpc_body) };
@@ -148,15 +145,14 @@ int lustre_msg_size(__u32 magic, int count, __u32 *lens)
return lustre_msg_size_v2(count, lens);
default:
LASSERTF(0, "incorrect message magic: %08x\n", magic);
- return -EINVAL;
+ return 0;
}
}
-EXPORT_SYMBOL(lustre_msg_size);
/* This is used to determine the size of a buffer that was already packed
* and will correctly handle the different message formats.
*/
-int lustre_packed_msg_size(struct lustre_msg *msg)
+u32 lustre_packed_msg_size(struct lustre_msg *msg)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
@@ -166,7 +162,6 @@ int lustre_packed_msg_size(struct lustre_msg *msg)
return 0;
}
}
-EXPORT_SYMBOL(lustre_packed_msg_size);
void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
char **bufs)
@@ -227,7 +222,6 @@ int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
/* only use new format, we don't need to be compatible with 1.4 */
return lustre_pack_request_v2(req, count, lens, bufs);
}
-EXPORT_SYMBOL(lustre_pack_request);
#if RS_DEBUG
LIST_HEAD(ptlrpc_rs_debug_lru);
@@ -369,7 +363,6 @@ int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
return rc;
}
-EXPORT_SYMBOL(lustre_pack_reply_flags);
int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
char **bufs)
@@ -378,11 +371,9 @@ int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
}
EXPORT_SYMBOL(lustre_pack_reply);
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size)
{
- int i, offset, buflen, bufcount;
-
- LASSERT(n >= 0);
+ u32 i, offset, buflen, bufcount;
bufcount = m->lm_bufcount;
if (unlikely(n >= bufcount)) {
@@ -406,7 +397,7 @@ void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
return (char *)m + offset;
}
-void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
+void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 min_size)
{
switch (m->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
@@ -419,7 +410,7 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
}
EXPORT_SYMBOL(lustre_msg_buf);
-static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, u32 segment,
unsigned int newlen, int move_data)
{
char *tail = NULL, *newpos;
@@ -493,7 +484,6 @@ void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
sptlrpc_svc_free_rs(rs);
}
-EXPORT_SYMBOL(lustre_free_reply_state);
static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
{
@@ -581,7 +571,6 @@ int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
}
return rc;
}
-EXPORT_SYMBOL(ptlrpc_unpack_req_msg);
int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
{
@@ -594,7 +583,6 @@ int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
}
return rc;
}
-EXPORT_SYMBOL(ptlrpc_unpack_rep_msg);
static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
const int inout, int offset)
@@ -647,7 +635,7 @@ int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
}
}
-static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
+static inline u32 lustre_msg_buflen_v2(struct lustre_msg_v2 *m, u32 n)
{
if (n >= m->lm_bufcount)
return 0;
@@ -662,14 +650,14 @@ static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
*
* returns zero for non-existent message indices
*/
-int lustre_msg_buflen(struct lustre_msg *m, int n)
+u32 lustre_msg_buflen(struct lustre_msg *m, u32 n)
{
switch (m->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
return lustre_msg_buflen_v2(m, n);
default:
CERROR("incorrect message magic: %08x\n", m->lm_magic);
- return -EINVAL;
+ return 0;
}
}
EXPORT_SYMBOL(lustre_msg_buflen);
@@ -677,23 +665,22 @@ EXPORT_SYMBOL(lustre_msg_buflen);
/* NB return the bufcount for lustre_msg_v2 format, so if message is packed
* in V1 format, the result is one bigger. (add struct ptlrpc_body).
*/
-int lustre_msg_bufcount(struct lustre_msg *m)
+u32 lustre_msg_bufcount(struct lustre_msg *m)
{
switch (m->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
return m->lm_bufcount;
default:
CERROR("incorrect message magic: %08x\n", m->lm_magic);
- return -EINVAL;
+ return 0;
}
}
-EXPORT_SYMBOL(lustre_msg_bufcount);
-char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
+char *lustre_msg_string(struct lustre_msg *m, u32 index, u32 max_len)
{
/* max_len == 0 means the string should fill the buffer */
char *str;
- int slen, blen;
+ u32 slen, blen;
switch (m->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
@@ -731,11 +718,10 @@ char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
return str;
}
-EXPORT_SYMBOL(lustre_msg_string);
/* Wrap up the normal fixed length cases */
-static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index,
- int min_size, void *swabber)
+static inline void *__lustre_swab_buf(struct lustre_msg *msg, u32 index,
+ u32 min_size, void *swabber)
{
void *ptr = NULL;
@@ -804,7 +790,7 @@ __u32 lustre_msg_get_flags(struct lustre_msg *msg)
}
EXPORT_SYMBOL(lustre_msg_get_flags);
-void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2: {
@@ -820,7 +806,7 @@ void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
}
EXPORT_SYMBOL(lustre_msg_add_flags);
-void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2: {
@@ -834,9 +820,8 @@ void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_set_flags);
-void lustre_msg_clear_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2: {
@@ -868,9 +853,8 @@ __u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
return 0;
}
}
-EXPORT_SYMBOL(lustre_msg_get_op_flags);
-void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2: {
@@ -903,7 +887,6 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
return NULL;
}
}
-EXPORT_SYMBOL(lustre_msg_get_handle);
__u32 lustre_msg_get_type(struct lustre_msg *msg)
{
@@ -924,7 +907,7 @@ __u32 lustre_msg_get_type(struct lustre_msg *msg)
}
EXPORT_SYMBOL(lustre_msg_get_type);
-void lustre_msg_add_version(struct lustre_msg *msg, int version)
+void lustre_msg_add_version(struct lustre_msg *msg, u32 version)
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2: {
@@ -938,7 +921,6 @@ void lustre_msg_add_version(struct lustre_msg *msg, int version)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_add_version);
__u32 lustre_msg_get_opc(struct lustre_msg *msg)
{
@@ -1055,7 +1037,6 @@ __u64 lustre_msg_get_slv(struct lustre_msg *msg)
return -EINVAL;
}
}
-EXPORT_SYMBOL(lustre_msg_get_slv);
void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
{
@@ -1075,7 +1056,6 @@ void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
return;
}
}
-EXPORT_SYMBOL(lustre_msg_set_slv);
__u32 lustre_msg_get_limit(struct lustre_msg *msg)
{
@@ -1094,7 +1074,6 @@ __u32 lustre_msg_get_limit(struct lustre_msg *msg)
return -EINVAL;
}
}
-EXPORT_SYMBOL(lustre_msg_get_limit);
void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
{
@@ -1114,7 +1093,6 @@ void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
return;
}
}
-EXPORT_SYMBOL(lustre_msg_set_limit);
__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
{
@@ -1145,7 +1123,6 @@ __u32 lustre_msg_get_magic(struct lustre_msg *msg)
return 0;
}
}
-EXPORT_SYMBOL(lustre_msg_get_magic);
__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
{
@@ -1203,8 +1180,9 @@ __u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
unsigned int hsize = 4;
cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
- lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF),
- NULL, 0, (unsigned char *)&crc, &hsize);
+ lustre_msg_buflen(msg,
+ MSG_PTLRPC_BODY_OFF),
+ NULL, 0, (unsigned char *)&crc, &hsize);
return crc;
}
default:
@@ -1227,7 +1205,6 @@ void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_set_handle);
void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
{
@@ -1243,7 +1220,6 @@ void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_set_type);
void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
{
@@ -1259,7 +1235,6 @@ void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_set_opc);
void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
{
@@ -1326,7 +1301,6 @@ void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
-EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
{
@@ -1377,7 +1351,7 @@ void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
if (jobid)
- memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+ memcpy(pb->pb_jobid, jobid, LUSTRE_JOBID_SIZE);
else if (pb->pb_jobid[0] == '\0')
lustre_get_jobid(pb->pb_jobid);
return;
@@ -1491,7 +1465,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
*/
CLASSERT(offsetof(typeof(*b), pb_jobid) != 0);
}
-EXPORT_SYMBOL(lustre_swab_ptlrpc_body);
void lustre_swab_connect(struct obd_connect_data *ocd)
{
@@ -1591,7 +1564,6 @@ void lustre_swab_obd_statfs(struct obd_statfs *os)
CLASSERT(offsetof(typeof(*os), os_spare8) != 0);
CLASSERT(offsetof(typeof(*os), os_spare9) != 0);
}
-EXPORT_SYMBOL(lustre_swab_obd_statfs);
void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
{
@@ -1599,33 +1571,28 @@ void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
__swab32s(&ioo->ioo_max_brw);
__swab32s(&ioo->ioo_bufcnt);
}
-EXPORT_SYMBOL(lustre_swab_obd_ioobj);
void lustre_swab_niobuf_remote(struct niobuf_remote *nbr)
{
- __swab64s(&nbr->offset);
- __swab32s(&nbr->len);
- __swab32s(&nbr->flags);
+ __swab64s(&nbr->rnb_offset);
+ __swab32s(&nbr->rnb_len);
+ __swab32s(&nbr->rnb_flags);
}
-EXPORT_SYMBOL(lustre_swab_niobuf_remote);
void lustre_swab_ost_body(struct ost_body *b)
{
lustre_swab_obdo(&b->oa);
}
-EXPORT_SYMBOL(lustre_swab_ost_body);
void lustre_swab_ost_last_id(u64 *id)
{
__swab64s(id);
}
-EXPORT_SYMBOL(lustre_swab_ost_last_id);
void lustre_swab_generic_32s(__u32 *val)
{
__swab32s(val);
}
-EXPORT_SYMBOL(lustre_swab_generic_32s);
void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
{
@@ -1674,37 +1641,36 @@ EXPORT_SYMBOL(lustre_swab_lquota_lvb);
void lustre_swab_mdt_body(struct mdt_body *b)
{
- lustre_swab_lu_fid(&b->fid1);
- lustre_swab_lu_fid(&b->fid2);
+ lustre_swab_lu_fid(&b->mbo_fid1);
+ lustre_swab_lu_fid(&b->mbo_fid2);
/* handle is opaque */
- __swab64s(&b->valid);
- __swab64s(&b->size);
- __swab64s(&b->mtime);
- __swab64s(&b->atime);
- __swab64s(&b->ctime);
- __swab64s(&b->blocks);
- __swab64s(&b->ioepoch);
- __swab64s(&b->t_state);
- __swab32s(&b->fsuid);
- __swab32s(&b->fsgid);
- __swab32s(&b->capability);
- __swab32s(&b->mode);
- __swab32s(&b->uid);
- __swab32s(&b->gid);
- __swab32s(&b->flags);
- __swab32s(&b->rdev);
- __swab32s(&b->nlink);
- CLASSERT(offsetof(typeof(*b), unused2) != 0);
- __swab32s(&b->suppgid);
- __swab32s(&b->eadatasize);
- __swab32s(&b->aclsize);
- __swab32s(&b->max_mdsize);
- __swab32s(&b->max_cookiesize);
- __swab32s(&b->uid_h);
- __swab32s(&b->gid_h);
- CLASSERT(offsetof(typeof(*b), padding_5) != 0);
-}
-EXPORT_SYMBOL(lustre_swab_mdt_body);
+ __swab64s(&b->mbo_valid);
+ __swab64s(&b->mbo_size);
+ __swab64s(&b->mbo_mtime);
+ __swab64s(&b->mbo_atime);
+ __swab64s(&b->mbo_ctime);
+ __swab64s(&b->mbo_blocks);
+ __swab64s(&b->mbo_ioepoch);
+ __swab64s(&b->mbo_t_state);
+ __swab32s(&b->mbo_fsuid);
+ __swab32s(&b->mbo_fsgid);
+ __swab32s(&b->mbo_capability);
+ __swab32s(&b->mbo_mode);
+ __swab32s(&b->mbo_uid);
+ __swab32s(&b->mbo_gid);
+ __swab32s(&b->mbo_flags);
+ __swab32s(&b->mbo_rdev);
+ __swab32s(&b->mbo_nlink);
+ CLASSERT(offsetof(typeof(*b), mbo_unused2) != 0);
+ __swab32s(&b->mbo_suppgid);
+ __swab32s(&b->mbo_eadatasize);
+ __swab32s(&b->mbo_aclsize);
+ __swab32s(&b->mbo_max_mdsize);
+ __swab32s(&b->mbo_max_cookiesize);
+ __swab32s(&b->mbo_uid_h);
+ __swab32s(&b->mbo_gid_h);
+ CLASSERT(offsetof(typeof(*b), mbo_padding_5) != 0);
+}
void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
{
@@ -1713,7 +1679,6 @@ void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
__swab32s(&b->flags);
CLASSERT(offsetof(typeof(*b), padding) != 0);
}
-EXPORT_SYMBOL(lustre_swab_mdt_ioepoch);
void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
{
@@ -1729,11 +1694,10 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
for (i = 0; i < MTI_NIDS_MAX; i++)
__swab64s(&mti->mti_nids[i]);
}
-EXPORT_SYMBOL(lustre_swab_mgs_target_info);
void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
{
- int i;
+ __u8 i;
__swab64s(&entry->mne_version);
__swab32s(&entry->mne_instance);
@@ -1760,14 +1724,12 @@ void lustre_swab_mgs_config_body(struct mgs_config_body *body)
__swab32s(&body->mcb_units);
__swab16s(&body->mcb_type);
}
-EXPORT_SYMBOL(lustre_swab_mgs_config_body);
void lustre_swab_mgs_config_res(struct mgs_config_res *body)
{
__swab64s(&body->mcr_offset);
__swab64s(&body->mcr_size);
}
-EXPORT_SYMBOL(lustre_swab_mgs_config_res);
static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i)
{
@@ -1800,7 +1762,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q)
lustre_swab_obd_dqinfo(&q->qc_dqinfo);
lustre_swab_obd_dqblk(&q->qc_dqblk);
}
-EXPORT_SYMBOL(lustre_swab_obd_quotactl);
void lustre_swab_fid2path(struct getinfo_fid2path *gf)
{
@@ -1822,7 +1783,7 @@ static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
{
- int i;
+ __u32 i;
__swab64s(&fiemap->fm_start);
__swab64s(&fiemap->fm_length);
@@ -1834,7 +1795,6 @@ void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
for (i = 0; i < fiemap->fm_mapped_extents; i++)
lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
}
-EXPORT_SYMBOL(lustre_swab_fiemap);
void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
{
@@ -1863,7 +1823,6 @@ void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
};
-EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
void lustre_swab_lov_desc(struct lov_desc *ld)
{
@@ -1878,18 +1837,42 @@ void lustre_swab_lov_desc(struct lov_desc *ld)
}
EXPORT_SYMBOL(lustre_swab_lov_desc);
-static void print_lum(struct lov_user_md *lum)
+/* This structure is always in little-endian */
+static void lustre_swab_lmv_mds_md_v1(struct lmv_mds_md_v1 *lmm1)
+{
+ int i;
+
+ __swab32s(&lmm1->lmv_magic);
+ __swab32s(&lmm1->lmv_stripe_count);
+ __swab32s(&lmm1->lmv_master_mdt_index);
+ __swab32s(&lmm1->lmv_hash_type);
+ __swab32s(&lmm1->lmv_layout_version);
+ for (i = 0; i < lmm1->lmv_stripe_count; i++)
+ lustre_swab_lu_fid(&lmm1->lmv_stripe_fids[i]);
+}
+
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm)
+{
+ switch (lmm->lmv_magic) {
+ case LMV_MAGIC_V1:
+ lustre_swab_lmv_mds_md_v1(&lmm->lmv_md_v1);
+ break;
+ default:
+ break;
+ }
+}
+EXPORT_SYMBOL(lustre_swab_lmv_mds_md);
+
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
{
- CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
- CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
- CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
- CDEBUG(D_OTHER, "\tlmm_object_id: %llu\n", lmm_oi_id(&lum->lmm_oi));
- CDEBUG(D_OTHER, "\tlmm_object_gr: %llu\n", lmm_oi_seq(&lum->lmm_oi));
- CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
- CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
- CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
- lum->lmm_stripe_offset);
+ __swab32s(&lum->lum_magic);
+ __swab32s(&lum->lum_stripe_count);
+ __swab32s(&lum->lum_stripe_offset);
+ __swab32s(&lum->lum_hash_type);
+ __swab32s(&lum->lum_type);
+ CLASSERT(offsetof(typeof(*lum), lum_padding1));
}
+EXPORT_SYMBOL(lustre_swab_lmv_user_md);
static void lustre_swab_lmm_oi(struct ost_id *oi)
{
@@ -1905,7 +1888,6 @@ static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
__swab32s(&lum->lmm_stripe_size);
__swab16s(&lum->lmm_stripe_count);
__swab16s(&lum->lmm_stripe_offset);
- print_lum(lum);
}
void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
@@ -1941,9 +1923,9 @@ void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
int i;
for (i = 0; i < stripe_count; i++) {
- lustre_swab_ost_id(&(lod[i].l_ost_oi));
- __swab32s(&(lod[i].l_ost_gen));
- __swab32s(&(lod[i].l_ost_idx));
+ lustre_swab_ost_id(&lod[i].l_ost_oi);
+ __swab32s(&lod[i].l_ost_gen);
+ __swab32s(&lod[i].l_ost_idx);
}
}
EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
@@ -1973,7 +1955,6 @@ void lustre_swab_ldlm_intent(struct ldlm_intent *i)
{
__swab64s(&i->opc);
}
-EXPORT_SYMBOL(lustre_swab_ldlm_intent);
static void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r)
{
@@ -1997,7 +1978,6 @@ void lustre_swab_ldlm_request(struct ldlm_request *rq)
__swab32s(&rq->lock_count);
/* lock_handle[] opaque */
}
-EXPORT_SYMBOL(lustre_swab_ldlm_request);
void lustre_swab_ldlm_reply(struct ldlm_reply *r)
{
@@ -2008,7 +1988,6 @@ void lustre_swab_ldlm_reply(struct ldlm_reply *r)
__swab64s(&r->lock_policy_res1);
__swab64s(&r->lock_policy_res2);
}
-EXPORT_SYMBOL(lustre_swab_ldlm_reply);
/* Dump functions */
void dump_ioo(struct obd_ioobj *ioo)
@@ -2018,14 +1997,12 @@ void dump_ioo(struct obd_ioobj *ioo)
POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
ioo->ioo_bufcnt);
}
-EXPORT_SYMBOL(dump_ioo);
void dump_rniobuf(struct niobuf_remote *nb)
{
CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n",
- nb->offset, nb->len, nb->flags);
+ nb->rnb_offset, nb->rnb_len, nb->rnb_flags);
}
-EXPORT_SYMBOL(dump_rniobuf);
static void dump_obdo(struct obdo *oa)
{
@@ -2093,13 +2070,11 @@ void dump_ost_body(struct ost_body *ob)
{
dump_obdo(&ob->oa);
}
-EXPORT_SYMBOL(dump_ost_body);
void dump_rcs(__u32 *rc)
{
CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
}
-EXPORT_SYMBOL(dump_rcs);
static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
{
@@ -2184,14 +2159,12 @@ void lustre_swab_lustre_capa(struct lustre_capa *c)
__swab32s(&c->lc_timeout);
__swab32s(&c->lc_expiry);
}
-EXPORT_SYMBOL(lustre_swab_lustre_capa);
void lustre_swab_hsm_user_state(struct hsm_user_state *state)
{
__swab32s(&state->hus_states);
__swab32s(&state->hus_archive_id);
}
-EXPORT_SYMBOL(lustre_swab_hsm_user_state);
void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
{
@@ -2214,14 +2187,12 @@ void lustre_swab_hsm_current_action(struct hsm_current_action *action)
__swab32s(&action->hca_action);
lustre_swab_hsm_extent(&action->hca_location);
}
-EXPORT_SYMBOL(lustre_swab_hsm_current_action);
void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
{
lustre_swab_lu_fid(&hui->hui_fid);
lustre_swab_hsm_extent(&hui->hui_extent);
}
-EXPORT_SYMBOL(lustre_swab_hsm_user_item);
void lustre_swab_layout_intent(struct layout_intent *li)
{
@@ -2230,7 +2201,6 @@ void lustre_swab_layout_intent(struct layout_intent *li)
__swab64s(&li->li_start);
__swab64s(&li->li_end);
}
-EXPORT_SYMBOL(lustre_swab_layout_intent);
void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
{
@@ -2241,7 +2211,6 @@ void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
__swab16s(&hpk->hpk_flags);
__swab16s(&hpk->hpk_errval);
}
-EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel);
void lustre_swab_hsm_request(struct hsm_request *hr)
{
@@ -2251,7 +2220,6 @@ void lustre_swab_hsm_request(struct hsm_request *hr)
__swab32s(&hr->hr_itemcount);
__swab32s(&hr->hr_data_len);
}
-EXPORT_SYMBOL(lustre_swab_hsm_request);
void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
{
@@ -2264,4 +2232,3 @@ void lustre_swab_close_data(struct close_data *cd)
lustre_swab_lu_fid(&cd->cd_fid);
__swab64s(&cd->cd_data_version);
}
-EXPORT_SYMBOL(lustre_swab_close_data);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index 6c820e944171..5b9fb11c0b6b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -64,9 +64,9 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
{
lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
- kiov->kiov_page = page;
- kiov->kiov_offset = pageoffset;
- kiov->kiov_len = len;
+ kiov->bv_page = page;
+ kiov->bv_offset = pageoffset;
+ kiov->bv_len = len;
desc->bd_iov_count++;
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index c0529d808d81..5504fc2363ac 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -340,7 +340,6 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
{
ptlrpc_update_next_ping(imp, 0);
}
-EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
void ptlrpc_pinger_commit_expected(struct obd_import *imp)
{
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index a9831fab80f3..f14d193287da 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -53,6 +53,8 @@ int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
int ptlrpcd_start(struct ptlrpcd_ctl *pc);
/* client.c */
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+ unsigned int service_time);
struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
unsigned type, unsigned portal);
int ptlrpc_request_cache_init(void);
@@ -60,6 +62,11 @@ void ptlrpc_request_cache_fini(void);
struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
void ptlrpc_request_cache_free(struct ptlrpc_request *req);
void ptlrpc_init_xid(void);
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+ struct ptlrpc_request *req);
+int ptlrpc_expired_set(void *data);
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
+void ptlrpc_resend_req(struct ptlrpc_request *request);
/* events.c */
int ptlrpc_init_portals(void);
@@ -268,7 +275,7 @@ void sptlrpc_conf_fini(void);
int sptlrpc_init(void);
void sptlrpc_fini(void);
-static inline int ll_rpc_recoverable_error(int rc)
+static inline bool ptlrpc_recoverable_error(int rc)
{
return (rc == -ENOTCONN || rc == -ENODEV);
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index 0a374b6c2f71..1f55d642aa75 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -412,7 +412,7 @@ static int ptlrpcd(void *arg)
* an argument, describing its "scope".
*/
rc = lu_context_init(&env.le_ctx,
- LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ LCT_CL_THREAD | LCT_REMEMBER | LCT_NOREF);
if (rc == 0) {
rc = lu_context_init(env.le_ses,
LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
@@ -567,7 +567,7 @@ int ptlrpcd_start(struct ptlrpcd_ctl *pc)
* ptlrpcd thread (or a thread-set) has to be given an argument,
* describing its "scope".
*/
- rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
+ rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD | LCT_REMEMBER);
if (rc != 0)
goto out;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 718b3a8d61c6..405faf0dc9fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -201,7 +201,6 @@ int ptlrpc_resend(struct obd_import *imp)
return 0;
}
-EXPORT_SYMBOL(ptlrpc_resend);
/**
* Go through all requests in delayed list and wake their threads
@@ -221,7 +220,6 @@ void ptlrpc_wake_delayed(struct obd_import *imp)
}
spin_unlock(&imp->imp_lock);
}
-EXPORT_SYMBOL(ptlrpc_wake_delayed);
void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
{
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index dbd819fa6b75..5d3995d5c69a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -311,6 +311,19 @@ static int import_sec_check_expire(struct obd_import *imp)
return sptlrpc_import_sec_adapt(imp, NULL, NULL);
}
+/**
+ * Get and validate the client side ptlrpc security facilities from
+ * \a imp. There is a race condition on client reconnect when the import is
+ * being destroyed while there are outstanding client bound requests. In
+ * this case do not output any error messages if import secuity is not
+ * found.
+ *
+ * \param[in] imp obd import associated with client
+ * \param[out] sec client side ptlrpc security
+ *
+ * \retval 0 if security retrieved successfully
+ * \retval -ve errno if there was a problem
+ */
static int import_sec_validate_get(struct obd_import *imp,
struct ptlrpc_sec **sec)
{
@@ -323,9 +336,11 @@ static int import_sec_validate_get(struct obd_import *imp,
}
*sec = sptlrpc_import_sec_ref(imp);
+ /* Only output an error when the import is still active */
if (!*sec) {
- CERROR("import %p (%s) with no sec\n",
- imp, ptlrpc_import_state_name(imp->imp_state));
+ if (list_empty(&imp->imp_zombie_chain))
+ CERROR("import %p (%s) with no sec\n",
+ imp, ptlrpc_import_state_name(imp->imp_state));
return -EACCES;
}
@@ -499,7 +514,7 @@ static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
newctx, newctx->cc_flags);
set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
+ schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
} else {
/*
* it's possible newctx == oldctx if we're switching
@@ -718,8 +733,9 @@ again:
req->rq_restart = 0;
spin_unlock(&req->rq_lock);
- lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
- ctx_refresh_interrupt, req);
+ lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC),
+ ctx_refresh_timeout, ctx_refresh_interrupt,
+ req);
rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
/*
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 5f4d79718589..b2cc5ea6cb93 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -139,7 +139,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
"cache missing: %lu\n"
"low free mark: %lu\n"
"max waitqueue depth: %u\n"
- "max wait time: %ld/%u\n",
+ "max wait time: %ld/%lu\n",
totalram_pages,
PAGES_PER_POOL,
page_pools.epp_max_pages,
@@ -158,7 +158,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
page_pools.epp_st_lowfree,
page_pools.epp_st_max_wqlen,
page_pools.epp_st_max_wait,
- HZ);
+ msecs_to_jiffies(MSEC_PER_SEC));
spin_unlock(&page_pools.epp_lock);
@@ -326,12 +326,12 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
LASSERT(page_pools.epp_pools[p_idx]);
for (i = 0; i < desc->bd_iov_count; i++) {
- LASSERT(desc->bd_enc_iov[i].kiov_page);
+ LASSERT(desc->bd_enc_iov[i].bv_page);
LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
LASSERT(!page_pools.epp_pools[p_idx][g_idx]);
page_pools.epp_pools[p_idx][g_idx] =
- desc->bd_enc_iov[i].kiov_page;
+ desc->bd_enc_iov[i].bv_page;
if (++g_idx == PAGES_PER_POOL) {
p_idx++;
@@ -348,7 +348,6 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
kfree(desc->bd_enc_iov);
desc->bd_enc_iov = NULL;
}
-EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
static inline void enc_pools_alloc(void)
{
@@ -432,12 +431,13 @@ void sptlrpc_enc_pool_fini(void)
if (page_pools.epp_st_access > 0) {
CDEBUG(D_SEC,
- "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%d\n",
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld\n",
page_pools.epp_st_max_pages, page_pools.epp_st_grows,
page_pools.epp_st_grow_fails,
page_pools.epp_st_shrinks, page_pools.epp_st_access,
page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait, HZ);
+ page_pools.epp_st_max_wait,
+ msecs_to_jiffies(MSEC_PER_SEC));
}
}
@@ -456,13 +456,11 @@ const char *sptlrpc_get_hash_name(__u8 hash_alg)
{
return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
}
-EXPORT_SYMBOL(sptlrpc_get_hash_name);
__u8 sptlrpc_get_hash_alg(const char *algname)
{
return cfs_crypto_hash_alg(algname);
}
-EXPORT_SYMBOL(sptlrpc_get_hash_alg);
int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
{
@@ -522,9 +520,10 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
for (i = 0; i < desc->bd_iov_count; i++) {
- cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
- desc->bd_iov[i].kiov_offset & ~PAGE_MASK,
- desc->bd_iov[i].kiov_len);
+ cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].bv_page,
+ desc->bd_iov[i].bv_offset &
+ ~PAGE_MASK,
+ desc->bd_iov[i].bv_len);
}
if (hashsize > buflen) {
@@ -542,4 +541,3 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
return err;
}
-EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
index c14035479c5f..2181a85efd49 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -58,7 +58,6 @@ enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
CERROR("unknown target %p(%s)\n", obd, type);
return LUSTRE_SP_ANY;
}
-EXPORT_SYMBOL(sptlrpc_target_sec_part);
/****************************************
* user supplied flavor string parsing *
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
index 9b9801ece582..8ffd000eafac 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -71,7 +71,6 @@ void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
}
-EXPORT_SYMBOL(sptlrpc_gc_add_sec);
void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
{
@@ -95,7 +94,6 @@ void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
}
-EXPORT_SYMBOL(sptlrpc_gc_del_sec);
static void sec_process_ctx_list(void)
{
@@ -182,7 +180,8 @@ again:
/* check ctx list again before sleep */
sec_process_ctx_list();
- lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(SEC_GC_INTERVAL * MSEC_PER_SEC),
+ NULL, NULL);
l_wait_event(thread->t_ctl_waitq,
thread_is_stopping(thread) ||
thread_is_signal(thread),
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 5c4590b0c521..cd305bcb334a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -154,13 +154,13 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
unsigned int off, i;
for (i = 0; i < desc->bd_iov_count; i++) {
- if (desc->bd_iov[i].kiov_len == 0)
+ if (desc->bd_iov[i].bv_len == 0)
continue;
- ptr = kmap(desc->bd_iov[i].kiov_page);
- off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
+ ptr = kmap(desc->bd_iov[i].bv_page);
+ off = desc->bd_iov[i].bv_offset & ~PAGE_MASK;
ptr[off] ^= 0x1;
- kunmap(desc->bd_iov[i].kiov_page);
+ kunmap(desc->bd_iov[i].bv_page);
return;
}
}
@@ -249,9 +249,12 @@ int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
unsigned int hsize = 4;
cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
- lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
- lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
- NULL, 0, (unsigned char *)&cksum, &hsize);
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
+ 0),
+ lustre_msg_buflen(msg,
+ PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&cksum,
+ &hsize);
if (cksum != msg->lm_cksum) {
CDEBUG(D_SEC,
"early reply checksum mismatch: %08x != %08x\n",
@@ -349,11 +352,11 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
/* fix the actual data size */
for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
- if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
- desc->bd_iov[i].kiov_len =
+ if (desc->bd_iov[i].bv_len + nob > desc->bd_nob_transferred) {
+ desc->bd_iov[i].bv_len =
desc->bd_nob_transferred - nob;
}
- nob += desc->bd_iov[i].kiov_len;
+ nob += desc->bd_iov[i].bv_len;
}
rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
@@ -869,9 +872,12 @@ int plain_authorize(struct ptlrpc_request *req)
unsigned int hsize = 4;
cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
- lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
- lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
- NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize);
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
+ 0),
+ lustre_msg_buflen(msg,
+ PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&msg->lm_cksum,
+ &hsize);
req->rq_reply_off = 0;
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 4788c4940c2a..72f39308eebb 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -1005,6 +1005,10 @@ ptlrpc_at_remove_timed(struct ptlrpc_request *req)
array->paa_count--;
}
+/*
+ * Attempt to extend the request deadline by sending an early reply to the
+ * client.
+ */
static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
{
struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
@@ -1039,24 +1043,26 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
return -ENOSYS;
}
- /* Fake our processing time into the future to ask the clients
- * for some extra amount of time
+ /*
+ * We want to extend the request deadline by at_extra seconds,
+ * so we set our service estimate to reflect how much time has
+ * passed since this request arrived plus an additional
+ * at_extra seconds. The client will calculate the new deadline
+ * based on this service estimate (plus some additional time to
+ * account for network latency). See ptlrpc_at_recv_early_reply
*/
at_measured(&svcpt->scp_at_estimate, at_extra +
ktime_get_real_seconds() - req->rq_arrival_time.tv_sec);
+ newdl = req->rq_arrival_time.tv_sec + at_get(&svcpt->scp_at_estimate);
/* Check to see if we've actually increased the deadline -
* we may be past adaptive_max
*/
- if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate)) {
+ if (req->rq_deadline >= newdl) {
DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%lld), not sending early reply\n",
- olddl, req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate) -
- ktime_get_real_seconds());
+ olddl, newdl - ktime_get_real_seconds());
return -ETIMEDOUT;
}
- newdl = ktime_get_real_seconds() + at_get(&svcpt->scp_at_estimate);
reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
if (!reqcopy)
@@ -1982,11 +1988,12 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
cond_resched();
l_wait_event_exclusive_head(svcpt->scp_waitq,
- ptlrpc_thread_stopping(thread) ||
- ptlrpc_server_request_incoming(svcpt) ||
- ptlrpc_server_request_pending(svcpt, false) ||
- ptlrpc_rqbd_pending(svcpt) ||
- ptlrpc_at_check(svcpt), &lwi);
+ ptlrpc_thread_stopping(thread) ||
+ ptlrpc_server_request_incoming(svcpt) ||
+ ptlrpc_server_request_pending(svcpt,
+ false) ||
+ ptlrpc_rqbd_pending(svcpt) ||
+ ptlrpc_at_check(svcpt), &lwi);
if (ptlrpc_thread_stopping(thread))
return -EINTR;
@@ -2049,7 +2056,7 @@ static int ptlrpc_main(void *arg)
}
rc = lu_context_init(&env->le_ctx,
- svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF);
+ svc->srv_ctx_tags | LCT_REMEMBER | LCT_NOREF);
if (rc)
goto out_srv_fini;
@@ -2349,7 +2356,7 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
while (!list_empty(&zombie)) {
thread = list_entry(zombie.next,
- struct ptlrpc_thread, t_link);
+ struct ptlrpc_thread, t_link);
list_del(&thread->t_link);
kfree(thread);
}
@@ -2398,7 +2405,6 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc)
ptlrpc_stop_all_threads(svc);
return rc;
}
-EXPORT_SYMBOL(ptlrpc_start_threads);
int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
{
@@ -2539,8 +2545,8 @@ int ptlrpc_hr_init(void)
LASSERT(hrp->hrp_nthrs > 0);
hrp->hrp_thrs =
kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS,
- cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
- i));
+ cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
+ i));
if (!hrp->hrp_thrs) {
rc = -ENOMEM;
goto out;
@@ -2593,7 +2599,8 @@ static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
NULL, NULL);
rc = l_wait_event(svcpt->scp_waitq,
- atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+ atomic_read(&svcpt->scp_nreps_difficult) == 0,
+ &lwi);
if (rc == 0)
break;
CWARN("Unexpectedly long timeout %s %p\n",
@@ -2639,7 +2646,7 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
* event with its 'unlink' flag set for each posted rqbd
*/
list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
- rqbd_list) {
+ rqbd_list) {
rc = LNetMDUnlink(rqbd->rqbd_md_h);
LASSERT(rc == 0 || rc == -ENOENT);
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 6cc2b2edf3fc..e5945e2ccc49 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -190,28 +190,30 @@ void lustre_assert_wire_constants(void)
(long long)REINT_SETXATTR);
LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
(long long)REINT_RMENTRY);
- LASSERTF(REINT_MAX == 9, "found %lld\n",
+ LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
+ (long long)REINT_MIGRATE);
+ LASSERTF(REINT_MAX == 10, "found %lld\n",
(long long)REINT_MAX);
LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)DISP_IT_EXECD);
+ (unsigned)DISP_IT_EXECD);
LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_EXECD);
+ (unsigned)DISP_LOOKUP_EXECD);
LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_NEG);
+ (unsigned)DISP_LOOKUP_NEG);
LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_POS);
+ (unsigned)DISP_LOOKUP_POS);
LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_CREATE);
+ (unsigned)DISP_OPEN_CREATE);
LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_OPEN);
+ (unsigned)DISP_OPEN_OPEN);
LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_COMPLETE);
+ (unsigned)DISP_ENQ_COMPLETE);
LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_OPEN_REF);
+ (unsigned)DISP_ENQ_OPEN_REF);
LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_CREATE_REF);
+ (unsigned)DISP_ENQ_CREATE_REF);
LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_LOCK);
+ (unsigned)DISP_OPEN_LOCK);
LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
(long long)MDS_STATUS_CONN);
LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
@@ -219,55 +221,55 @@ void lustre_assert_wire_constants(void)
LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
(long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MF_SOM_CHANGE);
+ (unsigned)MF_SOM_CHANGE);
LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MF_EPOCH_OPEN);
+ (unsigned)MF_EPOCH_OPEN);
LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MF_EPOCH_CLOSE);
+ (unsigned)MF_EPOCH_CLOSE);
LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID1);
+ (unsigned)MF_MDC_CANCEL_FID1);
LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID2);
+ (unsigned)MF_MDC_CANCEL_FID2);
LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID3);
+ (unsigned)MF_MDC_CANCEL_FID3);
LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID4);
+ (unsigned)MF_MDC_CANCEL_FID4);
LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MF_SOM_AU);
+ (unsigned)MF_SOM_AU);
LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)MF_GETATTR_LOCK);
+ (unsigned)MF_GETATTR_LOCK);
LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MODE);
+ (long long)MDS_ATTR_MODE);
LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_UID);
+ (long long)MDS_ATTR_UID);
LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_GID);
+ (long long)MDS_ATTR_GID);
LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_SIZE);
+ (long long)MDS_ATTR_SIZE);
LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATIME);
+ (long long)MDS_ATTR_ATIME);
LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MTIME);
+ (long long)MDS_ATTR_MTIME);
LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_CTIME);
+ (long long)MDS_ATTR_CTIME);
LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATIME_SET);
+ (long long)MDS_ATTR_ATIME_SET);
LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MTIME_SET);
+ (long long)MDS_ATTR_MTIME_SET);
LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_FORCE);
+ (long long)MDS_ATTR_FORCE);
LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATTR_FLAG);
+ (long long)MDS_ATTR_ATTR_FLAG);
LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_KILL_SUID);
+ (long long)MDS_ATTR_KILL_SUID);
LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_KILL_SGID);
+ (long long)MDS_ATTR_KILL_SGID);
LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_CTIME_SET);
+ (long long)MDS_ATTR_CTIME_SET);
LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_FROM_OPEN);
+ (long long)MDS_ATTR_FROM_OPEN);
LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_BLOCKS);
+ (long long)MDS_ATTR_BLOCKS);
LASSERTF(FLD_QUERY == 900, "found %lld\n",
(long long)FLD_QUERY);
LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
@@ -418,15 +420,15 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
(long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LMAI_RELEASED);
+ (unsigned)LMAI_RELEASED);
LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_HSM);
+ (unsigned)LMAC_HSM);
LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_SOM);
+ (unsigned)LMAC_SOM);
LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_NOT_IN_OI);
+ (unsigned)LMAC_NOT_IN_OI);
LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_FID_ON_OST);
+ (unsigned)LMAC_FID_ON_OST);
/* Checks for struct ost_id */
LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
@@ -452,35 +454,35 @@ void lustre_assert_wire_constants(void)
LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
(long long)FID_SEQ_IGIF);
LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IGIF_MAX);
+ (long long)FID_SEQ_IGIF_MAX);
LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IDIF);
+ (long long)FID_SEQ_IDIF);
LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IDIF_MAX);
+ (long long)FID_SEQ_IDIF_MAX);
LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_START);
+ (long long)FID_SEQ_START);
LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_LOCAL_FILE);
+ (long long)FID_SEQ_LOCAL_FILE);
LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_DOT_LUSTRE);
+ (long long)FID_SEQ_DOT_LUSTRE);
LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_SPECIAL);
+ (long long)FID_SEQ_SPECIAL);
LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_QUOTA);
+ (long long)FID_SEQ_QUOTA);
LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_QUOTA_GLB);
+ (long long)FID_SEQ_QUOTA_GLB);
LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_ROOT);
+ (long long)FID_SEQ_ROOT);
LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_NORMAL);
+ (long long)FID_SEQ_NORMAL);
LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_LOV_DEFAULT);
+ (long long)FID_SEQ_LOV_DEFAULT);
LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_SPECIAL_BFL);
+ (unsigned)FID_OID_SPECIAL_BFL);
LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_DOT_LUSTRE);
+ (unsigned)FID_OID_DOT_LUSTRE);
LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_DOT_LUSTRE_OBF);
+ (unsigned)FID_OID_DOT_LUSTRE_OBF);
/* Checks for struct lu_dirent */
LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
@@ -510,11 +512,11 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
(long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_FID);
+ (unsigned)LUDA_FID);
LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_TYPE);
+ (unsigned)LUDA_TYPE);
LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_64BITHASH);
+ (unsigned)LUDA_64BITHASH);
/* Checks for struct luda_type */
LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
@@ -602,9 +604,9 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
- LUSTRE_MSG_MAGIC_V2);
+ LUSTRE_MSG_MAGIC_V2);
LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
- LUSTRE_MSG_MAGIC_V2_SWABBED);
+ LUSTRE_MSG_MAGIC_V2_SWABBED);
/* Checks for struct ptlrpc_body */
LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
@@ -682,7 +684,7 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
(long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
- CLASSERT(JOBSTATS_JOBID_SIZE == 32);
+ CLASSERT(LUSTRE_JOBID_SIZE == 32);
LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
@@ -780,61 +782,61 @@ void lustre_assert_wire_constants(void)
LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
(long long)MSG_PTLRPC_HEADER_OFF);
LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
- PTLRPC_MSG_VERSION);
+ PTLRPC_MSG_VERSION);
LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
- LUSTRE_VERSION_MASK);
+ LUSTRE_VERSION_MASK);
LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
- LUSTRE_OBD_VERSION);
+ LUSTRE_OBD_VERSION);
LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
- LUSTRE_MDS_VERSION);
+ LUSTRE_MDS_VERSION);
LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
- LUSTRE_OST_VERSION);
+ LUSTRE_OST_VERSION);
LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
- LUSTRE_DLM_VERSION);
+ LUSTRE_DLM_VERSION);
LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
- LUSTRE_LOG_VERSION);
+ LUSTRE_LOG_VERSION);
LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
- LUSTRE_MGS_VERSION);
+ LUSTRE_MGS_VERSION);
LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
(long long)MSGHDR_AT_SUPPORT);
LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
(long long)MSGHDR_CKSUM_INCOMPAT18);
LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
- (unsigned)MSG_OP_FLAG_MASK);
+ (unsigned)MSG_OP_FLAG_MASK);
LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
(long long)MSG_OP_FLAG_SHIFT);
LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
- (unsigned)MSG_GEN_FLAG_MASK);
+ (unsigned)MSG_GEN_FLAG_MASK);
LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MSG_LAST_REPLAY);
+ (unsigned)MSG_LAST_REPLAY);
LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MSG_RESENT);
+ (unsigned)MSG_RESENT);
LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MSG_REPLAY);
+ (unsigned)MSG_REPLAY);
LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MSG_DELAY_REPLAY);
+ (unsigned)MSG_DELAY_REPLAY);
LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MSG_VERSION_REPLAY);
+ (unsigned)MSG_VERSION_REPLAY);
LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MSG_REQ_REPLAY_DONE);
+ (unsigned)MSG_REQ_REPLAY_DONE);
LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MSG_LOCK_REPLAY_DONE);
+ (unsigned)MSG_LOCK_REPLAY_DONE);
LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_RECOVERING);
+ (unsigned)MSG_CONNECT_RECOVERING);
LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_RECONNECT);
+ (unsigned)MSG_CONNECT_RECONNECT);
LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_REPLAYABLE);
+ (unsigned)MSG_CONNECT_REPLAYABLE);
LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_LIBCLIENT);
+ (unsigned)MSG_CONNECT_LIBCLIENT);
LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_INITIAL);
+ (unsigned)MSG_CONNECT_INITIAL);
LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_ASYNC);
+ (unsigned)MSG_CONNECT_ASYNC);
LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_NEXT_VER);
+ (unsigned)MSG_CONNECT_NEXT_VER);
LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_TRANSNO);
+ (unsigned)MSG_CONNECT_TRANSNO);
/* Checks for struct obd_connect_data */
LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
@@ -1069,12 +1071,18 @@ void lustre_assert_wire_constants(void)
"found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
"found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
+ LASSERTF(OBD_CONNECT_LFSCK == 0x40000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LFSCK);
+ LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_UNLINK_CLOSE);
+ LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_DIR_STRIPE);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_CRC32);
+ (unsigned)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_ADLER);
+ (unsigned)OBD_CKSUM_ADLER);
LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_CRC32C);
+ (unsigned)OBD_CKSUM_CRC32C);
/* Checks for struct obdo */
LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
@@ -1346,7 +1354,7 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
(long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
- CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+ CLASSERT(LOV_MAGIC_V1 == (0x0BD10000 | 0x0BD0));
/* Checks for struct lov_mds_md_v3 */
LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
@@ -1375,7 +1383,7 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
(long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
- CLASSERT(LOV_MAXPOOLNAME == 16);
+ CLASSERT(LOV_MAXPOOLNAME == 15);
LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
(long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
@@ -1384,15 +1392,64 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
(long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
- CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
+ CLASSERT(LOV_MAGIC_V3 == (0x0BD30000 | 0x0BD0));
LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_RAID0);
+ (unsigned)LOV_PATTERN_RAID0);
LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_RAID1);
+ (unsigned)LOV_PATTERN_RAID1);
LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_FIRST);
+ (unsigned)LOV_PATTERN_FIRST);
LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_CMOBD);
+ (unsigned)LOV_PATTERN_CMOBD);
+
+ /* Checks for struct lmv_mds_md_v1 */
+ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct lmv_mds_md_v1));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_magic));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_hash_type) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_hash_type));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_layout_version) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_layout_version));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding1) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding1));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding2) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding2));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding3) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding3));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]));
+ LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]));
+ LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]));
+ CLASSERT(LMV_MAGIC_V1 == 0x0CD20CD0);
+ CLASSERT(LMV_MAGIC_STRIPE == 0x0CD40CD0);
+ CLASSERT(LMV_HASH_TYPE_MASK == 0x0000ffff);
+ CLASSERT(LMV_HASH_FLAG_MIGRATION == 0x80000000);
+ CLASSERT(LMV_HASH_FLAG_DEAD == 0x40000000);
/* Checks for struct obd_statfs */
LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
@@ -1582,53 +1639,53 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
- Q_QUOTACHECK);
+ Q_QUOTACHECK);
LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
- Q_INITQUOTA);
+ Q_INITQUOTA);
LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
- Q_GETOINFO);
+ Q_GETOINFO);
LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
- Q_GETOQUOTA);
+ Q_GETOQUOTA);
LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
- Q_FINVALIDATE);
+ Q_FINVALIDATE);
/* Checks for struct niobuf_remote */
LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
(long long)(int)sizeof(struct niobuf_remote));
- LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, offset));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
- LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, len));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
- LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, flags));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
+ LASSERTF((int)offsetof(struct niobuf_remote, rnb_offset) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, rnb_offset));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_offset));
+ LASSERTF((int)offsetof(struct niobuf_remote, rnb_len) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, rnb_len));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_len));
+ LASSERTF((int)offsetof(struct niobuf_remote, rnb_flags) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, rnb_flags));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_flags));
LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
- OBD_BRW_READ);
+ OBD_BRW_READ);
LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
- OBD_BRW_WRITE);
+ OBD_BRW_WRITE);
LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
- OBD_BRW_SYNC);
+ OBD_BRW_SYNC);
LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
- OBD_BRW_CHECK);
+ OBD_BRW_CHECK);
LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
- OBD_BRW_FROM_GRANT);
+ OBD_BRW_FROM_GRANT);
LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
- OBD_BRW_GRANTED);
+ OBD_BRW_GRANTED);
LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
- OBD_BRW_NOCACHE);
+ OBD_BRW_NOCACHE);
LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
- OBD_BRW_NOQUOTA);
+ OBD_BRW_NOQUOTA);
LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
- OBD_BRW_SRVLOCK);
+ OBD_BRW_SRVLOCK);
LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
- OBD_BRW_ASYNC);
+ OBD_BRW_ASYNC);
LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
- OBD_BRW_MEMALLOC);
+ OBD_BRW_MEMALLOC);
LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
OBD_BRW_OVER_USRQUOTA);
LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
@@ -1663,203 +1720,203 @@ void lustre_assert_wire_constants(void)
/* Checks for struct mdt_body */
LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
(long long)(int)sizeof(struct mdt_body));
- LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, fid1));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
- LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, fid2));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
- LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, handle));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->handle));
- LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, valid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->valid));
- LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, size));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->size));
- LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mtime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
- LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, atime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->atime));
- LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, ctime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
- LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, blocks));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
- LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, t_state));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8,
+ LASSERTF((int)offsetof(struct mdt_body, mbo_fid1) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_fid1));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid1));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_fid2) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_fid2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid2));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_handle) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_handle));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_handle));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_valid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_valid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_valid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_size) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_size));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_size));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_mtime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_mtime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mtime));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_atime) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_atime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_atime));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_ctime) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_ctime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_ctime));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_blocks) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_blocks));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_blocks));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_t_state) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_t_state));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_t_state) == 8,
"found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
- LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, fsuid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
- LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, fsgid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
- LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, capability));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->capability));
- LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mode));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mode));
- LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, uid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->uid));
- LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, gid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->gid));
- LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, flags));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->flags));
- LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, rdev));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
- LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, nlink));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
- LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, unused2));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
- LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, suppgid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
- LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, eadatasize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
- LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, aclsize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
- LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, max_mdsize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
- LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, max_cookiesize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
- LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, uid_h));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
- LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, gid_h));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
- LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_5));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
- LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_6));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
- LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_7));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
- LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_8));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
- LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_9));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
- LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, padding_10));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_t_state));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_fsuid) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsuid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_fsgid) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsgid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_capability) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_capability));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_capability) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_capability));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_mode));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mode));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_uid) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_uid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_gid) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_gid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_flags) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_flags));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_flags));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_rdev) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_rdev));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_rdev) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_rdev));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_nlink) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_nlink));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_nlink) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_nlink));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_unused2) == 140, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_unused2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused2));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_suppgid) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_suppgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_suppgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_suppgid));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_eadatasize) == 148, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_eadatasize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_eadatasize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_eadatasize));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_aclsize) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_aclsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_aclsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_aclsize));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_max_mdsize) == 156, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_max_cookiesize) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_max_cookiesize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_uid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid_h));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_gid_h) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_gid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid_h));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_5) == 172, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_5));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_6) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_6));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_7) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_7));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_8) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_8) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_8));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_9) == 200, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_9) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_9));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_padding_10) == 208, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_padding_10));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_10) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_10));
LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
- MDS_FMODE_CLOSED);
+ MDS_FMODE_CLOSED);
LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
- MDS_FMODE_EXEC);
+ MDS_FMODE_EXEC);
LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
- MDS_FMODE_EPOCH);
+ MDS_FMODE_EPOCH);
LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
- MDS_FMODE_TRUNC);
+ MDS_FMODE_TRUNC);
LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
- MDS_FMODE_SOM);
+ MDS_FMODE_SOM);
LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
- MDS_OPEN_CREATED);
+ MDS_OPEN_CREATED);
LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
- MDS_OPEN_CROSS);
+ MDS_OPEN_CROSS);
LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
- MDS_OPEN_CREAT);
+ MDS_OPEN_CREAT);
LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
- MDS_OPEN_EXCL);
+ MDS_OPEN_EXCL);
LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
- MDS_OPEN_TRUNC);
+ MDS_OPEN_TRUNC);
LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
- MDS_OPEN_APPEND);
+ MDS_OPEN_APPEND);
LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
- MDS_OPEN_SYNC);
+ MDS_OPEN_SYNC);
LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
- MDS_OPEN_DIRECTORY);
+ MDS_OPEN_DIRECTORY);
LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
- MDS_OPEN_BY_FID);
+ MDS_OPEN_BY_FID);
LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
- MDS_OPEN_DELAY_CREATE);
+ MDS_OPEN_DELAY_CREATE);
LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
- MDS_OPEN_OWNEROVERRIDE);
+ MDS_OPEN_OWNEROVERRIDE);
LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
- MDS_OPEN_JOIN_FILE);
+ MDS_OPEN_JOIN_FILE);
LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_LOCK);
+ MDS_OPEN_LOCK);
LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_HAS_EA);
+ MDS_OPEN_HAS_EA);
LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_HAS_OBJS);
+ MDS_OPEN_HAS_OBJS);
LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_NORESTORE);
+ (long long)MDS_OPEN_NORESTORE);
LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_NEWSTRIPE);
+ (long long)MDS_OPEN_NEWSTRIPE);
LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_VOLATILE);
+ (long long)MDS_OPEN_VOLATILE);
LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
- LUSTRE_SYNC_FL);
+ LUSTRE_SYNC_FL);
LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
- LUSTRE_IMMUTABLE_FL);
+ LUSTRE_IMMUTABLE_FL);
LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
- LUSTRE_APPEND_FL);
+ LUSTRE_APPEND_FL);
LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
- LUSTRE_NOATIME_FL);
+ LUSTRE_NOATIME_FL);
LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
- LUSTRE_DIRSYNC_FL);
+ LUSTRE_DIRSYNC_FL);
LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
- MDS_INODELOCK_LOOKUP);
+ MDS_INODELOCK_LOOKUP);
LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE);
LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
- MDS_INODELOCK_OPEN);
+ MDS_INODELOCK_OPEN);
LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
- MDS_INODELOCK_LAYOUT);
+ MDS_INODELOCK_LAYOUT);
/* Checks for struct mdt_ioepoch */
LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
@@ -2617,35 +2674,6 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
(long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
- /* Checks for struct lmv_stripe_md */
- LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
- (long long)(int)sizeof(struct lmv_stripe_md));
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
- CLASSERT(LOV_MAXPOOLNAME == 16);
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
- LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
- LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
-
/* Checks for struct lov_desc */
LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
(long long)(int)sizeof(struct lov_desc));
@@ -3195,10 +3223,10 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
(long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
(long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
@@ -3272,50 +3300,6 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
(long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
- /* Checks for struct changelog_ext_rec */
- LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
- (long long)(int)sizeof(struct changelog_ext_rec));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
- LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
- (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
- LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
-
/* Checks for struct changelog_setinfo */
LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
(long long)(int)sizeof(struct changelog_setinfo));
@@ -3339,10 +3323,10 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct llog_changelog_rec, cr));
LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
(long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
- LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
- LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr_do_not_use) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr_do_not_use));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use));
/* Checks for struct llog_changelog_user_rec */
LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
@@ -3506,6 +3490,19 @@ void lustre_assert_wire_constants(void)
CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
CLASSERT(LLOG_FIRST_OPC == 501);
CLASSERT(LLOG_LAST_OPC == 510);
+ CLASSERT(LLOG_CONFIG_ORIG_CTXT == 0);
+ CLASSERT(LLOG_CONFIG_REPL_CTXT == 1);
+ CLASSERT(LLOG_MDS_OST_ORIG_CTXT == 2);
+ CLASSERT(LLOG_MDS_OST_REPL_CTXT == 3);
+ CLASSERT(LLOG_SIZE_ORIG_CTXT == 4);
+ CLASSERT(LLOG_SIZE_REPL_CTXT == 5);
+ CLASSERT(LLOG_TEST_ORIG_CTXT == 8);
+ CLASSERT(LLOG_TEST_REPL_CTXT == 9);
+ CLASSERT(LLOG_CHANGELOG_ORIG_CTXT == 12);
+ CLASSERT(LLOG_CHANGELOG_REPL_CTXT == 13);
+ CLASSERT(LLOG_CHANGELOG_USER_ORIG_CTXT == 14);
+ CLASSERT(LLOG_AGENT_ORIG_CTXT == 15);
+ CLASSERT(LLOG_MAX_CTXTS == 16);
/* Checks for struct llogd_conn_body */
LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
@@ -3943,9 +3940,9 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
(long long)(int)sizeof(((struct hsm_progress *)0)->padding));
LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
- HP_FLAG_COMPLETED);
+ HP_FLAG_COMPLETED);
LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
- HP_FLAG_RETRY);
+ HP_FLAG_RETRY);
LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
(long long)(int)offsetof(struct hsm_copy, hc_data_version));
@@ -4100,9 +4097,9 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
(long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)HSM_FORCE_ACTION);
+ (unsigned)HSM_FORCE_ACTION);
LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)HSM_GHOST_COPY);
+ (unsigned)HSM_GHOST_COPY);
/* Checks for struct hsm_user_request */
LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",