From 5e4def20381678ba3ce0a4e117f97e378ecd81bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:44 +0000 Subject: Pass mode to wait_on_atomic_t() action funcs and provide default actions Make wait_on_atomic_t() pass the TASK_* mode onto its action function as an extra argument and make it 'unsigned int throughout. Also, consolidate a bunch of identical action functions into a default function that can do the appropriate thing for the mode. Also, change the argument name in the bit_wait*() function declarations to reflect the fact that it's the mode and not the bit number. [Peter Z gives this a grudging ACK, but thinks that the whole atomic_t wait should be done differently, though he's not immediately sure as to how] Signed-off-by: David Howells Acked-by: Peter Zijlstra cc: Ingo Molnar --- include/linux/wait_bit.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index af0d495430d7..61b39eaf7cad 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -26,6 +26,8 @@ struct wait_bit_queue_entry { { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); +typedef int wait_atomic_t_action_f(atomic_t *counter, unsigned int mode); + void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); @@ -34,7 +36,7 @@ void wake_up_atomic_t(atomic_t *p); int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, wait_atomic_t_action_f action, unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); extern void __init wait_bit_init(void); @@ -51,10 +53,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync }, \ } -extern int bit_wait(struct wait_bit_key *key, int bit); -extern int bit_wait_io(struct wait_bit_key *key, int bit); -extern int bit_wait_timeout(struct wait_bit_key *key, int bit); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait(struct wait_bit_key *key, int mode); +extern int bit_wait_io(struct wait_bit_key *key, int mode); +extern int bit_wait_timeout(struct wait_bit_key *key, int mode); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); +extern int atomic_t_wait(atomic_t *counter, unsigned int mode); /** * wait_on_bit - wait for a bit to be cleared @@ -251,7 +254,7 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, * outside of the target 'word'. */ static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode) { might_sleep(); if (atomic_read(val) == 0) -- cgit v1.2.3 From f044c8847bb61eff5e1e95b6f6bb950e7f4a73a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:45 +0000 Subject: afs: Lay the groundwork for supporting network namespaces Lay the groundwork for supporting network namespaces (netns) to the AFS filesystem by moving various global features to a network-namespace struct (afs_net) and providing an instance of this as a temporary global variable that everything uses via accessor functions for the moment. The following changes have been made: (1) Store the netns in the superblock info. This will be obtained from the mounter's nsproxy on a manual mount and inherited from the parent superblock on an automount. (2) The cell list is made per-netns. It can be viewed through /proc/net/afs/cells and also be modified by writing commands to that file. (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell. This is unset by default. (4) The 'rootcell' module parameter, which sets a cell and VL server list modifies the init net namespace, thereby allowing an AFS root fs to be theoretically used. (5) The volume location lists and the file lock manager are made per-netns. (6) The AF_RXRPC socket and associated I/O bits are made per-ns. The various workqueues remain global for the moment. Changes still to be made: (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced from the old name. (2) A per-netns subsys needs to be registered for AFS into which it can store its per-netns data. (3) Rather than the AF_RXRPC socket being opened on module init, it needs to be opened on the creation of a superblock in that netns. (4) The socket needs to be closed when the last superblock using it is destroyed and all outstanding client calls on it have been completed. This prevents a reference loop on the namespace. (5) It is possible that several namespaces will want to use AFS, in which case each one will need its own UDP port. These can either be set through /proc/net/afs/cm_port or the kernel can pick one at random. The init_ns gets 7001 by default. Other issues that need resolving: (1) The DNS keyring needs net-namespacing. (2) Where do upcalls go (eg. DNS request-key upcall)? (3) Need something like open_socket_in_file_ns() syscall so that AFS command line tools attempting to operate on an AFS file/volume have their RPC calls go to the right place. Signed-off-by: David Howells --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index aa50113ebe5b..1a6fee974116 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -47,6 +47,7 @@ #define OPENPROM_SUPER_MAGIC 0x9fa1 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ +#define AFS_FS_MAGIC 0x6B414653 #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ /* used by file system utilities that -- cgit v1.2.3 From 025db80c9e421efef11f2b83b7f78a11476f06db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: afs: Trace the initiation and completion of client calls Add tracepoints to trace the initiation and completion of client calls within the kafs filesystem. The afs_make_vl_call tracepoint watches calls to the volume location database server. The afs_make_fs_call tracepoint watches calls to the file server. The afs_call_done tracepoint watches for call completion. Signed-off-by: David Howells --- include/trace/events/afs.h | 142 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8b95c16b7045..dd9d2600ee98 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -30,6 +30,38 @@ enum afs_call_trace { afs_call_trace_work, }; +enum afs_fs_operation { + afs_FS_FetchData = 130, /* AFS Fetch file data */ + afs_FS_FetchStatus = 132, /* AFS Fetch file status */ + afs_FS_StoreData = 133, /* AFS Store file data */ + afs_FS_StoreStatus = 135, /* AFS Store file status */ + afs_FS_RemoveFile = 136, /* AFS Remove a file */ + afs_FS_CreateFile = 137, /* AFS Create a file */ + afs_FS_Rename = 138, /* AFS Rename or move a file or directory */ + afs_FS_Symlink = 139, /* AFS Create a symbolic link */ + afs_FS_Link = 140, /* AFS Create a hard link */ + afs_FS_MakeDir = 141, /* AFS Create a directory */ + afs_FS_RemoveDir = 142, /* AFS Remove a directory */ + afs_FS_GetVolumeInfo = 148, /* AFS Get information about a volume */ + afs_FS_GetVolumeStatus = 149, /* AFS Get volume status information */ + afs_FS_GetRootVolume = 151, /* AFS Get root volume name */ + afs_FS_SetLock = 156, /* AFS Request a file lock */ + afs_FS_ExtendLock = 157, /* AFS Extend a file lock */ + afs_FS_ReleaseLock = 158, /* AFS Release a file lock */ + afs_FS_Lookup = 161, /* AFS lookup file in directory */ + afs_FS_FetchData64 = 65537, /* AFS Fetch file data */ + afs_FS_StoreData64 = 65538, /* AFS Store file data */ + afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */ + afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */ +}; + +enum afs_vl_operation { + afs_VL_GetEntryByNameU = 527, /* AFS Get Vol Entry By Name operation ID */ + afs_VL_GetAddrsU = 533, /* AFS Get FS server addresses */ + afs_YFSVL_GetEndpoints = 64002, /* YFS Get FS & Vol server addresses */ + afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -42,6 +74,37 @@ enum afs_call_trace { EM(afs_call_trace_wake, "WAKE ") \ E_(afs_call_trace_work, "WORK ") +#define afs_fs_operations \ + EM(afs_FS_FetchData, "FS.FetchData") \ + EM(afs_FS_FetchStatus, "FS.FetchStatus") \ + EM(afs_FS_StoreData, "FS.StoreData") \ + EM(afs_FS_StoreStatus, "FS.StoreStatus") \ + EM(afs_FS_RemoveFile, "FS.RemoveFile") \ + EM(afs_FS_CreateFile, "FS.CreateFile") \ + EM(afs_FS_Rename, "FS.Rename") \ + EM(afs_FS_Symlink, "FS.Symlink") \ + EM(afs_FS_Link, "FS.Link") \ + EM(afs_FS_MakeDir, "FS.MakeDir") \ + EM(afs_FS_RemoveDir, "FS.RemoveDir") \ + EM(afs_FS_GetVolumeInfo, "FS.GetVolumeInfo") \ + EM(afs_FS_GetVolumeStatus, "FS.GetVolumeStatus") \ + EM(afs_FS_GetRootVolume, "FS.GetRootVolume") \ + EM(afs_FS_SetLock, "FS.SetLock") \ + EM(afs_FS_ExtendLock, "FS.ExtendLock") \ + EM(afs_FS_ReleaseLock, "FS.ReleaseLock") \ + EM(afs_FS_Lookup, "FS.Lookup") \ + EM(afs_FS_FetchData64, "FS.FetchData64") \ + EM(afs_FS_StoreData64, "FS.StoreData64") \ + EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \ + E_(afs_FS_GetCapabilities, "FS.GetCapabilities") + +#define afs_vl_operations \ + EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \ + EM(afs_VL_GetAddrsU, "VL.GetAddrsU") \ + EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \ + E_(afs_VL_GetCapabilities, "VL.GetCapabilities") + + /* * Export enum symbols via userspace. */ @@ -51,6 +114,8 @@ enum afs_call_trace { #define E_(a, b) TRACE_DEFINE_ENUM(a); afs_call_traces; +afs_fs_operations; +afs_vl_operations; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -178,6 +243,83 @@ TRACE_EVENT(afs_call, __entry->where) ); +TRACE_EVENT(afs_make_fs_call, + TP_PROTO(struct afs_call *call, const struct afs_fid *fid), + + TP_ARGS(call, fid), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_fs_operation, op ) + __field_struct(struct afs_fid, fid ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = call->operation_ID; + if (fid) { + __entry->fid = *fid; + } else { + __entry->fid.vid = 0; + __entry->fid.vnode = 0; + __entry->fid.unique = 0; + } + ), + + TP_printk("c=%p %06x:%06x:%06x %s", + __entry->call, + __entry->fid.vid, + __entry->fid.vnode, + __entry->fid.unique, + __print_symbolic(__entry->op, afs_fs_operations)) + ); + +TRACE_EVENT(afs_make_vl_call, + TP_PROTO(struct afs_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_vl_operation, op ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = call->operation_ID; + ), + + TP_printk("c=%p %s", + __entry->call, + __print_symbolic(__entry->op, afs_vl_operations)) + ); + +TRACE_EVENT(afs_call_done, + TP_PROTO(struct afs_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(struct rxrpc_call *, rx_call ) + __field(int, ret ) + __field(u32, abort_code ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->rx_call = call->rxcall; + __entry->ret = call->error; + __entry->abort_code = call->abort_code; + ), + + TP_printk(" c=%p ret=%d ab=%d [%p]", + __entry->call, + __entry->ret, + __entry->abort_code, + __entry->rx_call) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 2c099014a0a456012c1778e80adce839bf956b77 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: afs: Trace the sending of pages Add a pair of tracepoints to log the sending of pages for an FS.StoreData or FS.StoreData64 operation. Tracepoint afs_send_pages notes each set of pages added to the operation. There may be several of these per operation as we get up at most 8 contiguous pages in one go because the bvec we're using is on the stack. Tracepoint afs_sent_pages notes the end of adding data from a whole run of pages to the operation and the completion of the request phase. Signed-off-by: David Howells --- include/trace/events/afs.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index dd9d2600ee98..c780f4c39a53 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -320,6 +320,67 @@ TRACE_EVENT(afs_call_done, __entry->rx_call) ); +TRACE_EVENT(afs_send_pages, + TP_PROTO(struct afs_call *call, struct msghdr *msg, + pgoff_t first, pgoff_t last, unsigned int offset), + + TP_ARGS(call, msg, first, last, offset), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(pgoff_t, first ) + __field(pgoff_t, last ) + __field(unsigned int, nr ) + __field(unsigned int, bytes ) + __field(unsigned int, offset ) + __field(unsigned int, flags ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->last = last; + __entry->nr = msg->msg_iter.nr_segs; + __entry->bytes = msg->msg_iter.count; + __entry->offset = offset; + __entry->flags = msg->msg_flags; + ), + + TP_printk(" c=%p %lx-%lx-%lx b=%x o=%x f=%x", + __entry->call, + __entry->first, __entry->first + __entry->nr - 1, __entry->last, + __entry->bytes, __entry->offset, + __entry->flags) + ); + +TRACE_EVENT(afs_sent_pages, + TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last, + pgoff_t cursor, int ret), + + TP_ARGS(call, first, last, cursor, ret), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(pgoff_t, first ) + __field(pgoff_t, last ) + __field(pgoff_t, cursor ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->last = last; + __entry->cursor = cursor; + __entry->ret = ret; + ), + + TP_printk(" c=%p %lx-%lx c=%lx r=%d", + __entry->call, + __entry->first, __entry->last, + __entry->cursor, __entry->ret) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From dab17c1add5c51b68027a9a3861af3a99cb5485a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: afs: Fix directory read/modify race Because parsing of the directory wasn't being done under any sort of lock, the pages holding the directory content can get invalidated whilst the parsing is ongoing. Further, the directory page check function gets called outside of the page lock, so if the page gets cleared or updated, this may return reports of bad magic numbers in the directory page. Also, the directory may change size whilst checking and parsing are ongoing, so more care needs to be taken here. Fix this by: (1) Perform the page check from the page filling function before we set PageUptodate and drop the page lock. (2) Check for the file having shrunk and the page having been abandoned before checking the page contents. (3) Lock the page whilst parsing it for the directory iterator. Whilst we're at it, add a tracepoint to report check failure. Signed-off-by: David Howells --- include/trace/events/afs.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index c780f4c39a53..9cfb7657b72c 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -381,6 +381,27 @@ TRACE_EVENT(afs_sent_pages, __entry->cursor, __entry->ret) ); +TRACE_EVENT(afs_dir_check_failed, + TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size), + + TP_ARGS(vnode, off, i_size), + + TP_STRUCT__entry( + __field(struct afs_vnode *, vnode ) + __field(loff_t, off ) + __field(loff_t, i_size ) + ), + + TP_fast_assign( + __entry->vnode = vnode; + __entry->off = off; + __entry->i_size = i_size; + ), + + TP_printk("vn=%p %llx/%llx", + __entry->vnode, __entry->off, __entry->i_size) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 13524ab3c6f41bcd257d28644414297bea8282b7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:53 +0000 Subject: afs: Trace page dirty/clean Add a trace event that logs the dirtying and cleaning of pages attached to AFS inodes. Signed-off-by: David Howells --- include/trace/events/afs.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9cfb7657b72c..5f4e8193932d 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -402,6 +402,45 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + */ +#if PAGE_SIZE > 32768 +#define AFS_PRIV_MAX 0xffffffff +#define AFS_PRIV_SHIFT 32 +#else +#define AFS_PRIV_MAX 0xffff +#define AFS_PRIV_SHIFT 16 +#endif + +TRACE_EVENT(afs_page_dirty, + TP_PROTO(struct afs_vnode *vnode, const char *where, + pgoff_t page, unsigned long priv), + + TP_ARGS(vnode, where, page, priv), + + TP_STRUCT__entry( + __field(struct afs_vnode *, vnode ) + __field(const char *, where ) + __field(pgoff_t, page ) + __field(unsigned long, priv ) + ), + + TP_fast_assign( + __entry->vnode = vnode; + __entry->where = where; + __entry->page = page; + __entry->priv = priv; + ), + + TP_printk("vn=%p %lx %s %lu-%lu", + __entry->vnode, __entry->page, __entry->where, + __entry->priv & AFS_PRIV_MAX, + __entry->priv >> AFS_PRIV_SHIFT) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 98bf40cd99fcfed0705812b6cbdbb3b441a42970 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:53 +0000 Subject: afs: Protect call->state changes against signals Protect call->state changes against the call being prematurely terminated due to a signal. What can happen is that a signal causes afs_wait_for_call_to_complete() to abort an afs_call because it's not yet complete whilst afs_deliver_to_call() is delivering data to that call. If the data delivery causes the state to change, this may overwrite the state of the afs_call, making it not-yet-complete again - but no further notifications will be forthcoming from AF_RXRPC as the rxrpc call has been aborted and completed, so kAFS will just hang in various places waiting for that call or on page bits that need clearing by that call. A tracepoint to monitor call state changes is also provided. Signed-off-by: David Howells --- include/trace/events/afs.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5f4e8193932d..6b59c63a8e51 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -441,6 +441,36 @@ TRACE_EVENT(afs_page_dirty, __entry->priv >> AFS_PRIV_SHIFT) ); +TRACE_EVENT(afs_call_state, + TP_PROTO(struct afs_call *call, + enum afs_call_state from, + enum afs_call_state to, + int ret, u32 remote_abort), + + TP_ARGS(call, from, to, ret, remote_abort), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_call_state, from ) + __field(enum afs_call_state, to ) + __field(int, ret ) + __field(u32, abort ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->from = from; + __entry->to = to; + __entry->ret = ret; + __entry->abort = remote_abort; + ), + + TP_printk("c=%p %u->%u r=%d ab=%d", + __entry->call, + __entry->from, __entry->to, + __entry->ret, __entry->abort) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3