From 1054794198e39103cb986618c4c10ec2252b7089 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:02:25 +1100 Subject: xfs: convert log grant ticket queues to list heads The grant write and reserve queues use a roll-your-own double linked list, so convert it to a standard list_head structure and convert all the list traversals to use list_for_each_entry(). We can also get rid of the XLOG_TIC_IN_Q flag as we can use the list_empty() check to tell if the ticket is in a list or not. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 123 +++++++++++++++++++------------------------------------ 1 file changed, 41 insertions(+), 82 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cee4ab9f8a9e..1b82735471ab 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -95,38 +95,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, STATIC int xlog_iclogs_empty(xlog_t *log); - -static void -xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) -{ - if (*qp) { - tic->t_next = (*qp); - tic->t_prev = (*qp)->t_prev; - (*qp)->t_prev->t_next = tic; - (*qp)->t_prev = tic; - } else { - tic->t_prev = tic->t_next = tic; - *qp = tic; - } - - tic->t_flags |= XLOG_TIC_IN_Q; -} - -static void -xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) -{ - if (tic == tic->t_next) { - *qp = NULL; - } else { - *qp = tic->t_next; - tic->t_next->t_prev = tic->t_prev; - tic->t_prev->t_next = tic->t_next; - } - - tic->t_next = tic->t_prev = NULL; - tic->t_flags &= ~XLOG_TIC_IN_Q; -} - static void xlog_grant_sub_space(struct log *log, int bytes) { @@ -724,7 +692,7 @@ xfs_log_move_tail(xfs_mount_t *mp, log->l_tail_lsn = tail_lsn; } - if ((tic = log->l_write_headq)) { + if (!list_empty(&log->l_writeq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); @@ -732,7 +700,7 @@ xfs_log_move_tail(xfs_mount_t *mp, cycle = log->l_grant_write_cycle; bytes = log->l_grant_write_bytes; free_bytes = xlog_space_left(log, cycle, bytes); - do { + list_for_each_entry(tic, &log->l_writeq, t_queue) { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); if (free_bytes < tic->t_unit_res && tail_lsn != 1) @@ -740,10 +708,10 @@ xfs_log_move_tail(xfs_mount_t *mp, tail_lsn = 0; free_bytes -= tic->t_unit_res; sv_signal(&tic->t_wait); - tic = tic->t_next; - } while (tic != log->l_write_headq); + } } - if ((tic = log->l_reserve_headq)) { + + if (!list_empty(&log->l_reserveq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); @@ -751,7 +719,7 @@ xfs_log_move_tail(xfs_mount_t *mp, cycle = log->l_grant_reserve_cycle; bytes = log->l_grant_reserve_bytes; free_bytes = xlog_space_left(log, cycle, bytes); - do { + list_for_each_entry(tic, &log->l_reserveq, t_queue) { if (tic->t_flags & XLOG_TIC_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_cnt; else @@ -761,8 +729,7 @@ xfs_log_move_tail(xfs_mount_t *mp, tail_lsn = 0; free_bytes -= need_bytes; sv_signal(&tic->t_wait); - tic = tic->t_next; - } while (tic != log->l_reserve_headq); + } } spin_unlock(&log->l_grant_lock); } /* xfs_log_move_tail */ @@ -1053,6 +1020,8 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ log->l_grant_reserve_cycle = 1; log->l_grant_write_cycle = 1; + INIT_LIST_HEAD(&log->l_reserveq); + INIT_LIST_HEAD(&log->l_writeq); error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { @@ -2550,8 +2519,8 @@ xlog_grant_log_space(xlog_t *log, trace_xfs_log_grant_enter(log, tic); /* something is already sleeping; insert new transaction at end */ - if (log->l_reserve_headq) { - xlog_ins_ticketq(&log->l_reserve_headq, tic); + if (!list_empty(&log->l_reserveq)) { + list_add_tail(&tic->t_queue, &log->l_reserveq); trace_xfs_log_grant_sleep1(log, tic); @@ -2583,8 +2552,8 @@ redo: free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, log->l_grant_reserve_bytes); if (free_bytes < need_bytes) { - if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) - xlog_ins_ticketq(&log->l_reserve_headq, tic); + if (list_empty(&tic->t_queue)) + list_add_tail(&tic->t_queue, &log->l_reserveq); trace_xfs_log_grant_sleep2(log, tic); @@ -2602,8 +2571,9 @@ redo: trace_xfs_log_grant_wake2(log, tic); goto redo; - } else if (tic->t_flags & XLOG_TIC_IN_Q) - xlog_del_ticketq(&log->l_reserve_headq, tic); + } + + list_del_init(&tic->t_queue); /* we've got enough space */ xlog_grant_add_space(log, need_bytes); @@ -2626,9 +2596,7 @@ redo: return 0; error_return: - if (tic->t_flags & XLOG_TIC_IN_Q) - xlog_del_ticketq(&log->l_reserve_headq, tic); - + list_del_init(&tic->t_queue); trace_xfs_log_grant_error(log, tic); /* @@ -2653,7 +2621,6 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { int free_bytes, need_bytes; - xlog_ticket_t *ntic; #ifdef DEBUG xfs_lsn_t tail_lsn; #endif @@ -2683,22 +2650,23 @@ xlog_regrant_write_log_space(xlog_t *log, * this transaction. */ need_bytes = tic->t_unit_res; - if ((ntic = log->l_write_headq)) { + if (!list_empty(&log->l_writeq)) { + struct xlog_ticket *ntic; free_bytes = xlog_space_left(log, log->l_grant_write_cycle, log->l_grant_write_bytes); - do { + list_for_each_entry(ntic, &log->l_writeq, t_queue) { ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); if (free_bytes < ntic->t_unit_res) break; free_bytes -= ntic->t_unit_res; sv_signal(&ntic->t_wait); - ntic = ntic->t_next; - } while (ntic != log->l_write_headq); + } - if (ntic != log->l_write_headq) { - if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) - xlog_ins_ticketq(&log->l_write_headq, tic); + if (ntic != list_first_entry(&log->l_writeq, + struct xlog_ticket, t_queue)) { + if (list_empty(&tic->t_queue)) + list_add_tail(&tic->t_queue, &log->l_writeq); trace_xfs_log_regrant_write_sleep1(log, tic); @@ -2727,8 +2695,8 @@ redo: free_bytes = xlog_space_left(log, log->l_grant_write_cycle, log->l_grant_write_bytes); if (free_bytes < need_bytes) { - if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) - xlog_ins_ticketq(&log->l_write_headq, tic); + if (list_empty(&tic->t_queue)) + list_add_tail(&tic->t_queue, &log->l_writeq); spin_unlock(&log->l_grant_lock); xlog_grant_push_ail(log->l_mp, need_bytes); spin_lock(&log->l_grant_lock); @@ -2745,8 +2713,9 @@ redo: trace_xfs_log_regrant_write_wake2(log, tic); goto redo; - } else if (tic->t_flags & XLOG_TIC_IN_Q) - xlog_del_ticketq(&log->l_write_headq, tic); + } + + list_del_init(&tic->t_queue); /* we've got enough space */ xlog_grant_add_space_write(log, need_bytes); @@ -2766,9 +2735,7 @@ redo: error_return: - if (tic->t_flags & XLOG_TIC_IN_Q) - xlog_del_ticketq(&log->l_reserve_headq, tic); - + list_del_init(&tic->t_queue); trace_xfs_log_regrant_write_error(log, tic); /* @@ -3435,6 +3402,7 @@ xlog_ticket_alloc( } atomic_set(&tic->t_ref, 1); + INIT_LIST_HEAD(&tic->t_queue); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3742,26 +3710,17 @@ xfs_log_force_umount( spin_unlock(&log->l_icloglock); /* - * We don't want anybody waiting for log reservations - * after this. That means we have to wake up everybody - * queued up on reserve_headq as well as write_headq. - * In addition, we make sure in xlog_{re}grant_log_space - * that we don't enqueue anything once the SHUTDOWN flag - * is set, and this action is protected by the GRANTLOCK. + * We don't want anybody waiting for log reservations after this. That + * means we have to wake up everybody queued up on reserveq as well as + * writeq. In addition, we make sure in xlog_{re}grant_log_space that + * we don't enqueue anything once the SHUTDOWN flag is set, and this + * action is protected by the GRANTLOCK. */ - if ((tic = log->l_reserve_headq)) { - do { - sv_signal(&tic->t_wait); - tic = tic->t_next; - } while (tic != log->l_reserve_headq); - } + list_for_each_entry(tic, &log->l_reserveq, t_queue) + sv_signal(&tic->t_wait); - if ((tic = log->l_write_headq)) { - do { - sv_signal(&tic->t_wait); - tic = tic->t_next; - } while (tic != log->l_write_headq); - } + list_for_each_entry(tic, &log->l_writeq, t_queue) + sv_signal(&tic->t_wait); spin_unlock(&log->l_grant_lock); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { -- cgit v1.2.3 From 3f336c6fa17c2b3d14b3dd1bd6e64e9cc97b6359 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:02:52 +1100 Subject: xfs: fact out common grant head/log tail verification code Factor repeated debug code out of grant head manipulation functions into a separate function. This removes ifdef DEBUG spagetti from the code and makes the code easier to follow. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 51 ++++++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1b82735471ab..99c62855432e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -82,6 +82,7 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); STATIC void xlog_verify_grant_head(xlog_t *log, int equals); +STATIC void xlog_verify_grant_tail(struct log *log); STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, int count, boolean_t syncing); STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, @@ -89,6 +90,7 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_head(a,b) +#define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c,d) #define xlog_verify_tail_lsn(a,b,c) #endif @@ -2503,10 +2505,6 @@ xlog_grant_log_space(xlog_t *log, { int free_bytes; int need_bytes; -#ifdef DEBUG - xfs_lsn_t tail_lsn; -#endif - #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) @@ -2577,21 +2575,9 @@ redo: /* we've got enough space */ xlog_grant_add_space(log, need_bytes); -#ifdef DEBUG - tail_lsn = log->l_tail_lsn; - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ - if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { - ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); - ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); - } -#endif trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_head(log, 1); + xlog_verify_grant_tail(log); spin_unlock(&log->l_grant_lock); return 0; @@ -2621,9 +2607,6 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_ticket_t *tic) { int free_bytes, need_bytes; -#ifdef DEBUG - xfs_lsn_t tail_lsn; -#endif tic->t_curr_res = tic->t_unit_res; xlog_tic_reset_res(tic); @@ -2719,17 +2702,9 @@ redo: /* we've got enough space */ xlog_grant_add_space_write(log, need_bytes); -#ifdef DEBUG - tail_lsn = log->l_tail_lsn; - if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { - ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); - ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); - } -#endif - trace_xfs_log_regrant_write_exit(log, tic); - xlog_verify_grant_head(log, 1); + xlog_verify_grant_tail(log); spin_unlock(&log->l_grant_lock); return 0; @@ -3465,6 +3440,24 @@ xlog_verify_grant_head(xlog_t *log, int equals) } } /* xlog_verify_grant_head */ +STATIC void +xlog_verify_grant_tail( + struct log *log) +{ + xfs_lsn_t tail_lsn = log->l_tail_lsn; + + /* + * Check to make sure the grant write head didn't just over lap the + * tail. If the cycles are the same, we can't be overlapping. + * Otherwise, make sure that the cycles differ by exactly one and + * check the byte count. + */ + if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { + ASSERT(log->l_grant_write_cycle - 1 == CYCLE_LSN(tail_lsn)); + ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); + } +} + /* check if it will fit */ STATIC void xlog_verify_tail_lsn(xlog_t *log, -- cgit v1.2.3 From 663e496a720a3a9fc08ea70b29724e8906b34e43 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:06:05 +1100 Subject: xfs: rework log grant space calculations The log grant space calculations are repeated for both write and reserve grant heads. To make it simpler to convert the calculations toa different algorithm, factor them so both the gratn heads use the same calculation functions. Once this is done we can drop the wrappers that are used in only a couple of place to update both grant heads at once as they don't provide any particular value. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 95 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 47 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 99c62855432e..9a4b9edad847 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -98,53 +98,34 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, STATIC int xlog_iclogs_empty(xlog_t *log); static void -xlog_grant_sub_space(struct log *log, int bytes) -{ - log->l_grant_write_bytes -= bytes; - if (log->l_grant_write_bytes < 0) { - log->l_grant_write_bytes += log->l_logsize; - log->l_grant_write_cycle--; - } - - log->l_grant_reserve_bytes -= bytes; - if ((log)->l_grant_reserve_bytes < 0) { - log->l_grant_reserve_bytes += log->l_logsize; - log->l_grant_reserve_cycle--; - } - -} - -static void -xlog_grant_add_space_write(struct log *log, int bytes) +xlog_grant_sub_space( + struct log *log, + int *cycle, + int *space, + int bytes) { - int tmp = log->l_logsize - log->l_grant_write_bytes; - if (tmp > bytes) - log->l_grant_write_bytes += bytes; - else { - log->l_grant_write_cycle++; - log->l_grant_write_bytes = bytes - tmp; + *space -= bytes; + if (*space < 0) { + *space += log->l_logsize; + (*cycle)--; } } static void -xlog_grant_add_space_reserve(struct log *log, int bytes) +xlog_grant_add_space( + struct log *log, + int *cycle, + int *space, + int bytes) { - int tmp = log->l_logsize - log->l_grant_reserve_bytes; + int tmp = log->l_logsize - *space; if (tmp > bytes) - log->l_grant_reserve_bytes += bytes; + *space += bytes; else { - log->l_grant_reserve_cycle++; - log->l_grant_reserve_bytes = bytes - tmp; + *space = bytes - tmp; + (*cycle)++; } } - -static inline void -xlog_grant_add_space(struct log *log, int bytes) -{ - xlog_grant_add_space_write(log, bytes); - xlog_grant_add_space_reserve(log, bytes); -} - static void xlog_tic_reset_res(xlog_ticket_t *tic) { @@ -1344,7 +1325,10 @@ xlog_sync(xlog_t *log, /* move grant heads by roundoff in sync */ spin_lock(&log->l_grant_lock); - xlog_grant_add_space(log, roundoff); + xlog_grant_add_space(log, &log->l_grant_reserve_cycle, + &log->l_grant_reserve_bytes, roundoff); + xlog_grant_add_space(log, &log->l_grant_write_cycle, + &log->l_grant_write_bytes, roundoff); spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ @@ -2574,7 +2558,10 @@ redo: list_del_init(&tic->t_queue); /* we've got enough space */ - xlog_grant_add_space(log, need_bytes); + xlog_grant_add_space(log, &log->l_grant_reserve_cycle, + &log->l_grant_reserve_bytes, need_bytes); + xlog_grant_add_space(log, &log->l_grant_write_cycle, + &log->l_grant_write_bytes, need_bytes); trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); @@ -2701,7 +2688,8 @@ redo: list_del_init(&tic->t_queue); /* we've got enough space */ - xlog_grant_add_space_write(log, need_bytes); + xlog_grant_add_space(log, &log->l_grant_write_cycle, + &log->l_grant_write_bytes, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); @@ -2742,7 +2730,12 @@ xlog_regrant_reserve_log_space(xlog_t *log, ticket->t_cnt--; spin_lock(&log->l_grant_lock); - xlog_grant_sub_space(log, ticket->t_curr_res); + xlog_grant_sub_space(log, &log->l_grant_reserve_cycle, + &log->l_grant_reserve_bytes, + ticket->t_curr_res); + xlog_grant_sub_space(log, &log->l_grant_write_cycle, + &log->l_grant_write_bytes, + ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2756,7 +2749,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, return; } - xlog_grant_add_space_reserve(log, ticket->t_unit_res); + xlog_grant_add_space(log, &log->l_grant_reserve_cycle, + &log->l_grant_reserve_bytes, + ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); @@ -2785,24 +2780,30 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket) { + int bytes; + if (ticket->t_cnt > 0) ticket->t_cnt--; spin_lock(&log->l_grant_lock); trace_xfs_log_ungrant_enter(log, ticket); - - xlog_grant_sub_space(log, ticket->t_curr_res); - trace_xfs_log_ungrant_sub(log, ticket); - /* If this is a permanent reservation ticket, we may be able to free + /* + * If this is a permanent reservation ticket, we may be able to free * up more space based on the remaining count. */ + bytes = ticket->t_curr_res; if (ticket->t_cnt > 0) { ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); - xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); + bytes += ticket->t_unit_res*ticket->t_cnt; } + xlog_grant_sub_space(log, &log->l_grant_reserve_cycle, + &log->l_grant_reserve_bytes, bytes); + xlog_grant_sub_space(log, &log->l_grant_write_cycle, + &log->l_grant_write_bytes, bytes); + trace_xfs_log_ungrant_exit(log, ticket); xlog_verify_grant_head(log, 1); -- cgit v1.2.3 From a69ed03c24d4a336c23b7116127713d5a8c5ac4d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:08:20 +1100 Subject: xfs: combine grant heads into a single 64 bit integer Prepare for switching the grant heads to atomic variables by combining the two 32 bit values that make up the grant head into a single 64 bit variable. Provide wrapper functions to combine and split the grant heads appropriately for calculations and use them as necessary. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 166 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 87 insertions(+), 79 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9a4b9edad847..6bba8b4b8596 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks); -STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); +STATIC int xlog_space_left(struct log *log, int64_t *head); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_dealloc_log(xlog_t *log); @@ -100,32 +100,44 @@ STATIC int xlog_iclogs_empty(xlog_t *log); static void xlog_grant_sub_space( struct log *log, - int *cycle, - int *space, + int64_t *head, int bytes) { - *space -= bytes; - if (*space < 0) { - *space += log->l_logsize; - (*cycle)--; + int cycle, space; + + xlog_crack_grant_head(head, &cycle, &space); + + space -= bytes; + if (space < 0) { + space += log->l_logsize; + cycle--; } + + xlog_assign_grant_head(head, cycle, space); } static void xlog_grant_add_space( struct log *log, - int *cycle, - int *space, + int64_t *head, int bytes) { - int tmp = log->l_logsize - *space; + int tmp; + int cycle, space; + + xlog_crack_grant_head(head, &cycle, &space); + + tmp = log->l_logsize - space; if (tmp > bytes) - *space += bytes; + space += bytes; else { - *space = bytes - tmp; - (*cycle)++; + space = bytes - tmp; + cycle++; } + + xlog_assign_grant_head(head, cycle, space); } + static void xlog_tic_reset_res(xlog_ticket_t *tic) { @@ -654,7 +666,7 @@ xfs_log_move_tail(xfs_mount_t *mp, { xlog_ticket_t *tic; xlog_t *log = mp->m_log; - int need_bytes, free_bytes, cycle, bytes; + int need_bytes, free_bytes; if (XLOG_FORCED_SHUTDOWN(log)) return; @@ -680,9 +692,7 @@ xfs_log_move_tail(xfs_mount_t *mp, if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - cycle = log->l_grant_write_cycle; - bytes = log->l_grant_write_bytes; - free_bytes = xlog_space_left(log, cycle, bytes); + free_bytes = xlog_space_left(log, &log->l_grant_write_head); list_for_each_entry(tic, &log->l_writeq, t_queue) { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); @@ -699,9 +709,7 @@ xfs_log_move_tail(xfs_mount_t *mp, if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif - cycle = log->l_grant_reserve_cycle; - bytes = log->l_grant_reserve_bytes; - free_bytes = xlog_space_left(log, cycle, bytes); + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); list_for_each_entry(tic, &log->l_reserveq, t_queue) { if (tic->t_flags & XLOG_TIC_PERM_RESERV) need_bytes = tic->t_unit_res*tic->t_cnt; @@ -814,21 +822,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) * result is that we return the size of the log as the amount of space left. */ STATIC int -xlog_space_left(xlog_t *log, int cycle, int bytes) +xlog_space_left( + struct log *log, + int64_t *head) { - int free_bytes; - int tail_bytes; - int tail_cycle; + int free_bytes; + int tail_bytes; + int tail_cycle; + int head_cycle; + int head_bytes; + xlog_crack_grant_head(head, &head_cycle, &head_bytes); tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); tail_cycle = CYCLE_LSN(log->l_tail_lsn); - if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { - free_bytes = log->l_logsize - (bytes - tail_bytes); - } else if ((tail_cycle + 1) < cycle) { + if (tail_cycle == head_cycle && head_bytes >= tail_bytes) + free_bytes = log->l_logsize - (head_bytes - tail_bytes); + else if (tail_cycle + 1 < head_cycle) return 0; - } else if (tail_cycle < cycle) { - ASSERT(tail_cycle == (cycle - 1)); - free_bytes = tail_bytes - bytes; + else if (tail_cycle < head_cycle) { + ASSERT(tail_cycle == (head_cycle - 1)); + free_bytes = tail_bytes - head_bytes; } else { /* * The reservation head is behind the tail. @@ -839,12 +852,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes) "xlog_space_left: head behind tail\n" " tail_cycle = %d, tail_bytes = %d\n" " GH cycle = %d, GH bytes = %d", - tail_cycle, tail_bytes, cycle, bytes); + tail_cycle, tail_bytes, head_cycle, head_bytes); ASSERT(0); free_bytes = log->l_logsize; } return free_bytes; -} /* xlog_space_left */ +} /* @@ -1001,8 +1014,8 @@ xlog_alloc_log(xfs_mount_t *mp, /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - log->l_grant_reserve_cycle = 1; - log->l_grant_write_cycle = 1; + xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); + xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); INIT_LIST_HEAD(&log->l_reserveq); INIT_LIST_HEAD(&log->l_writeq); @@ -1190,9 +1203,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, ASSERT(BTOBB(need_bytes) < log->l_logBBsize); spin_lock(&log->l_grant_lock); - free_bytes = xlog_space_left(log, - log->l_grant_reserve_cycle, - log->l_grant_reserve_bytes); + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); tail_lsn = log->l_tail_lsn; free_blocks = BTOBBT(free_bytes); @@ -1325,10 +1336,8 @@ xlog_sync(xlog_t *log, /* move grant heads by roundoff in sync */ spin_lock(&log->l_grant_lock); - xlog_grant_add_space(log, &log->l_grant_reserve_cycle, - &log->l_grant_reserve_bytes, roundoff); - xlog_grant_add_space(log, &log->l_grant_write_cycle, - &log->l_grant_write_bytes, roundoff); + xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); + xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ @@ -2531,8 +2540,7 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, - log->l_grant_reserve_bytes); + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); if (free_bytes < need_bytes) { if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_reserveq); @@ -2558,10 +2566,8 @@ redo: list_del_init(&tic->t_queue); /* we've got enough space */ - xlog_grant_add_space(log, &log->l_grant_reserve_cycle, - &log->l_grant_reserve_bytes, need_bytes); - xlog_grant_add_space(log, &log->l_grant_write_cycle, - &log->l_grant_write_bytes, need_bytes); + xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); @@ -2622,8 +2628,7 @@ xlog_regrant_write_log_space(xlog_t *log, need_bytes = tic->t_unit_res; if (!list_empty(&log->l_writeq)) { struct xlog_ticket *ntic; - free_bytes = xlog_space_left(log, log->l_grant_write_cycle, - log->l_grant_write_bytes); + free_bytes = xlog_space_left(log, &log->l_grant_write_head); list_for_each_entry(ntic, &log->l_writeq, t_queue) { ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); @@ -2662,8 +2667,7 @@ redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - free_bytes = xlog_space_left(log, log->l_grant_write_cycle, - log->l_grant_write_bytes); + free_bytes = xlog_space_left(log, &log->l_grant_write_head); if (free_bytes < need_bytes) { if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_writeq); @@ -2688,8 +2692,7 @@ redo: list_del_init(&tic->t_queue); /* we've got enough space */ - xlog_grant_add_space(log, &log->l_grant_write_cycle, - &log->l_grant_write_bytes, need_bytes); + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); @@ -2730,12 +2733,10 @@ xlog_regrant_reserve_log_space(xlog_t *log, ticket->t_cnt--; spin_lock(&log->l_grant_lock); - xlog_grant_sub_space(log, &log->l_grant_reserve_cycle, - &log->l_grant_reserve_bytes, - ticket->t_curr_res); - xlog_grant_sub_space(log, &log->l_grant_write_cycle, - &log->l_grant_write_bytes, - ticket->t_curr_res); + xlog_grant_sub_space(log, &log->l_grant_reserve_head, + ticket->t_curr_res); + xlog_grant_sub_space(log, &log->l_grant_write_head, + ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2749,9 +2750,8 @@ xlog_regrant_reserve_log_space(xlog_t *log, return; } - xlog_grant_add_space(log, &log->l_grant_reserve_cycle, - &log->l_grant_reserve_bytes, - ticket->t_unit_res); + xlog_grant_add_space(log, &log->l_grant_reserve_head, + ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); @@ -2799,10 +2799,8 @@ xlog_ungrant_log_space(xlog_t *log, bytes += ticket->t_unit_res*ticket->t_cnt; } - xlog_grant_sub_space(log, &log->l_grant_reserve_cycle, - &log->l_grant_reserve_bytes, bytes); - xlog_grant_sub_space(log, &log->l_grant_write_cycle, - &log->l_grant_write_bytes, bytes); + xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); + xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); trace_xfs_log_ungrant_exit(log, ticket); @@ -3430,22 +3428,31 @@ xlog_verify_dest_ptr( STATIC void xlog_verify_grant_head(xlog_t *log, int equals) { - if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { - if (equals) - ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); - else - ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); - } else { - ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); - ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); - } -} /* xlog_verify_grant_head */ + int reserve_cycle, reserve_space; + int write_cycle, write_space; + + xlog_crack_grant_head(&log->l_grant_reserve_head, + &reserve_cycle, &reserve_space); + xlog_crack_grant_head(&log->l_grant_write_head, + &write_cycle, &write_space); + + if (reserve_cycle == write_cycle) { + if (equals) + ASSERT(reserve_space >= write_space); + else + ASSERT(reserve_space > write_space); + } else { + ASSERT(reserve_cycle - 1 == write_cycle); + ASSERT(write_space >= reserve_space); + } +} STATIC void xlog_verify_grant_tail( struct log *log) { xfs_lsn_t tail_lsn = log->l_tail_lsn; + int cycle, space; /* * Check to make sure the grant write head didn't just over lap the @@ -3453,9 +3460,10 @@ xlog_verify_grant_tail( * Otherwise, make sure that the cycles differ by exactly one and * check the byte count. */ - if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { - ASSERT(log->l_grant_write_cycle - 1 == CYCLE_LSN(tail_lsn)); - ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); + xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); + if (CYCLE_LSN(tail_lsn) != cycle) { + ASSERT(cycle - 1 == CYCLE_LSN(tail_lsn)); + ASSERT(space <= BBTOB(BLOCK_LSN(tail_lsn))); } } -- cgit v1.2.3 From eb40a87500ac2f6be7eaf8ebb35610e6d0e60e9a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:09:01 +1100 Subject: xfs: use wait queues directly for the log wait queues The log grant queues are one of the few places left using sv_t constructs for waiting. Given we are touching this code, we should convert them to plain wait queues. While there, convert all the other sv_t users in the log code as well. Seeing as this removes the last users of the sv_t type, remove the header file defining the wrapper and the fragments that still reference it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 64 +++++++++++++++++++++++++------------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6bba8b4b8596..cc0504e0bb3b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -547,8 +547,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { - sv_wait(&iclog->ic_force_wait, PMEM, - &log->l_icloglock, s); + xlog_wait(&iclog->ic_force_wait, + &log->l_icloglock); } else { spin_unlock(&log->l_icloglock); } @@ -588,8 +588,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) || iclog->ic_state == XLOG_STATE_DIRTY || iclog->ic_state == XLOG_STATE_IOERROR) ) { - sv_wait(&iclog->ic_force_wait, PMEM, - &log->l_icloglock, s); + xlog_wait(&iclog->ic_force_wait, + &log->l_icloglock); } else { spin_unlock(&log->l_icloglock); } @@ -700,7 +700,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= tic->t_unit_res; - sv_signal(&tic->t_wait); + wake_up(&tic->t_wait); } } @@ -719,7 +719,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= need_bytes; - sv_signal(&tic->t_wait); + wake_up(&tic->t_wait); } } spin_unlock(&log->l_grant_lock); @@ -1060,7 +1060,7 @@ xlog_alloc_log(xfs_mount_t *mp, spin_lock_init(&log->l_icloglock); spin_lock_init(&log->l_grant_lock); - sv_init(&log->l_flush_wait, 0, "flush_wait"); + init_waitqueue_head(&log->l_flush_wait); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1116,8 +1116,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); - sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); - sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); + init_waitqueue_head(&iclog->ic_force_wait); + init_waitqueue_head(&iclog->ic_write_wait); iclogp = &iclog->ic_next; } @@ -1132,11 +1132,8 @@ xlog_alloc_log(xfs_mount_t *mp, out_free_iclog: for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { prev_iclog = iclog->ic_next; - if (iclog->ic_bp) { - sv_destroy(&iclog->ic_force_wait); - sv_destroy(&iclog->ic_write_wait); + if (iclog->ic_bp) xfs_buf_free(iclog->ic_bp); - } kmem_free(iclog); } spinlock_destroy(&log->l_icloglock); @@ -1453,8 +1450,6 @@ xlog_dealloc_log(xlog_t *log) iclog = log->l_iclog; for (i=0; il_iclog_bufs; i++) { - sv_destroy(&iclog->ic_force_wait); - sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; kmem_free(iclog); @@ -2261,7 +2256,7 @@ xlog_state_do_callback( xlog_state_clean_log(log); /* wake up threads waiting in xfs_log_force() */ - sv_broadcast(&iclog->ic_force_wait); + wake_up_all(&iclog->ic_force_wait); iclog = iclog->ic_next; } while (first_iclog != iclog); @@ -2308,7 +2303,7 @@ xlog_state_do_callback( spin_unlock(&log->l_icloglock); if (wake) - sv_broadcast(&log->l_flush_wait); + wake_up_all(&log->l_flush_wait); } @@ -2359,7 +2354,7 @@ xlog_state_done_syncing( * iclog buffer, we wake them all, one will get to do the * I/O, the others get to wait for the result. */ - sv_broadcast(&iclog->ic_write_wait); + wake_up_all(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2408,7 +2403,7 @@ restart: XFS_STATS_INC(xs_log_noiclogs); /* Wait for log writes to have flushed */ - sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); + xlog_wait(&log->l_flush_wait, &log->l_icloglock); goto restart; } @@ -2523,7 +2518,8 @@ xlog_grant_log_space(xlog_t *log, goto error_return; XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); + xlog_wait(&tic->t_wait, &log->l_grant_lock); + /* * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... @@ -2552,7 +2548,7 @@ redo: spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); + xlog_wait(&tic->t_wait, &log->l_grant_lock); spin_lock(&log->l_grant_lock); if (XLOG_FORCED_SHUTDOWN(log)) @@ -2635,7 +2631,7 @@ xlog_regrant_write_log_space(xlog_t *log, if (free_bytes < ntic->t_unit_res) break; free_bytes -= ntic->t_unit_res; - sv_signal(&ntic->t_wait); + wake_up(&ntic->t_wait); } if (ntic != list_first_entry(&log->l_writeq, @@ -2650,8 +2646,7 @@ xlog_regrant_write_log_space(xlog_t *log, spin_lock(&log->l_grant_lock); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_wait, PINOD|PLTWAIT, - &log->l_grant_lock, s); + xlog_wait(&tic->t_wait, &log->l_grant_lock); /* If we're shutting down, this tic is already * off the queue */ @@ -2677,8 +2672,7 @@ redo: XFS_STATS_INC(xs_sleep_logspace); trace_xfs_log_regrant_write_sleep2(log, tic); - - sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); + xlog_wait(&tic->t_wait, &log->l_grant_lock); /* If we're shutting down, this tic is already off the queue */ spin_lock(&log->l_grant_lock); @@ -3029,7 +3023,7 @@ maybe_sleep: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3147,8 +3141,8 @@ try_again: XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_prev->ic_write_wait, - PSWP, &log->l_icloglock, s); + xlog_wait(&iclog->ic_prev->ic_write_wait, + &log->l_icloglock); if (log_flushed) *log_flushed = 1; already_slept = 1; @@ -3176,7 +3170,7 @@ try_again: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3251,10 +3245,8 @@ xfs_log_ticket_put( xlog_ticket_t *ticket) { ASSERT(atomic_read(&ticket->t_ref) > 0); - if (atomic_dec_and_test(&ticket->t_ref)) { - sv_destroy(&ticket->t_wait); + if (atomic_dec_and_test(&ticket->t_ref)) kmem_zone_free(xfs_log_ticket_zone, ticket); - } } xlog_ticket_t * @@ -3387,7 +3379,7 @@ xlog_ticket_alloc( tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; - sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); + init_waitqueue_head(&tic->t_wait); xlog_tic_reset_res(tic); @@ -3719,10 +3711,10 @@ xfs_log_force_umount( * action is protected by the GRANTLOCK. */ list_for_each_entry(tic, &log->l_reserveq, t_queue) - sv_signal(&tic->t_wait); + wake_up(&tic->t_wait); list_for_each_entry(tic, &log->l_writeq, t_queue) - sv_signal(&tic->t_wait); + wake_up(&tic->t_wait); spin_unlock(&log->l_grant_lock); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { -- cgit v1.2.3 From 2ced19cbae5448b720919a494606c62095d4f4db Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:09:20 +1100 Subject: xfs: make AIL tail pushing independent of the grant lock The xlog_grant_push_ail() currently takes the grant lock internally to sample the tail lsn, last sync lsn and the reserve grant head. Most of the callers already hold the grant lock but have to drop it before calling xlog_grant_push_ail(). This is a left over from when the AIL tail pushing was done in line and hence xlog_grant_push_ail had to drop the grant lock. AIL push is now done in another thread and hence we can safely hold the grant lock over the entire xlog_grant_push_ail call. Push the grant lock outside of xlog_grant_push_ail() to simplify the locking and synchronisation needed for tail pushing. This will reduce traffic on the grant lock by itself, but this is only one step in preparing for the complete removal of the grant lock. While there, clean up the formatting of xlog_grant_push_ail() to match the rest of the XFS code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 111 +++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 57 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cc0504e0bb3b..1e2020d5a8b6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); /* local functions to manipulate grant head */ STATIC int xlog_grant_log_space(xlog_t *log, xlog_ticket_t *xtic); -STATIC void xlog_grant_push_ail(xfs_mount_t *mp, +STATIC void xlog_grant_push_ail(struct log *log, int need_bytes); STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket); @@ -318,7 +318,9 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - xlog_grant_push_ail(mp, internal_ticket->t_unit_res); + spin_lock(&log->l_grant_lock); + xlog_grant_push_ail(log, internal_ticket->t_unit_res); + spin_unlock(&log->l_grant_lock); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ @@ -332,9 +334,11 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - xlog_grant_push_ail(mp, + spin_lock(&log->l_grant_lock); + xlog_grant_push_ail(log, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); + spin_unlock(&log->l_grant_lock); retval = xlog_grant_log_space(log, internal_ticket); } @@ -1185,59 +1189,58 @@ xlog_commit_record( * water mark. In this manner, we would be creating a low water mark. */ STATIC void -xlog_grant_push_ail(xfs_mount_t *mp, - int need_bytes) +xlog_grant_push_ail( + struct log *log, + int need_bytes) { - xlog_t *log = mp->m_log; /* pointer to the log */ - xfs_lsn_t tail_lsn; /* lsn of the log tail */ - xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ - int free_blocks; /* free blocks left to write to */ - int free_bytes; /* free bytes left to write to */ - int threshold_block; /* block in lsn we'd like to be at */ - int threshold_cycle; /* lsn cycle we'd like to be at */ - int free_threshold; - - ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - - spin_lock(&log->l_grant_lock); - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - tail_lsn = log->l_tail_lsn; - free_blocks = BTOBBT(free_bytes); - - /* - * Set the threshold for the minimum number of free blocks in the - * log to the maximum of what the caller needs, one quarter of the - * log, and 256 blocks. - */ - free_threshold = BTOBB(need_bytes); - free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); - free_threshold = MAX(free_threshold, 256); - if (free_blocks < free_threshold) { + xfs_lsn_t threshold_lsn = 0; + xfs_lsn_t tail_lsn; + int free_blocks; + int free_bytes; + int threshold_block; + int threshold_cycle; + int free_threshold; + + ASSERT(BTOBB(need_bytes) < log->l_logBBsize); + + tail_lsn = log->l_tail_lsn; + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); + free_blocks = BTOBBT(free_bytes); + + /* + * Set the threshold for the minimum number of free blocks in the + * log to the maximum of what the caller needs, one quarter of the + * log, and 256 blocks. + */ + free_threshold = BTOBB(need_bytes); + free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); + free_threshold = MAX(free_threshold, 256); + if (free_blocks >= free_threshold) + return; + threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; threshold_cycle = CYCLE_LSN(tail_lsn); if (threshold_block >= log->l_logBBsize) { - threshold_block -= log->l_logBBsize; - threshold_cycle += 1; + threshold_block -= log->l_logBBsize; + threshold_cycle += 1; } - threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); - - /* Don't pass in an lsn greater than the lsn of the last + threshold_lsn = xlog_assign_lsn(threshold_cycle, + threshold_block); + /* + * Don't pass in an lsn greater than the lsn of the last * log record known to be on disk. */ if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) - threshold_lsn = log->l_last_sync_lsn; - } - spin_unlock(&log->l_grant_lock); - - /* - * Get the transaction layer to kick the dirty buffers out to - * disk asynchronously. No point in trying to do this if - * the filesystem is shutting down. - */ - if (threshold_lsn && - !XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_ail_push(log->l_ailp, threshold_lsn); -} /* xlog_grant_push_ail */ + threshold_lsn = log->l_last_sync_lsn; + + /* + * Get the transaction layer to kick the dirty buffers out to + * disk asynchronously. No point in trying to do this if + * the filesystem is shutting down. + */ + if (!XLOG_FORCED_SHUTDOWN(log)) + xfs_trans_ail_push(log->l_ailp, threshold_lsn); +} /* * The bdstrat callback function for log bufs. This gives us a central @@ -2543,9 +2546,7 @@ redo: trace_xfs_log_grant_sleep2(log, tic); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); + xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); xlog_wait(&tic->t_wait, &log->l_grant_lock); @@ -2641,9 +2642,7 @@ xlog_regrant_write_log_space(xlog_t *log, trace_xfs_log_regrant_write_sleep1(log, tic); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); + xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); xlog_wait(&tic->t_wait, &log->l_grant_lock); @@ -2666,9 +2665,7 @@ redo: if (free_bytes < need_bytes) { if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_writeq); - spin_unlock(&log->l_grant_lock); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); + xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); trace_xfs_log_regrant_write_sleep2(log, tic); -- cgit v1.2.3 From 84f3c683c4d3f36d3c3ed320babd960a332ac458 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 3 Dec 2010 22:11:29 +1100 Subject: xfs: convert l_last_sync_lsn to an atomic variable log->l_last_sync_lsn is updated in only one critical spot - log buffer Io completion - and is protected by the grant lock here. This requires the grant lock to be taken for every log buffer IO completion. Converting the l_last_sync_lsn variable to an atomic64_t means that we do not need to take the grant lock in log buffer IO completion to update it. This also removes the need for explicitly holding a spinlock to read the l_last_sync_lsn on 32 bit platforms. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 55 +++++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 30 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1e2020d5a8b6..70790eb48336 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -675,12 +675,8 @@ xfs_log_move_tail(xfs_mount_t *mp, if (XLOG_FORCED_SHUTDOWN(log)) return; - if (tail_lsn == 0) { - /* needed since sync_lsn is 64 bits */ - spin_lock(&log->l_icloglock); - tail_lsn = log->l_last_sync_lsn; - spin_unlock(&log->l_icloglock); - } + if (tail_lsn == 0) + tail_lsn = atomic64_read(&log->l_last_sync_lsn); spin_lock(&log->l_grant_lock); @@ -800,11 +796,9 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) tail_lsn = xfs_trans_ail_tail(mp->m_ail); spin_lock(&log->l_grant_lock); - if (tail_lsn != 0) { - log->l_tail_lsn = tail_lsn; - } else { - tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; - } + if (!tail_lsn) + tail_lsn = atomic64_read(&log->l_last_sync_lsn); + log->l_tail_lsn = tail_lsn; spin_unlock(&log->l_grant_lock); return tail_lsn; @@ -1014,9 +1008,9 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; - log->l_tail_lsn = xlog_assign_lsn(1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ - log->l_last_sync_lsn = log->l_tail_lsn; + log->l_tail_lsn = xlog_assign_lsn(1, 0); + atomic64_set(&log->l_last_sync_lsn, xlog_assign_lsn(1, 0)); log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); @@ -1194,6 +1188,7 @@ xlog_grant_push_ail( int need_bytes) { xfs_lsn_t threshold_lsn = 0; + xfs_lsn_t last_sync_lsn; xfs_lsn_t tail_lsn; int free_blocks; int free_bytes; @@ -1228,10 +1223,12 @@ xlog_grant_push_ail( threshold_block); /* * Don't pass in an lsn greater than the lsn of the last - * log record known to be on disk. + * log record known to be on disk. Use a snapshot of the last sync lsn + * so that it doesn't change between the compare and the set. */ - if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) - threshold_lsn = log->l_last_sync_lsn; + last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); + if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) + threshold_lsn = last_sync_lsn; /* * Get the transaction layer to kick the dirty buffers out to @@ -2194,7 +2191,7 @@ xlog_state_do_callback( lowest_lsn = xlog_get_lowest_lsn(log); if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, - be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { + be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { iclog = iclog->ic_next; continue; /* Leave this iclog for * another thread */ @@ -2202,23 +2199,21 @@ xlog_state_do_callback( iclog->ic_state = XLOG_STATE_CALLBACK; - spin_unlock(&log->l_icloglock); - /* l_last_sync_lsn field protected by - * l_grant_lock. Don't worry about iclog's lsn. - * No one else can be here except us. + /* + * update the last_sync_lsn before we drop the + * icloglock to ensure we are the only one that + * can update it. */ - spin_lock(&log->l_grant_lock); - ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, - be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); - log->l_last_sync_lsn = - be64_to_cpu(iclog->ic_header.h_lsn); - spin_unlock(&log->l_grant_lock); + ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), + be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); + atomic64_set(&log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)); - } else { - spin_unlock(&log->l_icloglock); + } else ioerrors++; - } + + spin_unlock(&log->l_icloglock); /* * Keep processing entries in the callback list until -- cgit v1.2.3 From 1c3cb9ec07fabf0c0970adc46fd2a1f09c1186dd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:28:39 +1100 Subject: xfs: convert l_tail_lsn to an atomic variable. log->l_tail_lsn is currently protected by the log grant lock. The lock is only needed for serialising readers against writers, so we don't really need the lock if we make the l_tail_lsn variable an atomic. Converting the l_tail_lsn variable to an atomic64_t means we can start to peel back the grant lock from various operations. Also, provide functions to safely crack an atomic LSN variable into it's component pieces and to recombined the components into an atomic variable. Use them where appropriate. This also removes the need for explicitly holding a spinlock to read the l_tail_lsn on 32 bit platforms. Signed-off-by: Dave Chinner --- fs/xfs/xfs_log.c | 56 +++++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 70790eb48336..d118bf804480 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -678,15 +678,11 @@ xfs_log_move_tail(xfs_mount_t *mp, if (tail_lsn == 0) tail_lsn = atomic64_read(&log->l_last_sync_lsn); - spin_lock(&log->l_grant_lock); - - /* Also an invalid lsn. 1 implies that we aren't passing in a valid - * tail_lsn. - */ - if (tail_lsn != 1) { - log->l_tail_lsn = tail_lsn; - } + /* tail_lsn == 1 implies that we weren't passed a valid value. */ + if (tail_lsn != 1) + atomic64_set(&log->l_tail_lsn, tail_lsn); + spin_lock(&log->l_grant_lock); if (!list_empty(&log->l_writeq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) @@ -789,21 +785,19 @@ xfs_log_need_covered(xfs_mount_t *mp) * We may be holding the log iclog lock upon entering this routine. */ xfs_lsn_t -xlog_assign_tail_lsn(xfs_mount_t *mp) +xlog_assign_tail_lsn( + struct xfs_mount *mp) { - xfs_lsn_t tail_lsn; - xlog_t *log = mp->m_log; + xfs_lsn_t tail_lsn; + struct log *log = mp->m_log; tail_lsn = xfs_trans_ail_tail(mp->m_ail); - spin_lock(&log->l_grant_lock); if (!tail_lsn) tail_lsn = atomic64_read(&log->l_last_sync_lsn); - log->l_tail_lsn = tail_lsn; - spin_unlock(&log->l_grant_lock); + atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; -} /* xlog_assign_tail_lsn */ - +} /* * Return the space in the log between the tail and the head. The head @@ -831,8 +825,8 @@ xlog_space_left( int head_bytes; xlog_crack_grant_head(head, &head_cycle, &head_bytes); - tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); - tail_cycle = CYCLE_LSN(log->l_tail_lsn); + xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); + tail_bytes = BBTOB(tail_bytes); if (tail_cycle == head_cycle && head_bytes >= tail_bytes) free_bytes = log->l_logsize - (head_bytes - tail_bytes); else if (tail_cycle + 1 < head_cycle) @@ -1009,8 +1003,8 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_prev_block = -1; /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ - log->l_tail_lsn = xlog_assign_lsn(1, 0); - atomic64_set(&log->l_last_sync_lsn, xlog_assign_lsn(1, 0)); + xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); + xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); @@ -1189,7 +1183,6 @@ xlog_grant_push_ail( { xfs_lsn_t threshold_lsn = 0; xfs_lsn_t last_sync_lsn; - xfs_lsn_t tail_lsn; int free_blocks; int free_bytes; int threshold_block; @@ -1198,7 +1191,6 @@ xlog_grant_push_ail( ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - tail_lsn = log->l_tail_lsn; free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); free_blocks = BTOBBT(free_bytes); @@ -1213,8 +1205,9 @@ xlog_grant_push_ail( if (free_blocks >= free_threshold) return; - threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; - threshold_cycle = CYCLE_LSN(tail_lsn); + xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, + &threshold_block); + threshold_block += free_threshold; if (threshold_block >= log->l_logBBsize) { threshold_block -= log->l_logBBsize; threshold_cycle += 1; @@ -2828,11 +2821,11 @@ xlog_state_release_iclog( if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { /* update tail before writing to iclog */ - xlog_assign_tail_lsn(log->l_mp); + xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); sync++; iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); - xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog, tail_lsn); /* cycle incremented when incrementing curr_block */ } spin_unlock(&log->l_icloglock); @@ -3435,7 +3428,7 @@ STATIC void xlog_verify_grant_tail( struct log *log) { - xfs_lsn_t tail_lsn = log->l_tail_lsn; + int tail_cycle, tail_blocks; int cycle, space; /* @@ -3445,9 +3438,10 @@ xlog_verify_grant_tail( * check the byte count. */ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); - if (CYCLE_LSN(tail_lsn) != cycle) { - ASSERT(cycle - 1 == CYCLE_LSN(tail_lsn)); - ASSERT(space <= BBTOB(BLOCK_LSN(tail_lsn))); + xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); + if (tail_cycle != cycle) { + ASSERT(cycle - 1 == tail_cycle); + ASSERT(space <= BBTOB(tail_blocks)); } } -- cgit v1.2.3 From c8a09ff8ca2235bccdaea8a52fbd5349646a8ba4 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 4 Dec 2010 00:02:40 +1100 Subject: xfs: convert log grant heads to atomic variables Convert the log grant heads to atomic64_t types in preparation for converting the accounting algorithms to atomic operations. his patch just converts the variables; the algorithmic changes are in a separate patch for clarity. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d118bf804480..a1d7d12fc51f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks); -STATIC int xlog_space_left(struct log *log, int64_t *head); +STATIC int xlog_space_left(struct log *log, atomic64_t *head); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); STATIC void xlog_dealloc_log(xlog_t *log); @@ -100,7 +100,7 @@ STATIC int xlog_iclogs_empty(xlog_t *log); static void xlog_grant_sub_space( struct log *log, - int64_t *head, + atomic64_t *head, int bytes) { int cycle, space; @@ -119,7 +119,7 @@ xlog_grant_sub_space( static void xlog_grant_add_space( struct log *log, - int64_t *head, + atomic64_t *head, int bytes) { int tmp; @@ -816,7 +816,7 @@ xlog_assign_tail_lsn( STATIC int xlog_space_left( struct log *log, - int64_t *head) + atomic64_t *head) { int free_bytes; int tail_bytes; -- cgit v1.2.3 From 3f16b9850743b702380f098ab5e0308cd6af1792 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:29:01 +1100 Subject: xfs: introduce new locks for the log grant ticket wait queues The log grant ticket wait queues are currently protected by the log grant lock. However, the queues are functionally independent from each other, and operations on them only require serialisation against other queue operations now that all of the other log variables they use are atomic values. Hence, we can make them independent of the grant lock by introducing new locks just to protect the lists operations. because the lists are independent, we can use a lock per list and ensure that reserve and write head queuing do not contend. To ensure forced shutdowns work correctly in conjunction with the new fast paths, ensure that we check whether the log has been shut down in the grant functions once we hold the relevant spin locks but before we go to sleep. This is needed to co-ordinate correctly with the wakeups that are issued on the ticket queues so we don't leave any processes sleeping on the queues during a shutdown. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 139 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 56 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a1d7d12fc51f..6fcc9d0af524 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -682,12 +682,12 @@ xfs_log_move_tail(xfs_mount_t *mp, if (tail_lsn != 1) atomic64_set(&log->l_tail_lsn, tail_lsn); - spin_lock(&log->l_grant_lock); - if (!list_empty(&log->l_writeq)) { + if (!list_empty_careful(&log->l_writeq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif + spin_lock(&log->l_grant_write_lock); free_bytes = xlog_space_left(log, &log->l_grant_write_head); list_for_each_entry(tic, &log->l_writeq, t_queue) { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); @@ -696,15 +696,18 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= tic->t_unit_res; + trace_xfs_log_regrant_write_wake_up(log, tic); wake_up(&tic->t_wait); } + spin_unlock(&log->l_grant_write_lock); } - if (!list_empty(&log->l_reserveq)) { + if (!list_empty_careful(&log->l_reserveq)) { #ifdef DEBUG if (log->l_flags & XLOG_ACTIVE_RECOVERY) panic("Recovery problem"); #endif + spin_lock(&log->l_grant_reserve_lock); free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); list_for_each_entry(tic, &log->l_reserveq, t_queue) { if (tic->t_flags & XLOG_TIC_PERM_RESERV) @@ -715,11 +718,12 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= need_bytes; + trace_xfs_log_grant_wake_up(log, tic); wake_up(&tic->t_wait); } + spin_unlock(&log->l_grant_reserve_lock); } - spin_unlock(&log->l_grant_lock); -} /* xfs_log_move_tail */ +} /* * Determine if we have a transaction that has gone to disk @@ -1010,6 +1014,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); INIT_LIST_HEAD(&log->l_reserveq); INIT_LIST_HEAD(&log->l_writeq); + spin_lock_init(&log->l_grant_reserve_lock); + spin_lock_init(&log->l_grant_write_lock); error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { @@ -2477,6 +2483,18 @@ restart: * * Once a ticket gets put onto the reserveq, it will only return after * the needed reservation is satisfied. + * + * This function is structured so that it has a lock free fast path. This is + * necessary because every new transaction reservation will come through this + * path. Hence any lock will be globally hot if we take it unconditionally on + * every pass. + * + * As tickets are only ever moved on and off the reserveq under the + * l_grant_reserve_lock, we only need to take that lock if we are going + * to add the ticket to the queue and sleep. We can avoid taking the lock if the + * ticket was never added to the reserveq because the t_queue list head will be + * empty and we hold the only reference to it so it can safely be checked + * unlocked. */ STATIC int xlog_grant_log_space(xlog_t *log, @@ -2490,13 +2508,20 @@ xlog_grant_log_space(xlog_t *log, panic("grant Recovery problem"); #endif - /* Is there space or do we need to sleep? */ - spin_lock(&log->l_grant_lock); - trace_xfs_log_grant_enter(log, tic); + need_bytes = tic->t_unit_res; + if (tic->t_flags & XFS_LOG_PERM_RESERV) + need_bytes *= tic->t_ocnt; + /* something is already sleeping; insert new transaction at end */ - if (!list_empty(&log->l_reserveq)) { + if (!list_empty_careful(&log->l_reserveq)) { + spin_lock(&log->l_grant_reserve_lock); + /* recheck the queue now we are locked */ + if (list_empty(&log->l_reserveq)) { + spin_unlock(&log->l_grant_reserve_lock); + goto redo; + } list_add_tail(&tic->t_queue, &log->l_reserveq); trace_xfs_log_grant_sleep1(log, tic); @@ -2509,48 +2534,47 @@ xlog_grant_log_space(xlog_t *log, goto error_return; XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_lock); + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); /* * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... */ trace_xfs_log_grant_wake1(log, tic); - spin_lock(&log->l_grant_lock); } - if (tic->t_flags & XFS_LOG_PERM_RESERV) - need_bytes = tic->t_unit_res*tic->t_ocnt; - else - need_bytes = tic->t_unit_res; redo: if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; + goto error_return_unlocked; free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); if (free_bytes < need_bytes) { + spin_lock(&log->l_grant_reserve_lock); if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_reserveq); trace_xfs_log_grant_sleep2(log, tic); + if (XLOG_FORCED_SHUTDOWN(log)) + goto error_return; + xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_lock); - - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); trace_xfs_log_grant_wake2(log, tic); - goto redo; } - list_del_init(&tic->t_queue); + if (!list_empty(&tic->t_queue)) { + spin_lock(&log->l_grant_reserve_lock); + list_del_init(&tic->t_queue); + spin_unlock(&log->l_grant_reserve_lock); + } /* we've got enough space */ + spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_grant_exit(log, tic); @@ -2559,8 +2583,11 @@ redo: spin_unlock(&log->l_grant_lock); return 0; - error_return: +error_return_unlocked: + spin_lock(&log->l_grant_reserve_lock); +error_return: list_del_init(&tic->t_queue); + spin_unlock(&log->l_grant_reserve_lock); trace_xfs_log_grant_error(log, tic); /* @@ -2570,7 +2597,6 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_grant_log_space */ @@ -2578,7 +2604,8 @@ redo: /* * Replenish the byte reservation required by moving the grant write head. * - * + * Similar to xlog_grant_log_space, the function is structured to have a lock + * free fast path. */ STATIC int xlog_regrant_write_log_space(xlog_t *log, @@ -2597,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log, panic("regrant Recovery problem"); #endif - spin_lock(&log->l_grant_lock); - trace_xfs_log_regrant_write_enter(log, tic); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; + goto error_return_unlocked; /* If there are other waiters on the queue then give them a * chance at logspace before us. Wake up the first waiters, @@ -2611,8 +2635,10 @@ xlog_regrant_write_log_space(xlog_t *log, * this transaction. */ need_bytes = tic->t_unit_res; - if (!list_empty(&log->l_writeq)) { + if (!list_empty_careful(&log->l_writeq)) { struct xlog_ticket *ntic; + + spin_lock(&log->l_grant_write_lock); free_bytes = xlog_space_left(log, &log->l_grant_write_head); list_for_each_entry(ntic, &log->l_writeq, t_queue) { ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); @@ -2627,50 +2653,48 @@ xlog_regrant_write_log_space(xlog_t *log, struct xlog_ticket, t_queue)) { if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_writeq); - trace_xfs_log_regrant_write_sleep1(log, tic); xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_lock); - - /* If we're shutting down, this tic is already - * off the queue */ - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - + xlog_wait(&tic->t_wait, &log->l_grant_write_lock); trace_xfs_log_regrant_write_wake1(log, tic); - } + } else + spin_unlock(&log->l_grant_write_lock); } redo: if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; + goto error_return_unlocked; free_bytes = xlog_space_left(log, &log->l_grant_write_head); if (free_bytes < need_bytes) { + spin_lock(&log->l_grant_write_lock); if (list_empty(&tic->t_queue)) list_add_tail(&tic->t_queue, &log->l_writeq); + + if (XLOG_FORCED_SHUTDOWN(log)) + goto error_return; + xlog_grant_push_ail(log, need_bytes); XFS_STATS_INC(xs_sleep_logspace); trace_xfs_log_regrant_write_sleep2(log, tic); - xlog_wait(&tic->t_wait, &log->l_grant_lock); - - /* If we're shutting down, this tic is already off the queue */ - spin_lock(&log->l_grant_lock); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; + xlog_wait(&tic->t_wait, &log->l_grant_write_lock); trace_xfs_log_regrant_write_wake2(log, tic); goto redo; } - list_del_init(&tic->t_queue); + if (!list_empty(&tic->t_queue)) { + spin_lock(&log->l_grant_write_lock); + list_del_init(&tic->t_queue); + spin_unlock(&log->l_grant_write_lock); + } /* we've got enough space */ + spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); xlog_verify_grant_head(log, 1); @@ -2679,8 +2703,11 @@ redo: return 0; + error_return_unlocked: + spin_lock(&log->l_grant_write_lock); error_return: list_del_init(&tic->t_queue); + spin_unlock(&log->l_grant_write_lock); trace_xfs_log_regrant_write_error(log, tic); /* @@ -2690,7 +2717,6 @@ redo: */ tic->t_curr_res = 0; tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - spin_unlock(&log->l_grant_lock); return XFS_ERROR(EIO); } /* xlog_regrant_write_log_space */ @@ -3664,12 +3690,10 @@ xfs_log_force_umount( xlog_cil_force(log); /* - * We must hold both the GRANT lock and the LOG lock, - * before we mark the filesystem SHUTDOWN and wake - * everybody up to tell the bad news. + * mark the filesystem and the as in a shutdown state and wake + * everybody up to tell them the bad news. */ spin_lock(&log->l_icloglock); - spin_lock(&log->l_grant_lock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; if (mp->m_sb_bp) XFS_BUF_DONE(mp->m_sb_bp); @@ -3694,14 +3718,17 @@ xfs_log_force_umount( * means we have to wake up everybody queued up on reserveq as well as * writeq. In addition, we make sure in xlog_{re}grant_log_space that * we don't enqueue anything once the SHUTDOWN flag is set, and this - * action is protected by the GRANTLOCK. + * action is protected by the grant locks. */ + spin_lock(&log->l_grant_reserve_lock); list_for_each_entry(tic, &log->l_reserveq, t_queue) wake_up(&tic->t_wait); + spin_unlock(&log->l_grant_reserve_lock); + spin_lock(&log->l_grant_write_lock); list_for_each_entry(tic, &log->l_writeq, t_queue) wake_up(&tic->t_wait); - spin_unlock(&log->l_grant_lock); + spin_unlock(&log->l_grant_write_lock); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); -- cgit v1.2.3 From d0eb2f38b250b7d6c993adf81b0e4ded0565497e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 21 Dec 2010 12:29:14 +1100 Subject: xfs: convert grant head manipulations to lockless algorithm The only thing that the grant lock remains to protect is the grant head manipulations when adding or removing space from the log. These calculations are already based on atomic variables, so we can already update them safely without locks. However, the grant head manpulations require atomic multi-step calculations to be executed, which the algorithms currently don't allow. To make these multi-step calculations atomic, convert the algorithms to compare-and-exchange loops on the atomic variables. That is, we sample the old value, perform the calculation and use atomic64_cmpxchg() to attempt to update the head with the new value. If the head has not changed since we sampled it, it will succeed and we are done. Otherwise, we rerun the calculation again from a new sample of the head. This allows us to remove the grant lock from around all the grant head space manipulations, and that effectively removes the grant lock from the log completely. Hence we can remove the grant lock completely from the log at this point. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 103 ++++++++++++++++++------------------------------------- 1 file changed, 34 insertions(+), 69 deletions(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6fcc9d0af524..0bf24b11d0c4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -81,7 +81,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void xlog_verify_grant_head(xlog_t *log, int equals); STATIC void xlog_verify_grant_tail(struct log *log); STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, int count, boolean_t syncing); @@ -89,7 +88,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, xfs_lsn_t tail_lsn); #else #define xlog_verify_dest_ptr(a,b) -#define xlog_verify_grant_head(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c,d) #define xlog_verify_tail_lsn(a,b,c) @@ -103,17 +101,24 @@ xlog_grant_sub_space( atomic64_t *head, int bytes) { - int cycle, space; + int64_t head_val = atomic64_read(head); + int64_t new, old; - xlog_crack_grant_head(head, &cycle, &space); + do { + int cycle, space; - space -= bytes; - if (space < 0) { - space += log->l_logsize; - cycle--; - } + xlog_crack_grant_head_val(head_val, &cycle, &space); - xlog_assign_grant_head(head, cycle, space); + space -= bytes; + if (space < 0) { + space += log->l_logsize; + cycle--; + } + + old = head_val; + new = xlog_assign_grant_head_val(cycle, space); + head_val = atomic64_cmpxchg(head, old, new); + } while (head_val != old); } static void @@ -122,20 +127,27 @@ xlog_grant_add_space( atomic64_t *head, int bytes) { - int tmp; - int cycle, space; + int64_t head_val = atomic64_read(head); + int64_t new, old; - xlog_crack_grant_head(head, &cycle, &space); + do { + int tmp; + int cycle, space; - tmp = log->l_logsize - space; - if (tmp > bytes) - space += bytes; - else { - space = bytes - tmp; - cycle++; - } + xlog_crack_grant_head_val(head_val, &cycle, &space); - xlog_assign_grant_head(head, cycle, space); + tmp = log->l_logsize - space; + if (tmp > bytes) + space += bytes; + else { + space = bytes - tmp; + cycle++; + } + + old = head_val; + new = xlog_assign_grant_head_val(cycle, space); + head_val = atomic64_cmpxchg(head, old, new); + } while (head_val != old); } static void @@ -318,9 +330,7 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - spin_lock(&log->l_grant_lock); xlog_grant_push_ail(log, internal_ticket->t_unit_res); - spin_unlock(&log->l_grant_lock); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ @@ -334,11 +344,9 @@ xfs_log_reserve( trace_xfs_log_reserve(log, internal_ticket); - spin_lock(&log->l_grant_lock); xlog_grant_push_ail(log, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); - spin_unlock(&log->l_grant_lock); retval = xlog_grant_log_space(log, internal_ticket); } @@ -1057,7 +1065,6 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); - spin_lock_init(&log->l_grant_lock); init_waitqueue_head(&log->l_flush_wait); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ @@ -1135,7 +1142,6 @@ out_free_iclog: kmem_free(iclog); } spinlock_destroy(&log->l_icloglock); - spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); @@ -1331,10 +1337,8 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); - spin_unlock(&log->l_grant_lock); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); @@ -1455,7 +1459,6 @@ xlog_dealloc_log(xlog_t *log) iclog = next_iclog; } spinlock_destroy(&log->l_icloglock); - spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); log->l_mp->m_log = NULL; @@ -2574,13 +2577,10 @@ redo: } /* we've got enough space */ - spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_grant_exit(log, tic); - xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); - spin_unlock(&log->l_grant_lock); return 0; error_return_unlocked: @@ -2694,12 +2694,9 @@ redo: } /* we've got enough space */ - spin_lock(&log->l_grant_lock); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); - xlog_verify_grant_head(log, 1); xlog_verify_grant_tail(log); - spin_unlock(&log->l_grant_lock); return 0; @@ -2737,7 +2734,6 @@ xlog_regrant_reserve_log_space(xlog_t *log, if (ticket->t_cnt > 0) ticket->t_cnt--; - spin_lock(&log->l_grant_lock); xlog_grant_sub_space(log, &log->l_grant_reserve_head, ticket->t_curr_res); xlog_grant_sub_space(log, &log->l_grant_write_head, @@ -2747,21 +2743,15 @@ xlog_regrant_reserve_log_space(xlog_t *log, trace_xfs_log_regrant_reserve_sub(log, ticket); - xlog_verify_grant_head(log, 1); - /* just return if we still have some of the pre-reserved space */ - if (ticket->t_cnt > 0) { - spin_unlock(&log->l_grant_lock); + if (ticket->t_cnt > 0) return; - } xlog_grant_add_space(log, &log->l_grant_reserve_head, ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); - xlog_verify_grant_head(log, 0); - spin_unlock(&log->l_grant_lock); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); } /* xlog_regrant_reserve_log_space */ @@ -2790,7 +2780,6 @@ xlog_ungrant_log_space(xlog_t *log, if (ticket->t_cnt > 0) ticket->t_cnt--; - spin_lock(&log->l_grant_lock); trace_xfs_log_ungrant_enter(log, ticket); trace_xfs_log_ungrant_sub(log, ticket); @@ -2809,8 +2798,6 @@ xlog_ungrant_log_space(xlog_t *log, trace_xfs_log_ungrant_exit(log, ticket); - xlog_verify_grant_head(log, 1); - spin_unlock(&log->l_grant_lock); xfs_log_move_tail(log->l_mp, 1); } /* xlog_ungrant_log_space */ @@ -3428,28 +3415,6 @@ xlog_verify_dest_ptr( xlog_panic("xlog_verify_dest_ptr: invalid ptr"); } -STATIC void -xlog_verify_grant_head(xlog_t *log, int equals) -{ - int reserve_cycle, reserve_space; - int write_cycle, write_space; - - xlog_crack_grant_head(&log->l_grant_reserve_head, - &reserve_cycle, &reserve_space); - xlog_crack_grant_head(&log->l_grant_write_head, - &write_cycle, &write_space); - - if (reserve_cycle == write_cycle) { - if (equals) - ASSERT(reserve_space >= write_space); - else - ASSERT(reserve_space > write_space); - } else { - ASSERT(reserve_cycle - 1 == write_cycle); - ASSERT(write_space >= reserve_space); - } -} - STATIC void xlog_verify_grant_tail( struct log *log) -- cgit v1.2.3 From 73efe4a4ddf8eb2b1cc7039e8a66a23a424961af Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 12 Jan 2011 00:35:42 +0000 Subject: xfs: prevent NMI timeouts in cmn_err We currently have a global error message buffer in cmn_err that is protected by a spin lock that disables interrupts. Recently there have been reports of NMI timeouts occurring when the console is being flooded by SCSI error reports due to cmn_err() getting stuck trying to print to the console while holding this lock (i.e. with interrupts disabled). The NMI watchdog is seeing this CPU as non-responding and so is triggering a panic. While the trigger for the reported case is SCSI errors, pretty much anything that spams the kernel log could cause this to occur. Realistically the only reason that we have the intemediate message buffer is to prepend the correct kernel log level prefix to the log message. The only reason we have the lock is to protect the global message buffer and the only reason the message buffer is global is to keep it off the stack. Hence if we can avoid needing a global message buffer we avoid needing the lock, and we can do this with a small amount of cleanup and some preprocessor tricks: 1. clean up xfs_cmn_err() panic mask functionality to avoid needing debug code in xfs_cmn_err() 2. remove the couple of "!" message prefixes that still exist that the existing cmn_err() code steps over. 3. redefine CE_* levels directly to KERN_* 4. redefine cmn_err() and friends to use printk() directly via variable argument length macros. By doing this, we can completely remove the cmn_err() code and the lock that is causing the problems, and rely solely on printk() serialisation to ensure that we don't get garbled messages. A series of followup patches is really needed to clean up all the cmn_err() calls and related messages properly, but that results in a series that is not easily back portable to enterprise kernels. Hence this initial fix is only to address the direct problem in the lowest impact way possible. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_log.c') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0bf24b11d0c4..ae6fef1ff563 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -377,7 +377,7 @@ xfs_log_mount( cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { cmn_err(CE_NOTE, - "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", + "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", mp->m_fsname); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } -- cgit v1.2.3