From 05cca7381429e12d66c5b5c8b5c5848055b88bf7 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 26 Jan 2008 17:42:45 +0100 Subject: firewire: fw-sbp2: unsigned int vs. unsigned Standardize on "unsigned int" style. Sort some struct members thematically. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 19ece9b6d742..f2a9a33b47a1 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -141,15 +141,13 @@ struct sbp2_logical_unit { struct sbp2_target { struct kref kref; struct fw_unit *unit; + struct list_head lu_list; u64 management_agent_address; int directory_id; int node_id; int address_high; - - unsigned workarounds; - struct list_head lu_list; - + unsigned int workarounds; unsigned int mgt_orb_timeout; }; @@ -160,7 +158,7 @@ struct sbp2_target { */ #define SBP2_MIN_LOGIN_ORB_TIMEOUT 5000U /* Timeout in ms */ #define SBP2_MAX_LOGIN_ORB_TIMEOUT 40000U /* Timeout in ms */ -#define SBP2_ORB_TIMEOUT 2000 /* Timeout in ms */ +#define SBP2_ORB_TIMEOUT 2000U /* Timeout in ms */ #define SBP2_ORB_NULL 0x80000000 #define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 @@ -297,7 +295,7 @@ struct sbp2_command_orb { static const struct { u32 firmware_revision; u32 model; - unsigned workarounds; + unsigned int workarounds; } sbp2_workarounds_table[] = { /* DViCO Momobay CX-1 with TSB42AA9 bridge */ { .firmware_revision = 0x002800, @@ -836,7 +834,7 @@ static void sbp2_init_workarounds(struct sbp2_target *tgt, u32 model, u32 firmware_revision) { int i; - unsigned w = sbp2_param_workarounds; + unsigned int w = sbp2_param_workarounds; if (w) fw_notify("Please notify linux1394-devel@lists.sourceforge.net " @@ -1197,7 +1195,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) struct sbp2_logical_unit *lu = cmd->device->hostdata; struct fw_device *device = fw_device(lu->tgt->unit->device.parent); struct sbp2_command_orb *orb; - unsigned max_payload; + unsigned int max_payload; int retval = SCSI_MLQUEUE_HOST_BUSY; /* -- cgit v1.2.3 From 1b9c12ba2fdf802a23630f70eddb0e821296634e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 26 Jan 2008 17:43:23 +0100 Subject: firewire: fw-sbp2: fix logout before login retry This fixes a "can't recognize device" kind of bug. If the SCSI INQUIRY failed and hence __scsi_add_device failed due to a bus reset, we tried a logout and then waited for the already scheduled login work to happen. So far so good, but the generation used for the logout was outdated, hence the logout never reached the target. The target might therefore deny the subsequent relogin attempt, which would also leave the target inaccessible. Therefore fetch a fresh device->generation for the logout. Use memory barriers to prevent our plan being foiled by compiler or hardware optimizations. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index f2a9a33b47a1..a15e3c7d21d3 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -716,7 +716,11 @@ static void sbp2_login(struct work_struct *work) sdev = __scsi_add_device(shost, 0, 0, scsilun_to_int(&eight_bytes_lun), lu); if (IS_ERR(sdev)) { - sbp2_send_management_orb(lu, node_id, generation, + smp_rmb(); /* generation may have changed */ + generation = device->generation; + smp_rmb(); /* node_id must not be older than generation */ + + sbp2_send_management_orb(lu, device->node_id, generation, SBP2_LOGOUT_REQUEST, lu->login_id, NULL); /* * Set this back to sbp2_login so we fall back and -- cgit v1.2.3 From be6f48b0174584c9c415012ca14803c7e941e27e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 27 Jan 2008 19:14:44 +0100 Subject: firewire: fw-sbp2: don't retry login or reconnect after unplug If a device is being unplugged while fw-sbp2 had a login or reconnect on schedule, it would take about half a minute to shut the fw_unit down: Jan 27 18:34:54 stein firewire_sbp2: logged in to fw2.0 LUN 0000 (0 retries) Jan 27 18:34:59 stein firewire_sbp2: sbp2_scsi_abort Jan 27 18:34:59 stein scsi 25:0:0:0: Device offlined - not ready after error recovery Jan 27 18:35:01 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:06 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:12 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:17 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:22 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:27 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:32 stein firewire_sbp2: orb reply timed out, rcode=0x11 Jan 27 18:35:32 stein firewire_sbp2: failed to login to fw2.0 LUN 0000 Jan 27 18:35:32 stein firewire_sbp2: released fw2.0 After this patch, typically only a few seconds spent in __scsi_add_device remain: Jan 27 19:05:50 stein firewire_sbp2: logged in to fw2.0 LUN 0000 (0 retries) Jan 27 19:05:56 stein firewire_sbp2: sbp2_scsi_abort Jan 27 19:05:56 stein scsi 33:0:0:0: Device offlined - not ready after error recovery Jan 27 19:05:56 stein firewire_sbp2: released fw2.0 The benefit of this is less noise in the syslog. It furthermore avoids a few wasted CPU cycles and needlessly prolonged lifetime of a few driver objects. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index a15e3c7d21d3..72fddf5a12a3 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -499,6 +499,9 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, unsigned int timeout; int retval = -ENOMEM; + if (function == SBP2_LOGOUT_REQUEST && fw_device_is_shutdown(device)) + return 0; + orb = kzalloc(sizeof(*orb), GFP_ATOMIC); if (orb == NULL) return -ENOMEM; @@ -619,16 +622,13 @@ static void sbp2_release_target(struct kref *kref) struct sbp2_logical_unit *lu, *next; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); - struct fw_device *device = fw_device(tgt->unit->device.parent); list_for_each_entry_safe(lu, next, &tgt->lu_list, link) { if (lu->sdev) scsi_remove_device(lu->sdev); - if (!fw_device_is_shutdown(device)) - sbp2_send_management_orb(lu, tgt->node_id, - lu->generation, SBP2_LOGOUT_REQUEST, - lu->login_id, NULL); + sbp2_send_management_orb(lu, tgt->node_id, lu->generation, + SBP2_LOGOUT_REQUEST, lu->login_id, NULL); fw_core_remove_address_handler(&lu->address_handler); list_del(&lu->link); @@ -673,6 +673,9 @@ static void sbp2_login(struct work_struct *work) struct sbp2_login_response response; int generation, node_id, local_node_id; + if (fw_device_is_shutdown(device)) + goto out; + generation = device->generation; smp_rmb(); /* node_id must not be older than generation */ node_id = device->node_id; @@ -944,6 +947,9 @@ static void sbp2_reconnect(struct work_struct *work) struct fw_device *device = fw_device(unit->device.parent); int generation, node_id, local_node_id; + if (fw_device_is_shutdown(device)) + goto out; + generation = device->generation; smp_rmb(); /* node_id must not be older than generation */ node_id = device->node_id; -- cgit v1.2.3 From 9220f1946209a5b3335ea2d28f8462695885791b Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:04:38 +0100 Subject: firewire: fw-sbp2: add INQUIRY delay workaround Several different SBP-2 bridges accept a login early while the IDE device is still powering up. They are therefore unable to respond to SCSI INQUIRY immediately, and the SCSI core has to retry the INQUIRY. One of these retries is typically successful, and all is well. But in case of Momobay FX-3A, the INQUIRY retries tend to fail entirely. This can usually be avoided by waiting a little while after login before letting the SCSI core send the INQUIRY. The old sbp2 driver handles this more gracefully for as yet unknown reasons (perhaps because it waits for fetch agent resets to complete, unlike fw-sbp2 which quickly proceeds after requesting the agent reset). Therefore the workaround is not as much necessary for sbp2. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 72fddf5a12a3..4a118fbc7b24 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -82,6 +83,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " * Avoids access beyond actual disk limits on devices with an off-by-one bug. * Don't use this with devices which don't have this bug. * + * - delay inquiry + * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry. + * * - override internal blacklist * Instead of adding to the built-in blacklist, use only the workarounds * specified in the module load parameter. @@ -91,6 +95,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " #define SBP2_WORKAROUND_INQUIRY_36 0x2 #define SBP2_WORKAROUND_MODE_SENSE_8 0x4 #define SBP2_WORKAROUND_FIX_CAPACITY 0x8 +#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10 +#define SBP2_INQUIRY_DELAY 12 #define SBP2_WORKAROUND_OVERRIDE 0x100 static int sbp2_param_workarounds; @@ -100,6 +106,7 @@ MODULE_PARM_DESC(workarounds, "Work around device bugs (default = 0" ", 36 byte inquiry = " __stringify(SBP2_WORKAROUND_INQUIRY_36) ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8) ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY) + ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY) ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE) ", or a combination)"); @@ -303,6 +310,11 @@ static const struct { .workarounds = SBP2_WORKAROUND_INQUIRY_36 | SBP2_WORKAROUND_MODE_SENSE_8, }, + /* DViCO Momobay FX-3A with TSB42AA9A bridge */ { + .firmware_revision = 0x002800, + .model = 0x000000, + .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY, + }, /* Initio bridges, actually only needed for some older ones */ { .firmware_revision = 0x000200, .model = ~0, @@ -712,6 +724,9 @@ static void sbp2_login(struct work_struct *work) PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); sbp2_agent_reset(lu); + if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY) + ssleep(SBP2_INQUIRY_DELAY); + memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun)); eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff; eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff; -- cgit v1.2.3 From e0e60215552d4d40caf581a8d3247203fe948fe7 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:08:58 +0100 Subject: firewire: fw-sbp2: wait for completion of fetch agent reset Like the old sbp2 driver, wait for the write transaction to the AGENT_RESET to complete before proceeding (after login, after reconnect, or in SCSI error handling). There is one occasion where AGENT_RESET is written to from atomic context when getting DEAD status for a command ORB. There we still continue without waiting for the transaction to complete because this is more difficult to fix... Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 4a118fbc7b24..32b50f13e7a8 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -603,29 +603,46 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, static void complete_agent_reset_write(struct fw_card *card, int rcode, - void *payload, size_t length, void *data) + void *payload, size_t length, void *done) { - struct fw_transaction *t = data; + complete(done); +} + +static void sbp2_agent_reset(struct sbp2_logical_unit *lu) +{ + struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + DECLARE_COMPLETION_ONSTACK(done); + struct fw_transaction t; + static u32 z; - kfree(t); + fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST, + lu->tgt->node_id, lu->generation, device->max_speed, + lu->command_block_agent_address + SBP2_AGENT_RESET, + &z, sizeof(z), complete_agent_reset_write, &done); + wait_for_completion(&done); } -static int sbp2_agent_reset(struct sbp2_logical_unit *lu) +static void +complete_agent_reset_write_no_wait(struct fw_card *card, int rcode, + void *payload, size_t length, void *data) +{ + kfree(data); +} + +static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu) { struct fw_device *device = fw_device(lu->tgt->unit->device.parent); struct fw_transaction *t; - static u32 zero; + static u32 z; - t = kzalloc(sizeof(*t), GFP_ATOMIC); + t = kmalloc(sizeof(*t), GFP_ATOMIC); if (t == NULL) - return -ENOMEM; + return; fw_send_request(device->card, t, TCODE_WRITE_QUADLET_REQUEST, lu->tgt->node_id, lu->generation, device->max_speed, lu->command_block_agent_address + SBP2_AGENT_RESET, - &zero, sizeof(zero), complete_agent_reset_write, t); - - return 0; + &z, sizeof(z), complete_agent_reset_write_no_wait, t); } static void sbp2_release_target(struct kref *kref) @@ -1086,7 +1103,7 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) if (status != NULL) { if (STATUS_GET_DEAD(*status)) - sbp2_agent_reset(orb->lu); + sbp2_agent_reset_no_wait(orb->lu); switch (STATUS_GET_RESPONSE(*status)) { case SBP2_STATUS_REQUEST_COMPLETE: -- cgit v1.2.3 From 48f18c761c001a66ef1928b42799c717368b1d64 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:09:50 +0100 Subject: firewire: fw-sbp2: log bus_id at management request failures for easier readable logs if more than one SBP-2 device is present. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 32b50f13e7a8..077f1c09dad4 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -148,6 +148,7 @@ struct sbp2_logical_unit { struct sbp2_target { struct kref kref; struct fw_unit *unit; + const char *bus_id; struct list_head lu_list; u64 management_agent_address; @@ -566,20 +567,20 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, retval = -EIO; if (sbp2_cancel_orbs(lu) == 0) { - fw_error("orb reply timed out, rcode=0x%02x\n", - orb->base.rcode); + fw_error("%s: orb reply timed out, rcode=0x%02x\n", + lu->tgt->bus_id, orb->base.rcode); goto out; } if (orb->base.rcode != RCODE_COMPLETE) { - fw_error("management write failed, rcode 0x%02x\n", - orb->base.rcode); + fw_error("%s: management write failed, rcode 0x%02x\n", + lu->tgt->bus_id, orb->base.rcode); goto out; } if (STATUS_GET_RESPONSE(orb->status) != 0 || STATUS_GET_SBP_STATUS(orb->status) != 0) { - fw_error("error status: %d:%d\n", + fw_error("%s: error status: %d:%d\n", lu->tgt->bus_id, STATUS_GET_RESPONSE(orb->status), STATUS_GET_SBP_STATUS(orb->status)); goto out; @@ -664,7 +665,7 @@ static void sbp2_release_target(struct kref *kref) kfree(lu); } scsi_remove_host(shost); - fw_notify("released %s\n", tgt->unit->device.bus_id); + fw_notify("released %s\n", tgt->bus_id); put_device(&tgt->unit->device); scsi_host_put(shost); @@ -693,12 +694,11 @@ static void sbp2_login(struct work_struct *work) { struct sbp2_logical_unit *lu = container_of(work, struct sbp2_logical_unit, work.work); - struct Scsi_Host *shost = - container_of((void *)lu->tgt, struct Scsi_Host, hostdata[0]); + struct sbp2_target *tgt = lu->tgt; + struct fw_device *device = fw_device(tgt->unit->device.parent); + struct Scsi_Host *shost; struct scsi_device *sdev; struct scsi_lun eight_bytes_lun; - struct fw_unit *unit = lu->tgt->unit; - struct fw_device *device = fw_device(unit->device.parent); struct sbp2_login_response response; int generation, node_id, local_node_id; @@ -715,14 +715,14 @@ static void sbp2_login(struct work_struct *work) if (lu->retries++ < 5) sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); else - fw_error("failed to login to %s LUN %04x\n", - unit->device.bus_id, lu->lun); + fw_error("%s: failed to login to LUN %04x\n", + tgt->bus_id, lu->lun); goto out; } - lu->generation = generation; - lu->tgt->node_id = node_id; - lu->tgt->address_high = local_node_id << 16; + lu->generation = generation; + tgt->node_id = node_id; + tgt->address_high = local_node_id << 16; /* Get command block agent offset and login id. */ lu->command_block_agent_address = @@ -730,8 +730,8 @@ static void sbp2_login(struct work_struct *work) response.command_block_agent.low; lu->login_id = LOGIN_RESPONSE_GET_LOGIN_ID(response); - fw_notify("logged in to %s LUN %04x (%d retries)\n", - unit->device.bus_id, lu->lun, lu->retries); + fw_notify("%s: logged in to LUN %04x (%d retries)\n", + tgt->bus_id, lu->lun, lu->retries); #if 0 /* FIXME: The linux1394 sbp2 does this last step. */ @@ -747,6 +747,7 @@ static void sbp2_login(struct work_struct *work) memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun)); eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff; eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff; + shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); sdev = __scsi_add_device(shost, 0, 0, scsilun_to_int(&eight_bytes_lun), lu); @@ -767,7 +768,7 @@ static void sbp2_login(struct work_struct *work) scsi_device_put(sdev); } out: - sbp2_target_put(lu->tgt); + sbp2_target_put(tgt); } static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) @@ -850,7 +851,7 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory, if (timeout > tgt->mgt_orb_timeout) fw_notify("%s: config rom contains %ds " "management ORB timeout, limiting " - "to %ds\n", tgt->unit->device.bus_id, + "to %ds\n", tgt->bus_id, timeout / 1000, tgt->mgt_orb_timeout / 1000); break; @@ -878,7 +879,7 @@ static void sbp2_init_workarounds(struct sbp2_target *tgt, u32 model, if (w) fw_notify("Please notify linux1394-devel@lists.sourceforge.net " "if you need the workarounds parameter for %s\n", - tgt->unit->device.bus_id); + tgt->bus_id); if (w & SBP2_WORKAROUND_OVERRIDE) goto out; @@ -900,8 +901,7 @@ static void sbp2_init_workarounds(struct sbp2_target *tgt, u32 model, if (w) fw_notify("Workarounds for %s: 0x%x " "(firmware_revision 0x%06x, model_id 0x%06x)\n", - tgt->unit->device.bus_id, - w, firmware_revision, model); + tgt->bus_id, w, firmware_revision, model); tgt->workarounds = w; } @@ -925,6 +925,7 @@ static int sbp2_probe(struct device *dev) tgt->unit = unit; kref_init(&tgt->kref); INIT_LIST_HEAD(&tgt->lu_list); + tgt->bus_id = unit->device.bus_id; if (fw_device_enable_phys_dma(device) < 0) goto fail_shost_put; @@ -975,8 +976,8 @@ static void sbp2_reconnect(struct work_struct *work) { struct sbp2_logical_unit *lu = container_of(work, struct sbp2_logical_unit, work.work); - struct fw_unit *unit = lu->tgt->unit; - struct fw_device *device = fw_device(unit->device.parent); + struct sbp2_target *tgt = lu->tgt; + struct fw_device *device = fw_device(tgt->unit->device.parent); int generation, node_id, local_node_id; if (fw_device_is_shutdown(device)) @@ -991,8 +992,7 @@ static void sbp2_reconnect(struct work_struct *work) SBP2_RECONNECT_REQUEST, lu->login_id, NULL) < 0) { if (lu->retries++ >= 5) { - fw_error("failed to reconnect to %s\n", - unit->device.bus_id); + fw_error("%s: failed to reconnect\n", tgt->bus_id); /* Fall back and try to log in again. */ lu->retries = 0; PREPARE_DELAYED_WORK(&lu->work, sbp2_login); @@ -1001,17 +1001,17 @@ static void sbp2_reconnect(struct work_struct *work) goto out; } - lu->generation = generation; - lu->tgt->node_id = node_id; - lu->tgt->address_high = local_node_id << 16; + lu->generation = generation; + tgt->node_id = node_id; + tgt->address_high = local_node_id << 16; - fw_notify("reconnected to %s LUN %04x (%d retries)\n", - unit->device.bus_id, lu->lun, lu->retries); + fw_notify("%s: reconnected to LUN %04x (%d retries)\n", + tgt->bus_id, lu->lun, lu->retries); sbp2_agent_reset(lu); sbp2_cancel_orbs(lu); out: - sbp2_target_put(lu->tgt); + sbp2_target_put(tgt); } static void sbp2_update(struct fw_unit *unit) @@ -1359,7 +1359,7 @@ static int sbp2_scsi_abort(struct scsi_cmnd *cmd) { struct sbp2_logical_unit *lu = cmd->device->hostdata; - fw_notify("sbp2_scsi_abort\n"); + fw_notify("%s: sbp2_scsi_abort\n", lu->tgt->bus_id); sbp2_agent_reset(lu); sbp2_cancel_orbs(lu); -- cgit v1.2.3 From 0fa6dfdb0a2768541e998a5dab10b368de56c60a Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:10:47 +0100 Subject: firewire: fw-sbp2: don't add scsi_device twice When a reconnect failed but re-login succeeded, __scsi_add_device was called again. In those cases, __scsi_add_device succeeded and returned the pointer to the existing scsi_device. fw-sbp2 then continued orderly, except that it missed to call sbp2_cancel_orbs. SCSI core would call fw-sbp2's eh_abort_handler eventually if there had been an outstanding command. This patch avoids the needless lookups and temporary allocations in SCSI core and I/O stall and timeout until eh_abort_handler hits. Also, __scsi_add_device tolerating calls for devices which already exist is undocumented behavior on which we shouldn't rely. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 077f1c09dad4..914170bb50a8 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -741,6 +741,12 @@ static void sbp2_login(struct work_struct *work) PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); sbp2_agent_reset(lu); + /* This was a re-login. */ + if (lu->sdev) { + sbp2_cancel_orbs(lu); + goto out; + } + if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY) ssleep(SBP2_INQUIRY_DELAY); -- cgit v1.2.3 From ce896d95cc7886ae05859c5b409a7b2f3b606ec1 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:11:39 +0100 Subject: firewire: fw-sbp2: logout and login after failed reconnect If fw-sbp2 was too late with requesting the reconnect, the target would reject this. In this case, log out before attempting the reconnect. Else several firmwares will deny the re-login because they somehow didn't invalidate the old login. Also, don't retry reconnects in this situation. The retries won't succeed either. These changes improve chances for successful re-login and shorten the period during which the logical unit is inaccessible. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 914170bb50a8..80ab65161750 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -710,6 +710,11 @@ static void sbp2_login(struct work_struct *work) node_id = device->node_id; local_node_id = device->card->node_id; + /* If this is a re-login attempt, log out, or we might be rejected. */ + if (lu->sdev) + sbp2_send_management_orb(lu, device->node_id, generation, + SBP2_LOGOUT_REQUEST, lu->login_id, NULL); + if (sbp2_send_management_orb(lu, node_id, generation, SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) { if (lu->retries++ < 5) @@ -997,9 +1002,17 @@ static void sbp2_reconnect(struct work_struct *work) if (sbp2_send_management_orb(lu, node_id, generation, SBP2_RECONNECT_REQUEST, lu->login_id, NULL) < 0) { - if (lu->retries++ >= 5) { + /* + * If reconnect was impossible even though we are in the + * current generation, fall back and try to log in again. + * + * We could check for "Function rejected" status, but + * looking at the bus generation as simpler and more general. + */ + smp_rmb(); /* get current card generation */ + if (generation == device->card->generation || + lu->retries++ >= 5) { fw_error("%s: failed to reconnect\n", tgt->bus_id); - /* Fall back and try to log in again. */ lu->retries = 0; PREPARE_DELAYED_WORK(&lu->work, sbp2_login); } -- cgit v1.2.3 From 7bb6bf7c8ba0b4ccfecaa00d6faea51b0bd42c8c Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 3 Feb 2008 23:12:17 +0100 Subject: firewire: fw-sbp2: sort includes Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 80ab65161750..323b03bacd29 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -28,15 +28,15 @@ * and many others. */ +#include +#include +#include +#include #include +#include #include #include -#include -#include -#include #include -#include -#include #include #include #include @@ -48,9 +48,9 @@ #include #include -#include "fw-transaction.h" -#include "fw-topology.h" #include "fw-device.h" +#include "fw-topology.h" +#include "fw-transaction.h" /* * So far only bridges from Oxford Semiconductor are known to support -- cgit v1.2.3 From e80de3704ac30ddb7f9a12447a2ecee32ccd7880 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 15 Feb 2008 21:29:02 +0100 Subject: firewire: fw-sbp2: enforce a retry of __scsi_add_device if bus generation changed fw-sbp2 is unable to reconnect while performing __scsi_add_device because there is only a single workqueue thread context available for both at the moment. This should be fixed eventually. An actual failure of __scsi_add_device is easy to handle, but an incomplete execution of __scsi_add_device with an sdev returned would remain undetected and leave the SBP-2 target unusable. Therefore we use a workaround: If there was a bus reset during __scsi_add_device (i.e. during the SCSI probe), we remove the new sdev immediately, log out, and attempt login and SCSI probe again. Tested-by: Jarod Wilson (earlier version) Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 49 +++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 323b03bacd29..6d10934c58f1 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -762,22 +762,43 @@ static void sbp2_login(struct work_struct *work) sdev = __scsi_add_device(shost, 0, 0, scsilun_to_int(&eight_bytes_lun), lu); - if (IS_ERR(sdev)) { - smp_rmb(); /* generation may have changed */ - generation = device->generation; - smp_rmb(); /* node_id must not be older than generation */ + /* + * FIXME: We are unable to perform reconnects while in sbp2_login(). + * Therefore __scsi_add_device() will get into trouble if a bus reset + * happens in parallel. It will either fail or leave us with an + * unusable sdev. As a workaround we check for this and retry the + * whole login and SCSI probing. + */ - sbp2_send_management_orb(lu, device->node_id, generation, - SBP2_LOGOUT_REQUEST, lu->login_id, NULL); - /* - * Set this back to sbp2_login so we fall back and - * retry login on bus reset. - */ - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); - } else { - lu->sdev = sdev; - scsi_device_put(sdev); + /* Reported error during __scsi_add_device() */ + if (IS_ERR(sdev)) + goto out_logout_login; + + scsi_device_put(sdev); + + /* Unreported error during __scsi_add_device() */ + smp_rmb(); /* get current card generation */ + if (generation != device->card->generation) { + scsi_remove_device(sdev); + goto out_logout_login; } + + /* No error during __scsi_add_device() */ + lu->sdev = sdev; + goto out; + + out_logout_login: + smp_rmb(); /* generation may have changed */ + generation = device->generation; + smp_rmb(); /* node_id must not be older than generation */ + + sbp2_send_management_orb(lu, device->node_id, generation, + SBP2_LOGOUT_REQUEST, lu->login_id, NULL); + /* + * If a bus reset happened, sbp2_update will have requeued + * lu->work already. Reset the work from reconnect to login. + */ + PREPARE_DELAYED_WORK(&lu->work, sbp2_login); out: sbp2_target_put(tgt); } -- cgit v1.2.3 From 2e2705bdcb959372d54bf7f79dd9a555ec2adfb4 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 16 Feb 2008 16:37:28 +0100 Subject: firewire: fw-sbp2: (try to) avoid I/O errors during reconnect While fw-sbp2 takes the necessary time to reconnect to a logical unit after bus reset, the SCSI core keeps sending new commands. They are all immediately completed with host busy status, and application clients or filesystems will break quickly. The SCSI device might even be taken offline: http://bugzilla.kernel.org/show_bug.cgi?id=9734 The only remedy seems to be to block the SCSI device until reconnect. Alas the SCSI core has no useful API to block only one logical unit i.e. the scsi_device, therefore we block the entire Scsi_Host. This currently corresponds to an SBP-2 target. In case of targets with multiple logical units, we need to satisfy the dependencies between logical units by carefully tracking the blocking state of the target and its units. We block all logical units of a target as soon as one of them needs to be blocked, and keep them blocked until all of them are ready to be unblocked. Furthermore, as the history of the old sbp2 driver has shown, the scsi_block_requests() API is a minefield with high potential of deadlocks. We therefore take extra measures to keep logical units unblocked during __scsi_add_device() and during shutdown. This avoids I/O errors during reconnect in many but alas not in all cases. There may still be errors after a re-login had to be performed. Also, some bridges have been seen to cease fetching management ORBs if I/O went on up until a bus reset. In these cases, all management ORBs time out after mgt_orb_timeout. The old sbp2 driver is less vulnerable or maybe not vulnerable to this, for as yet unknown reasons. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 126 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 6d10934c58f1..ea4811c45512 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -139,6 +139,7 @@ struct sbp2_logical_unit { int generation; int retries; struct delayed_work work; + bool blocked; }; /* @@ -157,6 +158,9 @@ struct sbp2_target { int address_high; unsigned int workarounds; unsigned int mgt_orb_timeout; + + int dont_block; /* counter for each logical unit */ + int blocked; /* ditto */ }; /* @@ -646,6 +650,107 @@ static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu) &z, sizeof(z), complete_agent_reset_write_no_wait, t); } +static void sbp2_set_generation(struct sbp2_logical_unit *lu, int generation) +{ + struct fw_card *card = fw_device(lu->tgt->unit->device.parent)->card; + unsigned long flags; + + /* serialize with comparisons of lu->generation and card->generation */ + spin_lock_irqsave(&card->lock, flags); + lu->generation = generation; + spin_unlock_irqrestore(&card->lock, flags); +} + +static inline void sbp2_allow_block(struct sbp2_logical_unit *lu) +{ + /* + * We may access dont_block without taking card->lock here: + * All callers of sbp2_allow_block() and all callers of sbp2_unblock() + * are currently serialized against each other. + * And a wrong result in sbp2_conditionally_block()'s access of + * dont_block is rather harmless, it simply misses its first chance. + */ + --lu->tgt->dont_block; +} + +/* + * Blocks lu->tgt if all of the following conditions are met: + * - Login, INQUIRY, and high-level SCSI setup of all of the target's + * logical units have been finished (indicated by dont_block == 0). + * - lu->generation is stale. + * + * Note, scsi_block_requests() must be called while holding card->lock, + * otherwise it might foil sbp2_[conditionally_]unblock()'s attempt to + * unblock the target. + */ +static void sbp2_conditionally_block(struct sbp2_logical_unit *lu) +{ + struct sbp2_target *tgt = lu->tgt; + struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct Scsi_Host *shost = + container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + unsigned long flags; + + spin_lock_irqsave(&card->lock, flags); + if (!tgt->dont_block && !lu->blocked && + lu->generation != card->generation) { + lu->blocked = true; + if (++tgt->blocked == 1) { + scsi_block_requests(shost); + fw_notify("blocked %s\n", lu->tgt->bus_id); + } + } + spin_unlock_irqrestore(&card->lock, flags); +} + +/* + * Unblocks lu->tgt as soon as all its logical units can be unblocked. + * Note, it is harmless to run scsi_unblock_requests() outside the + * card->lock protected section. On the other hand, running it inside + * the section might clash with shost->host_lock. + */ +static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu) +{ + struct sbp2_target *tgt = lu->tgt; + struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct Scsi_Host *shost = + container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + unsigned long flags; + bool unblock = false; + + spin_lock_irqsave(&card->lock, flags); + if (lu->blocked && lu->generation == card->generation) { + lu->blocked = false; + unblock = --tgt->blocked == 0; + } + spin_unlock_irqrestore(&card->lock, flags); + + if (unblock) { + scsi_unblock_requests(shost); + fw_notify("unblocked %s\n", lu->tgt->bus_id); + } +} + +/* + * Prevents future blocking of tgt and unblocks it. + * Note, it is harmless to run scsi_unblock_requests() outside the + * card->lock protected section. On the other hand, running it inside + * the section might clash with shost->host_lock. + */ +static void sbp2_unblock(struct sbp2_target *tgt) +{ + struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct Scsi_Host *shost = + container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + unsigned long flags; + + spin_lock_irqsave(&card->lock, flags); + ++tgt->dont_block; + spin_unlock_irqrestore(&card->lock, flags); + + scsi_unblock_requests(shost); +} + static void sbp2_release_target(struct kref *kref) { struct sbp2_target *tgt = container_of(kref, struct sbp2_target, kref); @@ -653,6 +758,9 @@ static void sbp2_release_target(struct kref *kref) struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + /* prevent deadlocks */ + sbp2_unblock(tgt); + list_for_each_entry_safe(lu, next, &tgt->lu_list, link) { if (lu->sdev) scsi_remove_device(lu->sdev); @@ -717,17 +825,20 @@ static void sbp2_login(struct work_struct *work) if (sbp2_send_management_orb(lu, node_id, generation, SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) { - if (lu->retries++ < 5) + if (lu->retries++ < 5) { sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); - else + } else { fw_error("%s: failed to login to LUN %04x\n", tgt->bus_id, lu->lun); + /* Let any waiting I/O fail from now on. */ + sbp2_unblock(lu->tgt); + } goto out; } - lu->generation = generation; tgt->node_id = node_id; tgt->address_high = local_node_id << 16; + sbp2_set_generation(lu, generation); /* Get command block agent offset and login id. */ lu->command_block_agent_address = @@ -749,6 +860,7 @@ static void sbp2_login(struct work_struct *work) /* This was a re-login. */ if (lu->sdev) { sbp2_cancel_orbs(lu); + sbp2_conditionally_unblock(lu); goto out; } @@ -785,6 +897,7 @@ static void sbp2_login(struct work_struct *work) /* No error during __scsi_add_device() */ lu->sdev = sdev; + sbp2_allow_block(lu); goto out; out_logout_login: @@ -825,6 +938,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) lu->sdev = NULL; lu->lun = lun_entry & 0xffff; lu->retries = 0; + lu->blocked = false; + ++tgt->dont_block; INIT_LIST_HEAD(&lu->orb_list); INIT_DELAYED_WORK(&lu->work, sbp2_login); @@ -1041,15 +1156,16 @@ static void sbp2_reconnect(struct work_struct *work) goto out; } - lu->generation = generation; tgt->node_id = node_id; tgt->address_high = local_node_id << 16; + sbp2_set_generation(lu, generation); fw_notify("%s: reconnected to LUN %04x (%d retries)\n", tgt->bus_id, lu->lun, lu->retries); sbp2_agent_reset(lu); sbp2_cancel_orbs(lu); + sbp2_conditionally_unblock(lu); out: sbp2_target_put(tgt); } @@ -1066,6 +1182,7 @@ static void sbp2_update(struct fw_unit *unit) * Iteration over tgt->lu_list is therefore safe here. */ list_for_each_entry(lu, &tgt->lu_list, link) { + sbp2_conditionally_block(lu); lu->retries = 0; sbp2_queue_work(lu, 0); } @@ -1169,6 +1286,7 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) * or when sending the write (less likely). */ result = DID_BUS_BUSY << 16; + sbp2_conditionally_block(orb->lu); } dma_unmap_single(device->card->device, orb->base.request_bus, -- cgit v1.2.3 From 5513c5f6f9bd8c8ad3727130910fa288c62526a7 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 17 Feb 2008 14:56:19 +0100 Subject: firewire: fw-sbp2: fix NULL pointer deref. in slave_alloc Fix a kernel bug when running rescan-scsi-bus while a FireWire disk is connected: http://bugzilla.kernel.org/show_bug.cgi?id=10008 Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index ea4811c45512..60ebcb5fe21a 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -1473,6 +1473,10 @@ static int sbp2_scsi_slave_alloc(struct scsi_device *sdev) { struct sbp2_logical_unit *lu = sdev->hostdata; + /* (Re-)Adding logical units via the SCSI stack is not supported. */ + if (!lu) + return -ENOSYS; + sdev->allow_restart = 1; /* -- cgit v1.2.3 From 33f1c6c3529f5f279e2e98e5cca0c5bac152153b Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 19 Feb 2008 09:05:49 +0100 Subject: firewire: fw-sbp2: fix NULL pointer deref. in scsi_remove_device Fix a kernel bug when unplugging an SBP-2 device after having its scsi_device already removed via the "delete" sysfs attribute. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 60ebcb5fe21a..5259491580fc 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -762,9 +762,10 @@ static void sbp2_release_target(struct kref *kref) sbp2_unblock(tgt); list_for_each_entry_safe(lu, next, &tgt->lu_list, link) { - if (lu->sdev) + if (lu->sdev) { scsi_remove_device(lu->sdev); - + scsi_device_put(lu->sdev); + } sbp2_send_management_orb(lu, tgt->node_id, lu->generation, SBP2_LOGOUT_REQUEST, lu->login_id, NULL); @@ -886,12 +887,11 @@ static void sbp2_login(struct work_struct *work) if (IS_ERR(sdev)) goto out_logout_login; - scsi_device_put(sdev); - /* Unreported error during __scsi_add_device() */ smp_rmb(); /* get current card generation */ if (generation != device->card->generation) { scsi_remove_device(sdev); + scsi_device_put(sdev); goto out_logout_login; } -- cgit v1.2.3 From f8436158b1d76e6842856048f287799468b56eb2 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 26 Feb 2008 23:30:02 +0100 Subject: firewire: fw-sbp2: better fix for NULL pointer dereference in scsi_remove_device Patch "firewire: fw-sbp2: fix NULL pointer deref. in scsi_remove_device" had the unintended effect that firewire-sbp2 could not be unloaded anymore until all SBP-2 devices were unplugged. We now fix the NULL pointer bug by reacquiring a reference to the sdev instead of holding a reference to the sdev (and to the module) all the time. Signed-off-by: Stefan Richter Tested-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 5259491580fc..a093ac329db7 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -122,7 +122,6 @@ static const char sbp2_driver_name[] = "sbp2"; struct sbp2_logical_unit { struct sbp2_target *tgt; struct list_head link; - struct scsi_device *sdev; struct fw_address_handler address_handler; struct list_head orb_list; @@ -139,6 +138,7 @@ struct sbp2_logical_unit { int generation; int retries; struct delayed_work work; + bool has_sdev; bool blocked; }; @@ -751,20 +751,33 @@ static void sbp2_unblock(struct sbp2_target *tgt) scsi_unblock_requests(shost); } +static int sbp2_lun2int(u16 lun) +{ + struct scsi_lun eight_bytes_lun; + + memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun)); + eight_bytes_lun.scsi_lun[0] = (lun >> 8) & 0xff; + eight_bytes_lun.scsi_lun[1] = lun & 0xff; + + return scsilun_to_int(&eight_bytes_lun); +} + static void sbp2_release_target(struct kref *kref) { struct sbp2_target *tgt = container_of(kref, struct sbp2_target, kref); struct sbp2_logical_unit *lu, *next; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + struct scsi_device *sdev; /* prevent deadlocks */ sbp2_unblock(tgt); list_for_each_entry_safe(lu, next, &tgt->lu_list, link) { - if (lu->sdev) { - scsi_remove_device(lu->sdev); - scsi_device_put(lu->sdev); + sdev = scsi_device_lookup(shost, 0, 0, sbp2_lun2int(lu->lun)); + if (sdev) { + scsi_remove_device(sdev); + scsi_device_put(sdev); } sbp2_send_management_orb(lu, tgt->node_id, lu->generation, SBP2_LOGOUT_REQUEST, lu->login_id, NULL); @@ -807,7 +820,6 @@ static void sbp2_login(struct work_struct *work) struct fw_device *device = fw_device(tgt->unit->device.parent); struct Scsi_Host *shost; struct scsi_device *sdev; - struct scsi_lun eight_bytes_lun; struct sbp2_login_response response; int generation, node_id, local_node_id; @@ -820,7 +832,7 @@ static void sbp2_login(struct work_struct *work) local_node_id = device->card->node_id; /* If this is a re-login attempt, log out, or we might be rejected. */ - if (lu->sdev) + if (lu->has_sdev) sbp2_send_management_orb(lu, device->node_id, generation, SBP2_LOGOUT_REQUEST, lu->login_id, NULL); @@ -859,7 +871,7 @@ static void sbp2_login(struct work_struct *work) sbp2_agent_reset(lu); /* This was a re-login. */ - if (lu->sdev) { + if (lu->has_sdev) { sbp2_cancel_orbs(lu); sbp2_conditionally_unblock(lu); goto out; @@ -868,13 +880,8 @@ static void sbp2_login(struct work_struct *work) if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY) ssleep(SBP2_INQUIRY_DELAY); - memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun)); - eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff; - eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff; shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); - - sdev = __scsi_add_device(shost, 0, 0, - scsilun_to_int(&eight_bytes_lun), lu); + sdev = __scsi_add_device(shost, 0, 0, sbp2_lun2int(lu->lun), lu); /* * FIXME: We are unable to perform reconnects while in sbp2_login(). * Therefore __scsi_add_device() will get into trouble if a bus reset @@ -896,7 +903,8 @@ static void sbp2_login(struct work_struct *work) } /* No error during __scsi_add_device() */ - lu->sdev = sdev; + lu->has_sdev = true; + scsi_device_put(sdev); sbp2_allow_block(lu); goto out; @@ -934,11 +942,11 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) return -ENOMEM; } - lu->tgt = tgt; - lu->sdev = NULL; - lu->lun = lun_entry & 0xffff; - lu->retries = 0; - lu->blocked = false; + lu->tgt = tgt; + lu->lun = lun_entry & 0xffff; + lu->retries = 0; + lu->has_sdev = false; + lu->blocked = false; ++tgt->dont_block; INIT_LIST_HEAD(&lu->orb_list); INIT_DELAYED_WORK(&lu->work, sbp2_login); -- cgit v1.2.3 From 855c603d61ede7e2810217f15f0d574b4f29c891 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Wed, 27 Feb 2008 22:14:27 +0100 Subject: firewire: fix crash in automatic module unloading "modprobe firewire-ohci; sleep .1; modprobe -r firewire-ohci" used to result in crashes like this: BUG: unable to handle kernel paging request at ffffffff8807b455 IP: [] PGD 203067 PUD 207063 PMD 7c170067 PTE 0 Oops: 0010 [1] PREEMPT SMP CPU 0 Modules linked in: i915 drm cpufreq_ondemand acpi_cpufreq freq_table applesmc input_polldev led_class coretemp hwmon eeprom snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss button thermal processor sg snd_hda_intel snd_pcm snd_timer snd snd_page_alloc sky2 i2c_i801 rtc [last unloaded: crc_itu_t] Pid: 9, comm: events/0 Not tainted 2.6.25-rc2 #3 RIP: 0010:[] [] RSP: 0018:ffff81007dcdde88 EFLAGS: 00010246 RAX: ffff81007dc95040 RBX: ffff81007dee5390 RCX: 0000000000005e13 RDX: 0000000000008c8b RSI: 0000000000000001 RDI: ffff81007dee5388 RBP: ffff81007dc5eb40 R08: 0000000000000002 R09: ffffffff8022d05c R10: ffffffff8023b34c R11: ffffffff8041a353 R12: ffff81007dee5388 R13: ffffffff8807b455 R14: ffffffff80593bc0 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffffff8055a000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: ffffffff8807b455 CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process events/0 (pid: 9, threadinfo ffff81007dcdc000, task ffff81007dc95040) Stack: ffffffff8023b396 ffffffff88082524 0000000000000000 ffffffff8807d9ae ffff81007dc5eb40 ffff81007dc9dce0 ffff81007dc5eb40 ffff81007dc5eb80 ffff81007dc9dce0 ffffffffffffffff ffffffff8023be87 0000000000000000 Call Trace: [] ? run_workqueue+0xdf/0x1df [] ? worker_thread+0xd8/0xe3 [] ? autoremove_wake_function+0x0/0x2e [] ? worker_thread+0x0/0xe3 [] ? kthread+0x47/0x74 [] ? trace_hardirqs_on_thunk+0x35/0x3a [] ? child_rip+0xa/0x12 [] ? restore_args+0x0/0x3d [] ? kthreadd+0x14c/0x171 [] ? kthreadd+0x14c/0x171 [] ? kthread+0x0/0x74 [] ? child_rip+0x0/0x12 Code: Bad RIP value. RIP [] RSP CR2: ffffffff8807b455 ---[ end trace c7366c6657fe5bed ]--- Note that this crash happened _after_ firewire-core was unloaded. The shared workqueue tried to run firewire-core's device initialization jobs or similar jobs. The fix makes sure that firewire-ohci and hence firewire-core is not unloaded before all device shutdown jobs have been completed. This is determined by the count of device initializations minus device releases. Also skip useless retries in the node initialization job if the node is to be shut down. Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index a093ac329db7..03069a454c07 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -769,6 +769,7 @@ static void sbp2_release_target(struct kref *kref) struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); struct scsi_device *sdev; + struct fw_device *device = fw_device(tgt->unit->device.parent); /* prevent deadlocks */ sbp2_unblock(tgt); @@ -791,6 +792,7 @@ static void sbp2_release_target(struct kref *kref) put_device(&tgt->unit->device); scsi_host_put(shost); + fw_device_put(device); } static struct workqueue_struct *sbp2_wq; @@ -1088,6 +1090,8 @@ static int sbp2_probe(struct device *dev) if (scsi_add_host(shost, &unit->device) < 0) goto fail_shost_put; + fw_device_get(device); + /* Initialize to values that won't match anything in our table. */ firmware_revision = 0xff000000; model = 0xff000000; -- cgit v1.2.3 From 51f9dbef5be41f3ff6000c874741a3a357f9bad7 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 7 Mar 2008 01:43:01 -0500 Subject: firewire: fw-sbp2: set single-phase retry_limit Per the SBP-2 specification, all SBP-2 target devices must have a BUSY_TIMEOUT register. Per the 1394-1995 specification, the retry_limt portion of the register should be set to 0x0 initially, and set on the target by a logged in initiator (i.e., a Linux host w/firewire controller(s)). Well, as it turns out, lots of devices these days have actually moved on to starting to implement SBP-3 compliance, which says that retry_limit should default to 0xf instead (yes, SBP-3 stomps directly on 1394-1995, oops). Prior to this change, the firewire driver stack didn't touch retry_limit, and any SBP-3 compliant device worked fine, while SBP-2 compliant ones were unable to retransmit when the host returned an ack_busy_X, which resulted in stalled out I/O, eventually causing the SCSI layer to give up and offline the device. The simple fix is for us to set retry_limit to 0xf in the register for all devices (which actually matches what the old ieee1394 stack did). Prior to this change, a hard disk behind an SBP-2 Prolific PL-3507 bridge chip would routinely encounter buffer I/O errors and wind up offlined by the SCSI layer. With this change, I've encountered zero I/O failures moving tens of GB of data around. Signed-off-by: Jarod Wilson Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 03069a454c07..8bce569a7c5a 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -173,6 +173,7 @@ struct sbp2_target { #define SBP2_ORB_TIMEOUT 2000U /* Timeout in ms */ #define SBP2_ORB_NULL 0x80000000 #define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 +#define SBP2_RETRY_LIMIT 0xf /* 15 retries */ #define SBP2_DIRECTION_TO_MEDIA 0x0 #define SBP2_DIRECTION_FROM_MEDIA 0x1 @@ -812,6 +813,30 @@ static void sbp2_target_put(struct sbp2_target *tgt) kref_put(&tgt->kref, sbp2_release_target); } +static void +complete_set_busy_timeout(struct fw_card *card, int rcode, + void *payload, size_t length, void *done) +{ + complete(done); +} + +static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu) +{ + struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + DECLARE_COMPLETION_ONSTACK(done); + struct fw_transaction t; + static __be32 busy_timeout; + + /* FIXME: we should try to set dual-phase cycle_limit too */ + busy_timeout = cpu_to_be32(SBP2_RETRY_LIMIT); + + fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST, + lu->tgt->node_id, lu->generation, device->max_speed, + CSR_REGISTER_BASE + CSR_BUSY_TIMEOUT, &busy_timeout, + sizeof(busy_timeout), complete_set_busy_timeout, &done); + wait_for_completion(&done); +} + static void sbp2_reconnect(struct work_struct *work); static void sbp2_login(struct work_struct *work) @@ -864,10 +889,8 @@ static void sbp2_login(struct work_struct *work) fw_notify("%s: logged in to LUN %04x (%d retries)\n", tgt->bus_id, lu->lun, lu->retries); -#if 0 - /* FIXME: The linux1394 sbp2 does this last step. */ - sbp2_set_busy_timeout(scsi_id); -#endif + /* set appropriate retry limit(s) in BUSY_TIMEOUT register */ + sbp2_set_busy_timeout(lu); PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); sbp2_agent_reset(lu); -- cgit v1.2.3 From 2aa9ff7fc5bc41d4b77c2da02086259a86f3d472 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 11 Mar 2008 22:32:03 +0100 Subject: firewire: fw-sbp2: fix for SYM13FW500 bridge (Datafab disk) Fix I/O errors due to SYM13FW500's inability to handle larger request sizes. Reported by Piergiorgio Sartor in https://bugzilla.redhat.com/show_bug.cgi?id=436879 Signed-off-by: Stefan Richter Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/firewire/fw-sbp2.c') diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 8bce569a7c5a..62b4e47d0cc0 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -331,6 +331,11 @@ static const struct { .model = ~0, .workarounds = SBP2_WORKAROUND_128K_MAX_TRANS, }, + /* Datafab MD2-FW2 with Symbios/LSILogic SYM13FW500 bridge */ { + .firmware_revision = 0x002600, + .model = ~0, + .workarounds = SBP2_WORKAROUND_128K_MAX_TRANS, + }, /* * There are iPods (2nd gen, 3rd gen) with model_id == 0, but -- cgit v1.2.3