From 33524b70e8f3dd55a4ba78ad81742c7814e7b0ed Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 21 Nov 2006 10:40:08 -0800 Subject: [SCSI] aacraid: Detect Blinkled at startup Received from Mark Salyzyn: Blinkled at startup is useful for catching Adapters in a lot of pain, in a BlinkLED assert, quickly; rather than waiting several minutes for commands to timeout. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/commsup.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/scsi/aacraid/commsup.c') diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 19e42ac07cb2..0fd462a876f9 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -518,6 +518,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, */ unsigned long count = 36000000L; /* 3 minutes */ while (down_trylock(&fibptr->event_wait)) { + int blink; if (--count == 0) { spin_lock_irqsave(q->lock, qflags); q->numpending--; @@ -530,6 +531,14 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } return -ETIMEDOUT; } + if ((blink = aac_adapter_check_health(dev)) > 0) { + if (wait == -1) { + printk(KERN_ERR "aacraid: aac_fib_send: adapter blinkLED 0x%x.\n" + "Usually a result of a serious unrecoverable hardware problem\n", + blink); + } + return -EFAULT; + } udelay(5); } } else if (down_interruptible(&fibptr->event_wait)) { -- cgit v1.2.3 From d18b448fc2caf0d719bd4bd34fb1856be89c8ef7 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 21 Nov 2006 10:40:31 -0800 Subject: [SCSI] aacraid: Abort management FIBs Received from Mark Salyzyn: Add code to abort outstanding management ioctl fibs when the blinkLED recovery is performed. This code is 'clunky' and does not have any real feedback in that the reset could progress before the user application has gotten it's notification of command completion. We put a schedule() call to delay just the right amount for most cases, because we tried a spin and still managed to find cases where we would spin forever waiting for the management application to acknowledge the impending doom surrounding the cause of the BlinkLED. Will cause an oops in the context of the management application if we proceed too quickly. I view this as the lesser of many evils since currently if there are outstanding management ioctls during a need to reset/recover the adapter, the management application just locks up and waits forever. The best practices fix for this problem not going to be simple or easy (at least the fixes I imagine today); and we found a balance between the needs of the driver to proceed, and the applications that locked or confused that would hold back the driver. I just do not like the idea of a kernel oops in an application to deal with low priority, sluggish or misbehaving applications. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/commsup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/scsi/aacraid/commsup.c') diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0fd462a876f9..4893a6d06a33 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1102,6 +1102,20 @@ static int _aac_reset_adapter(struct aac_dev *aac) goto out; } + /* + * Loop through the fibs, close the synchronous FIBS + */ + for (index = 0; index < (aac->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB); index++) { + struct fib *fib = &aac->fibs[index]; + if (!(fib->hw_fib->header.XferState & cpu_to_le32(NoResponseExpected | Async)) && + (fib->hw_fib->header.XferState & cpu_to_le32(ResponseExpected))) { + unsigned long flagv; + spin_lock_irqsave(&fib->event_lock, flagv); + up(&fib->event_wait); + spin_unlock_irqrestore(&fib->event_lock, flagv); + schedule(); + } + } index = aac->cardtype; /* -- cgit v1.2.3