Re: mpt irq timeout problem after reboot - only if non-verbose booting !?!

2012-10-18 Thread John Baldwin
On Wednesday, October 17, 2012 3:14:52 pm Harald Schmalzbauer (mobil) wrote:
> -Ursprüngliche Nachricht-
> > Von: John Baldwin 
> > An: freebsd-stable@freebsd.org
> > Cc: h.schmalzba...@omnilan.de
> > Gesendet: 17.10.'12,  20:46
> > 
> > On Tuesday, October 16, 2012 5:24:44 am Harald Schmalzbauer wrote:
> >>  Hello,
> >> 
> >> I have 9.1-RC2 running in an ESXi 5.1 guest.
> >> I use 'lsisas' as virtual SCSI-Controller and mpt attaches and finds 1068E.
> >> 
> >> Everything is working fine until the first 'shutdown -r now':
> >> The second boot pauses for ~2 minutes after probing disks and continues
> >> with this error:
> >> mpt0: Timedout requests already complete. Interrupts may not be 
> >> functioning.
> > 
> > To be clear, you only see this at the end of reboot, and the hardware is 
> > fine
> > once the machine is back up?
> .
> 
> Thanks for your attention!
> The timeout occurs after the first 'shutdown -r' while device probing during
> second boot process.  Perhaps this is amd64 specific. Today I had a new i386
> setup which doesn't exhibit this timeout. But it's on different hardware and
> hv-host was 5.0 inestead 5.1. So not really representative...

Hmmm, ok.  In that case my patch is not relevant.  It would only fix that
message occuring during the shutdown.

-- 
John Baldwin
___
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Re: mpt irq timeout problem after reboot - only if non-verbose booting !?!

2012-10-17 Thread Harald Schmalzbauer
 schrieb John Baldwin am 17.10.2012 19:19 (localtime):
> Are you using any RAID volumes?  The only shutdown handler in mpt that looks
> like it might want interrupts to work is mpt_raid_shutdown().  It needs to use
> polled I/O instead of disabling interrupts I think.  Try this:
>
> Index: mpt_raid.c
> ===
> --- mpt_raid.c(revision 241641)
> +++ mpt_raid.c(working copy)
> @@ -115,7 +115,7 @@ static timeout_t mpt_raid_timer;
>  static void mpt_enable_vol(struct mpt_softc *mpt,
>  struct mpt_raid_volume *mpt_vol, int enable);
>  #endif
> -static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *);
> +static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *, 
> int);
>  static void mpt_adjust_queue_depth(struct mpt_softc *, struct 
> mpt_raid_volume *,
>  struct cam_path *);
>  #if __FreeBSD_version < 50
> @@ -135,7 +135,7 @@ static void mpt_disk_prt(struct mpt_softc *mpt, st
>  static int mpt_issue_raid_req(struct mpt_softc *mpt,
>  struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req,
>  u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
> -int write, int wait);
> +int write, int wait, int sleep_ok);
>  
>  static int mpt_refresh_raid_data(struct mpt_softc *mpt);
>  static void mpt_schedule_raid_refresh(struct mpt_softc *mpt);
> @@ -517,7 +517,7 @@ mpt_raid_shutdown(struct mpt_softc *mpt)
>  
>   mpt->raid_mwce_setting = MPT_RAID_MWCE_OFF;
>   RAID_VOL_FOREACH(mpt, mpt_vol) {
> - mpt_verify_mwce(mpt, mpt_vol);
> + mpt_verify_mwce(mpt, mpt_vol, FALSE);
>   }
>  }
>  
> @@ -592,7 +592,7 @@ static int
>  mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol,
>  struct mpt_raid_disk *disk, request_t *req, u_int Action,
>  uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
> -int write, int wait)
> +int write, int wait, int sleep_ok)
>  {
>   MSG_RAID_ACTION_REQUEST *rap;
>   SGE_SIMPLE32 *se;
> @@ -623,7 +623,7 @@ mpt_issue_raid_req(struct mpt_softc *mpt, struct m
>  
>   if (wait) {
>   return (mpt_wait_req(mpt, req, REQ_STATE_DONE, REQ_STATE_DONE,
> -  /*sleep_ok*/FALSE, /*time_ms*/2000));
> +  sleep_ok, /*time_ms*/2000));
>   } else {
>   return (0);
>   }
> @@ -763,7 +763,7 @@ mpt_raid_quiesce_disk(struct mpt_softc *mpt, struc
>   MPI_RAID_ACTION_QUIESCE_PHYS_IO,
>   /*ActionData*/0, /*addr*/0,
>   /*len*/0, /*write*/FALSE,
> - /*wait*/FALSE);
> + /*wait*/FALSE, /*sleep_ok*/FALSE);
>   if (rv != 0)
>   return (CAM_REQ_CMP_ERR);
>  
> @@ -882,7 +882,7 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
>   enable ? MPI_RAID_ACTION_ENABLE_VOLUME
>  : MPI_RAID_ACTION_DISABLE_VOLUME,
>   /*data*/0, /*addr*/0, /*len*/0,
> - /*write*/FALSE, /*wait*/TRUE);
> + /*write*/FALSE, /*wait*/TRUE, /*sleep_ok*/TRUE);
>   if (rv == ETIMEDOUT) {
>   mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: "
>   "%s Volume Timed-out\n",
> @@ -903,7 +903,8 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
>  #endif
>  
>  static void
> -mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol)
> +mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol,
> +int sleep_ok)
>  {
>   request_t *req;
>   struct mpt_raid_action_result *ar;
> @@ -950,7 +951,7 @@ static void
>   return;
>   }
>  
> - req = mpt_get_request(mpt, /*sleep_ok*/TRUE);
> + req = mpt_get_request(mpt, sleep_ok);
>   if (req == NULL) {
>   mpt_vol_prt(mpt, mpt_vol,
>   "mpt_verify_mwce: Get request failed!\n");
> @@ -965,7 +966,7 @@ static void
>   rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req,
>   MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS,
>   data, /*addr*/0, /*len*/0,
> - /*write*/FALSE, /*wait*/TRUE);
> + /*write*/FALSE, /*wait*/TRUE, sleep_ok);
>   if (rv == ETIMEDOUT) {
>   mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: "
>   "Write Cache Enable Timed-out\n");
> @@ -1018,7 +1019,8 @@ mpt_verify_resync_rate(struct mpt_softc *mpt, stru
>   rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req,
>   MPI_RAID_ACTION_SET_RESYNC_RATE,
> 

Re: mpt irq timeout problem after reboot - only if non-verbose booting !?!

2012-10-17 Thread Harald Schmalzbauer (mobil)

-Ursprüngliche Nachricht-

Von: John Baldwin 
An: freebsd-stable@freebsd.org
Cc: h.schmalzba...@omnilan.de
Gesendet: 17.10.'12,  20:46

On Tuesday, October 16, 2012 5:24:44 am Harald Schmalzbauer wrote:

 Hello,

I have 9.1-RC2 running in an ESXi 5.1 guest.
I use 'lsisas' as virtual SCSI-Controller and mpt attaches and finds 1068E.

Everything is working fine until the first 'shutdown -r now':
The second boot pauses for ~2 minutes after probing disks and continues
with this error:
mpt0: Timedout requests already complete. Interrupts may not be functioning.


To be clear, you only see this at the end of reboot, and the hardware is fine
once the machine is back up?

.

Thanks for your attention!
The timeout occurs after the first 'shutdown -r' while device probing during 
second boot process.
Perhaps this is amd64 specific. Today I had a new i386 setup which doesn't 
exhibit this timeout. But it's on different hardware and hv-host was 5.0 
inestead 5.1. So not really representative...


Extra printfs affect the timing most likely.

Are you using any RAID volumes?  The only shutdown handler in mpt that looks


The controller is 'virtual' SAS. But thers's been reports that also on real HW 
(sasuc8i) the same problem occurs.

I'll try your patch tomorrow morning; timezon shift...

Thanks,

-Harry
___
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Re: mpt irq timeout problem after reboot - only if non-verbose booting !?!

2012-10-17 Thread John Baldwin
On Tuesday, October 16, 2012 5:24:44 am Harald Schmalzbauer wrote:
>  Hello,
> 
> I have 9.1-RC2 running in an ESXi 5.1 guest.
> I use 'lsisas' as virtual SCSI-Controller and mpt attaches and finds 1068E.
> 
> Everything is working fine until the first 'shutdown -r now':
> The second boot pauses for ~2 minutes after probing disks and continues
> with this error:
> mpt0: Timedout requests already complete. Interrupts may not be functioning.

To be clear, you only see this at the end of reboot, and the hardware is fine
once the machine is back up?

> This problem was also obeserved with real 1068 hardware:
> http://lists.freebsd.org/pipermail/freebsd-stable/2011-September/063937.html
> 
> When I power off the virtual machine instead of rebooting, the problem
> doesn't occur.
> 
> Accidentally I found a workarround ;-) :
> If I set 'verbose_boot' in loader.conf, the problem vanisehs!?!?!?
> 
> Any idea how „verbose_boot“ affects the operation of the mpt driver?

Extra printfs affect the timing most likely.

Are you using any RAID volumes?  The only shutdown handler in mpt that looks
like it might want interrupts to work is mpt_raid_shutdown().  It needs to use
polled I/O instead of disabling interrupts I think.  Try this:

Index: mpt_raid.c
===
--- mpt_raid.c  (revision 241641)
+++ mpt_raid.c  (working copy)
@@ -115,7 +115,7 @@ static timeout_t mpt_raid_timer;
 static void mpt_enable_vol(struct mpt_softc *mpt,
   struct mpt_raid_volume *mpt_vol, int enable);
 #endif
-static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *);
+static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *, int);
 static void mpt_adjust_queue_depth(struct mpt_softc *, struct mpt_raid_volume 
*,
 struct cam_path *);
 #if __FreeBSD_version < 50
@@ -135,7 +135,7 @@ static void mpt_disk_prt(struct mpt_softc *mpt, st
 static int mpt_issue_raid_req(struct mpt_softc *mpt,
 struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req,
 u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
-int write, int wait);
+int write, int wait, int sleep_ok);
 
 static int mpt_refresh_raid_data(struct mpt_softc *mpt);
 static void mpt_schedule_raid_refresh(struct mpt_softc *mpt);
@@ -517,7 +517,7 @@ mpt_raid_shutdown(struct mpt_softc *mpt)
 
mpt->raid_mwce_setting = MPT_RAID_MWCE_OFF;
RAID_VOL_FOREACH(mpt, mpt_vol) {
-   mpt_verify_mwce(mpt, mpt_vol);
+   mpt_verify_mwce(mpt, mpt_vol, FALSE);
}
 }
 
@@ -592,7 +592,7 @@ static int
 mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol,
   struct mpt_raid_disk *disk, request_t *req, u_int Action,
   uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
-  int write, int wait)
+  int write, int wait, int sleep_ok)
 {
MSG_RAID_ACTION_REQUEST *rap;
SGE_SIMPLE32 *se;
@@ -623,7 +623,7 @@ mpt_issue_raid_req(struct mpt_softc *mpt, struct m
 
if (wait) {
return (mpt_wait_req(mpt, req, REQ_STATE_DONE, REQ_STATE_DONE,
-/*sleep_ok*/FALSE, /*time_ms*/2000));
+sleep_ok, /*time_ms*/2000));
} else {
return (0);
}
@@ -763,7 +763,7 @@ mpt_raid_quiesce_disk(struct mpt_softc *mpt, struc
MPI_RAID_ACTION_QUIESCE_PHYS_IO,
/*ActionData*/0, /*addr*/0,
/*len*/0, /*write*/FALSE,
-   /*wait*/FALSE);
+   /*wait*/FALSE, /*sleep_ok*/FALSE);
if (rv != 0)
return (CAM_REQ_CMP_ERR);
 
@@ -882,7 +882,7 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
enable ? MPI_RAID_ACTION_ENABLE_VOLUME
   : MPI_RAID_ACTION_DISABLE_VOLUME,
/*data*/0, /*addr*/0, /*len*/0,
-   /*write*/FALSE, /*wait*/TRUE);
+   /*write*/FALSE, /*wait*/TRUE, /*sleep_ok*/TRUE);
if (rv == ETIMEDOUT) {
mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: "
"%s Volume Timed-out\n",
@@ -903,7 +903,8 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
 #endif
 
 static void
-mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol)
+mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol,
+int sleep_ok)
 {
request_t *req;
struct mpt_raid_action_result *ar;
@@ -950,7 +951,7 @@ static void
return;
}
 
-   req = mpt_get_request(mpt, /*sleep_ok*/TRUE);
+   req = mpt_get_request(mpt, sleep_ok);
if (req == NULL) {
   

mpt irq timeout problem after reboot - only if non-verbose booting !?!

2012-10-16 Thread Harald Schmalzbauer
 Hello,

I have 9.1-RC2 running in an ESXi 5.1 guest.
I use 'lsisas' as virtual SCSI-Controller and mpt attaches and finds 1068E.

Everything is working fine until the first 'shutdown -r now':
The second boot pauses for ~2 minutes after probing disks and continues
with this error:
mpt0: Timedout requests already complete. Interrupts may not be functioning.

This problem was also obeserved with real 1068 hardware:
http://lists.freebsd.org/pipermail/freebsd-stable/2011-September/063937.html

When I power off the virtual machine instead of rebooting, the problem
doesn't occur.

Accidentally I found a workarround ;-) :
If I set 'verbose_boot' in loader.conf, the problem vanisehs!?!?!?

Any idea how „verbose_boot“ affects the operation of the mpt driver?

Thanks,

-Harry
Table 'FACP' at 0xbfefee98
Table 'BOOT' at 0xbfef01fc
Table 'APIC' at 0xbfef0182
APIC: Found table at 0xbfef0182
APIC: Using the MADT enumerator.
MADT: Found CPU APIC ID 0 ACPI ID 0: enabled
SMP: Added CPU 0 (AP)
MADT: Found CPU APIC ID 1 ACPI ID 1: enabled
SMP: Added CPU 1 (AP)
MADT: Found CPU APIC ID 2 ACPI ID 2: enabled
SMP: Added CPU 2 (AP)
MADT: Found CPU APIC ID 3 ACPI ID 3: enabled
SMP: Added CPU 3 (AP)
Copyright (c) 1992-2012 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
The Regents of the University of California. All rights reserved.
FreeBSD is a registered trademark of The FreeBSD Foundation.
FreeBSD 9.1-RC2 #11 r241503M: Sat Oct 13 15:56:02 CEST 2012

ad...@gundi.vnl.wdn.omnilan.net:/usr/local/share/deploy-tools/obj-amd64/VMWARE/usr/local/share/deploy-tools/RELENG_9_1/src/sys/VMWARE.flint
 amd64
Preloaded elf kernel "/boot/kernel/kernel" at 0x80f95000.
Preloaded elf obj module "/boot/kernel/zfs.ko" at 0x80f95210.
Preloaded elf obj module "/boot/kernel/opensolaris.ko" at 0x80f958b8.
Preloaded /boot/zfs/zpool.cache "/boot/zfs/zpool.cache" at 0x80f95ee8.
Preloaded elf obj module "/boot/kernel/aesni.ko" at 0x80f95f48.
Preloaded elf obj module "/boot/modules/vmxnet3.ko" at 0x80f96570.
Preloaded elf obj module "/boot/kernel/mps.ko" at 0x80f96ae0.
Hypervisor: Origin = "VMwareVMware"
CPU: Intel(R) Xeon(R) CPU E3-1270 V2 @ 3.50GHz (3492.07-MHz K8-class CPU)
  Origin = "GenuineIntel"  Id = 0x306a9  Family = 6  Model = 3a  Stepping = 9
  
Features=0x1fa3fbff
  
Features2=0xfeba2203
  AMD Features=0x28100800
  AMD Features2=0x1
  TSC: P-state invariant
real memory  = 8589934592 (8192 MB)
Physical memory chunk(s):
0x1000 - 0x0009bfff, 634880 bytes (155 pages)
0x0010 - 0x001f, 1048576 bytes (256 pages)
0x00fcb000 - 0xbfed, 3203485696 bytes (782101 pages)
0xbff0 - 0xbfff, 1048576 bytes (256 pages)
0x0001 - 0x00022f11, 5084676096 bytes (1241376 pages)
avail memory = 8234651648 (7853 MB)
INTR: Adding local APIC 0 as a target
Event timer "LAPIC" quality 600
ACPI APIC Table: 
INTR: Adding local APIC 0 as a target
INTR: Adding local APIC 1 as a target
INTR: Adding local APIC 2 as a target
INTR: Adding local APIC 3 as a target
FreeBSD/SMP: Multiprocessor System Detected: 4 CPUs
FreeBSD/SMP: 1 package(s) x 4 core(s)
 cpu0 (BSP): APIC ID:  0
 cpu1 (AP): APIC ID:  1
 cpu2 (AP): APIC ID:  2
 cpu3 (AP): APIC ID:  3
APIC: CPU 0 has ACPI ID 0
APIC: CPU 1 has ACPI ID 1
APIC: CPU 2 has ACPI ID 2
APIC: CPU 3 has ACPI ID 3
x86bios:  IVT 0x00-0x0004ff at 0xfe00
x86bios: SSEG 0x001000-0x001fff at 0xff800023
x86bios: EBDA 0x09f000-0x09 at 0xfe09f000
x86bios:  ROM 0x0a-0x0fefff at 0xfe0a
ULE: setup cpu 0
ULE: setup cpu 1
ULE: setup cpu 2
ULE: setup cpu 3
ACPI: RSDP 0xf6b80 00024 (v02 PTLTD )
ACPI: XSDT 0xbfeeff3c 0005C (v01 INTEL  440BX0604 VMW  01324272)
ACPI: FACP 0xbfefee98 000F4 (v04 INTEL  440BX0604 PTL  000F4240)
ACPI: DSDT 0xbfef0224 0EC74 (v01 PTLTD  Custom   0604 MSFT 0301)
ACPI: FACS 0xbfefffc0 00040
ACPI: BOOT 0xbfef01fc 00028 (v01 PTLTD  $SBFTBL$ 0604  LTP 0001)
ACPI: APIC 0xbfef0182 0007A (v01 PTLTD  ? APIC   0604  LTP )
ACPI: MCFG 0xbfef0146 0003C (v01 PTLTD  $PCITBL$ 0604  LTP 0001)
ACPI: SRAT 0xbfef005e 000E8 (v02 VMWARE MEMPLUG  0604 VMW  0001)
ACPI: HPET 0xbfef0026 00038 (v01 VMWARE VMW HPET 0604 VMW  0001)
ACPI: WAET 0xbfeeffd8 00028 (v01 VMWARE VMW WAET 0604 VMW  0001)
MADT: Found IO APIC ID 4, Interrupt 0 at 0xfec0
ioapic0: Routing external 8259A's -> intpin 0
MADT: Interrupt override: source 0, irq 2
ioapic0: Routing IRQ 0 -> intpin 2
lapic0: Routing NMI -> LINT1
lapic0: LINT1 trigger: edge
lapic0: LINT1 polarity: high
lapic1: Routing NMI -> LINT1
lapic1: LINT1 trigger: edge
lapic1: LINT1 polarity: high
lapic2: Routing NMI -> LINT1
lapic2: LINT1 trigger: edge
lapic2: LINT1 polarity: high
lapic3: Routing NMI -> LINT1
lapic3: LINT1 trigger: edge
lapic3: LINT1 polarity: high
MADT: Forcing active-low