from:"Ivan Mironov"

Re: [PATCH] HID: sony: Support for DS4 clones that do not implement feature report 0x81

2021-02-12 Thread Ivan Mironov

Ignore this patch, I am working on a better one.

On Wed, 2021-01-13 at 22:34 +0500, Ivan Mironov wrote:
> There are clones of DualShock 4 that are very similar to the originals,
> except of 1) they do not support HID feature report 0x81 and 2) they do
> not have any USB Audio interfaces despite they physically have audio
> jack.
> 
> Such controllers are working fine with Linux when connected via
> Bluetooth, but not when connected via USB. Here is how failed USB
> connection attempt looks in log:
> 
>   usb 1-5: New USB device found, idVendor=054c, idProduct=05c4, 
> bcdDevice= 1.00
>   usb 1-5: New USB device strings: Mfr=1, Product=2, SerialNumber=0
>   usb 1-5: Product: Wireless Controller
>   usb 1-5: Manufacturer: Sony Computer Entertainment
>   sony 0003:054C:05C4.0007: failed to retrieve feature report 0x81 with 
> the DualShock 4 MAC address
>   sony 0003:054C:05C4.0007: hidraw6: USB HID v81.11 Gamepad [Sony 
> Computer Entertainment Wireless Controller] on usb-:00:14.0-5/input0
>   sony 0003:054C:05C4.0007: failed to claim input
> 
> This patch adds support of using feature report 0x12 as a fallback for
> Bluetooth MAC address retrieval. Feature report 0x12 also seems to be
> used by DS4Windows[1] for all DS4 controllers.
> 
> [1] 
> https://github.com/Ryochan7/DS4Windows/blob/1b74a4440089f38a24ee2c2483c1d733a0692b8f/DS4Windows/HidLibrary/HidDevice.cs#L479
> 
> Signed-off-by: Ivan Mironov 
> ---
>  drivers/hid/hid-sony.c | 72 ++
>  1 file changed, 52 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
> index e3a557dc9ffd..97df12180e45 100644
> --- a/drivers/hid/hid-sony.c
> +++ b/drivers/hid/hid-sony.c
> @@ -491,6 +491,7 @@ struct motion_output_report_02 {
>  
> 
>  #define DS4_FEATURE_REPORT_0x02_SIZE 37
>  #define DS4_FEATURE_REPORT_0x05_SIZE 41
> +#define DS4_FEATURE_REPORT_0x12_SIZE 16
>  #define DS4_FEATURE_REPORT_0x81_SIZE 7
>  #define DS4_FEATURE_REPORT_0xA3_SIZE 49
>  #define DS4_INPUT_REPORT_0x11_SIZE 78
> @@ -2593,6 +2594,53 @@ static int sony_get_bt_devaddr(struct sony_sc *sc)
>   return 0;
>  }
>  
> 
> +static int sony_get_usb_ds4_devaddr(struct sony_sc *sc)
> +{
> + u8 *buf = NULL;
> + int ret;
> +
> + buf = kmalloc(max(DS4_FEATURE_REPORT_0x12_SIZE, 
> DS4_FEATURE_REPORT_0x81_SIZE), GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + /*
> +  * The MAC address of a DS4 controller connected via USB can be
> +  * retrieved with feature report 0x81. The address begins at
> +  * offset 1.
> +  */
> + ret = hid_hw_raw_request(sc->hdev, 0x81, buf,
> + DS4_FEATURE_REPORT_0x81_SIZE, HID_FEATURE_REPORT,
> + HID_REQ_GET_REPORT);
> + if (ret == DS4_FEATURE_REPORT_0x81_SIZE) {
> + memcpy(sc->mac_address, [1], sizeof(sc->mac_address));
> + goto out_free;
> + }
> + dbg_hid("%s: hid_hw_raw_request(..., 0x81, ...) returned %d\n", 
> __func__, ret);
> +
> + /*
> +  * Some variants do not implement feature report 0x81 at all.
> +  * Fortunately, feature report 0x12 also contains the MAC address of
> +  * a controller.
> +  */
> + ret = hid_hw_raw_request(sc->hdev, 0x12, buf,
> + DS4_FEATURE_REPORT_0x12_SIZE, HID_FEATURE_REPORT,
> + HID_REQ_GET_REPORT);
> + if (ret == DS4_FEATURE_REPORT_0x12_SIZE) {
> + memcpy(sc->mac_address, [1], sizeof(sc->mac_address));
> + goto out_free;
> + }
> + dbg_hid("%s: hid_hw_raw_request(..., 0x12, ...) returned %d\n", 
> __func__, ret);
> +
> + hid_err(sc->hdev, "failed to retrieve feature reports 0x81 and 0x12 
> with the DualShock 4 MAC address\n");
> + ret = ret < 0 ? ret : -EINVAL;
> +
> +out_free:
> +
> + kfree(buf);
> +
> + return ret;
> +}
> +
>  static int sony_check_add(struct sony_sc *sc)
>  {
>   u8 *buf = NULL;
> @@ -2613,26 +2661,9 @@ static int sony_check_add(struct sony_sc *sc)
>   return 0;
>   }
>   } else if (sc->quirks & (DUALSHOCK4_CONTROLLER_USB | 
> DUALSHOCK4_DONGLE)) {
> - buf = kmalloc(DS4_FEATURE_REPORT_0x81_SIZE, GFP_KERNEL);
> - if (!buf)
> - return -ENOMEM;
> -
> - /*
> -  * The MAC address of a DS4 controller connected via USB can be
> -  * retrieved with feature report 0x81. The address begins at
> -  * offset 1.
> -  */
> -

[PATCH] HID: sony: Support for DS4 clones that do not implement feature report 0x81

2021-01-13 Thread Ivan Mironov

There are clones of DualShock 4 that are very similar to the originals,
except of 1) they do not support HID feature report 0x81 and 2) they do
not have any USB Audio interfaces despite they physically have audio
jack.

Such controllers are working fine with Linux when connected via
Bluetooth, but not when connected via USB. Here is how failed USB
connection attempt looks in log:

usb 1-5: New USB device found, idVendor=054c, idProduct=05c4, 
bcdDevice= 1.00
usb 1-5: New USB device strings: Mfr=1, Product=2, SerialNumber=0
usb 1-5: Product: Wireless Controller
usb 1-5: Manufacturer: Sony Computer Entertainment
sony 0003:054C:05C4.0007: failed to retrieve feature report 0x81 with 
the DualShock 4 MAC address
sony 0003:054C:05C4.0007: hidraw6: USB HID v81.11 Gamepad [Sony 
Computer Entertainment Wireless Controller] on usb-:00:14.0-5/input0
sony 0003:054C:05C4.0007: failed to claim input

This patch adds support of using feature report 0x12 as a fallback for
Bluetooth MAC address retrieval. Feature report 0x12 also seems to be
used by DS4Windows[1] for all DS4 controllers.

[1] 
https://github.com/Ryochan7/DS4Windows/blob/1b74a4440089f38a24ee2c2483c1d733a0692b8f/DS4Windows/HidLibrary/HidDevice.cs#L479

Signed-off-by: Ivan Mironov 
---
 drivers/hid/hid-sony.c | 72 ++
 1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index e3a557dc9ffd..97df12180e45 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -491,6 +491,7 @@ struct motion_output_report_02 {
 
 #define DS4_FEATURE_REPORT_0x02_SIZE 37
 #define DS4_FEATURE_REPORT_0x05_SIZE 41
+#define DS4_FEATURE_REPORT_0x12_SIZE 16
 #define DS4_FEATURE_REPORT_0x81_SIZE 7
 #define DS4_FEATURE_REPORT_0xA3_SIZE 49
 #define DS4_INPUT_REPORT_0x11_SIZE 78
@@ -2593,6 +2594,53 @@ static int sony_get_bt_devaddr(struct sony_sc *sc)
return 0;
 }
 
+static int sony_get_usb_ds4_devaddr(struct sony_sc *sc)
+{
+   u8 *buf = NULL;
+   int ret;
+
+   buf = kmalloc(max(DS4_FEATURE_REPORT_0x12_SIZE, 
DS4_FEATURE_REPORT_0x81_SIZE), GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   /*
+* The MAC address of a DS4 controller connected via USB can be
+* retrieved with feature report 0x81. The address begins at
+* offset 1.
+*/
+   ret = hid_hw_raw_request(sc->hdev, 0x81, buf,
+   DS4_FEATURE_REPORT_0x81_SIZE, HID_FEATURE_REPORT,
+   HID_REQ_GET_REPORT);
+   if (ret == DS4_FEATURE_REPORT_0x81_SIZE) {
+   memcpy(sc->mac_address, [1], sizeof(sc->mac_address));
+   goto out_free;
+   }
+   dbg_hid("%s: hid_hw_raw_request(..., 0x81, ...) returned %d\n", 
__func__, ret);
+
+   /*
+* Some variants do not implement feature report 0x81 at all.
+* Fortunately, feature report 0x12 also contains the MAC address of
+* a controller.
+*/
+   ret = hid_hw_raw_request(sc->hdev, 0x12, buf,
+   DS4_FEATURE_REPORT_0x12_SIZE, HID_FEATURE_REPORT,
+   HID_REQ_GET_REPORT);
+   if (ret == DS4_FEATURE_REPORT_0x12_SIZE) {
+   memcpy(sc->mac_address, [1], sizeof(sc->mac_address));
+   goto out_free;
+   }
+   dbg_hid("%s: hid_hw_raw_request(..., 0x12, ...) returned %d\n", 
__func__, ret);
+
+   hid_err(sc->hdev, "failed to retrieve feature reports 0x81 and 0x12 
with the DualShock 4 MAC address\n");
+   ret = ret < 0 ? ret : -EINVAL;
+
+out_free:
+
+   kfree(buf);
+
+   return ret;
+}
+
 static int sony_check_add(struct sony_sc *sc)
 {
u8 *buf = NULL;
@@ -2613,26 +2661,9 @@ static int sony_check_add(struct sony_sc *sc)
return 0;
}
} else if (sc->quirks & (DUALSHOCK4_CONTROLLER_USB | 
DUALSHOCK4_DONGLE)) {
-   buf = kmalloc(DS4_FEATURE_REPORT_0x81_SIZE, GFP_KERNEL);
-   if (!buf)
-   return -ENOMEM;
-
-   /*
-* The MAC address of a DS4 controller connected via USB can be
-* retrieved with feature report 0x81. The address begins at
-* offset 1.
-*/
-   ret = hid_hw_raw_request(sc->hdev, 0x81, buf,
-   DS4_FEATURE_REPORT_0x81_SIZE, 
HID_FEATURE_REPORT,
-   HID_REQ_GET_REPORT);
-
-   if (ret != DS4_FEATURE_REPORT_0x81_SIZE) {
-   hid_err(sc->hdev, "failed to retrieve feature report 
0x81 with the DualShock 4 MAC address\n");
-   ret = ret < 0 ? ret : -EINVAL;
-   goto out_free;
-   }
-
-   memcpy(sc->mac_address, [1], sizeof(sc->mac_addr

Re: [PATCH v1] drm/amd/powerplay: Fix NULL dereference in lock_bus() on Vega20 w/o RAS

2020-06-25 Thread Ivan Mironov

Issue still reproduces on latest 5.8.0-rc2+
(8be3a53e18e0e1a98f288f6c7f5e9da3adbe9c49).

[PATCH v1] drm/amd/powerplay: Fix NULL dereference in lock_bus() on Vega20 w/o RAS

2020-06-25 Thread Ivan Mironov

I updated my system with Radeon VII from kernel 5.6 to kernel 5.7, and
following started to happen on each boot:

...
BUG: kernel NULL pointer dereference, address: 0128
...
CPU: 9 PID: 1940 Comm: modprobe Tainted: GE 
5.7.2-200.im0.fc32.x86_64 #1
Hardware name: System manufacturer System Product Name/PRIME X570-P, 
BIOS 1407 04/02/2020
RIP: 0010:lock_bus+0x42/0x60 [amdgpu]
...
Call Trace:
 i2c_smbus_xfer+0x3d/0xf0
 i2c_default_probe+0xf3/0x130
 i2c_detect.isra.0+0xfe/0x2b0
 ? kfree+0xa3/0x200
 ? kobject_uevent_env+0x11f/0x6a0
 ? i2c_detect.isra.0+0x2b0/0x2b0
 __process_new_driver+0x1b/0x20
 bus_for_each_dev+0x64/0x90
 ? 0xc0f34000
 i2c_register_driver+0x73/0xc0
 do_one_initcall+0x46/0x200
 ? _cond_resched+0x16/0x40
 ? kmem_cache_alloc_trace+0x167/0x220
 ? do_init_module+0x23/0x260
 do_init_module+0x5c/0x260
 __do_sys_init_module+0x14f/0x170
 do_syscall_64+0x5b/0xf0
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
...

Error appears when some i2c device driver tries to probe for devices
using adapter registered by `smu_v11_0_i2c_eeprom_control_init()`.
Code supporting this adapter requires `adev->psp.ras.ras` to be not
NULL, which is true only when `amdgpu_ras_init()` detects HW support by
calling `amdgpu_ras_check_supported()`.

Before 9015d60c9ee1, adapter was registered by

-> amdgpu_device_ip_init()
  -> amdgpu_ras_recovery_init()
-> amdgpu_ras_eeprom_init()
  -> smu_v11_0_i2c_eeprom_control_init()

after verifying that `adev->psp.ras.ras` is not NULL in
`amdgpu_ras_recovery_init()`. Currently it is registered
unconditionally by

-> amdgpu_device_ip_init()
  -> pp_sw_init()
-> hwmgr_sw_init()
  -> vega20_smu_init()
-> smu_v11_0_i2c_eeprom_control_init()

Fix simply adds HW support check (ras == NULL => no support) before
calling `smu_v11_0_i2c_eeprom_control_{init,fini}()`.

Please note that there is a chance that similar fix is also required for
CHIP_ARCTURUS. I do not know whether any actual Arcturus hardware without
RAS exist, and whether calling `smu_i2c_eeprom_init()` makes any sense
when there is no HW support.

Cc: sta...@vger.kernel.org
Fixes: 9015d60c9ee1 ("drm/amdgpu: Move EEPROM I2C adapter to amdgpu_device")
Signed-off-by: Ivan Mironov 
Tested-by: Bjorn Nostvold 
---
Changelog:

v1:
  - Added "Tested-by" for another user who used this patch to fix the
same issue.

v0:
  - Patch introduced.
---
 drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index 2fb97554134f..c2e0fbbccf56 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -522,9 +522,11 @@ static int vega20_smu_init(struct pp_hwmgr *hwmgr)
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].version = 0x01;
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size = 
sizeof(DpmActivityMonitorCoeffInt_t);
 
-   ret = smu_v11_0_i2c_eeprom_control_init(>pm.smu_i2c);
-   if (ret)
-   goto err4;
+   if (adev->psp.ras.ras) {
+   ret = smu_v11_0_i2c_eeprom_control_init(>pm.smu_i2c);
+   if (ret)
+   goto err4;
+   }
 
return 0;
 
@@ -560,7 +562,8 @@ static int vega20_smu_fini(struct pp_hwmgr *hwmgr)
(struct vega20_smumgr *)(hwmgr->smu_backend);
struct amdgpu_device *adev = hwmgr->adev;
 
-   smu_v11_0_i2c_eeprom_control_fini(>pm.smu_i2c);
+   if (adev->psp.ras.ras)
+   smu_v11_0_i2c_eeprom_control_fini(>pm.smu_i2c);
 
if (priv) {

amdgpu_bo_free_kernel(>smu_tables.entry[TABLE_PPTABLE].handle,
-- 
2.26.2

[PATCH] drm/amd/powerplay: Fix NULL dereference in lock_bus() on Vega20 w/o RAS

2020-06-16 Thread Ivan Mironov

I updated my system with Radeon VII from kernel 5.6 to kernel 5.7, and
following started to happen on each boot:

...
BUG: kernel NULL pointer dereference, address: 0128
...
CPU: 9 PID: 1940 Comm: modprobe Tainted: GE 
5.7.2-200.im0.fc32.x86_64 #1
Hardware name: System manufacturer System Product Name/PRIME X570-P, 
BIOS 1407 04/02/2020
RIP: 0010:lock_bus+0x42/0x60 [amdgpu]
...
Call Trace:
 i2c_smbus_xfer+0x3d/0xf0
 i2c_default_probe+0xf3/0x130
 i2c_detect.isra.0+0xfe/0x2b0
 ? kfree+0xa3/0x200
 ? kobject_uevent_env+0x11f/0x6a0
 ? i2c_detect.isra.0+0x2b0/0x2b0
 __process_new_driver+0x1b/0x20
 bus_for_each_dev+0x64/0x90
 ? 0xc0f34000
 i2c_register_driver+0x73/0xc0
 do_one_initcall+0x46/0x200
 ? _cond_resched+0x16/0x40
 ? kmem_cache_alloc_trace+0x167/0x220
 ? do_init_module+0x23/0x260
 do_init_module+0x5c/0x260
 __do_sys_init_module+0x14f/0x170
 do_syscall_64+0x5b/0xf0
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
...

Error appears when some i2c device driver tries to probe for devices
using adapter registered by `smu_v11_0_i2c_eeprom_control_init()`.
Code supporting this adapter requires `adev->psp.ras.ras` to be not
NULL, which is true only when `amdgpu_ras_init()` detects HW support by
calling `amdgpu_ras_check_supported()`.

Before 9015d60c9ee1, adapter was registered by

-> amdgpu_device_ip_init()
  -> amdgpu_ras_recovery_init()
-> amdgpu_ras_eeprom_init()
  -> smu_v11_0_i2c_eeprom_control_init()

after verifying that `adev->psp.ras.ras` is not NULL in
`amdgpu_ras_recovery_init()`. Currently it is registered
unconditionally by

-> amdgpu_device_ip_init()
  -> pp_sw_init()
-> hwmgr_sw_init()
  -> vega20_smu_init()
-> smu_v11_0_i2c_eeprom_control_init()

Fix simply adds HW support check (ras == NULL => no support) before
calling `smu_v11_0_i2c_eeprom_control_{init,fini}()`.

Please note that there is a chance that similar fix is also required for
CHIP_ARCTURUS. I do not know whether any actual Arcturus hardware without
RAS exist, and whether calling `smu_i2c_eeprom_init()` makes any sense
when there is no HW support.

Cc: sta...@vger.kernel.org
Fixes: 9015d60c9ee1 ("drm/amdgpu: Move EEPROM I2C adapter to amdgpu_device")
Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index 2fb97554134f..c2e0fbbccf56 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -522,9 +522,11 @@ static int vega20_smu_init(struct pp_hwmgr *hwmgr)
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].version = 0x01;
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size = 
sizeof(DpmActivityMonitorCoeffInt_t);
 
-   ret = smu_v11_0_i2c_eeprom_control_init(>pm.smu_i2c);
-   if (ret)
-   goto err4;
+   if (adev->psp.ras.ras) {
+   ret = smu_v11_0_i2c_eeprom_control_init(>pm.smu_i2c);
+   if (ret)
+   goto err4;
+   }
 
return 0;
 
@@ -560,7 +562,8 @@ static int vega20_smu_fini(struct pp_hwmgr *hwmgr)
(struct vega20_smumgr *)(hwmgr->smu_backend);
struct amdgpu_device *adev = hwmgr->adev;
 
-   smu_v11_0_i2c_eeprom_control_fini(>pm.smu_i2c);
+   if (adev->psp.ras.ras)
+   smu_v11_0_i2c_eeprom_control_fini(>pm.smu_i2c);
 
if (priv) {

amdgpu_bo_free_kernel(>smu_tables.entry[TABLE_PPTABLE].handle,
-- 
2.26.2

Re: [RFC PATCH 0/2] Fix for the internal card reader and suspend on MacBooks

2019-02-25 Thread Ivan Mironov

On Thu, 2019-02-14 at 22:32 +0500, Ivan Mironov wrote:
> On Thu, 2019-02-14 at 06:40 +0500, Ivan Mironov wrote:
> > Unfortunately, everything broke again after yet another suspend/resume.
> > Currently I'm suspecting that my patch maybe only helps to survive the
> > short suspend, but not the long one.
> > 
> > After this bad suspend/resume, card reader disappeared again. Debug
> > logging was not enabled this time, so not too many information in the
> > dmesg:
> > 
> > [44013.429613] usb 2-4: Disable of device-initiated U1 failed.
> > [44018.549809] usb 2-4: Disable of device-initiated U2 failed.
> > [44024.182043] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44029.814239] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44030.022207] usb 2-4: device not accepting address 2, error -62
> > [44035.446526] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44041.078732] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44041.286640] usb 2-4: device not accepting address 2, error -62
> > [44046.710928] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44052.343184] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44052.551120] usb 2-4: device not accepting address 2, error -62
> > [44057.975369] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44063.607605] xhci_hcd :00:14.0: Timeout while waiting for setup 
> > device command
> > [44063.815538] usb 2-4: device not accepting address 2, error -62
> > [44063.882505] PM: resume devices took 55.895 seconds
> > [44063.882508] [ cut here ]
> > [44063.882511] Component: resume devices, time: 55895
> > [44063.882530] WARNING: CPU: 1 PID: 10887 at kernel/power/suspend_test.c:55 
> > suspend_test_finish+0x6b/0x70
> > [44063.882531] Modules linked in: vfat fat rfcomm fuse xt_CHECKSUM 
> > ipt_MASQUERADE tun bridge stp llc devlink nf_conntrack_netbios_ns 
> > nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 
> > xt_conntrack ebtable_nat ip6table_nat nf_nat_ipv6 ip6table_mangle 
> > ip6table_raw ip6table_security iptable_nat nf_nat_ipv4 nf_nat 
> > iptable_mangle iptable_raw iptable_security nf_conntrack nf_defrag_ipv6 
> > nf_defrag_ipv4 ip_set nfnetlink ebtable_filter ebtables ip6table_filter 
> > ip6_tables cmac bnep sunrpc nls_utf8 hfsplus joydev iTCO_wdt 
> > iTCO_vendor_support intel_rapl x86_pkg_temp_thermal btusb intel_powerclamp 
> > applesmc btrtl input_polldev brcmfmac btbcm coretemp btintel bluetooth 
> > kvm_intel brcmutil snd_hda_codec_cirrus snd_hda_codec_generic 
> > snd_hda_codec_hdmi intel_cstate snd_hda_intel intel_uncore cfg80211 
> > snd_hda_codec ecdh_generic intel_rapl_perf snd_hda_core snd_hwdep bcm5974 
> > snd_seq snd_seq_device snd_pcm mmc_core rfkill thunderbolt snd_timer snd 
> > mei_me mei soundcore i2c_i801 intel_pch_thermal
> > [44063.882614]  lpc_ich sbs acpi_als kfifo_buf sbshc industrialio 
> > apple_gmux pcc_cpufreq apple_bl binfmt_misc xfs libcrc32c dm_crypt i915 
> > kvmgt mdev vfio kvm irqbypass i2c_algo_bit drm_kms_helper crct10dif_pclmul 
> > crc32_pclmul crc32c_intel drm uas ghash_clmulni_intel usb_storage video 
> > hid_apple
> > [44063.882648] CPU: 1 PID: 10887 Comm: systemd-sleep Not tainted 
> > 4.20.7-200.ivan3.fc29.x86_64 #1
> > [44063.882651] Hardware name: Apple Inc. 
> > MacBookPro11,4/Mac-06F11FD93F0323C5, BIOS MBP114.88Z.0184.B00.1806051659 
> > 06/05/2018
> > [44063.882656] RIP: 0010:suspend_test_finish+0x6b/0x70
> > [44063.882660] Code: 06 69 c2 e8 03 00 00 29 c1 e8 df a3 00 00 81 fd 10 27 
> > 00 00 77 03 5b 5d c3 89 ea 48 89 de 48 c7 c7 e9 bb 0c b3 e8 1f 56 fa ff 
> > <0f> 0b eb e8 90 0f 1f 44 00 00 0f b6 05 49 e5 88 01 c3 0f 1f 00 0f
> > [44063.882663] RSP: :bad682b0fd30 EFLAGS: 00010286
> > [44063.882666] RAX:  RBX: b30cb9c2 RCX: 
> > 0006
> > [44063.882669] RDX: 0007 RSI: 0082 RDI: 
> > 8f70af0568c0
> > [44063.882671] RBP: da57 R08: 0002 R09: 
> > 000207c0
> > [44063.882674] R10: 002394f2f376 R11: 0001cd94 R12: 
> > 
> > [44063.882676] R13: b3254210 R14:  R15: 
> > bad682b0fd60
> > [44063.882681] FS:  7fd487426940() GS:8f70af04() 
> > knlGS:
> > [44063.882683] CS:  0010

Re: [RFC PATCH 1/4] watchdog: hpwdt: Don't disable watchdog on NMI

2019-02-14 Thread Ivan Mironov

On Thu, 2019-02-07 at 18:26 -0700, Jerry Hoemann wrote:
> The name is not the best given its current use, but I'm not sure a
> name change would be allowed.
> 
> However, I will send a patch to update the documentation in Kconfig.
> 
> 

Thanks!

Re: [RFC PATCH 0/2] Fix for the internal card reader and suspend on MacBooks

2019-02-14 Thread Ivan Mironov

On Thu, 2019-02-14 at 06:40 +0500, Ivan Mironov wrote:
> Unfortunately, everything broke again after yet another suspend/resume.
> Currently I'm suspecting that my patch maybe only helps to survive the
> short suspend, but not the long one.
> 
> After this bad suspend/resume, card reader disappeared again. Debug
> logging was not enabled this time, so not too many information in the
> dmesg:
> 
> [44013.429613] usb 2-4: Disable of device-initiated U1 failed.
> [44018.549809] usb 2-4: Disable of device-initiated U2 failed.
> [44024.182043] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44029.814239] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44030.022207] usb 2-4: device not accepting address 2, error -62
> [44035.446526] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44041.078732] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44041.286640] usb 2-4: device not accepting address 2, error -62
> [44046.710928] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44052.343184] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44052.551120] usb 2-4: device not accepting address 2, error -62
> [44057.975369] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44063.607605] xhci_hcd :00:14.0: Timeout while waiting for setup device 
> command
> [44063.815538] usb 2-4: device not accepting address 2, error -62
> [44063.882505] PM: resume devices took 55.895 seconds
> [44063.882508] [ cut here ]
> [44063.882511] Component: resume devices, time: 55895
> [44063.882530] WARNING: CPU: 1 PID: 10887 at kernel/power/suspend_test.c:55 
> suspend_test_finish+0x6b/0x70
> [44063.882531] Modules linked in: vfat fat rfcomm fuse xt_CHECKSUM 
> ipt_MASQUERADE tun bridge stp llc devlink nf_conntrack_netbios_ns 
> nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 
> xt_conntrack ebtable_nat ip6table_nat nf_nat_ipv6 ip6table_mangle 
> ip6table_raw ip6table_security iptable_nat nf_nat_ipv4 nf_nat iptable_mangle 
> iptable_raw iptable_security nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 
> ip_set nfnetlink ebtable_filter ebtables ip6table_filter ip6_tables cmac bnep 
> sunrpc nls_utf8 hfsplus joydev iTCO_wdt iTCO_vendor_support intel_rapl 
> x86_pkg_temp_thermal btusb intel_powerclamp applesmc btrtl input_polldev 
> brcmfmac btbcm coretemp btintel bluetooth kvm_intel brcmutil 
> snd_hda_codec_cirrus snd_hda_codec_generic snd_hda_codec_hdmi intel_cstate 
> snd_hda_intel intel_uncore cfg80211 snd_hda_codec ecdh_generic 
> intel_rapl_perf snd_hda_core snd_hwdep bcm5974 snd_seq snd_seq_device snd_pcm 
> mmc_core rfkill thunderbolt snd_timer snd mei_me mei soundcore i2c_i801 
> intel_pch_thermal
> [44063.882614]  lpc_ich sbs acpi_als kfifo_buf sbshc industrialio apple_gmux 
> pcc_cpufreq apple_bl binfmt_misc xfs libcrc32c dm_crypt i915 kvmgt mdev vfio 
> kvm irqbypass i2c_algo_bit drm_kms_helper crct10dif_pclmul crc32_pclmul 
> crc32c_intel drm uas ghash_clmulni_intel usb_storage video hid_apple
> [44063.882648] CPU: 1 PID: 10887 Comm: systemd-sleep Not tainted 
> 4.20.7-200.ivan3.fc29.x86_64 #1
> [44063.882651] Hardware name: Apple Inc. MacBookPro11,4/Mac-06F11FD93F0323C5, 
> BIOS MBP114.88Z.0184.B00.1806051659 06/05/2018
> [44063.882656] RIP: 0010:suspend_test_finish+0x6b/0x70
> [44063.882660] Code: 06 69 c2 e8 03 00 00 29 c1 e8 df a3 00 00 81 fd 10 27 00 
> 00 77 03 5b 5d c3 89 ea 48 89 de 48 c7 c7 e9 bb 0c b3 e8 1f 56 fa ff <0f> 0b 
> eb e8 90 0f 1f 44 00 00 0f b6 05 49 e5 88 01 c3 0f 1f 00 0f
> [44063.882663] RSP: :bad682b0fd30 EFLAGS: 00010286
> [44063.882666] RAX:  RBX: b30cb9c2 RCX: 
> 0006
> [44063.882669] RDX: 0007 RSI: 0082 RDI: 
> 8f70af0568c0
> [44063.882671] RBP: da57 R08: 0002 R09: 
> 000207c0
> [44063.882674] R10: 002394f2f376 R11: 0001cd94 R12: 
> 
> [44063.882676] R13: b3254210 R14:  R15: 
> bad682b0fd60
> [44063.882681] FS:  7fd487426940() GS:8f70af04() 
> knlGS:
> [44063.882683] CS:  0010 DS:  ES:  CR0: 80050033
> [44063.882686] CR2:  CR3: 0003fcc92002 CR4: 
> 001606e0
> [44063.882688] Call Trace:
> [44063.882699]  suspend_devices_and_enter+0x248/0x7f0
> [44063.882706]  pm_suspend.cold.5+0x33c/0x392
> [44063.882711]  state_store+0x80/0xe0
> [44063.882718]  kernfs_fop_write+0x116/0x190
> [44063.882728]  __vfs_write+0x36/0x1a0
> [44063.882736]  ? selinux_file_permission+0xf0/0x130

Re: [RFC PATCH 0/2] Fix for the internal card reader and suspend on MacBooks

2019-02-14 Thread Ivan Mironov

On Thu, 2019-02-14 at 17:03 +0200, Mathias Nyman wrote:
> This card reader prevents second system suspend on latest kernels, see thread:
> https://marc.info/?l=linux-usb=154816680816246=2
> 
> In that case the card reader fails to resume from usb3 U3 suspend state,
> and ends up stuck in USB3 polling state, which now prevents suspend
> 
> Could you try a testpatch (attached) to see if it helps?
> 
> Thanks
> -Mathias

Hi Mathias,

thanks for your patch. I applied it on top of current master (5.0.0-
rc6+ 1f947a7a011fcceb14cb912f5481a53b18f1879a) and tested it.
Unfortunately, without success.

Card reader disappeared after the first suspend/resume cycle. Second
suspend failed.

Complete dmesg is here: 
https://raw.githubusercontent.com/im-0/investigate-card-reader-suspend-problem-on-mbp11.4/master/test-18/dmesg

Re: [RFC PATCH 0/2] Fix for the internal card reader and suspend on MacBooks

2019-02-13 Thread Ivan Mironov

Unfortunately, everything broke again after yet another suspend/resume.
Currently I'm suspecting that my patch maybe only helps to survive the
short suspend, but not the long one.

After this bad suspend/resume, card reader disappeared again. Debug
logging was not enabled this time, so not too many information in the
dmesg:

[44013.429613] usb 2-4: Disable of device-initiated U1 failed.
[44018.549809] usb 2-4: Disable of device-initiated U2 failed.
[44024.182043] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44029.814239] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44030.022207] usb 2-4: device not accepting address 2, error -62
[44035.446526] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44041.078732] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44041.286640] usb 2-4: device not accepting address 2, error -62
[44046.710928] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44052.343184] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44052.551120] usb 2-4: device not accepting address 2, error -62
[44057.975369] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44063.607605] xhci_hcd :00:14.0: Timeout while waiting for setup device 
command
[44063.815538] usb 2-4: device not accepting address 2, error -62
[44063.882505] PM: resume devices took 55.895 seconds
[44063.882508] [ cut here ]
[44063.882511] Component: resume devices, time: 55895
[44063.882530] WARNING: CPU: 1 PID: 10887 at kernel/power/suspend_test.c:55 
suspend_test_finish+0x6b/0x70
[44063.882531] Modules linked in: vfat fat rfcomm fuse xt_CHECKSUM 
ipt_MASQUERADE tun bridge stp llc devlink nf_conntrack_netbios_ns 
nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 
xt_conntrack ebtable_nat ip6table_nat nf_nat_ipv6 ip6table_mangle ip6table_raw 
ip6table_security iptable_nat nf_nat_ipv4 nf_nat iptable_mangle iptable_raw 
iptable_security nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nfnetlink 
ebtable_filter ebtables ip6table_filter ip6_tables cmac bnep sunrpc nls_utf8 
hfsplus joydev iTCO_wdt iTCO_vendor_support intel_rapl x86_pkg_temp_thermal 
btusb intel_powerclamp applesmc btrtl input_polldev brcmfmac btbcm coretemp 
btintel bluetooth kvm_intel brcmutil snd_hda_codec_cirrus snd_hda_codec_generic 
snd_hda_codec_hdmi intel_cstate snd_hda_intel intel_uncore cfg80211 
snd_hda_codec ecdh_generic intel_rapl_perf snd_hda_core snd_hwdep bcm5974 
snd_seq snd_seq_device snd_pcm mmc_core rfkill thunderbolt snd_timer snd mei_me 
mei soundcore i2c_i801 intel_pch_thermal
[44063.882614]  lpc_ich sbs acpi_als kfifo_buf sbshc industrialio apple_gmux 
pcc_cpufreq apple_bl binfmt_misc xfs libcrc32c dm_crypt i915 kvmgt mdev vfio 
kvm irqbypass i2c_algo_bit drm_kms_helper crct10dif_pclmul crc32_pclmul 
crc32c_intel drm uas ghash_clmulni_intel usb_storage video hid_apple
[44063.882648] CPU: 1 PID: 10887 Comm: systemd-sleep Not tainted 
4.20.7-200.ivan3.fc29.x86_64 #1
[44063.882651] Hardware name: Apple Inc. MacBookPro11,4/Mac-06F11FD93F0323C5, 
BIOS MBP114.88Z.0184.B00.1806051659 06/05/2018
[44063.882656] RIP: 0010:suspend_test_finish+0x6b/0x70
[44063.882660] Code: 06 69 c2 e8 03 00 00 29 c1 e8 df a3 00 00 81 fd 10 27 00 
00 77 03 5b 5d c3 89 ea 48 89 de 48 c7 c7 e9 bb 0c b3 e8 1f 56 fa ff <0f> 0b eb 
e8 90 0f 1f 44 00 00 0f b6 05 49 e5 88 01 c3 0f 1f 00 0f
[44063.882663] RSP: :bad682b0fd30 EFLAGS: 00010286
[44063.882666] RAX:  RBX: b30cb9c2 RCX: 0006
[44063.882669] RDX: 0007 RSI: 0082 RDI: 8f70af0568c0
[44063.882671] RBP: da57 R08: 0002 R09: 000207c0
[44063.882674] R10: 002394f2f376 R11: 0001cd94 R12: 
[44063.882676] R13: b3254210 R14:  R15: bad682b0fd60
[44063.882681] FS:  7fd487426940() GS:8f70af04() 
knlGS:
[44063.882683] CS:  0010 DS:  ES:  CR0: 80050033
[44063.882686] CR2:  CR3: 0003fcc92002 CR4: 001606e0
[44063.882688] Call Trace:
[44063.882699]  suspend_devices_and_enter+0x248/0x7f0
[44063.882706]  pm_suspend.cold.5+0x33c/0x392
[44063.882711]  state_store+0x80/0xe0
[44063.882718]  kernfs_fop_write+0x116/0x190
[44063.882728]  __vfs_write+0x36/0x1a0
[44063.882736]  ? selinux_file_permission+0xf0/0x130
[44063.882745]  ? security_file_permission+0x2c/0xb0
[44063.882751]  vfs_write+0xa5/0x1a0
[44063.882758]  ksys_write+0x4f/0xb0
[44063.882767]  do_syscall_64+0x5b/0x160
[44063.882776]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[44063.882781] RIP: 0033:0x7fd48816dff8
[44063.882785] Code: 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 f3 
0f 1e fa 48 8d 05 25 77 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 
f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 49 89 d4 55
[44063.882788] RSP:

[RFC PATCH 2/2] usb: quirks: Add quirk to fix card reader and suspend on MacBooks

2019-02-13 Thread Ivan Mironov

This enabled USB_QUIRK_DISABLE_LINK_ON_SUSPEND for the embedded SD card
reader used in Apple MacBook laptops.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111201
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202509
Signed-off-by: Ivan Mironov 
---
 drivers/usb/core/quirks.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index c26ba784dc54..818f949ff1b9 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -310,6 +310,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* appletouch */
{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* Apple Card Reader */
+   { USB_DEVICE(0x05ac, 0x8406), .driver_info =
+ USB_QUIRK_DISABLE_LINK_ON_SUSPEND },
+
/* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
{ USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
 
-- 
2.20.1

[RFC PATCH 1/2] usb: core: Add support of disabling SS link before system suspend

2019-02-13 Thread Ivan Mironov

Some Apple MacBooks contain internal SD card reader connected to the USB
3.0 bus. Example: MacBook Pro Retina mid 2015, which contains USB card
reader with ID 05ac:8406.

Currently, such card reader works only after a reboot and completely
disappears from system after the first system suspend/resume cycle.
Also, any subsequent attempts to suspend are starting to fail.

There is a known way to circumvent the suspend problem: removing device
using /sys/devices/*/*/usb*/*-*/remove helps. But this unbreaks only
suspend, not the card reader itself (it remains absent).

When trying to fix both suspend and card reader device, I found that the
only working method is to set link state to disabled during suspend
procedure.

This patch adds new quirk for USB devices:
USB_QUIRK_DISABLE_LINK_ON_SUSPEND. When enabled, it changes the usual
suspend procedure for USB 3.0 devices by setting link state to DISABLED
instead of U3. To "resume" from disabled state, new code sets link state
to RX_DETECT and also enables reset for device.

As usual, this quirk may be enabled manually by adding
'usbcore.quirks=$VID:$PID:p' to the kernel parameters.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=111201
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202509
Signed-off-by: Ivan Mironov 
---
 drivers/usb/core/driver.c  |  6 +++
 drivers/usb/core/hub.c | 84 --
 drivers/usb/core/quirks.c  |  3 ++
 include/linux/usb.h|  3 ++
 include/linux/usb/quirks.h |  9 
 5 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 53564386ed57..1a1ee1ba7a63 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1145,6 +1145,10 @@ static int usb_suspend_device(struct usb_device *udev, 
pm_message_t msg)
udev->state == USB_STATE_SUSPENDED)
goto done;
 
+   if (!PMSG_IS_AUTO(msg) &&
+   (udev->quirks & USB_QUIRK_DISABLE_LINK_ON_SUSPEND))
+   udev->disable_link_on_suspend = 1;
+
/* For devices that don't have a driver, we do a generic suspend. */
if (udev->dev.driver)
udriver = to_usb_device_driver(udev->dev.driver);
@@ -1188,6 +1192,8 @@ static int usb_resume_device(struct usb_device *udev, 
pm_message_t msg)
 
  done:
dev_vdbg(>dev, "%s: status %d\n", __func__, status);
+   if (!status)
+   udev->disable_link_on_suspend = 0;
return status;
 }
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1d1e61e980f3..c2e4e23500d3 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3230,8 +3230,22 @@ int usb_port_suspend(struct usb_device *udev, 
pm_message_t msg)
goto err_ltm;
}
 
+   if (udev->disable_link_on_suspend && !hub_is_superspeed(hub->hdev)) {
+   dev_dbg(>dev,
+   "disabling link unsupported on disable_link_on_suspend = 0;
+   }
+
+   if (udev->disable_link_on_suspend) {
+   status = hub_set_port_link_state(hub, port1,
+   USB_SS_PORT_LS_SS_DISABLED);
+   if (status) {
+   dev_dbg(_dev->dev, "can't disable link\n");
+   udev->disable_link_on_suspend = 0;
+   }
+   }
/* see 7.1.7.6 */
-   if (hub_is_superspeed(hub->hdev))
+   else if (hub_is_superspeed(hub->hdev))
status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3);
 
/*
@@ -3431,6 +3445,32 @@ static int wait_for_connected(struct usb_device *udev,
return status;
 }
 
+static int wait_for_link(struct usb_device *udev,
+   struct usb_hub *hub, int port1,
+   u16 *portchange, u16 *portstatus)
+{
+   int status = 0, delay_ms = 0;
+
+   while (delay_ms < 2000) {
+   status = hub_port_status(hub, port1, portstatus, portchange);
+
+   if (status || ((*portstatus & USB_PORT_STAT_LINK_STATE) ==
+   USB_SS_PORT_LS_POLLING) ||
+   (*portstatus & USB_PORT_STAT_CONNECTION))
+   break;
+
+   msleep(20);
+   delay_ms += 20;
+   }
+   dev_dbg(>dev, "waited %dms for link, status %d\n", delay_ms,
+   status);
+
+   if (delay_ms >= 2000)
+   status = -ENODEV;
+
+   return status;
+}
+
 /*
  * usb_port_resume - re-activate a suspended usb device's upstream port
  * @udev: device to re-activate, not a root hub
@@ -3484,8 +3524,43 @@ int usb_port_resume(struct usb_device *udev, 
pm_message_t msg)
 
usb_lock_port(port_dev);
 
-   /* Skip the initial Clear-Suspend step for a remote wakeup */
status = hub_port_sta

[RFC PATCH 0/2] Fix for the internal card reader and suspend on MacBooks

2019-02-13 Thread Ivan Mironov

Hi all,

There is a known problem on some MacBooks: internal card reader
disappears after the first suspend/resume and all subsequent attempts
to suspend laptop are failing.

This was reported[1][2] and even discussed in the mailing lists[3],
without any real solution. After trying various things[4], including
some existing quirks, I discovered that switching link state to DISABLED
before suspend both fixes the card reader device and allows any subsequent
suspend to succeed.

First patch adds code for this new quirk, and second patch enables this
quirk for card reader device which is used in my macbook.

I'm not really familiar with either USB standards or kernel code to
support them, so this patch series is RFC. I'm especially unsure with the
"resume" part, because I implemented it by trial and error mostly.
However, I'm using kernel with these patches and it works for me.

Also, feel free to suggest other kernel patches or existing quirks or
quirk combinations to fix the same problem.

Oh, and by the way: I've checked schematics of various macbooks available
on the internet. It seems that the actual chip is Genesys Logic GL3219,
probably just with the custom ID. What I found curious, is that USB 2.0
pins of this chip (D+ and D-) are not really connected anywhere, but
instead shorted through the resistor. Could it be possible that this
somehow messes up some logic inside the device, host controller or
linux kernel?

[1] https://bugzilla.kernel.org/show_bug.cgi?id=111201
[2] https://bugzilla.kernel.org/show_bug.cgi?id=202509
[3] https://www.spinics.net/lists/linux-usb/msg164261.html
[4] https://github.com/im-0/investigate-card-reader-suspend-problem-on-mbp11.4

Ivan Mironov (2):
  usb: core: Add support of disabling SS link before system suspend
  usb: quirks: Add quirk to fix card reader and suspend on MacBooks

 drivers/usb/core/driver.c  |  6 +++
 drivers/usb/core/hub.c | 84 --
 drivers/usb/core/quirks.c  |  7 
 include/linux/usb.h|  3 ++
 include/linux/usb/quirks.h |  9 
 5 files changed, 105 insertions(+), 4 deletions(-)

-- 
2.20.1

[PATCH] USB: serial: cp210x: Add ID for Ingenico 3070

2019-02-06 Thread Ivan Mironov

Here is how this device appears in kernel log:

usb 3-1: new full-speed USB device number 18 using xhci_hcd
usb 3-1: New USB device found, idVendor=0b00, idProduct=3070
usb 3-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 3-1: Product: Ingenico 3070
usb 3-1: Manufacturer: Silicon Labs
usb 3-1: SerialNumber: 0001

Apparently this is a POS terminal with embedded USB-to-Serial converter.

Cc: sta...@vger.kernel.org
Signed-off-by: Ivan Mironov 
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index c0777a374a88..3286ed462fc5 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -61,6 +61,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless 
smartcard reader */
{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC 
Device */
{ USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console 
*/
+   { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */
{ USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher 
Acceptor */
{ USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */
{ USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
-- 
2.20.1

Re: [RFC PATCH 0/4] watchdog: hpwdt: Fix NMI-related behaviour when CONFIG_HPWDT_NMI_DECODING is enabled

2019-02-01 Thread Ivan Mironov

On Tue, 2019-01-15 at 19:22 -0700, Jerry Hoemann wrote:
> On Mon, Jan 14, 2019 at 07:36:13AM +0500, Ivan Mironov wrote:
> > Hi,
> > 
> > I found out that hpwdt alters NMI behaviour unexpectedly if compiled
> > with enabled CONFIG_HPWDT_NMI_DECODING:
> > 
> >  * System starts to panic on any NMI with misleading message.
> 
> hpwdt doesn't start to panic on any NMI.  It starts to panic on:
> 
> 1) NMI_SERR   associated with NMI
> 2) NMI_IO_CHECK   associated with IO errors
> 3) NMI_UNKNOWNNMI unclaimed by all local handlers.
> 
> On Gen10 going forward we plan to restrict to just iLO
> generated NMIs.
> 
> There is a long history on hp/hpe proliant systems where hpwdt
> was handler of general IO errors (at least ones that would cause
> NMI to be generated) and we chose to panic in these situation
> as the errors were generally quite serious.
> 

I would prefer to have this at least configurable by some module
parameter.

> Yes, this has caused some problems in the past as Linux has
> overloaded NMI and some subsystems didn't claim the NMIs that
> they generated (think profiling.)  But, I haven't seen these
> types of problems for several years now.
> 
> The more modern platforms have more robust error handling built
> into them and to linux so going forward we'll restrict hpwdt to a more
> traditional WDT role.  But we're retaining the more conservative
> approach for legacy platforms.
> 

I've seen NMI panic on my old ProLiant BL460c G6 at least once.
hpwdt.ko "handled" this NMI by disabling watchdog before hanging the
system 8). mynmi was equal to zero.

That is why I decided to check the code and try to understand how
exactly it works.

> How would you suggest that the message be enhanced?
> 

Maybe mention that "false positives" are possible and the actual reason
of NMI is not always logged in OA/iLO/etc. logs.

> 
> >  * Watchdog provided by hpwdt is not working after such panic.
> > 
> > Here are the patches that should fix this.
> > 
> > This is an RFC patch series because I am not sure that patches are
> > correct. Questions:
> > 
> >  * Are "mynmi" flags always set on all supported iLO versions when iLO
> >is the source of NMI?
> 
> Unfortunately no.
> 
> hpwdt is a dual purpose driver.  It handles the iLO watchdog timer
> and the "Generate NMI to System" button.  These are closely related
> hardware wise.
> 
> However, some platforms generate NMI for "Generate NMI to System" button but 
> aren't
> signaled via iLO registers.  These will show up as NMI_UNKNOWN, hence while
> hpwdt still claims these.
> 
> There are also some systems that do not set the nmistat bits correctly.
> 
> So as to not break legacy platforms, the use the nmistat bits for
> control will be for Gen10 going forward.
> 

It seems that iLO 2 sets these bits correctly. Bit 1 is set on
pretimout NMI, bit 2 is set on "iLO web button" NMI.

> 
> 
> >  * Is it safe to reset "mynmi" flags to zero if code decides to not panic?
> 
> The reading of the registers is itself destructive (sets to zero)

Could you elaborate what exactly you mean here?

I tried to read nmistat register multiple times using ioread8(), and
every time returned value were the same, with one of mynmi flags set.
Even with mdelay() between calls.

>  but the real
> issue is that some proliant systems lack the ability to acknowledge the NMI so
> only one can ever be received.  So returning is not advisable as no
> further NMI will be generated via this path.  A reset through firmware
> is required to restore the feature.
> 

Yes, I noticed this.

> 
> > Ivan Mironov (4):
> >   watchdog: hpwdt: Don't disable watchdog on NMI
> >   watchdog: hpwdt: Don't panic on foreign NMI
> >   watchdog: hpwdt: Add more information into message
> >   watchdog: hpwdt: Make panic behaviour configurable
> > 
> >  drivers/watchdog/hpwdt.c | 45 ++--
> >  1 file changed, 25 insertions(+), 20 deletions(-)
> > 
> > -- 
> > 2.20.1

By the way, is it possible to implement something like this
(pseudocode):

***
bool handle_unknown_nmi_on_old_systems = true; // module parameter

int nmi_handler()
{
if (mynmi_flags_supported(current_hw)) {
if (mynmi & MYNMI_PRETIMOUT_FLAG) {
if (pretimout) {
hpwdt_stop();
panic("hpwdt pretimout");
return NMI_HANDLED;
} else {
warn("pretimout flag set, but

Re: [RFC PATCH 4/4] watchdog: hpwdt: Make panic behaviour configurable

2019-02-01 Thread Ivan Mironov

On Tue, 2019-01-15 at 19:30 -0700, Jerry Hoemann wrote:
> On Mon, Jan 14, 2019 at 07:36:17AM +0500, Ivan Mironov wrote:
> > This adds an option to not panic on NMI even if it was caused by iLO.
> > 
> > Signed-off-by: Ivan Mironov 
> > ---
> >  drivers/watchdog/hpwdt.c | 19 ---
> >  1 file changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
> > index 95d002b5b120..b12858491189 100644
> > --- a/drivers/watchdog/hpwdt.c
> > +++ b/drivers/watchdog/hpwdt.c
> > @@ -37,6 +37,10 @@ static unsigned int soft_margin = DEFAULT_MARGIN;
> > /* in seconds */
> >  static bool nowayout = WATCHDOG_NOWAYOUT;
> >  static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
> >  
> > +#ifdef CONFIG_HPWDT_NMI_DECODING
> > +static bool panic_on_nmi = true;
> > +#endif /* CONFIG_HPWDT_NMI_DECODING */
> > +
> >  static void __iomem *pci_mem_addr; /* the PCI-memory address */
> >  static unsigned long __iomem *hpwdt_nmistat;
> >  static unsigned long __iomem *hpwdt_timer_reg;
> > @@ -146,7 +150,10 @@ static int hpwdt_set_pretimeout(struct watchdog_device 
> > *wdd, unsigned int req)
> >  
> >  static int hpwdt_my_nmi(void)
> >  {
> > -   return ioread8(hpwdt_nmistat) & 0x6;
> > +   int nmistat = ioread8(hpwdt_nmistat);
> > +
> > +   iowrite8(nmistat & ~0x6, hpwdt_nmistat);
> > +   return nmistat & 0x6;
> 
> This is a read only register.
> 

Oops... At least on my system this code has the desired effect:
subsequent function call returns zero.

Probably it would be better to use additional variable to determine
whether NMI from iLO already happened or not.

> 
> >  }
> >  
> >  /*
> > @@ -168,7 +175,10 @@ static int hpwdt_pretimeout(unsigned int ulReason, 
> > struct pt_regs *regs)
> >  "4. iLO Event Log\n",
> >  mynmi, ulReason, smp_processor_id());
> >  
> > -   nmi_panic(regs, "hpwdt: NMI: Not continuing");
> > +   if (panic_on_nmi)
> > +   nmi_panic(regs, "hpwdt: NMI: Not continuing");
> > +
> > +   pr_emerg("Dazed and confused, but trying to continue\n");
> >  
> 
> The watchdog core provides a way to enable/disable the NMI pretimeout 
> dynamically
> via ioctl.  The pretimeout NMI can also be disabled via module parameter to 
> hpwdt.
> This adds complexity without really adding functionality.
> 

It looks like disabling pretimout will disable panics only for iLO 5
(mine system has iLO 2). Or am I missing something?

> 
> BTW, don't reuse error messages.  Makes it difficult to tell where the error
> originated from.
> 

Would it be better to just return NMI_DONE and thus reuse normal NMI
handling from kernel, with logging and panic/don't panic logic?

> 
> > return NMI_HANDLED;
> >  }
> > @@ -376,6 +386,9 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped 
> > once started (default="
> >  #ifdef CONFIG_HPWDT_NMI_DECODING
> >  module_param(pretimeout, bool, 0);
> >  MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
> > -#endif
> > +
> > +module_param(panic_on_nmi, bool, 0);
> > +MODULE_PARM_DESC(panic_on_nmi, "Cause panic on NMI induced by iLO 
> > (default=1)");
> > +#endif /* CONFIG_HPWDT_NMI_DECODING */
> >  
> >  module_pci_driver(hpwdt_driver);
> > -- 
> > 2.20.1

Re: [RFC PATCH 1/4] watchdog: hpwdt: Don't disable watchdog on NMI

2019-02-01 Thread Ivan Mironov

On Tue, 2019-01-15 at 19:27 -0700, Jerry Hoemann wrote:
> On Mon, Jan 14, 2019 at 07:36:14AM +0500, Ivan Mironov wrote:
> > Existing code disables watchdog on NMI right before completely hanging
> > the system.
> > 
> > There are two problems here:
> > 
> >  * First, watchdog is expected to reset the system in a case of such
> >failure, no matter what.
> 
> Documentation/watchdog/watchdog-api.txt
> 
> explicitly allows for pretimeout NMI and generation of kernel crash dumps.
> 
> By removing hpwdt_stop the system will likely fail to crash dump
> as there is only 9 seconds between receipt of a NMI and the iLO
> resetting the system.
> 
> Unfortunately, kdump is not without issues and can also be difficult
> to properly configure either of which can result in failure to dump
> and reset.
> 
> Customers who value availability over kdump collection, the pretimeout
> NMI can be disabled and hardware will not issue the pretimeout NMI
> and will only do reset.
> 
> A middle ground for those who want tombstones but not kdump, would
> be to leave the pretimeout NMI enabled and add "panic=N" to the
> Linux command line.  That way after the panic, the tombstone is
> printed and the system resets after N seconds.
> 
> 

Somehow I missed the whole pretimout thing when reading about the
watchdog API. Thanks for clarification, now code makes much more sense
=).

Still, I do not really understand the point of enabling of kdump
support in hpwdt driver by default while kdump is not enabled by
default.

Also, existing code may call hpwdt_stop() (and thus break watchdog)
even if pretimout is disabled.

Also, "panic=N" option is not providing a way to *not* panic on NMI
unrelated with iLO. This could be circumvented by blacklisting the
hpwdt module entirely, but normal watchdog functionality would be lost
then.

It is possible to rebuild kernel without HPWDT_NMI_DECODING (which is
enabled in Fedora, for example). But it is nearly impossible to come to
this solution without examining the source code, because description of
this option does not mention that it is really about pretimout support
and panics and not about something else...

I would say that current default behavior of hpwd is slightly confusing
in multiple different ways.

> 
> >  * Second, this code has no effect if there are more than one watchdog.
> 
> That is correct.  Hpwdt will not turn off any other WDT.
> 
> I don't see a current method of notifying other watchdogs
> that a given watchdog is going to take the system down.
> 
> The closest I hook see is watchdog_notify_pretimeout, but I don't
> see that notifying other WDT.  Its not clear to me that it should.
> (e.g. the second WDT could be of longer duration and protect against
> kdump hanging. This would need to be thought through.)
> 
> 
> 
> > Signed-off-by: Ivan Mironov 
> > ---
> >  drivers/watchdog/hpwdt.c | 2 --
> >  1 file changed, 2 deletions(-)
> > 
> > diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
> > index ef30c7e9728d..2467e6bc25c2 100644
> > --- a/drivers/watchdog/hpwdt.c
> > +++ b/drivers/watchdog/hpwdt.c
> > @@ -170,8 +170,6 @@ static int hpwdt_pretimeout(unsigned int ulReason, 
> > struct pt_regs *regs)
> > if (ilo5 && !pretimeout && !mynmi)
> > return NMI_DONE;
> >  
> > -   hpwdt_stop();
> > -
> > hex_byte_pack(panic_msg, mynmi);
> > nmi_panic(regs, panic_msg);
> >  
> > -- 
> > 2.20.1

[RFC PATCH 3/4] watchdog: hpwdt: Add more information into message

2019-01-13 Thread Ivan Mironov

Default NMI handling code prints the same into the kernel log.

Signed-off-by: Ivan Mironov 
---
 drivers/watchdog/hpwdt.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index e2958df46c69..95d002b5b120 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -155,19 +155,20 @@ static int hpwdt_my_nmi(void)
 static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 {
unsigned int mynmi = hpwdt_my_nmi();
-   static char panic_msg[] =
-   "00: An NMI occurred. Depending on your system the reason "
-   "for the NMI is logged in any one of the following resources:\n"
-   "1. Integrated Management Log (IML)\n"
-   "2. OA Syslog\n"
-   "3. OA Forward Progress Log\n"
-   "4. iLO Event Log";
 
if (!mynmi)
return NMI_DONE;
 
-   hex_byte_pack(panic_msg, mynmi);
-   nmi_panic(regs, panic_msg);
+   pr_emerg("%02x: An NMI occurred (reason %02x, CPU %d). Depending on "
+"your system the reason for the NMI is logged in any one of "
+"the following resources:\n"
+"1. Integrated Management Log (IML)\n"
+"2. OA Syslog\n"
+"3. OA Forward Progress Log\n"
+"4. iLO Event Log\n",
+mynmi, ulReason, smp_processor_id());
+
+   nmi_panic(regs, "hpwdt: NMI: Not continuing");
 
return NMI_HANDLED;
 }
-- 
2.20.1

[RFC PATCH 1/4] watchdog: hpwdt: Don't disable watchdog on NMI

2019-01-13 Thread Ivan Mironov

Existing code disables watchdog on NMI right before completely hanging
the system.

There are two problems here:

 * First, watchdog is expected to reset the system in a case of such
   failure, no matter what.
 * Second, this code has no effect if there are more than one watchdog.

Signed-off-by: Ivan Mironov 
---
 drivers/watchdog/hpwdt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index ef30c7e9728d..2467e6bc25c2 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -170,8 +170,6 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct 
pt_regs *regs)
if (ilo5 && !pretimeout && !mynmi)
return NMI_DONE;
 
-   hpwdt_stop();
-
hex_byte_pack(panic_msg, mynmi);
nmi_panic(regs, panic_msg);
 
-- 
2.20.1

[RFC PATCH 2/4] watchdog: hpwdt: Don't panic on foreign NMI

2019-01-13 Thread Ivan Mironov

Currently, hpwdt unconditionally panics on foreign NMIs in some cases.
This goes against the default kernel behaviour (which is configured by
unknown_nmi_panic and panic_on_unrecovered_nmi).

With this patch, hpwdt will simply ignore NMI unless one of "mynmi"
flags is set by iLO.

Signed-off-by: Ivan Mironov 
---
 drivers/watchdog/hpwdt.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 2467e6bc25c2..e2958df46c69 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -33,7 +33,6 @@
 #define DEFAULT_MARGIN 30
 #define PRETIMEOUT_SEC 9
 
-static bool ilo5;
 static unsigned int soft_margin = DEFAULT_MARGIN;  /* in seconds */
 static bool nowayout = WATCHDOG_NOWAYOUT;
 static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
@@ -164,10 +163,7 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct 
pt_regs *regs)
"3. OA Forward Progress Log\n"
"4. iLO Event Log";
 
-   if (ilo5 && ulReason == NMI_UNKNOWN && !mynmi)
-   return NMI_DONE;
-
-   if (ilo5 && !pretimeout && !mynmi)
+   if (!mynmi)
return NMI_DONE;
 
hex_byte_pack(panic_msg, mynmi);
@@ -332,9 +328,6 @@ static int hpwdt_init_one(struct pci_dev *dev,
dev_info(>dev, "pretimeout: %s.\n",
pretimeout ? "on" : "off");
 
-   if (dev->subsystem_vendor == PCI_VENDOR_ID_HP_3PAR)
-   ilo5 = true;
-
return 0;
 
 error_wd_register:
-- 
2.20.1

[RFC PATCH 0/4] watchdog: hpwdt: Fix NMI-related behaviour when CONFIG_HPWDT_NMI_DECODING is enabled

2019-01-13 Thread Ivan Mironov

Hi,

I found out that hpwdt alters NMI behaviour unexpectedly if compiled
with enabled CONFIG_HPWDT_NMI_DECODING:

 * System starts to panic on any NMI with misleading message.
 * Watchdog provided by hpwdt is not working after such panic.

Here are the patches that should fix this.

This is an RFC patch series because I am not sure that patches are
correct. Questions:

 * Are "mynmi" flags always set on all supported iLO versions when iLO
   is the source of NMI?
 * Is it safe to reset "mynmi" flags to zero if code decides to not panic?

Ivan Mironov (4):
  watchdog: hpwdt: Don't disable watchdog on NMI
  watchdog: hpwdt: Don't panic on foreign NMI
  watchdog: hpwdt: Add more information into message
  watchdog: hpwdt: Make panic behaviour configurable

 drivers/watchdog/hpwdt.c | 45 ++--
 1 file changed, 25 insertions(+), 20 deletions(-)

-- 
2.20.1

[RFC PATCH 4/4] watchdog: hpwdt: Make panic behaviour configurable

2019-01-13 Thread Ivan Mironov

This adds an option to not panic on NMI even if it was caused by iLO.

Signed-off-by: Ivan Mironov 
---
 drivers/watchdog/hpwdt.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 95d002b5b120..b12858491189 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -37,6 +37,10 @@ static unsigned int soft_margin = DEFAULT_MARGIN;/* in 
seconds */
 static bool nowayout = WATCHDOG_NOWAYOUT;
 static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
 
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static bool panic_on_nmi = true;
+#endif /* CONFIG_HPWDT_NMI_DECODING */
+
 static void __iomem *pci_mem_addr; /* the PCI-memory address */
 static unsigned long __iomem *hpwdt_nmistat;
 static unsigned long __iomem *hpwdt_timer_reg;
@@ -146,7 +150,10 @@ static int hpwdt_set_pretimeout(struct watchdog_device 
*wdd, unsigned int req)
 
 static int hpwdt_my_nmi(void)
 {
-   return ioread8(hpwdt_nmistat) & 0x6;
+   int nmistat = ioread8(hpwdt_nmistat);
+
+   iowrite8(nmistat & ~0x6, hpwdt_nmistat);
+   return nmistat & 0x6;
 }
 
 /*
@@ -168,7 +175,10 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct 
pt_regs *regs)
 "4. iLO Event Log\n",
 mynmi, ulReason, smp_processor_id());
 
-   nmi_panic(regs, "hpwdt: NMI: Not continuing");
+   if (panic_on_nmi)
+   nmi_panic(regs, "hpwdt: NMI: Not continuing");
+
+   pr_emerg("Dazed and confused, but trying to continue\n");
 
return NMI_HANDLED;
 }
@@ -376,6 +386,9 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once 
started (default="
 #ifdef CONFIG_HPWDT_NMI_DECODING
 module_param(pretimeout, bool, 0);
 MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
-#endif
+
+module_param(panic_on_nmi, bool, 0);
+MODULE_PARM_DESC(panic_on_nmi, "Cause panic on NMI induced by iLO 
(default=1)");
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 module_pci_driver(hpwdt_driver);
-- 
2.20.1

Re: [PATCH v2 2/2] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

2019-01-10 Thread Ivan Mironov

On Wed, 2019-01-09 at 15:52 +, Sasha Levin wrote:
> Hi,
> 
> [This is an automated email]
> 
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: 79e539453b34 DRM: i915: add mode setting support.
> 
> The bot has tested the following trees: v4.20.0, v4.19.13, v4.14.91, 
> v4.9.148, v4.4.169, v3.18.131.
> 
> v4.20.0: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 9c71a6686bfa ("drm: fourcc: Convert drm_format_info kerneldoc to in-line 
> member documentation")
> 
> v4.19.13: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 9c71a6686bfa ("drm: fourcc: Convert drm_format_info kerneldoc to in-line 
> member documentation")
> c76abab59b3c ("drm: Use horizontal and vertical chroma subsampling factor 
> while calculating offsets in the physical address of framebuffer")
> 
> v4.14.91: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 4cc4e1b40f3f ("drm/fourcc: Add a alpha field to drm_format_info")
> 9c71a6686bfa ("drm: fourcc: Convert drm_format_info kerneldoc to in-line 
> member documentation")
> c76abab59b3c ("drm: Use horizontal and vertical chroma subsampling factor 
> while calculating offsets in the physical address of framebuffer")
> ce2d54619a10 ("drm/fourcc: Add is_yuv field to drm_format_info to denote 
> if the format is yuv")
> 
> v4.9.148: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 05fc03217e08 ("drm/mm: Some doc polish")
> 06df8ac682e6 ("drm: kselftest for drm_mm_debug()")
> 14d7f96f90fb ("drm/fb_cma_helper: Add drm_fb_cma_prepare_fb() helper")
> 2bd966d106e3 ("drm: kselftest for drm_mm_replace_node()")
> 2fba0de0a9ec ("drm: kselftest for drm_mm_insert_node_in_range()")
> 393b50f30566 ("drm: kselftest for drm_mm_init()")
> 4636ce93d5b2 ("drm/fb-cma-helper: Add drm_fb_cma_get_gem_addr()")
> 50f0033d1a0f ("drm: Add some kselftests for the DRM range manager (struct 
> drm_mm)")
> 5628648df755 ("drm/fb-cma-helper: Use drm_gem_framebuffer_helper")
> 5705670d0463 ("drm: Track drm_mm allocators and show leaks on shutdown")
> 6259a56ba0e1 ("drm: Add asserts to catch overflow in drm_mm_init() and 
> drm_mm_init_scan()")
> 62a0d98a188c ("drm: allow to use mmuless SoC")
> 72a93e8dd52c ("drm: Take ownership of the dmabuf->obj when exporting")
> 7886692a5804 ("drm: kselftest for drm_mm_insert_node()")
> 900537dc3889 ("drm: kselftest for drm_mm_reserve_node()")
> 940eba2d58a7 ("drm/gem|prime|mm: Use recommened kerneldoc for struct 
> member refs")
> 9a71e277888b ("drm: Extract struct drm_mm_scan from struct drm_mm")
> 9b26f2ed29f8 ("drm: kselftest for drm_mm and alignment")
> b112481bb327 ("drm/cma-helper: simplify setup for drivers with ->dirty 
> callbacks")
> b3ee963fe41d ("drm: Compile time enabling for asserts in drm_mm")
> ba004e39b199 ("drm: Fix kerneldoc for drm_mm_scan_remove_block()")
> c76abab59b3c ("drm: Use horizontal and vertical chroma subsampling factor 
> while calculating offsets in the physical address of framebuffer")
> e6b62714e87c ("drm: Introduce drm_gem_object_{get,put}()")
> 
> v4.4.169: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 14d7f96f90fb ("drm/fb_cma_helper: Add drm_fb_cma_prepare_fb() helper")
> 199c77179c87 ("drm/fb-cma-helper: Add fb_deferred_io support")
> 1eb83451ba55 ("drm: Pass the user drm_mode_fb_cmd2 as const to 
> .fb_create()")
> 4636ce93d5b2 ("drm/fb-cma-helper: Add drm_fb_cma_get_gem_addr()")
> 5628648df755 ("drm/fb-cma-helper: Use drm_gem_framebuffer_helper")
> 70c0616d5a84 ("drm/fb_cma_helper: remove duplicate const from 
> drm_fb_cma_alloc")
> b112481bb327 ("drm/cma-helper: simplify setup for drivers with ->dirty 
> callbacks")
> c76abab59b3c ("drm: Use horizontal and vertical chroma subsampling factor 
> while calculating offsets in the physical address of framebuffer")
> ce0c57576810 ("drm/fb_cma_helper: Implement fb_mmap callback")
> fdce184609ee ("drm/fb-cma-helper: Use const for drm_framebuffer_funcs 
> argument")
> 
> v3.18.131: Failed to apply! Possible dependencies:
> 042bf753842d ("drm/fourcc: Add char_per_block, block_w and block_h in 
> drm_format_info")
> 14d7f96f90fb ("drm/fb_cma_helper: Add drm_fb_cma_prepare_fb() helper")
> 199c77179c87 ("drm/fb-cma-helper: Add fb_deferred_io support")
> 1a396789f65a ("drm: add Atmel HLCDC Display Controller support")
> 1eb83451ba55 ("drm: Pass the user drm_mode_fb_cmd2 as const to 
> .fb_create()")
> 2a8cb4894540 ("drm/exynos: merge

Re: [PATCH v1 1/2] drm/fb-helper: Bring back workaround for bugs of SDL 1.2

2019-01-07 Thread Ivan Mironov

On Mon, 2019-01-07 at 11:08 +0100, Daniel Vetter wrote:
> > > > @@ -1654,6 +1712,40 @@ int drm_fb_helper_check_var(struct 
> > > > fb_var_screeninfo *var,
> > > > return -EINVAL;
> > > > }
> > > >  
> > > > +   /*
> > > > +* Workaround for SDL 1.2, which is known to be setting all 
> > > > pixel format
> > > > +* fields values to zero in some cases. We treat this situation 
> > > > as a
> > > > +* kind of "use some reasonable autodetected values".
> > > > +*/
> > > > +   if (!var->red.offset && !var->green.offset&&
> > > > +   !var->blue.offset&& !var->transp.offset   &&
> > > > +   !var->red.length && !var->green.length&&
> > > > +   !var->blue.length&& !var->transp.length   &&
> > > > +   !var->red.msb_right  && !var->green.msb_right &&
> > > > +   !var->blue.msb_right && !var->transp.msb_right) {
> > > > +   u8 depth;
> > > > +
> > > > +   /*
> > > > +* There is no way to guess the right value for depth 
> > > > when
> > > > +* bpp is 16 or 32. So we just restore the behaviour 
> > > > previously
> > > > +* introduced here by commit . In fact, this was
> > > > +* implemented even earlier in various device drivers.
> > > > +*/
> > > > +   switch (var->bits_per_pixel) {
> > > > +   case 16:
> > > > +   depth = 15;
> > > > +   break;
> > > > +   case 32:
> > > > +   depth = 24;
> > > > +   break;
> > > > +   default:
> > > > +   depth = var->bits_per_pixel;
> > > > +   break;
> > > > +   }
> > > > +
> > > > +   drm_fb_helper_fill_pixel_fmt(var, depth);
> > > 
> > > Please use fb->format->depth here instead of guessing.
> > > -Daniel
> > 
> > I do not think that this is the right way.
> > 
> > Without guessing, if SDL1 makes a request with bits_per_pixel == 16
> > (for example) and current fb->format->depth == 24, ioctl() will succeed
> > while actual pixel format will remain the same. As a result, SDL1 will
> > display garbage because of invalid pixel format.
> > 
> > Or am I missing something here?
> 
> This is supposed to be the case where SDL didn't set any of this stuff.
> Guess is definitely not what we want to do, we should use the actual
> depth, which is stored in fb->format->depth. Older code did guess, but we
> fixed that, and shouldn't reintroduce that guess game.
> -Daniel
> 

Done. See the v2 patch series.

[PATCH v2 1/2] drm/fb-helper: Partially bring back workaround for bugs of SDL 1.2

2019-01-07 Thread Ivan Mironov

SDL 1.2 sets all fields related to the pixel format to zero in some
cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all
pixel format changing requests"), there was an unintentional workaround
for this that existed for more than a decade. First in device-specific DRM
drivers, then here in drm_fb_helper.c.

Previous code containing this workaround just ignores pixel format fields
from userspace code. Not a good thing either, as this way, driver may
silently use pixel format different from what client actually requested,
and this in turn will lead to displaying garbage on the screen. I think
that returning EINVAL to userspace in this particular case is the right
option, so I decided to left code from problematic commit untouched
instead of just reverting it entirely.

Here is the steps required to reproduce this problem exactly:
1) Compile fceux[2] with SDL 1.2.15 and without GTK or OpenGL
   support. SDL should be compiled with fbdev support (which is
   on by default).
2) Create /etc/fb.modes with following contents (values seems
   not used, and just required to trigger problematic code in
   SDL):

mode "test"
geometry 1 1 1 1 1
timings 1 1 1 1 1 1 1
endmode

3) Create ~/.fceux/fceux.cfg with following contents:

SDL.Hotkeys.Quit = 27
SDL.DoubleBuffering = 1

4) Ensure that screen resolution is at least 1280x960 (e.g.
   append "video=Virtual-1:1280x960-32" to the kernel cmdline
   for qemu/QXL).

5) Try to run fceux on VT with some ROM file[3]:

# ./fceux color_test.nes

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c,
FB_SetVideoMode()
[2] http://www.fceux.com
[3] Example ROM: 
https://github.com/bokuweb/rustynes/blob/master/roms/color_test.nes

Reported-by: saahriktu 
Suggested-by: saahriktu 
Cc: sta...@vger.kernel.org
Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing 
requests")
Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/drm_fb_helper.c | 142 
 1 file changed, 89 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d3af098b0922..ed7e91423258 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct 
fb_var_screeninfo *var_1,
   var_1->transp.msb_right == var_2->transp.msb_right;
 }
 
+static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
+u8 depth)
+{
+   switch (depth) {
+   case 8:
+   var->red.offset = 0;
+   var->green.offset = 0;
+   var->blue.offset = 0;
+   var->red.length = 8; /* 8bit DAC */
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 15:
+   var->red.offset = 10;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 5;
+   var->blue.length = 5;
+   var->transp.offset = 15;
+   var->transp.length = 1;
+   break;
+   case 16:
+   var->red.offset = 11;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 6;
+   var->blue.length = 5;
+   var->transp.offset = 0;
+   break;
+   case 24:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 32:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 24;
+   var->transp.length = 8;
+   break;
+   default:
+   break;
+   }
+}
+
 /**
  * drm_fb_helper_check_var - implementation for _ops.fb_check_var
  * @var: screeninfo to check
@@ -1654,6 +1712,36 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
return -EINVAL;
}
 
+   /*
+* Workaround for SDL 1.2, which is known to b

[PATCH v2 2/2] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

2019-01-07 Thread Ivan Mironov

Strict requirement of pixclock to be zero breaks support of SDL 1.2
which contains hardcoded table of supported video modes with non-zero
pixclock values[1].

To better understand which pixclock values are considered valid and how
driver should handle these values, I briefly examined few existing fbdev
drivers and documentation in Documentation/fb/. And it looks like there
are no strict rules on that and actual behaviour varies:

* some drivers treat (pixclock == 0) as "use defaults" (uvesafb.c);
* some treat (pixclock == 0) as invalid value which leads to
  -EINVAL (clps711x-fb.c);
* some pass converted pixclock value to hardware (uvesafb.c);
* some are trying to find nearest value from predefined table
  (vga16fb.c, video_gx.c).

Given this, I believe that it should be safe to just ignore this value if
changing is not supported. It seems that any portable fbdev application
which was not written only for one specific device working under one
specific kernel version should not rely on any particular behaviour of
pixclock anyway.

However, while enabling SDL1 applications to work out of the box when
there is no /etc/fb.modes with valid settings, this change affects the
video mode choosing logic in SDL. Depending on current screen
resolution, contents of /etc/fb.modes and resolution requested by
application, this may lead to user-visible difference (not always):
image will be displayed in a right way, but it will be aligned to the
left instead of center. There is no "right behaviour" here as well, as
emulated fbdev, opposing to old fbdev drivers, simply ignores any
requsts of video mode changes with resolutions smaller than current.

The easiest way to reproduce this problem is to install sdl-sopwith[2],
remove /etc/fb.modes file if it exists, and then try to run sopwith
from console without X. At least in Fedora 29, sopwith may be simply
installed from standard repositories.

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, vesa_timings
[2] http://sdl-sopwith.sourceforge.net/

Signed-off-by: Ivan Mironov 
Cc: sta...@vger.kernel.org
Fixes: 79e539453b34e ("DRM: i915: add mode setting support")
Fixes: 771fe6b912fca ("drm/radeon: introduce kernel modesetting for radeon 
hardware")
Fixes: 785b93ef8c309 ("drm/kms: move driver specific fb common code to helper 
functions (v2)")
---
 drivers/gpu/drm/drm_fb_helper.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index ed7e91423258..2d4c2b38508e 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1690,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
struct drm_fb_helper *fb_helper = info->par;
struct drm_framebuffer *fb = fb_helper->fb;
 
-   if (var->pixclock != 0 || in_dbg_master())
+   if (in_dbg_master())
return -EINVAL;
 
+   if (var->pixclock != 0) {
+   DRM_DEBUG("fbdev emulation doesn't support changing the pixel 
clock, value of pixclock is ignored\n");
+   var->pixclock = 0;
+   }
+
if ((drm_format_info_block_width(fb->format, 0) > 1) ||
(drm_format_info_block_height(fb->format, 0) > 1))
return -EINVAL;
-- 
2.20.1

[PATCH v2 0/2] Fix SDL 1.2 on emulated fbdev devices (broken in kernels >=4.19)

2019-01-07 Thread Ivan Mironov

Hi,

Originally this issue was brought up on linux.org.ru forum by user
saahriktu, he is on Cc. He discovered that commit db05c48197759
("drm: fb-helper: Reject all pixel format changing requests") breaks
support of SDL1 programs, like various old games and emulators of old
game consoles. First patch contains fix for that commit.

I tried to reproduce the same issue in a VM under qemu, and found yet
another part of kernel code which prevents SDL1 apps from running
normally. Second patch in this series fixes this problem.

Also, it seems that at least in some cases both problems could be
circumvented by adding appropriate modes into /etc/fb.modes. But without
examining the kernel code it is not clear which values are correct. I am
not sure that such circumvention covers all possible cases, and it is
definitely far from any user-friendliness.

First patch in this series fixes a clear regression. Second patch is
optional, please read commit message carefully before applying it.

Changes in v2:
- Added "Cc: stable" to the second patch.
- Proposed by Daniel Vetter: always use current depth (fb->format->depth)
  in a case of zero pixel format values and do not perform any guessing.

Changes in v1:
- Added "Cc: stable" to the patch which fixes known regression.
- Added more information and detailed reproduction steps in commit
  messages.

Changes in v0:
- RFC patch series introduced.

Ivan Mironov (2):
  drm/fb-helper: Partially bring back workaround for bugs of SDL 1.2
  drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

 drivers/gpu/drm/drm_fb_helper.c | 149 
 1 file changed, 95 insertions(+), 54 deletions(-)

-- 
2.20.1

Re: [PATCH v1 2/2] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

2019-01-05 Thread Ivan Mironov

On Fri, 2018-12-28 at 13:06 +0100, Daniel Vetter wrote:
> On Fri, Dec 28, 2018 at 04:13:08AM +0500, Ivan Mironov wrote:
> > Strict requirement of pixclock to be zero breaks support of SDL 1.2
> > which contains hardcoded table of supported video modes with non-zero
> > pixclock values[1].
> > 
> > To better understand which pixclock values are considered valid and how
> > driver should handle these values, I briefly examined few existing fbdev
> > drivers and documentation in Documentation/fb/. And it looks like there
> > are no strict rules on that and actual behaviour varies:
> > 
> > * some drivers treat (pixclock == 0) as "use defaults" (uvesafb.c);
> > * some treat (pixclock == 0) as invalid value which leads to
> >   -EINVAL (clps711x-fb.c);
> > * some pass converted pixclock value to hardware (uvesafb.c);
> > * some are trying to find nearest value from predefined table
> >   (vga16fb.c, video_gx.c).
> > 
> > Given this, I believe that it should be safe to just ignore this value if
> > changing is not supported. It seems that any portable fbdev application
> > which was not written only for one specific device working under one
> > specific kernel version should not rely on any particular behaviour of
> > pixclock anyway.
> > 
> > However, while enabling SDL1 applications to work out of the box when
> > there is no /etc/fb.modes with valid settings, this change affects the
> > video mode choosing logic in SDL. Depending on current screen
> > resolution, contents of /etc/fb.modes and resolution requested by
> > application, this may lead to user-visible difference (not always):
> > image will be displayed in a right way, but it will be aligned to the
> > left instead of center. There is no "right behaviour" here as well, as
> > emulated fbdev, opposing to old fbdev drivers, simply ignores any
> > requsts of video mode changes with resolutions smaller than current.
> > 
> > Feel free to NAK this patch if you think that it causes breakage of
> > user-space =).
> 
> It's a regression, we don't nack regression fixes :-)
> 
> > The easiest way to reproduce this problem is to install sdl-sopwith[2],
> > remove /etc/fb.modes file if it exists, and then try to run sopwith
> > from console without X. At least in Fedora 29, sopwith may be simply
> > installed from standard repositories.
> > 
> > [1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, vesa_timings
> > [2] http://sdl-sopwith.sourceforge.net/
> > 
> > Signed-off-by: Ivan Mironov 
> 
> I thought this is also a regression fix, so also needs Fixes: and cc:
> stable?
> -Daniel

This particular patch fixes a bug that existed for 10 years unnoticed.
Are "Fixes:" and "Cc: stable" really required?

"pixclock != 0" check was introduced in this file by 785b93ef8c309
("drm/kms: move driver specific fb common code to helper functions
(v2)"). But actually similar code existed in the device-specific
drivers even earlier.

> 
> > ---
> >  drivers/gpu/drm/drm_fb_helper.c | 7 ++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_fb_helper.c 
> > b/drivers/gpu/drm/drm_fb_helper.c
> > index aff576c3c4fb..b95a0c23c7c8 100644
> > --- a/drivers/gpu/drm/drm_fb_helper.c
> > +++ b/drivers/gpu/drm/drm_fb_helper.c
> > @@ -1690,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
> > *var,
> > struct drm_fb_helper *fb_helper = info->par;
> > struct drm_framebuffer *fb = fb_helper->fb;
> >  
> > -   if (var->pixclock != 0 || in_dbg_master())
> > +   if (in_dbg_master())
> > return -EINVAL;
> >  
> > +   if (var->pixclock != 0) {
> > +   DRM_DEBUG("fbdev emulation doesn't support changing the pixel 
> > clock, value of pixclock is ignored\n");
> > +   var->pixclock = 0;
> > +   }
> > +
> > if ((drm_format_info_block_width(fb->format, 0) > 1) ||
> > (drm_format_info_block_height(fb->format, 0) > 1))
> > return -EINVAL;
> > -- 
> > 2.20.1
> >

Re: [PATCH v1 1/2] drm/fb-helper: Bring back workaround for bugs of SDL 1.2

2019-01-05 Thread Ivan Mironov

On Fri, 2018-12-28 at 13:15 +0100, Daniel Vetter wrote:
> On Fri, Dec 28, 2018 at 04:13:07AM +0500, Ivan Mironov wrote:
> > SDL 1.2 sets all fields related to the pixel format to zero in some
> > cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all
> > pixel format changing requests"), there was an unintentional workaround
> > for this that existed for more than a decade. First in device-specific DRM
> > drivers, then here in drm_fb_helper.c.
> > 
> > Previous code containing this workaround just ignores pixel format fields
> > from userspace code. Not a good thing either, as this way, driver may
> > silently use pixel format different from what client actually requested,
> > and this in turn will lead to displaying garbage on the screen. I think
> > that returning EINVAL to userspace in this particular case is the right
> > option, so I decided to left code from problematic commit untouched
> > instead of just reverting it entirely.
> > 
> > Here is the steps required to reproduce this problem exactly:
> > 1) Compile fceux[2] with SDL 1.2.15 and without GTK or OpenGL
> >support. SDL should be compiled with fbdev support (which is
> >on by default).
> > 2) Create /etc/fb.modes with following contents (values seems
> >not used, and just required to trigger problematic code in
> >SDL):
> > 
> > mode "test"
> > geometry 1 1 1 1 1
> > timings 1 1 1 1 1 1 1
> > endmode
> > 
> > 3) Create ~/.fceux/fceux.cfg with following contents:
> > 
> > SDL.Hotkeys.Quit = 27
> > SDL.DoubleBuffering = 1
> > 
> > 4) Ensure that screen resolution is at least 1280x960 (e.g.
> >append "video=Virtual-1:1280x960-32" to the kernel cmdline
> >for qemu/QXL).
> > 
> > 5) Try to run fceux on VT with some ROM file[3]:
> > 
> > # ./fceux color_test.nes
> > 
> > [1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c,
> > FB_SetVideoMode()
> > [2] http://www.fceux.com
> > [3] Example ROM: 
> > https://github.com/bokuweb/rustynes/blob/master/roms/color_test.nes
> > 
> > Reported-by: saahriktu 
> > Suggested-by: saahriktu 
> > Cc: sta...@vger.kernel.org
> > Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing 
> > requests")
> > Signed-off-by: Ivan Mironov 
> > ---
> >  drivers/gpu/drm/drm_fb_helper.c | 146 
> >  1 file changed, 93 insertions(+), 53 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_fb_helper.c 
> > b/drivers/gpu/drm/drm_fb_helper.c
> > index d3af098b0922..aff576c3c4fb 100644
> > --- a/drivers/gpu/drm/drm_fb_helper.c
> > +++ b/drivers/gpu/drm/drm_fb_helper.c
> > @@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct 
> > fb_var_screeninfo *var_1,
> >var_1->transp.msb_right == var_2->transp.msb_right;
> >  }
> >  
> > +static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
> > +u8 depth)
> > +{
> > +   switch (depth) {
> > +   case 8:
> > +   var->red.offset = 0;
> > +   var->green.offset = 0;
> > +   var->blue.offset = 0;
> > +   var->red.length = 8; /* 8bit DAC */
> > +   var->green.length = 8;
> > +   var->blue.length = 8;
> > +   var->transp.offset = 0;
> > +   var->transp.length = 0;
> > +   break;
> > +   case 15:
> > +   var->red.offset = 10;
> > +   var->green.offset = 5;
> > +   var->blue.offset = 0;
> > +   var->red.length = 5;
> > +   var->green.length = 5;
> > +   var->blue.length = 5;
> > +   var->transp.offset = 15;
> > +   var->transp.length = 1;
> > +   break;
> > +   case 16:
> > +   var->red.offset = 11;
> > +   var->green.offset = 5;
> > +   var->blue.offset = 0;
> > +   var->red.length = 5;
> > +   var->green.length = 6;
> > +   var->blue.length = 5;
> > +   var->transp.offset = 0;
> > +   break;
> > +   case 24:
> > +   var->red.offset = 16;
> > +   var->green.offset = 8;
> > +   var->blue.offset = 0;
> > +   var->

Re: [RFC PATCH 1/2] drm/fb-helper: Bring back workaround for bugs of SDL 1.2

2018-12-27 Thread Ivan Mironov

On Thu, 2018-12-27 at 13:00 +0100, Daniel Vetter wrote: 
> > +   /*
> > +* Workaround for SDL 1.2, which is known to be setting all pixel format
> > +* fields values to zero in some cases. We treat this situation as a
> > +* kind of "use some reasonable autodetected values".
> > +*/
> > +   if (!var->red.offset && !var->green.offset&&
> > +   !var->blue.offset&& !var->transp.offset   &&
> > +   !var->red.length && !var->green.length&&
> > +   !var->blue.length&& !var->transp.length   &&
> > +   !var->red.msb_right  && !var->green.msb_right &&
> > +   !var->blue.msb_right && !var->transp.msb_right) {
> > +   u8 depth;
> > +
> > +   /*
> > +* There is no way to guess the right value for depth when
> > +* bpp is 16 or 32. So we just restore the behaviour previously
> > +* introduced here by commit 785b93ef8c309. In fact, this was
> > +* implemented even earlier in various device drivers.
> > +*/
> > +   switch (var->bits_per_pixel) {
> > +   case 16:
> > +   depth = 15;
> > +   break;
> > +   case 32:
> > +   depth = 24;
> > +   break;
> > +   default:
> > +   depth = var->bits_per_pixel;
> > +   break;
> > +   }
> 
> The guesswork here looks fishy. We should still have the drm-side format,
> and should use that.

This existed for a very long time until problematic commit was applied.
And there is a clear evidence that it was actually used by
applications.

> Otherwise the patches look good I think, but they
> need a Fixes: tag and cc: stable so backporters know what to do with
> these.
> 

I added "cc: stable" into the regression fix. Also added more info into
the commit messages. See the PATCH v1 in the mailing list.

Thanks.

[PATCH v1 1/2] drm/fb-helper: Bring back workaround for bugs of SDL 1.2

2018-12-27 Thread Ivan Mironov

SDL 1.2 sets all fields related to the pixel format to zero in some
cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all
pixel format changing requests"), there was an unintentional workaround
for this that existed for more than a decade. First in device-specific DRM
drivers, then here in drm_fb_helper.c.

Previous code containing this workaround just ignores pixel format fields
from userspace code. Not a good thing either, as this way, driver may
silently use pixel format different from what client actually requested,
and this in turn will lead to displaying garbage on the screen. I think
that returning EINVAL to userspace in this particular case is the right
option, so I decided to left code from problematic commit untouched
instead of just reverting it entirely.

Here is the steps required to reproduce this problem exactly:
1) Compile fceux[2] with SDL 1.2.15 and without GTK or OpenGL
   support. SDL should be compiled with fbdev support (which is
   on by default).
2) Create /etc/fb.modes with following contents (values seems
   not used, and just required to trigger problematic code in
   SDL):

mode "test"
geometry 1 1 1 1 1
timings 1 1 1 1 1 1 1
endmode

3) Create ~/.fceux/fceux.cfg with following contents:

SDL.Hotkeys.Quit = 27
SDL.DoubleBuffering = 1

4) Ensure that screen resolution is at least 1280x960 (e.g.
   append "video=Virtual-1:1280x960-32" to the kernel cmdline
   for qemu/QXL).

5) Try to run fceux on VT with some ROM file[3]:

# ./fceux color_test.nes

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c,
FB_SetVideoMode()
[2] http://www.fceux.com
[3] Example ROM: 
https://github.com/bokuweb/rustynes/blob/master/roms/color_test.nes

Reported-by: saahriktu 
Suggested-by: saahriktu 
Cc: sta...@vger.kernel.org
Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing 
requests")
Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/drm_fb_helper.c | 146 
 1 file changed, 93 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d3af098b0922..aff576c3c4fb 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct 
fb_var_screeninfo *var_1,
   var_1->transp.msb_right == var_2->transp.msb_right;
 }
 
+static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
+u8 depth)
+{
+   switch (depth) {
+   case 8:
+   var->red.offset = 0;
+   var->green.offset = 0;
+   var->blue.offset = 0;
+   var->red.length = 8; /* 8bit DAC */
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 15:
+   var->red.offset = 10;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 5;
+   var->blue.length = 5;
+   var->transp.offset = 15;
+   var->transp.length = 1;
+   break;
+   case 16:
+   var->red.offset = 11;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 6;
+   var->blue.length = 5;
+   var->transp.offset = 0;
+   break;
+   case 24:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 32:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 24;
+   var->transp.length = 8;
+   break;
+   default:
+   break;
+   }
+}
+
 /**
  * drm_fb_helper_check_var - implementation for _ops.fb_check_var
  * @var: screeninfo to check
@@ -1654,6 +1712,40 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
return -EINVAL;
}
 
+   /*
+* Workaround for SDL 1.2, which is known to b

[PATCH v1 2/2] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

2018-12-27 Thread Ivan Mironov

Strict requirement of pixclock to be zero breaks support of SDL 1.2
which contains hardcoded table of supported video modes with non-zero
pixclock values[1].

To better understand which pixclock values are considered valid and how
driver should handle these values, I briefly examined few existing fbdev
drivers and documentation in Documentation/fb/. And it looks like there
are no strict rules on that and actual behaviour varies:

* some drivers treat (pixclock == 0) as "use defaults" (uvesafb.c);
* some treat (pixclock == 0) as invalid value which leads to
  -EINVAL (clps711x-fb.c);
* some pass converted pixclock value to hardware (uvesafb.c);
* some are trying to find nearest value from predefined table
  (vga16fb.c, video_gx.c).

Given this, I believe that it should be safe to just ignore this value if
changing is not supported. It seems that any portable fbdev application
which was not written only for one specific device working under one
specific kernel version should not rely on any particular behaviour of
pixclock anyway.

However, while enabling SDL1 applications to work out of the box when
there is no /etc/fb.modes with valid settings, this change affects the
video mode choosing logic in SDL. Depending on current screen
resolution, contents of /etc/fb.modes and resolution requested by
application, this may lead to user-visible difference (not always):
image will be displayed in a right way, but it will be aligned to the
left instead of center. There is no "right behaviour" here as well, as
emulated fbdev, opposing to old fbdev drivers, simply ignores any
requsts of video mode changes with resolutions smaller than current.

Feel free to NAK this patch if you think that it causes breakage of
user-space =).

The easiest way to reproduce this problem is to install sdl-sopwith[2],
remove /etc/fb.modes file if it exists, and then try to run sopwith
from console without X. At least in Fedora 29, sopwith may be simply
installed from standard repositories.

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, vesa_timings
[2] http://sdl-sopwith.sourceforge.net/

Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/drm_fb_helper.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index aff576c3c4fb..b95a0c23c7c8 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1690,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
struct drm_fb_helper *fb_helper = info->par;
struct drm_framebuffer *fb = fb_helper->fb;
 
-   if (var->pixclock != 0 || in_dbg_master())
+   if (in_dbg_master())
return -EINVAL;
 
+   if (var->pixclock != 0) {
+   DRM_DEBUG("fbdev emulation doesn't support changing the pixel 
clock, value of pixclock is ignored\n");
+   var->pixclock = 0;
+   }
+
if ((drm_format_info_block_width(fb->format, 0) > 1) ||
(drm_format_info_block_height(fb->format, 0) > 1))
return -EINVAL;
-- 
2.20.1

[PATCH v1 0/2] Fix SDL 1.2 on emulated fbdev devices (broken in kernels >=4.19)

2018-12-27 Thread Ivan Mironov

Hi,

Originally this issue was brought up on linux.org.ru forum by user
saahriktu, he is on Cc. He discovered that commit db05c48197759
("drm: fb-helper: Reject all pixel format changing requests") breaks
support of SDL1 programs, like various old games and emulators of old
game consoles. First patch contains fix for that commit.

I tried to reproduce the same issue in a VM under qemu, and found yet
another part of kernel code which prevents SDL1 apps from running
normally. Second patch in this series fixes this problem.

Also, it seems that at least in some cases both problems could be
circumvented by adding appropriate modes into /etc/fb.modes. But without
examining the kernel code it is not clear which values are correct. I am
not sure that such circumvention covers all possible cases, and it is
definitely far from any user-frienliness.

First patch in this series fixes a clear regression. Second patch is
optional, please read commit message carefully before applying it.

Changes in v1:
- Added "Cc: stable" to the patch which fixes known regression.
- Added more information and detailed reproduction steps in commit
  messages.

Changes in v0:
- RFC patch series introduced.

Ivan Mironov (2):
  drm/fb-helper: Bring back workaround for bugs of SDL 1.2
  drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

 drivers/gpu/drm/drm_fb_helper.c | 153 +---
 1 file changed, 99 insertions(+), 54 deletions(-)

-- 
2.20.1

[RFC PATCH 1/2] drm/fb-helper: Bring back workaround for bugs of SDL 1.2

2018-12-26 Thread Ivan Mironov

SDL 1.2 sets all fields related to the pixel format to zero in some
cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all
pixel format changing requests"), there was an unintentional workaround
for this that existed for more than a decade. First in device-specific DRM
drivers, then here in drm_fb_helper.c.

Previous code containing this workaround just ignores pixel format fields
from userspace code. Not a good thing either, as this way, driver may
silently use pixel format different from what client actually requested,
and this in turn will lead to displaying garbage on the screen. I think
that returning EINVAL to userspace in this particular case is the right
option, so I decided to left code from problematic commit untouched
instead of just reverting it entirely.

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c,
FB_SetVideoMode()

Reported-by: saahriktu 
Suggested-by: saahriktu 
Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing 
requests")
Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/drm_fb_helper.c | 146 
 1 file changed, 93 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d3af098b0922..aff576c3c4fb 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct 
fb_var_screeninfo *var_1,
   var_1->transp.msb_right == var_2->transp.msb_right;
 }
 
+static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
+u8 depth)
+{
+   switch (depth) {
+   case 8:
+   var->red.offset = 0;
+   var->green.offset = 0;
+   var->blue.offset = 0;
+   var->red.length = 8; /* 8bit DAC */
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 15:
+   var->red.offset = 10;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 5;
+   var->blue.length = 5;
+   var->transp.offset = 15;
+   var->transp.length = 1;
+   break;
+   case 16:
+   var->red.offset = 11;
+   var->green.offset = 5;
+   var->blue.offset = 0;
+   var->red.length = 5;
+   var->green.length = 6;
+   var->blue.length = 5;
+   var->transp.offset = 0;
+   break;
+   case 24:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 0;
+   var->transp.length = 0;
+   break;
+   case 32:
+   var->red.offset = 16;
+   var->green.offset = 8;
+   var->blue.offset = 0;
+   var->red.length = 8;
+   var->green.length = 8;
+   var->blue.length = 8;
+   var->transp.offset = 24;
+   var->transp.length = 8;
+   break;
+   default:
+   break;
+   }
+}
+
 /**
  * drm_fb_helper_check_var - implementation for _ops.fb_check_var
  * @var: screeninfo to check
@@ -1654,6 +1712,40 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
return -EINVAL;
}
 
+   /*
+* Workaround for SDL 1.2, which is known to be setting all pixel format
+* fields values to zero in some cases. We treat this situation as a
+* kind of "use some reasonable autodetected values".
+*/
+   if (!var->red.offset && !var->green.offset&&
+   !var->blue.offset&& !var->transp.offset   &&
+   !var->red.length && !var->green.length&&
+   !var->blue.length&& !var->transp.length   &&
+   !var->red.msb_right  && !var->green.msb_right &&
+   !var->blue.msb_right && !var->transp.msb_right) {
+   u8 depth;
+
+   /*
+* There is no way to guess the right value for depth when
+* bpp is 16 or 32. So we just restore the behaviour previously
+* introduced here by commit 785b93ef8c309. In fact, this was
+* implemented even earlier in various device drivers.
+*/
+   switch (var-&g

[RFC PATCH 2/2] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

2018-12-26 Thread Ivan Mironov

Strict requirement of pixclock to be zero breaks support of SDL 1.2
which contains hardcoded table of supported video modes with non-zero
pixclock values[1].

To better understand which pixclock values are considered valid and how
driver should handle these values, I briefly examined few existing fbdev
drivers and documentation in Documentation/fb/. And it looks like there
are no strict rules on that and actual behaviour varies:

* some drivers treat (pixclock == 0) as "use defaults" (uvesafb.c);
* some treat (pixclock == 0) as invalid value which leads to
  -EINVAL (clps711x-fb.c);
* some pass converted pixclock value to hardware (uvesafb.c);
* some are trying to find nearest value from predefined table
  (vga16fb.c, video_gx.c).

Given this, I believe that it should be safe to just ignore this value if
changing is not supported. It seems that any portable fbdev application
which was not written only for one specific device working under one
specific kernel version should not rely on any particular behaviour of
pixclock anyway.

[1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, vesa_timings

Signed-off-by: Ivan Mironov 
---
 drivers/gpu/drm/drm_fb_helper.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index aff576c3c4fb..b95a0c23c7c8 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1690,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo 
*var,
struct drm_fb_helper *fb_helper = info->par;
struct drm_framebuffer *fb = fb_helper->fb;
 
-   if (var->pixclock != 0 || in_dbg_master())
+   if (in_dbg_master())
return -EINVAL;
 
+   if (var->pixclock != 0) {
+   DRM_DEBUG("fbdev emulation doesn't support changing the pixel 
clock, value of pixclock is ignored\n");
+   var->pixclock = 0;
+   }
+
if ((drm_format_info_block_width(fb->format, 0) > 1) ||
(drm_format_info_block_height(fb->format, 0) > 1))
return -EINVAL;
-- 
2.20.1

[RFC PATCH 0/2] Fix SDL 1.2 on emulated fbdev devices (broken in kernels >=4.19)

2018-12-26 Thread Ivan Mironov

Hi,

Originally this issue was brought up on linux.org.ru forum by user
saahriktu, he is on Cc. He discovered that commit db05c48197759
("drm: fb-helper: Reject all pixel format changing requests") breaks
support of SDL1 programs, like various old games and emulators of old
game consoles. First patch contains fix for that commit.

I tried to reproduce the same issue in a VM under qemu, and found yet
another part of kernel code which prevents SDL1 apps from running
normally. Second patch in this series fixes this problem.

However, I still have few unresolved questions regarding this patches:
* Why simple revert of commit db05c48197759 fixes SDL1 support for
  saahriktu? I do not understand how this could work when SDL1 has
  hardcoded non-zero pixclock values, for which current code always
  returns EINVAL.
* How this worked before? How this is working for non-DRM fbdev drivers?
  fbdev support code in SDL1 is also very old by the way.

Also, it seems that at least in some cases both problems could be
circumvented by adding appropriate modes into /etc/fb.modes. But without
examining the kernel code it is not clear which values are correct. I am
not sure that such circumvention covers all possible cases, and it is
definitely far from any user-frienliness.

Ah, almost forgot this: the easiest way to reproduce both problems is to
install sdl-sopwith[1] and try to run it from console without X. At
least in Fedora 29, sopwith may be simply installed from standard
repositories.

[1] http://sdl-sopwith.sourceforge.net/

Ivan Mironov (2):
  drm/fb-helper: Bring back workaround for bugs of SDL 1.2
  drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock

 drivers/gpu/drm/drm_fb_helper.c | 153 +---
 1 file changed, 99 insertions(+), 54 deletions(-)

-- 
2.20.1

Re: [EXT] [PATCH v1] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw

2018-12-24 Thread Ivan Mironov

On Mon, 2018-12-24 at 14:26 +, Sudarsana Reddy Kalluru wrote:
> Thanks a lot for root-causing the issue and the patch.
> Another (simpler) way to address this would be to invoke 
> bnx2x_del_all_vlans() only for the newer chips, e.g., 
> +   if (!CHIP_IS_E1(bp)) {
>   /* Remove all currently configured VLANs */
>rc = bnx2x_del_all_vlans(bp);
>if (rc < 0)
>BNX2X_ERR("Failed to delete all VLANs\n");
> +   }
> Could you please check on this?

Done. See the PATCH v2 in the mailing list. I tested it on my system.

[PATCH v2] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw

2018-12-24 Thread Ivan Mironov

This happened when I tried to boot normal Fedora 29 system with latest
available kernel (from fedora rawhide, plus some unrelated custom
patches):

BUG: unable to handle kernel NULL pointer dereference at 

PGD 0 P4D 0
Oops: 0010 [#1] SMP PTI
CPU: 6 PID: 1422 Comm: libvirtd Tainted: G  I   
4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1
Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018
RIP: 0010:  (null)
Code: Bad RIP value.
RSP: 0018:a47ccdc9fbe0 EFLAGS: 00010246
RAX:  RBX: 03e8 RCX: a47ccdc9fbf8
RDX: a47ccdc9fc00 RSI: 97d9ee7b01f8 RDI: 97d9f0150b80
RBP: 97d9f0150b80 R08:  R09: 
R10:  R11:  R12: 0003
R13: 97d9ef1e53e8 R14: 0009 R15: 97d9f0ac6730
FS:  7f4d224ef700() GS:97d9fa20() 
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: ffd6 CR3: 0011ece52006 CR4: 000206e0
Call Trace:
 ? bnx2x_chip_cleanup+0x195/0x610 [bnx2x]
 ? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x]
 ? bnx2x_reload_if_running+0x24/0x40 [bnx2x]
 ? bnx2x_set_features+0x79/0xa0 [bnx2x]
 ? __netdev_update_features+0x244/0x9e0
 ? netlink_broadcast_filtered+0x136/0x4b0
 ? netdev_update_features+0x22/0x60
 ? dev_disable_lro+0x1c/0xe0
 ? devinet_sysctl_forward+0x1c6/0x211
 ? proc_sys_call_handler+0xab/0x100
 ? __vfs_write+0x36/0x1a0
 ? rcu_read_lock_sched_held+0x79/0x80
 ? rcu_sync_lockdep_assert+0x2e/0x60
 ? __sb_start_write+0x14c/0x1b0
 ? vfs_write+0x159/0x1c0
 ? vfs_write+0xba/0x1c0
 ? ksys_write+0x52/0xc0
 ? do_syscall_64+0x60/0x1f0
 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe

After some investigation I figured out that recently added cleanup code
tries to call VLAN filtering de-initialization function which exist only
for newer hardware. Corresponding function pointer is not
set (== 0) for older hardware, namely these chips:

#define CHIP_NUM_57710  0x164e
#define CHIP_NUM_57711  0x164f
#define CHIP_NUM_57711E 0x1650

And I have one of those in my test system:

Broadcom Inc. and subsidiaries NetXtreme II BCM57711E 10-Gigabit PCIe 
[14e4:1650]

Function bnx2x_init_vlan_mac_fp_objs() from
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to
initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not.

This regression was introduced after v4.20-rc7, and still exists in v4.20
release.

Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload 
sequence.")
Signed-off-by: Ivan Mironov 
---
v2:
- As suggested by Sudarsana Reddy Kalluru, do not call bnx2x_del_all_vlans() at
  all if (!CHIP_IS_E1x(bp)).
v1:
- Check for chip num instead of (vlan_obj->delete_all != 0).
v0:
- Patch introduced.
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b164f705709d..3b5b47e98c73 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9360,10 +9360,16 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int 
unload_mode, bool keep_link)
BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: 
%d\n",
  rc);
 
-   /* Remove all currently configured VLANs */
-   rc = bnx2x_del_all_vlans(bp);
-   if (rc < 0)
-   BNX2X_ERR("Failed to delete all VLANs\n");
+   /* The whole *vlan_obj structure may be not initialized if VLAN
+* filtering offload is not supported by hardware. Currently this is
+* true for all hardware covered by CHIP_IS_E1x().
+*/
+   if (!CHIP_IS_E1x(bp)) {
+   /* Remove all currently configured VLANs */
+   rc = bnx2x_del_all_vlans(bp);
+   if (rc < 0)
+   BNX2X_ERR("Failed to delete all VLANs\n");
+   }
 
/* Disable LLH */
if (!CHIP_IS_E1(bp))
-- 
2.20.1

[PATCH v1] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw

2018-12-24 Thread Ivan Mironov

This happened when I tried to boot normal Fedora 29 system with latest
available kernel (from fedora rawhide, plus some unrelated custom
patches):

BUG: unable to handle kernel NULL pointer dereference at 

PGD 0 P4D 0
Oops: 0010 [#1] SMP PTI
CPU: 6 PID: 1422 Comm: libvirtd Tainted: G  I   
4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1
Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018
RIP: 0010:  (null)
Code: Bad RIP value.
RSP: 0018:a47ccdc9fbe0 EFLAGS: 00010246
RAX:  RBX: 03e8 RCX: a47ccdc9fbf8
RDX: a47ccdc9fc00 RSI: 97d9ee7b01f8 RDI: 97d9f0150b80
RBP: 97d9f0150b80 R08:  R09: 
R10:  R11:  R12: 0003
R13: 97d9ef1e53e8 R14: 0009 R15: 97d9f0ac6730
FS:  7f4d224ef700() GS:97d9fa20() 
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: ffd6 CR3: 0011ece52006 CR4: 000206e0
Call Trace:
 ? bnx2x_chip_cleanup+0x195/0x610 [bnx2x]
 ? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x]
 ? bnx2x_reload_if_running+0x24/0x40 [bnx2x]
 ? bnx2x_set_features+0x79/0xa0 [bnx2x]
 ? __netdev_update_features+0x244/0x9e0
 ? netlink_broadcast_filtered+0x136/0x4b0
 ? netdev_update_features+0x22/0x60
 ? dev_disable_lro+0x1c/0xe0
 ? devinet_sysctl_forward+0x1c6/0x211
 ? proc_sys_call_handler+0xab/0x100
 ? __vfs_write+0x36/0x1a0
 ? rcu_read_lock_sched_held+0x79/0x80
 ? rcu_sync_lockdep_assert+0x2e/0x60
 ? __sb_start_write+0x14c/0x1b0
 ? vfs_write+0x159/0x1c0
 ? vfs_write+0xba/0x1c0
 ? ksys_write+0x52/0xc0
 ? do_syscall_64+0x60/0x1f0
 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe

After some investigation I figured out that recently added cleanup code
tries to call VLAN filtering de-initialization function which exist only
for newer hardware. Corresponding function pointer is not
set (== 0) for older hardware, namely these chips:

#define CHIP_NUM_57710  0x164e
#define CHIP_NUM_57711  0x164f
#define CHIP_NUM_57711E 0x1650

And I have one of those in my test system:

Broadcom Inc. and subsidiaries NetXtreme II BCM57711E 10-Gigabit PCIe 
[14e4:1650]

Function bnx2x_init_vlan_mac_fp_objs() from
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to
initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not.

This regression was introduced after v4.20-rc7, and still exists in v4.20
release.

Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload 
sequence.")
Signed-off-by: Ivan Mironov 
---
v1:
- Check for chip num instead of (vlan_obj->delete_all != 0).
v0:
- Patch introduced.
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 22 +--
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b164f705709d..4d72e9948a58 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8504,15 +8504,23 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan,
 static int bnx2x_del_all_vlans(struct bnx2x *bp)
 {
struct bnx2x_vlan_mac_obj *vlan_obj = >sp_objs[0].vlan_obj;
-   unsigned long ramrod_flags = 0, vlan_flags = 0;
struct bnx2x_vlan_entry *vlan;
-   int rc;
 
-   __set_bit(RAMROD_COMP_WAIT, _flags);
-   __set_bit(BNX2X_VLAN, _flags);
-   rc = vlan_obj->delete_all(bp, vlan_obj, _flags, _flags);
-   if (rc)
-   return rc;
+   /* The whole *vlan_obj structure may be not initialized if VLAN
+* filtering offload is not supported by hardware. Currently this is
+* true for all hardware covered by CHIP_IS_E1x().
+*/
+   if (!CHIP_IS_E1x(bp)) {
+   unsigned long ramrod_flags = 0, vlan_flags = 0;
+   int rc;
+
+   __set_bit(RAMROD_COMP_WAIT, _flags);
+   __set_bit(BNX2X_VLAN, _flags);
+   rc = vlan_obj->delete_all(bp, vlan_obj, _flags,
+ _flags);
+   if (rc)
+   return rc;
+   }
 
/* Mark that hw forgot all entries */
list_for_each_entry(vlan, >vlan_reg, link)
-- 
2.20.1

Re: [PATCH] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw

2018-12-24 Thread Ivan Mironov

4.20 release is affected too.

On Sun, 2018-12-23 at 20:29 +0500, Ivan Mironov wrote:
> This happened when I tried to boot normal Fedora 29 system with latest
> available kernel (from fedora rawhide, plus some unrelated custom
> patches):
> 
>   BUG: unable to handle kernel NULL pointer dereference at 
> 
>   PGD 0 P4D 0
>   Oops: 0010 [#1] SMP PTI
>   CPU: 6 PID: 1422 Comm: libvirtd Tainted: G  I   
> 4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1
>   Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018
>   RIP: 0010:  (null)
>   Code: Bad RIP value.
>   RSP: 0018:a47ccdc9fbe0 EFLAGS: 00010246
>   RAX:  RBX: 03e8 RCX: a47ccdc9fbf8
>   RDX: a47ccdc9fc00 RSI: 97d9ee7b01f8 RDI: 97d9f0150b80
>   RBP: 97d9f0150b80 R08:  R09: 
>   R10:  R11:  R12: 0003
>   R13: 97d9ef1e53e8 R14: 0009 R15: 97d9f0ac6730
>   FS:  7f4d224ef700() GS:97d9fa20() 
> knlGS:
>   CS:  0010 DS:  ES:  CR0: 80050033
>   CR2: ffd6 CR3: 0011ece52006 CR4: 000206e0
>   Call Trace:
>? bnx2x_chip_cleanup+0x195/0x610 [bnx2x]
>? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x]
>? bnx2x_reload_if_running+0x24/0x40 [bnx2x]
>? bnx2x_set_features+0x79/0xa0 [bnx2x]
>? __netdev_update_features+0x244/0x9e0
>? netlink_broadcast_filtered+0x136/0x4b0
>? netdev_update_features+0x22/0x60
>? dev_disable_lro+0x1c/0xe0
>? devinet_sysctl_forward+0x1c6/0x211
>? proc_sys_call_handler+0xab/0x100
>? __vfs_write+0x36/0x1a0
>? rcu_read_lock_sched_held+0x79/0x80
>? rcu_sync_lockdep_assert+0x2e/0x60
>? __sb_start_write+0x14c/0x1b0
>? vfs_write+0x159/0x1c0
>? vfs_write+0xba/0x1c0
>? ksys_write+0x52/0xc0
>? do_syscall_64+0x60/0x1f0
>? entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> After some investigation I figured out that recently added cleanup code
> tries to call VLAN filtering de-initialization function which exist only
> for newer hardware. Corresponding function pointer is not
> initialized (== 0) for older hardware, namely these chips:
> 
>   #define CHIP_NUM_57710  0x164e
>   #define CHIP_NUM_57711  0x164f
>   #define CHIP_NUM_57711E 0x1650
> 
> And I have one of those in my test system:
> 
>   02:00.0 Ethernet controller [0200]: Broadcom Inc. and subsidiaries 
> NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650]
>   02:00.1 Ethernet controller [0200]: Broadcom Inc. and subsidiaries 
> NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650]
> 
> Function bnx2x_init_vlan_mac_fp_objs() from
> drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to
> initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not.
> 
> This regression was introduced after v4.20-rc7.
> 
> Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload 
> sequence.")
> Signed-off-by: Ivan Mironov 
> ---
>  .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 22 +--
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> index b164f705709d..0e37c2484ac2 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> @@ -8504,15 +8504,23 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan,
>  static int bnx2x_del_all_vlans(struct bnx2x *bp)
>  {
>   struct bnx2x_vlan_mac_obj *vlan_obj = >sp_objs[0].vlan_obj;
> - unsigned long ramrod_flags = 0, vlan_flags = 0;
>   struct bnx2x_vlan_entry *vlan;
> - int rc;
>  
> - __set_bit(RAMROD_COMP_WAIT, _flags);
> - __set_bit(BNX2X_VLAN, _flags);
> - rc = vlan_obj->delete_all(bp, vlan_obj, _flags, _flags);
> - if (rc)
> - return rc;
> + /* The whole *vlan_obj structure may be not initialized if VLAN
> +  * filtering offload is not supported by hardware. Currently this is
> +  * true for all hardware covered by CHIP_IS_E1x().
> +  */
> + if (vlan_obj->delete_all) {
> + unsigned long ramrod_flags = 0, vlan_flags = 0;
> + int rc;
> +
> + __set_bit(RAMROD_COMP_WAIT, _flags);
> + __set_bit(BNX2X_VLAN, _flags);
> + rc = vlan_obj->delete_all(bp, vlan_obj, _flags,
> +   _flags);
> + if (rc)
> + return rc;
> + }
>  
>   /* Mark that hw forgot all entries */
>   list_for_each_entry(vlan, >vlan_reg, link)

[PATCH] bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw

2018-12-23 Thread Ivan Mironov

This happened when I tried to boot normal Fedora 29 system with latest
available kernel (from fedora rawhide, plus some unrelated custom
patches):

BUG: unable to handle kernel NULL pointer dereference at 

PGD 0 P4D 0
Oops: 0010 [#1] SMP PTI
CPU: 6 PID: 1422 Comm: libvirtd Tainted: G  I   
4.20.0-0.rc7.git3.hpsa2.1.fc29.x86_64 #1
Hardware name: HP ProLiant BL460c G6, BIOS I24 05/21/2018
RIP: 0010:  (null)
Code: Bad RIP value.
RSP: 0018:a47ccdc9fbe0 EFLAGS: 00010246
RAX:  RBX: 03e8 RCX: a47ccdc9fbf8
RDX: a47ccdc9fc00 RSI: 97d9ee7b01f8 RDI: 97d9f0150b80
RBP: 97d9f0150b80 R08:  R09: 
R10:  R11:  R12: 0003
R13: 97d9ef1e53e8 R14: 0009 R15: 97d9f0ac6730
FS:  7f4d224ef700() GS:97d9fa20() 
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: ffd6 CR3: 0011ece52006 CR4: 000206e0
Call Trace:
 ? bnx2x_chip_cleanup+0x195/0x610 [bnx2x]
 ? bnx2x_nic_unload+0x1e2/0x8f0 [bnx2x]
 ? bnx2x_reload_if_running+0x24/0x40 [bnx2x]
 ? bnx2x_set_features+0x79/0xa0 [bnx2x]
 ? __netdev_update_features+0x244/0x9e0
 ? netlink_broadcast_filtered+0x136/0x4b0
 ? netdev_update_features+0x22/0x60
 ? dev_disable_lro+0x1c/0xe0
 ? devinet_sysctl_forward+0x1c6/0x211
 ? proc_sys_call_handler+0xab/0x100
 ? __vfs_write+0x36/0x1a0
 ? rcu_read_lock_sched_held+0x79/0x80
 ? rcu_sync_lockdep_assert+0x2e/0x60
 ? __sb_start_write+0x14c/0x1b0
 ? vfs_write+0x159/0x1c0
 ? vfs_write+0xba/0x1c0
 ? ksys_write+0x52/0xc0
 ? do_syscall_64+0x60/0x1f0
 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe

After some investigation I figured out that recently added cleanup code
tries to call VLAN filtering de-initialization function which exist only
for newer hardware. Corresponding function pointer is not
initialized (== 0) for older hardware, namely these chips:

#define CHIP_NUM_57710  0x164e
#define CHIP_NUM_57711  0x164f
#define CHIP_NUM_57711E 0x1650

And I have one of those in my test system:

02:00.0 Ethernet controller [0200]: Broadcom Inc. and subsidiaries 
NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650]
02:00.1 Ethernet controller [0200]: Broadcom Inc. and subsidiaries 
NetXtreme II BCM57711E 10-Gigabit PCIe [14e4:1650]

Function bnx2x_init_vlan_mac_fp_objs() from
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h decides whether to
initialize relevant pointers in bnx2x_sp_objs.vlan_obj or not.

This regression was introduced after v4.20-rc7.

Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload 
sequence.")
Signed-off-by: Ivan Mironov 
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 22 +--
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b164f705709d..0e37c2484ac2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8504,15 +8504,23 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan,
 static int bnx2x_del_all_vlans(struct bnx2x *bp)
 {
struct bnx2x_vlan_mac_obj *vlan_obj = >sp_objs[0].vlan_obj;
-   unsigned long ramrod_flags = 0, vlan_flags = 0;
struct bnx2x_vlan_entry *vlan;
-   int rc;
 
-   __set_bit(RAMROD_COMP_WAIT, _flags);
-   __set_bit(BNX2X_VLAN, _flags);
-   rc = vlan_obj->delete_all(bp, vlan_obj, _flags, _flags);
-   if (rc)
-   return rc;
+   /* The whole *vlan_obj structure may be not initialized if VLAN
+* filtering offload is not supported by hardware. Currently this is
+* true for all hardware covered by CHIP_IS_E1x().
+*/
+   if (vlan_obj->delete_all) {
+   unsigned long ramrod_flags = 0, vlan_flags = 0;
+   int rc;
+
+   __set_bit(RAMROD_COMP_WAIT, _flags);
+   __set_bit(BNX2X_VLAN, _flags);
+   rc = vlan_obj->delete_all(bp, vlan_obj, _flags,
+ _flags);
+   if (rc)
+   return rc;
+   }
 
/* Mark that hw forgot all entries */
list_for_each_entry(vlan, >vlan_reg, link)
-- 
2.20.1

[PATCH] scsi: sd: Fix cache_type_store()

2018-12-22 Thread Ivan Mironov

Changing of caching mode via /sys/devices/.../scsi_disk/.../cache_type
may fail if device responses to MODE SENSE command with DPOFUA flag set,
and then checks this flag to be not set on MODE SELECT command.

When trying to change cache_type, write always fails:
# echo "none" >cache_type
bash: echo: write error: Invalid argument

And following appears in dmesg:
[13007.865745] sd 1:0:1:0: [sda] Sense Key : Illegal Request [current]
[13007.865753] sd 1:0:1:0: [sda] Add. Sense: Invalid field in parameter 
list

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/sd.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bd0a5c694a97..698fe651fb1a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -206,6 +206,21 @@ cache_type_store(struct device *dev, struct 
device_attribute *attr,
sp = buffer_data[0] & 0x80 ? 1 : 0;
buffer_data[0] &= ~0x80;
 
+   /* From SBC-4 r15, 6.5.1 "Mode pages overview", description of
+* DEVICE-SPECIFIC PARAMETER field in the mode parameter header:
+* ...
+* The write protect (WP) bit for mode data sent with a MODE SELECT
+* command shall be ignored by the device server.
+* ...
+* The DPOFUA bit is reserved for mode data sent with a MODE SELECT
+* command.
+* ...
+* All other bits are also reserved, and all reserved bits shall be set
+* to zero according to the same document. So, we can simply set this
+* field to zero for compatibility.
+*/
+   data.device_specific = 0;
+
if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
 SD_MAX_RETRIES, , )) {
if (scsi_sense_valid())
-- 
2.20.1

Re: [PATCH 0/6] Add support of the HBA mode on HP Smart Array P410i controllers

2018-12-14 Thread Ivan Mironov

On Fri, 2018-12-14 at 19:38 +, don.br...@microchip.com wrote:
> NAKing this series.
> - The P410 controllers do not fully support HBA mode and we do not support 
> adding it to the hpsa driver.
> 

Could you please elaborate on what exactly you mean by "do not fully
support HBA mode"?

HBA mode looks supported on P410i (at least to some degree) because:
*) There is an official method to enable/disable it on these
controllers. Only for itanium-based servers unfortunately.
*) This method is mentioned in various documents from HP regarding
Integrity servers.
*) Controller reacts on the change of corresponging bit in nvram_flags
by immediately removing any configured RAID arrays and by refusing to
configure new arrays. Which is totally expected for HBA mode.
*) HBA mode works for me on P410i with patched driver.

[PATCH 2/6] scsi: hpsa: Support HBA mode on HP Smart Array P410i controllers

2018-12-14 Thread Ivan Mironov

This patch is based on code from the 316b221, most of which was removed by
the b9092b7.

Originally, HBA mode on these controllers was supported only on
Itanium-based HP Integrity servers running HP-UX. Tool for switching
between RAID and HBA modes existed only in form of EFI binary for
ia64 architecture: saupdate.efi[1]. However, I guessed how to overwrite the
corresponding flags field in controller's NVRAM, and was able to
reimplement RAID/HBA mode switching tool for Linux[2].

This change was successfully tested using blktests[3] and xfstests[4] on
my hardware, with embedded P410i controller (PCI ID: 103c:3245, board
ID: 0x3245103c) with firmware version 6.64.

This may work with some other controllers, but it is not tested
(because I do not have the hardware) and it may be very dangerous. That is
why this functionality is disabled by default and may be enabled only
manually using the new module parameter.

[1] 
https://support.hpe.com/hpsc/swd/public/detail?swItemId=MTX_0b76aec489764aea9802a6d27b
[2] https://github.com/im-0/hpsahba
[3] https://github.com/osandov/blktests
[4] https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa.c | 98 +++--
 drivers/scsi/hpsa.h |  3 ++
 2 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index fc06b790f16b..ee3d7c722a63 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -88,6 +88,11 @@ module_param(hpsa_simple_mode, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(hpsa_simple_mode,
"Use 'simple mode' rather than 'performant mode'");
 
+static bool hpsa_use_nvram_hba_flag;
+module_param(hpsa_use_nvram_hba_flag, bool, 0444);
+MODULE_PARM_DESC(hpsa_use_nvram_hba_flag,
+   "Use flag from NVRAM to enable HBA mode");
+
 /* define the PCI info for the cards we can control */
 static const struct pci_device_id hpsa_pci_device_id[] = {
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3241},
@@ -3039,6 +3044,37 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, 
unsigned char *scsi3addr,
return rc;
 }
 
+static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
+   struct bmic_controller_parameters *buf)
+{
+   int rc = IO_OK;
+   struct CommandList *c;
+   struct ErrorInfo *ei;
+
+   c = cmd_alloc(h);
+
+   if (fill_cmd(c, BMIC_SENSE_CONTROLLER_PARAMETERS, h, buf, sizeof(*buf),
+   0, RAID_CTLR_LUNID, TYPE_CMD)) {
+   rc = -1;
+   goto out;
+   }
+
+   rc = hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE,
+   NO_TIMEOUT);
+   if (rc)
+   goto out;
+
+   ei = c->err_info;
+   if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
+   hpsa_scsi_interpret_error(h, c);
+   rc = -1;
+   }
+
+out:
+   cmd_free(h, c);
+   return rc;
+}
+
 static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
u8 reset_type, int reply_queue)
 {
@@ -4296,6 +4332,50 @@ static bool hpsa_skip_device(struct ctlr_info *h, u8 
*lunaddrbytes,
return false;
 }
 
+static int hpsa_nvram_hba_flag_enabled(struct ctlr_info *h, bool *flag_enabled)
+{
+   int rc;
+   struct bmic_controller_parameters *ctlr_params;
+
+   ctlr_params = kzalloc(sizeof(*ctlr_params), GFP_KERNEL);
+   if (!ctlr_params) {
+   rc = -ENOMEM;
+   goto out;
+   }
+
+   rc = hpsa_bmic_ctrl_mode_sense(h, ctlr_params);
+   if (rc)
+   goto out;
+
+   *flag_enabled = ctlr_params->nvram_flags & HPSA_NVRAM_FLAG_HBA;
+
+out:
+   kfree(ctlr_params);
+   return rc;
+}
+
+static int hpsa_update_nvram_hba_mode(struct ctlr_info *h)
+{
+   int rc;
+   bool flag_enabled;
+
+   if (!hpsa_use_nvram_hba_flag)
+   return 0;
+
+   rc = hpsa_nvram_hba_flag_enabled(h, _enabled);
+   if (rc == -ENOMEM)
+   dev_warn(>pdev->dev, "Out of memory.\n");
+   if (rc)
+   return rc;
+
+   dev_info(>pdev->dev, "NVRAM HBA flag: %s\n",
+   flag_enabled ? "enabled" : "disabled");
+
+   h->nvram_hba_mode_enabled = flag_enabled;
+
+   return 0;
+}
+
 static void hpsa_update_scsi_devices(struct ctlr_info *h)
 {
/* the idea here is we could get notified
@@ -4352,6 +4432,11 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
__func__);
}
 
+   if (hpsa_update_nvram_hba_mode(h)) {
+   h->drv_req_rescan = 1;
+   goto out;
+   }
+
/* We might see up to the maximum number of logical and physical disks
 * plus external target devices, and a device for the local RAID
 * controller.
@@ -4437,11 +4522,16 @@ static void hpsa_updat

[PATCH 0/6] Add support of the HBA mode on HP Smart Array P410i controllers

2018-12-14 Thread Ivan Mironov

This series of patches adds support of the HBA mode on HP Smart Array
P410i RAID controllers.

This is not guaranteed to be correct as I do not have any access to
documentation on these controllers. However, this works fine for me
on hardware that I have. Also, these changes successfully passes blktests[1]
and xfstests[2].

To make sure that this new functionality does not break anything, it is
disabled by default and may be enabled only manually using new module
parameter.

[1] https://github.com/osandov/blktests
[2] https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git

Ivan Mironov (6):
  scsi: hpsa: Add function to check if device is a disk or a zoned
device
  scsi: hpsa: Support HBA mode on HP Smart Array P410i controllers
  scsi: hpsa: Add/mask existing devices on rescan if visibility changes
  scsi: hpsa: Ignore HBA flag from NVRAM if logical devices exist
  scsi: hpsa: Name more fields in "struct bmic_identify_controller"
  scsi: hpsa: Do not use HBA flag from NVRAM if HBA is not supported

 drivers/scsi/hpsa.c | 145 
 drivers/scsi/hpsa.h |   3 +
 drivers/scsi/hpsa_cmd.h | 113 ++-
 3 files changed, 244 insertions(+), 17 deletions(-)

-- 
2.19.2

[PATCH 1/6] scsi: hpsa: Add function to check if device is a disk or a zoned device

2018-12-14 Thread Ivan Mironov

This check is used multiple times within the driver. New function makes
conditional statements a bit shorter and more readable.

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c9cccf35e9d7..fc06b790f16b 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -768,6 +768,11 @@ static ssize_t sas_address_show(struct device *dev,
return snprintf(buf, PAGE_SIZE, "0x%016llx\n", sas_address);
 }
 
+static inline bool is_disk_or_zbc(const struct hpsa_scsi_dev_t *hdev)
+{
+   return hdev->devtype == TYPE_DISK || hdev->devtype == TYPE_ZBC;
+}
+
 static ssize_t host_show_hp_ssd_smart_path_enabled(struct device *dev,
 struct device_attribute *attr, char *buf)
 {
@@ -788,7 +793,7 @@ static ssize_t host_show_hp_ssd_smart_path_enabled(struct 
device *dev,
offload_enabled = hdev->offload_enabled;
spin_unlock_irqrestore(>lock, flags);
 
-   if (hdev->devtype == TYPE_DISK || hdev->devtype == TYPE_ZBC)
+   if (is_disk_or_zbc(hdev))
return snprintf(buf, 20, "%d\n", offload_enabled);
else
return snprintf(buf, 40, "%s\n",
@@ -855,8 +860,7 @@ static ssize_t path_info_show(struct device *dev,
PAGE_SIZE - output_len,
"PORT: %.2s ",
phys_connector);
-   if ((hdev->devtype == TYPE_DISK || hdev->devtype == TYPE_ZBC) &&
-   hdev->expose_device) {
+   if (is_disk_or_zbc(hdev) && hdev->expose_device) {
if (box == 0 || box == 0xFF) {
output_len += scnprintf(buf + output_len,
PAGE_SIZE - output_len,
@@ -1715,8 +1719,7 @@ static void hpsa_figure_phys_disk_ptrs(struct ctlr_info 
*h,
for (j = 0; j < ndevices; j++) {
if (dev[j] == NULL)
continue;
-   if (dev[j]->devtype != TYPE_DISK &&
-   dev[j]->devtype != TYPE_ZBC)
+   if (!is_disk_or_zbc(dev[j]))
continue;
if (is_logical_device(dev[j]))
continue;
@@ -1770,8 +1773,7 @@ static void hpsa_update_log_drive_phys_drive_ptrs(struct 
ctlr_info *h,
for (i = 0; i < ndevices; i++) {
if (dev[i] == NULL)
continue;
-   if (dev[i]->devtype != TYPE_DISK &&
-   dev[i]->devtype != TYPE_ZBC)
+   if (!is_disk_or_zbc(dev[i]))
continue;
if (!is_logical_device(dev[i]))
continue;
@@ -3965,9 +3967,8 @@ static int hpsa_update_device_info(struct ctlr_info *h,
scsi_device_type(this_device->devtype),
this_device->model);
 
-   if ((this_device->devtype == TYPE_DISK ||
-   this_device->devtype == TYPE_ZBC) &&
-   is_logical_dev_addr_mode(scsi3addr)) {
+   if (is_disk_or_zbc(this_device) &&
+   is_logical_dev_addr_mode(scsi3addr)) {
unsigned char volume_offline;
 
hpsa_get_raid_level(h, scsi3addr, _device->raid_level);
-- 
2.19.2

[PATCH 6/6] scsi: hpsa: Do not use HBA flag from NVRAM if HBA is not supported

2018-12-14 Thread Ivan Mironov

Check bmic_identify_controller.yet_more_controller_flags for HBA support
bit before trying to enable HBA mode.

HP's ssacli tool calls this bit "Hba Mode Supported" in full diagnostics
report.

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa.c | 16 ++--
 drivers/scsi/hpsa_cmd.h |  4 
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0b5b3a651b70..38026b82ac6b 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4342,6 +4342,12 @@ static bool hpsa_skip_device(struct ctlr_info *h, u8 
*lunaddrbytes,
return false;
 }
 
+static bool is_hba_supported(const struct bmic_identify_controller *id_ctlr)
+{
+   return le32_to_cpu(id_ctlr->yet_more_controller_flags) &
+   YET_MORE_CTLR_FLAG_HBA_MODE_SUPP;
+}
+
 static int hpsa_nvram_hba_flag_enabled(struct ctlr_info *h, bool *flag_enabled)
 {
int rc;
@@ -4364,7 +4370,8 @@ static int hpsa_nvram_hba_flag_enabled(struct ctlr_info 
*h, bool *flag_enabled)
return rc;
 }
 
-static int hpsa_update_nvram_hba_mode(struct ctlr_info *h, u32 nlogicals)
+static int hpsa_update_nvram_hba_mode(struct ctlr_info *h, u32 nlogicals,
+   const struct bmic_identify_controller *id_ctlr)
 {
int rc;
bool flag_enabled;
@@ -4373,6 +4380,11 @@ static int hpsa_update_nvram_hba_mode(struct ctlr_info 
*h, u32 nlogicals)
if (!hpsa_use_nvram_hba_flag)
return 0;
 
+   if (!is_hba_supported(id_ctlr)) {
+   dev_info(>pdev->dev, "NVRAM HBA flag: not supported\n");
+   return 0;
+   }
+
rc = hpsa_nvram_hba_flag_enabled(h, _enabled);
if (rc == -ENOMEM)
dev_warn(>pdev->dev, "Out of memory.\n");
@@ -4446,7 +4458,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
__func__);
}
 
-   if (hpsa_update_nvram_hba_mode(h, nlogicals)) {
+   if (hpsa_update_nvram_hba_mode(h, nlogicals, id_ctlr)) {
h->drv_req_rescan = 1;
goto out;
}
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 64100a33f844..b27f94b257bc 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -806,6 +806,10 @@ struct bmic_identify_controller {
 };
 
 
+/* ssacli calls this bit "Hba Mode Supported". */
+#define YET_MORE_CTLR_FLAG_HBA_MODE_SUPP (1 << 25)
+
+
 struct bmic_identify_physical_device {
u8scsi_bus;  /* SCSI Bus number on controller */
u8scsi_id;   /* SCSI ID on this bus */
-- 
2.19.2

[PATCH 3/6] scsi: hpsa: Add/mask existing devices on rescan if visibility changes

2018-12-14 Thread Ivan Mironov

Controller may be switched between RAID and HBA modes even without a
reboot.

When changing to HBA mode, it does some internal reset magic and
automatically removes any configured RAID arrays. Without this patch,
driver successfully detects disappearance of logical arrays, but does
not expose any physical disks.

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index ee3d7c722a63..60f1f7949d8d 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -1541,6 +1541,14 @@ static inline int device_updated(struct hpsa_scsi_dev_t 
*dev1,
return 0;
 }
 
+static inline bool device_expose_changed(struct hpsa_scsi_dev_t *dev1,
+   struct hpsa_scsi_dev_t *dev2)
+{
+   if (dev1->expose_device != dev2->expose_device)
+   return true;
+   return false;
+}
+
 /* Find needle in haystack.  If exact match found, return DEVICE_SAME,
  * and return needle location in *index.  If scsi3addr matches, but not
  * vendor, model, serial num, etc. return DEVICE_CHANGED, and return needle
@@ -1569,6 +1577,8 @@ static int hpsa_scsi_find_entry(struct hpsa_scsi_dev_t 
*needle,
if (device_is_the_same(needle, haystack[i])) {
if (device_updated(needle, haystack[i]))
return DEVICE_UPDATED;
+   if (device_expose_changed(needle, haystack[i]))
+   return DEVICE_CHANGED;
return DEVICE_SAME;
} else {
/* Keep offline devices offline */
-- 
2.19.2

[PATCH 4/6] scsi: hpsa: Ignore HBA flag from NVRAM if logical devices exist

2018-12-14 Thread Ivan Mironov

Simultaneous use of physical devices and logical RAID devices may be
dangerous.

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 60f1f7949d8d..0b5b3a651b70 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4364,10 +4364,11 @@ static int hpsa_nvram_hba_flag_enabled(struct ctlr_info 
*h, bool *flag_enabled)
return rc;
 }
 
-static int hpsa_update_nvram_hba_mode(struct ctlr_info *h)
+static int hpsa_update_nvram_hba_mode(struct ctlr_info *h, u32 nlogicals)
 {
int rc;
bool flag_enabled;
+   bool ignore;
 
if (!hpsa_use_nvram_hba_flag)
return 0;
@@ -4378,10 +4379,13 @@ static int hpsa_update_nvram_hba_mode(struct ctlr_info 
*h)
if (rc)
return rc;
 
-   dev_info(>pdev->dev, "NVRAM HBA flag: %s\n",
-   flag_enabled ? "enabled" : "disabled");
+   ignore = flag_enabled && nlogicals;
 
-   h->nvram_hba_mode_enabled = flag_enabled;
+   dev_info(>pdev->dev, "NVRAM HBA flag: %s%s\n",
+   flag_enabled ? "enabled" : "disabled",
+   ignore ? " (ignored because of existing logical devices)" : "");
+
+   h->nvram_hba_mode_enabled = flag_enabled && !ignore;
 
return 0;
 }
@@ -4442,7 +4446,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
__func__);
}
 
-   if (hpsa_update_nvram_hba_mode(h)) {
+   if (hpsa_update_nvram_hba_mode(h, nlogicals)) {
h->drv_req_rescan = 1;
goto out;
}
-- 
2.19.2

[PATCH 5/6] scsi: hpsa: Name more fields in "struct bmic_identify_controller"

2018-12-14 Thread Ivan Mironov

Based on information from "struct identify_controller" from
cciss_vol_status.c from the cciss_vol_status tool[1].

[1] https://sourceforge.net/projects/cciss/files/cciss_vol_status/

Signed-off-by: Ivan Mironov 
---
 drivers/scsi/hpsa_cmd.h | 109 ++--
 1 file changed, 106 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 21a726e2eec6..64100a33f844 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -695,11 +695,114 @@ struct hpsa_pci_info {
 
 struct bmic_identify_controller {
u8  configured_logical_drive_count; /* offset 0 */
-   u8  pad1[153];
+   __le32  signature;
+   charrunning_firm_rev[4];
+   charrom_firm_rev[4];
+   u8  hardware_rev;
+   u8  reserved_1[4];
+   __le32  drive_present_bit_map;
+   __le32  external_drive_bit_map;
+   __le32  board_id;
+   u8  reserved_2;
+   __le32  non_disk_map;
+   u8  reserved_3[5];
+   u8  marketing_revision;
+   u8  controller_flags;
+   u8  host_flags;
+   u8  expand_disable_code;
+   u8  scsi_chip_count;
+   u8  reserved_4[4];
+   __le32  ctlr_clock;
+   u8  drives_per_scsi_bus;
+   __le16  big_drive_present_map[8];
+   __le16  big_ext_drive_map[8];
+   __le16  big_non_disk_map[8];
+
+   /* used for FW debugging */
+   __le16  task_flags;
+   /* Bitmap used for ICL between controllers */
+   u8  icl_bus_map;
+   /* See REDUNDANT MODE VALUES */
+   u8  redund_ctlr_modes_support;
+   /* See REDUNDANT MODE VALUES */
+   u8  curr_redund_ctlr_mode;
+   /* See REDUNDANT STATUS FLAG */
+   u8  redund_ctlr_status;
+   /* See REDUNDANT FAILURE VALUES */
+   u8  redund_op_failure_code;
+   u8  unsupported_nile_bus;
+   u8  host_i2c_autorev;
+   u8  cpld_revision;
+   u8  fibre_chip_count;
+   u8  daughterboard_type;
+   u8  reserved_5[2];
+
+   u8  access_module_status;
+   u8  features_supported[12];
+   /* Recovery ROM inactive f/w revision */
+   charrec_rom_inactive_rev[4];
+   /* Recovery ROM flags */
+   u8  rec_rom_flags;
+   u8  pci_to_pci_bridge_status;
+   /* Reserved for future use */
+   u8  reserved_6[4];
+   /* Percent of memory allocated to write cache */
+   u8  percent_write_cache;
+   /* Total cache board size */
+   __le16  daughter_board_cache_size;
+   /* Number of cache batteries */
+   u8  cache_battery_count;
+   /* Total size (MB) of atttached memory */
+   __le16  total_memory_size;
+   /* Additional controller flags byte */
+   u8  more_controller_flags;
+   /* 2nd byte of 3 byte autorev field */
+   u8  x_board_host_i2c_autorev;
+   /* BBWC PIC revision */
+   u8  battery_pic_rev;
+   /* DDFF update engine version */
+   u8  ddff_version[4];
+   /* Maximum logical units supported */
+   __le16  max_logical_units;
+   /* Big num configured logical units */
__le16  extended_logical_unit_count;/* offset 154 */
-   u8  pad2[136];
+   /* Maximum physical devices supported */
+   __le16  max_physical_devices;
+   /* Max physical drive per logical unit */
+   __le16  max_phy_drv_per_logical_unit;
+   /* Number of attached enclosures */
+   u8  enclosure_count;
+   /* Number of expanders detected */
+   u8  expander_count;
+   /* Offset to extended drive present map*/
+   __le16  offset_to_edp_bitmap;
+   /* Offset to extended external drive present map */
+   __le16  offset_to_eedp_bitmap;
+   /* Offset to extended non-disk map */
+   __le16  offset_to_end_bitmap;
+   /* Internal port status bytes */
+   u8  internal_port_status[8];
+   /* External port status bytes */
+   u8  external_port_status[8];
+   /* Yet More Controller flags  */
+   __le32  yet_more_controller_flags;
+   /* Last lockup code */
+   u8  last_lockup;
+   /* PCI slot according to option ROM*/
+   u8  pci_slot;
+   /* Build number */
+   __le16  build_num;
+   /* Maximum safe full stripe size */
+   __le32  max_safe_full_stripe_size;
+   /* Total structure length */
+   __le32  total_length;
+   /* Vendor ID */
+   charvendor_id[8];
+   /* Product ID */
+   charproduct_id[16];
+   u8  reserved_7[68];
u8  controller_mode;/* offset 292 */
-   u8  pad3[32];
+   u8  reserved_8[32];
 };
 
 
-- 
2.19.2

50 matches

Mail list logo