On 2019-08-26 19:17, Sven Eckelmann wrote:
On Tuesday, 20 August 2019 17:47:36 CEST Kalle Valo wrote:
+static ssize_t ath11k_read_simulate_fw_crash(struct file *file,
+                                            char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+       const char buf[] =
+ "To simulate firmware crash write one of the keywords to this file:\n" + "`assert` - this will send WMI_FORCE_FW_HANG_CMDID to firmware to cause assert.\n" + "`hw-restart` - this will simply queue hw restart without fw/hw actually crashing.\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
+}

There is nothing in the write handler which handles "hw-restart". It just
causes an -EINVAL.


Yes. I will add "hw-restart".

+
+/* Simulate firmware crash:
+ * 'soft': Call wmi command causing firmware hang. This firmware hang is
+ * recoverable by warm firmware reset.
+ * 'hard': Force firmware crash by setting any vdev parameter for not allowed + * vdev id. This is hard firmware crash because it is recoverable only by cold
+ * firmware reset.
+ */
+static ssize_t ath11k_write_simulate_fw_crash(struct file *file,
+ const char __user *user_buf, + size_t count, loff_t *ppos)
+{
+       struct ath11k_base *ab = file->private_data;
+       struct ath11k_pdev *pdev;
+       struct ath11k *ar = ab->pdevs[0].ar;
+       char buf[32] = {0};
+       ssize_t rc;
+       int i, ret, radioup;
+
+       for (i = 0; i < ab->num_radios; i++) {
+               pdev = &ab->pdevs[i];
+               ar = pdev->ar;
+               if (ar && ar->state == ATH11K_STATE_ON) {
+                       radioup = 1;
+                       break;
+               }
+       }
+       /* filter partial writes and invalid commands */
+       if (*ppos != 0 || count >= sizeof(buf) || count == 0)
+               return -EINVAL;
+
+ rc = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
+       if (rc < 0)
+               return rc;
+
+       /* drop the possible '\n' from the end */
+       if (buf[*ppos - 1] == '\n')
+               buf[*ppos - 1] = '\0';
+
+       if (radioup == 0) {
+               ret = -ENETDOWN;
+               goto exit;
+       }
+
+       if (!strcmp(buf, "assert")) {
+               ath11k_info(ab, "simulating firmware assert crash\n");
+               ret = ath11k_wmi_force_fw_hang_cmd(ar,
+ ATH11K_WMI_FW_HANG_ASSERT_TYPE, + ATH11K_WMI_FW_HANG_DELAY);
+       } else {
+               ret = -EINVAL;
+               goto exit;
+       }
+
+       if (ret) {
+ ath11k_warn(ab, "failed to simulate firmware crash: %d\n", ret);
+               goto exit;
+       }
+
+       ret = count;
+
+exit:
+       return ret;
+}

And right now, the write of an "assert" to this file just causes an
fatal error for the system:

    [ 4312.409255] qcom-q6v5-wcss-pil cd00000.qcom_q6v5_wcss: fatal
error received:
    [ 4312.409255] QC Image Version:
QC_IMAGE_VERSION_STRING=WLAN.HK.2.1.0.1-00410-QCAHKSWPL_SILICONZ-2
[ 4312.409255] Image Variant : IMAGE_VARIANT_STRING=8074.wlanfw.eval_v2Q
    [ 4312.409255]
    [ 4312.409255] wlan_wmi.c:234 Assertion 0 failedparam0 :zero,
param1 :zero, param2 :zero.
    [ 4312.409255] Thread ID      : 0x00000069  Thread name    : WLAN
RT0  Process ID     : 0
    [ 4312.409255] Register:
    [ 4312.409255] SP : 0x4c168d58
    [ 4312.409255] FP : 0x4c168d60
    [ 4312.409255] PC : 0x4b1c8850
    [ 4312.409255] SSR : 0x00000008
    [ 4312.409255] BADVA : 0x00020000
    [ 4312.409255] LR : 0x4b1c7c68
    [ 4312.409255]
    [ 4312.409255] Stack Dump
    [ 4312.409255] from : 0x4c168d58
    [ 4312.409255] to   : 0x4c168f00
    [ 4312.409255]
    [ 4312.455997] remoteproc remoteproc0: crash detected in
cd00000.qcom_q6v5_wcss: type fatal error
    [ 4312.478259] remoteproc remoteproc0: handling crash #1 in
cd00000.qcom_q6v5_wcss
    [ 4312.486826] Kernel panic - not syncing: remoteproc remoteproc0:
Resetting the SoC - cd00000.qcom_q6v5_wcss crashed
    [ 4312.494028] CPU: 2 PID: 5590 Comm: kworker/2:0 Tainted: G
 W       4.4.60 #0
    [ 4312.504436] Hardware name: Generic DT based system
    [ 4312.511991] Workqueue: events rproc_crash_handler_work
    [ 4312.521880] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
    [ 4312.521979] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
    [ 4312.529789] [<803dd818>] (dump_stack) from [<80225d80>]
(panic+0x84/0x1f8)
    [ 4312.536818] [<80225d80>] (panic) from [<80555278>]
(rproc_crash_handler_work+0x90/0x98)
    [ 4312.543678] [<80555278>] (rproc_crash_handler_work) from
[<802380e8>] (process_one_work+0x1c0/0x2f8)
    [ 4312.551578] [<802380e8>] (process_one_work) from [<80238d24>]
(worker_thread+0x2b0/0x3ec)
    [ 4312.560952] [<80238d24>] (worker_thread) from [<8023cf84>]
(kthread+0xd8/0xec)
    [ 4312.569023] [<8023cf84>] (kthread) from [<80209be8>]
(ret_from_fork+0x14/0x2c)
    [ 4312.576141] CPU0: stopping
    [ 4312.583335] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W
    4.4.60 #0
    [ 4312.586032] Hardware name: Generic DT based system
    [ 4312.593237] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
    [ 4312.597930] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
    [ 4312.605827] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
    [ 4312.612858] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
    [ 4312.620063] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
    [ 4312.627701] Exception stack(0x80c67f60 to 0x80c67fa8)
    [ 4312.635249] 7f60: 00000001 00000000 00000000 8020b320 00000000
80c66000 00000000 80c612cc
    [ 4312.640291] 7f80: 80c67fb8 808f3a30 80cae010 00000000 00000000
80c67fb0 80218edc 80218ee0
    [ 4312.648448] 7fa0: 60000013 ffffffff
    [ 4312.656601] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
    [ 4312.659909] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
    [ 4312.667553] [<80254b38>] (cpu_startup_entry) from [<808cac48>]
(start_kernel+0x380/0x404)
    [ 4312.675620] CPU1: stopping
    [ 4312.683855] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W
    4.4.60 #0
    [ 4312.686466] Hardware name: Generic DT based system
    [ 4312.693671] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
    [ 4312.698363] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
    [ 4312.706263] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
    [ 4312.713293] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
    [ 4312.720497] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
    [ 4312.728135] Exception stack(0xbe083f98 to 0xbe083fe0)
    [ 4312.735683] 3f80:
        00000001 00000000
    [ 4312.740725] 3fa0: 00000000 8020b320 00000000 be082000 00000000
80c612cc be083ff0 410fd034
    [ 4312.748884] 3fc0: 00000000 00000000 00000000 be083fe8 80218edc
80218ee0 60000013 ffffffff
    [ 4312.757045] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
    [ 4312.765203] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
    [ 4312.772669] [<80254b38>] (cpu_startup_entry) from [<4120944c>]
(0x4120944c)
    [ 4312.780737] CPU3: stopping
    [ 4312.787589] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G        W
    4.4.60 #0
    [ 4312.790372] Hardware name: Generic DT based system
    [ 4312.797577] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
    [ 4312.802270] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
    [ 4312.810167] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
    [ 4312.817199] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
    [ 4312.824403] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
    [ 4312.832041] Exception stack(0xbe087f98 to 0xbe087fe0)
    [ 4312.839588] 7f80:
        00000001 00000000
    [ 4312.844630] 7fa0: 00000000 8020b320 00000000 be086000 00000000
80c612cc be087ff0 410fd034
    [ 4312.852791] 7fc0: 00000000 00000000 00000000 be087fe8 80218edc
80218ee0 60000013 ffffffff
    [ 4312.860951] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
    [ 4312.869109] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
    [ 4312.876576] [<80254b38>] (cpu_startup_entry) from [<4120944c>]
(0x4120944c)
    [ 4312.884650] The reading for sensor 4 is 0x002041f7
    [ 4312.891499] The reading for sensor 5 is 0x002051f4
    [ 4312.896415] Couldn't get reading for sensor 6
    [ 4312.901189] Couldn't get reading for sensor 7
    [ 4312.905561] The reading for sensor 8 is 0x002081e0
    [ 4312.909902] The reading for sensor 9 is 0x002091f7
    [ 4312.914645] Couldn't get reading for sensor 10
    [ 4312.919364] The reading for sensor 11 is 0x0020b1fa
    [ 4312.923791] The reading for sensor 12 is 0x0020c1fa
    [ 4312.928621] Couldn't get reading for sensor 13
    [ 4312.933425] The reading for sensor 14 is 0x0020e1f4
    [ 4312.937941] The reading for sensor 15 is 0x0020f1e7
    [ 4313.942700] Rebooting in 3 seconds..

Maybe can be fixed by a different kernel (for the remoteproc). But I don't
have this kernel at the moment.


The write of an "assert", sends 'WMI_FORCE_FW_HANG_CMDID' WMI command to target firmware.
This WMI command forces the target to assert.

Anil


Kind regards,
        Sven
_______________________________________________
ath11k mailing list
ath...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

--
Thanks
Anil.

Reply via email to