On Tuesday, 20 August 2019 17:47:36 CEST Kalle Valo wrote:
> +static ssize_t ath11k_read_simulate_fw_crash(struct file *file,
> + char __user *user_buf,
> + size_t count, loff_t *ppos)
> +{
> + const char buf[] =
> + "To simulate firmware crash write one of the keywords to this
> file:\n"
> + "`assert` - this will send WMI_FORCE_FW_HANG_CMDID to
> firmware to cause assert.\n"
> + "`hw-restart` - this will simply queue hw restart without
> fw/hw actually crashing.\n";
> +
> + return simple_read_from_buffer(user_buf, count, ppos, buf,
> strlen(buf));
> +}
There is nothing in the write handler which handles "hw-restart". It just
causes an -EINVAL.
> +
> +/* Simulate firmware crash:
> + * 'soft': Call wmi command causing firmware hang. This firmware hang is
> + * recoverable by warm firmware reset.
> + * 'hard': Force firmware crash by setting any vdev parameter for not allowed
> + * vdev id. This is hard firmware crash because it is recoverable only by
> cold
> + * firmware reset.
> + */
> +static ssize_t ath11k_write_simulate_fw_crash(struct file *file,
> + const char __user *user_buf,
> + size_t count, loff_t *ppos)
> +{
> + struct ath11k_base *ab = file->private_data;
> + struct ath11k_pdev *pdev;
> + struct ath11k *ar = ab->pdevs[0].ar;
> + char buf[32] = {0};
> + ssize_t rc;
> + int i, ret, radioup;
> +
> + for (i = 0; i < ab->num_radios; i++) {
> + pdev = &ab->pdevs[i];
> + ar = pdev->ar;
> + if (ar && ar->state == ATH11K_STATE_ON) {
> + radioup = 1;
> + break;
> + }
> + }
> + /* filter partial writes and invalid commands */
> + if (*ppos != 0 || count >= sizeof(buf) || count == 0)
> + return -EINVAL;
> +
> + rc = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf,
> count);
> + if (rc < 0)
> + return rc;
> +
> + /* drop the possible '\n' from the end */
> + if (buf[*ppos - 1] == '\n')
> + buf[*ppos - 1] = '\0';
> +
> + if (radioup == 0) {
> + ret = -ENETDOWN;
> + goto exit;
> + }
> +
> + if (!strcmp(buf, "assert")) {
> + ath11k_info(ab, "simulating firmware assert crash\n");
> + ret = ath11k_wmi_force_fw_hang_cmd(ar,
> +
> ATH11K_WMI_FW_HANG_ASSERT_TYPE,
> + ATH11K_WMI_FW_HANG_DELAY);
> + } else {
> + ret = -EINVAL;
> + goto exit;
> + }
> +
> + if (ret) {
> + ath11k_warn(ab, "failed to simulate firmware crash: %d\n",
> ret);
> + goto exit;
> + }
> +
> + ret = count;
> +
> +exit:
> + return ret;
> +}
And right now, the write of an "assert" to this file just causes an fatal error
for the system:
[ 4312.409255] qcom-q6v5-wcss-pil cd00000.qcom_q6v5_wcss: fatal error
received:
[ 4312.409255] QC Image Version:
QC_IMAGE_VERSION_STRING=WLAN.HK.2.1.0.1-00410-QCAHKSWPL_SILICONZ-2
[ 4312.409255] Image Variant : IMAGE_VARIANT_STRING=8074.wlanfw.eval_v2Q
[ 4312.409255]
[ 4312.409255] wlan_wmi.c:234 Assertion 0 failedparam0 :zero, param1 :zero,
param2 :zero.
[ 4312.409255] Thread ID : 0x00000069 Thread name : WLAN RT0
Process ID : 0
[ 4312.409255] Register:
[ 4312.409255] SP : 0x4c168d58
[ 4312.409255] FP : 0x4c168d60
[ 4312.409255] PC : 0x4b1c8850
[ 4312.409255] SSR : 0x00000008
[ 4312.409255] BADVA : 0x00020000
[ 4312.409255] LR : 0x4b1c7c68
[ 4312.409255]
[ 4312.409255] Stack Dump
[ 4312.409255] from : 0x4c168d58
[ 4312.409255] to : 0x4c168f00
[ 4312.409255]
[ 4312.455997] remoteproc remoteproc0: crash detected in
cd00000.qcom_q6v5_wcss: type fatal error
[ 4312.478259] remoteproc remoteproc0: handling crash #1 in
cd00000.qcom_q6v5_wcss
[ 4312.486826] Kernel panic - not syncing: remoteproc remoteproc0:
Resetting the SoC - cd00000.qcom_q6v5_wcss crashed
[ 4312.494028] CPU: 2 PID: 5590 Comm: kworker/2:0 Tainted: G W
4.4.60 #0
[ 4312.504436] Hardware name: Generic DT based system
[ 4312.511991] Workqueue: events rproc_crash_handler_work
[ 4312.521880] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
[ 4312.521979] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
[ 4312.529789] [<803dd818>] (dump_stack) from [<80225d80>]
(panic+0x84/0x1f8)
[ 4312.536818] [<80225d80>] (panic) from [<80555278>]
(rproc_crash_handler_work+0x90/0x98)
[ 4312.543678] [<80555278>] (rproc_crash_handler_work) from [<802380e8>]
(process_one_work+0x1c0/0x2f8)
[ 4312.551578] [<802380e8>] (process_one_work) from [<80238d24>]
(worker_thread+0x2b0/0x3ec)
[ 4312.560952] [<80238d24>] (worker_thread) from [<8023cf84>]
(kthread+0xd8/0xec)
[ 4312.569023] [<8023cf84>] (kthread) from [<80209be8>]
(ret_from_fork+0x14/0x2c)
[ 4312.576141] CPU0: stopping
[ 4312.583335] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W
4.4.60 #0
[ 4312.586032] Hardware name: Generic DT based system
[ 4312.593237] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
[ 4312.597930] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
[ 4312.605827] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
[ 4312.612858] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
[ 4312.620063] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
[ 4312.627701] Exception stack(0x80c67f60 to 0x80c67fa8)
[ 4312.635249] 7f60: 00000001 00000000 00000000 8020b320 00000000 80c66000
00000000 80c612cc
[ 4312.640291] 7f80: 80c67fb8 808f3a30 80cae010 00000000 00000000 80c67fb0
80218edc 80218ee0
[ 4312.648448] 7fa0: 60000013 ffffffff
[ 4312.656601] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
[ 4312.659909] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
[ 4312.667553] [<80254b38>] (cpu_startup_entry) from [<808cac48>]
(start_kernel+0x380/0x404)
[ 4312.675620] CPU1: stopping
[ 4312.683855] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W
4.4.60 #0
[ 4312.686466] Hardware name: Generic DT based system
[ 4312.693671] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
[ 4312.698363] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
[ 4312.706263] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
[ 4312.713293] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
[ 4312.720497] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
[ 4312.728135] Exception stack(0xbe083f98 to 0xbe083fe0)
[ 4312.735683] 3f80:
00000001 00000000
[ 4312.740725] 3fa0: 00000000 8020b320 00000000 be082000 00000000 80c612cc
be083ff0 410fd034
[ 4312.748884] 3fc0: 00000000 00000000 00000000 be083fe8 80218edc 80218ee0
60000013 ffffffff
[ 4312.757045] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
[ 4312.765203] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
[ 4312.772669] [<80254b38>] (cpu_startup_entry) from [<4120944c>]
(0x4120944c)
[ 4312.780737] CPU3: stopping
[ 4312.787589] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G W
4.4.60 #0
[ 4312.790372] Hardware name: Generic DT based system
[ 4312.797577] [<8021e86c>] (unwind_backtrace) from [<8021b404>]
(show_stack+0x10/0x14)
[ 4312.802270] [<8021b404>] (show_stack) from [<803dd818>]
(dump_stack+0x7c/0x9c)
[ 4312.810167] [<803dd818>] (dump_stack) from [<8021dc3c>]
(handle_IPI+0xe8/0x180)
[ 4312.817199] [<8021dc3c>] (handle_IPI) from [<802093a4>]
(gic_handle_irq+0x78/0x94)
[ 4312.824403] [<802093a4>] (gic_handle_irq) from [<8020a480>]
(__irq_svc+0x40/0x74)
[ 4312.832041] Exception stack(0xbe087f98 to 0xbe087fe0)
[ 4312.839588] 7f80:
00000001 00000000
[ 4312.844630] 7fa0: 00000000 8020b320 00000000 be086000 00000000 80c612cc
be087ff0 410fd034
[ 4312.852791] 7fc0: 00000000 00000000 00000000 be087fe8 80218edc 80218ee0
60000013 ffffffff
[ 4312.860951] [<8020a480>] (__irq_svc) from [<80218ee0>]
(arch_cpu_idle+0x2c/0x50)
[ 4312.869109] [<80218ee0>] (arch_cpu_idle) from [<80254b38>]
(cpu_startup_entry+0x134/0x214)
[ 4312.876576] [<80254b38>] (cpu_startup_entry) from [<4120944c>]
(0x4120944c)
[ 4312.884650] The reading for sensor 4 is 0x002041f7
[ 4312.891499] The reading for sensor 5 is 0x002051f4
[ 4312.896415] Couldn't get reading for sensor 6
[ 4312.901189] Couldn't get reading for sensor 7
[ 4312.905561] The reading for sensor 8 is 0x002081e0
[ 4312.909902] The reading for sensor 9 is 0x002091f7
[ 4312.914645] Couldn't get reading for sensor 10
[ 4312.919364] The reading for sensor 11 is 0x0020b1fa
[ 4312.923791] The reading for sensor 12 is 0x0020c1fa
[ 4312.928621] Couldn't get reading for sensor 13
[ 4312.933425] The reading for sensor 14 is 0x0020e1f4
[ 4312.937941] The reading for sensor 15 is 0x0020f1e7
[ 4313.942700] Rebooting in 3 seconds..
Maybe can be fixed by a different kernel (for the remoteproc). But I don't
have this kernel at the moment.
Kind regards,
Sven
signature.asc
Description: This is a digitally signed message part.
