Current abort operation has race.
xhci_handle_command_timeout()
xhci_abort_cmd_ring()
xhci_write_64(CMD_RING_ABORT)
xhci_handshake(5s)
do {
check CMD_RING_RUNNING
udelay(1)
...
COMP_CMD_ABORT event
COMP_CMD_STOP event
xhci_handle_stopped_cmd_ring()
restart cmd_ring
CMD_RING_RUNNING become 1 again
} while ()
return -ETIMEDOUT
xhci_write_64(CMD_RING_ABORT)
/* can abort random command */
To do abort operation correctly, we have to wait both of COMP_CMD_STOP
event and negation of CMD_RING_RUNNING.
But like above, while timeout handler is waiting negation of
CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout
handler never be notice negation of CMD_RING_RUNNING, and retry of
CMD_RING_ABORT can abort random command (BTW, I guess retry of
CMD_RING_ABORT was workaround of this race).
To fix this race, this moves xhci_handle_stopped_cmd_ring() to
xhci_abort_cmd_ring(). And timeout handler waits COMP_CMD_STOP event.
At this point, timeout handler is owner of cmd_ring, and safely
restart cmd_ring by using xhci_handle_stopped_cmd_ring().
[FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE
operation]
Signed-off-by: OGAWA Hirofumi
---
drivers/usb/host/xhci-mem.c |1
drivers/usb/host/xhci-ring.c | 163 ++
drivers/usb/host/xhci.h |1
3 files changed, 88 insertions(+), 77 deletions(-)
diff -puN drivers/usb/host/xhci-mem.c~xhci-fix-abort-race2
drivers/usb/host/xhci-mem.c
--- xhci/drivers/usb/host/xhci-mem.c~xhci-fix-abort-race2 2016-11-16
13:36:07.219329211 +0900
+++ xhci-hirofumi/drivers/usb/host/xhci-mem.c 2016-11-16 13:36:07.221329211
+0900
@@ -2344,6 +2344,7 @@ int xhci_mem_init(struct xhci_hcd *xhci,
/* init command timeout work */
INIT_DELAYED_WORK(>cmd_timer, xhci_handle_command_timeout);
+ init_completion(>stop_completion);
page_size = readl(>op_regs->page_size);
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
diff -puN drivers/usb/host/xhci-ring.c~xhci-fix-abort-race2
drivers/usb/host/xhci-ring.c
--- xhci/drivers/usb/host/xhci-ring.c~xhci-fix-abort-race2 2016-11-16
13:36:07.219329211 +0900
+++ xhci-hirofumi/drivers/usb/host/xhci-ring.c 2016-11-16 13:36:07.221329211
+0900
@@ -284,6 +284,61 @@ static bool xhci_mod_cmd_timer(struct xh
return mod_delayed_work(system_wq, >cmd_timer, delay);
}
+static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
+{
+ if (list_empty(>cmd_list))
+ return NULL;
+ return list_first_entry(>cmd_list, struct xhci_command, cmd_list);
+}
+
+/*
+ * Turn all commands on command ring with status set to "aborted" to no-op
trbs.
+ * If there are other commands waiting then restart the ring and kick the
timer.
+ * This must be called with command ring stopped and xhci->lock held.
+ */
+static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
+struct xhci_command *cur_cmd)
+{
+ struct xhci_command *i_cmd;
+ u32 cycle_state;
+
+ /* Turn all aborted commands in list to no-ops, then restart */
+ list_for_each_entry(i_cmd, >cmd_list, cmd_list) {
+
+ if (i_cmd->status != COMP_CMD_ABORT)
+ continue;
+
+ i_cmd->status = COMP_CMD_STOP;
+
+ xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
+i_cmd->command_trb);
+ /* get cycle state from the original cmd trb */
+ cycle_state = le32_to_cpu(
+ i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
+ /* modify the command trb to no-op command */
+ i_cmd->command_trb->generic.field[0] = 0;
+ i_cmd->command_trb->generic.field[1] = 0;
+ i_cmd->command_trb->generic.field[2] = 0;
+ i_cmd->command_trb->generic.field[3] = cpu_to_le32(
+ TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
+
+ /*
+* caller waiting for completion is called when command
+* completion event is received for these no-op commands
+*/
+ }
+
+ xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
+
+ /* ring command ring doorbell to restart the command ring */
+ if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
+ !(xhci->xhc_state & XHCI_STATE_DYING)) {
+ xhci->current_cmd = cur_cmd;
+ xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
+ xhci_ring_cmd_db(xhci);
+ }
+}
+
static