Author: imp
Date: Fri Jul 19 18:39:24 2019
New Revision: 350147
URL: https://svnweb.freebsd.org/changeset/base/350147

Log:
  Keep track of the number of commands that exhaust their retry limit.
  
  While we print failure messages on the console, sometimes logs are lost or
  overwhelmed. Keeping a count of how many times we've failed retriable commands
  helps get a magnitude of the problem.

Modified:
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c
  head/sys/dev/nvme/nvme_sysctl.c

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h    Fri Jul 19 18:39:18 2019        
(r350146)
+++ head/sys/dev/nvme/nvme_private.h    Fri Jul 19 18:39:24 2019        
(r350147)
@@ -191,6 +191,7 @@ struct nvme_qpair {
        int64_t                 num_cmds;
        int64_t                 num_intr_handler_calls;
        int64_t                 num_retries;
+       int64_t                 num_failures;
 
        struct nvme_command     *cmd;
        struct nvme_completion  *cpl;

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c      Fri Jul 19 18:39:18 2019        
(r350146)
+++ head/sys/dev/nvme/nvme_qpair.c      Fri Jul 19 18:39:24 2019        
(r350147)
@@ -387,14 +387,16 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, 
     struct nvme_completion *cpl, error_print_t print_on_error)
 {
        struct nvme_request     *req;
-       boolean_t               retry, error;
+       boolean_t               retry, error, retriable;
 
        req = tr->req;
        error = nvme_completion_is_error(cpl);
-       retry = error && nvme_completion_is_retry(cpl) &&
-          req->retries < nvme_retry_count;
+       retriable = nvme_completion_is_retry(cpl);
+       retry = error && retriable && req->retries < nvme_retry_count;
        if (retry)
                qpair->num_retries++;
+       if (error && req->retries >= nvme_retry_count && retriable)
+               qpair->num_failures++;
 
        if (error && (print_on_error == ERROR_PRINT_ALL ||
                (!retry && print_on_error == ERROR_PRINT_NO_RETRY))) {
@@ -687,6 +689,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_
        qpair->num_cmds = 0;
        qpair->num_intr_handler_calls = 0;
        qpair->num_retries = 0;
+       qpair->num_failures = 0;
        qpair->cmd = (struct nvme_command *)queuemem;
        qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz);
        prpmem = (uint8_t *)(queuemem + cmdsz + cplsz);

Modified: head/sys/dev/nvme/nvme_sysctl.c
==============================================================================
--- head/sys/dev/nvme/nvme_sysctl.c     Fri Jul 19 18:39:18 2019        
(r350146)
+++ head/sys/dev/nvme/nvme_sysctl.c     Fri Jul 19 18:39:24 2019        
(r350147)
@@ -167,6 +167,7 @@ nvme_qpair_reset_stats(struct nvme_qpair *qpair)
        qpair->num_cmds = 0;
        qpair->num_intr_handler_calls = 0;
        qpair->num_retries = 0;
+       qpair->num_failures = 0;
 }
 
 static int
@@ -215,6 +216,21 @@ nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
 }
 
 static int
+nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
+{
+       struct nvme_controller  *ctrlr = arg1;
+       int64_t                 num_failures = 0;
+       int                     i;
+
+       num_failures = ctrlr->adminq.num_failures;
+
+       for (i = 0; i < ctrlr->num_io_queues; i++)
+               num_failures += ctrlr->ioq[i].num_failures;
+
+       return (sysctl_handle_64(oidp, &num_failures, 0, req));
+}
+
+static int
 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
 {
        struct nvme_controller  *ctrlr = arg1;
@@ -267,6 +283,9 @@ nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
            "coalescing)");
        SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
            CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
+       SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
+           CTLFLAG_RD, &qpair->num_failures,
+           "Number of commands ending in failure after all retries");
 
        SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
            "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0,
@@ -322,6 +341,11 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *c
            "num_retries", CTLTYPE_S64 | CTLFLAG_RD,
            ctrlr, 0, nvme_sysctl_num_retries, "IU",
            "Number of commands retried");
+
+       SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
+           "num_failures", CTLTYPE_S64 | CTLFLAG_RD,
+           ctrlr, 0, nvme_sysctl_num_failures, "IU",
+           "Number of commands ending in failure after all retries");
 
        SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
            "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to