The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8000
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === This fixes the new failures reported in #6439.
From 6da737988a5fc96724446109a32c10e82bc4154b Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 8 Oct 2020 16:16:36 +0200 Subject: [PATCH 1/5] db: Retry transient errors for longer We used to retry for a littl more than a second, which is not enough in some cases. We now retry for about 25 seconds and with a higher attempt frequency. Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/query/retry.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go index a67f3dc5ac..8c62026843 100644 --- a/lxd/db/query/retry.go +++ b/lxd/db/query/retry.go @@ -5,12 +5,16 @@ import ( "strings" "time" + "github.com/Rican7/retry/jitter" + "github.com/canonical/go-dqlite/driver" "github.com/mattn/go-sqlite3" "github.com/pkg/errors" "github.com/lxc/lxd/shared/logger" ) +const maxRetries = 250 + // Retry wraps a function that interacts with the database, and retries it in // case a transient error is hit. // @@ -18,7 +22,7 @@ import ( func Retry(f func() error) error { // TODO: the retry loop should be configurable. var err error - for i := 0; i < 5; i++ { + for i := 0; i < maxRetries; i++ { err = f() if err != nil { // No point in re-trying or logging a no-row error. @@ -29,8 +33,12 @@ func Retry(f func() error) error { // Process actual errors. logger.Debugf("Database error: %#v", err) if IsRetriableError(err) { + if i == maxRetries { + logger.Warnf("Give up retring database error: %v", err) + break + } logger.Debugf("Retry failed db interaction (%v)", err) - time.Sleep(250 * time.Millisecond) + time.Sleep(jitter.Deviation(nil, 0.8)(100 * time.Millisecond)) continue } } From 0978355a1ba22a6552ab8cb13bce99808dfcb014 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 8 Oct 2020 16:18:10 +0200 Subject: [PATCH 2/5] db: Always retry driver.ErrBusy, regardless of the error message Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/query/retry.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go index 8c62026843..b8a3fb1116 100644 --- a/lxd/db/query/retry.go +++ b/lxd/db/query/retry.go @@ -56,6 +56,10 @@ func IsRetriableError(err error) bool { return false } + if err, ok := err.(driver.Error); ok && err.Code == driver.ErrBusy { + return true + } + if err == sqlite3.ErrLocked || err == sqlite3.ErrBusy { return true } From 4ba063876dcec1ea8659ab74bd66dbec58fb4a33 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 8 Oct 2020 16:23:23 +0200 Subject: [PATCH 3/5] db: Retry failed rollbacks if they are due to transient errors This avoids leaving the connection in a bad state, where a transaction is already in progress and another one can't be started. Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/query/transaction.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/db/query/transaction.go b/lxd/db/query/transaction.go index 0cde98614d..77bac41403 100644 --- a/lxd/db/query/transaction.go +++ b/lxd/db/query/transaction.go @@ -30,7 +30,7 @@ func Transaction(db *sql.DB, f func(*sql.Tx) error) error { // succeeds the given error is returned, otherwise a new error that wraps it // gets generated and returned. func rollback(tx *sql.Tx, reason error) error { - err := tx.Rollback() + err := Retry(tx.Rollback) if err != nil { logger.Warnf("Failed to rollback transaction after error (%v): %v", reason, err) } From f68785b88259f34af84c4360f06c33d69cbc4281 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 8 Oct 2020 16:49:56 +0200 Subject: [PATCH 4/5] db: Explicitly rollback leftover transactions when a new one can't be started Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/query/transaction.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lxd/db/query/transaction.go b/lxd/db/query/transaction.go index 77bac41403..901177b0e8 100644 --- a/lxd/db/query/transaction.go +++ b/lxd/db/query/transaction.go @@ -2,6 +2,7 @@ package query import ( "database/sql" + "strings" "github.com/lxc/lxd/shared/logger" "github.com/pkg/errors" @@ -11,6 +12,11 @@ import ( func Transaction(db *sql.DB, f func(*sql.Tx) error) error { tx, err := db.Begin() if err != nil { + // If there is a leftover transaction let's try to rollback, + // we'll then retry again. + if strings.Contains(err.Error(), "cannot start a transaction within a transaction") { + db.Exec("ROLLBACK") + } return errors.Wrap(err, "failed to begin transaction") } From 54639c41eab9532e618be4b6d59a835c3ee6aee0 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 8 Oct 2020 16:50:35 +0200 Subject: [PATCH 5/5] db: Retry to begin a new transaction after an explicit rollback attempt Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/query/retry.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lxd/db/query/retry.go b/lxd/db/query/retry.go index b8a3fb1116..0d8a28c032 100644 --- a/lxd/db/query/retry.go +++ b/lxd/db/query/retry.go @@ -68,6 +68,10 @@ func IsRetriableError(err error) bool { return true } + if strings.Contains(err.Error(), "cannot start a transaction within a transaction") { + return true + } + if strings.Contains(err.Error(), "bad connection") { return true }
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel