When the hash algorithm is SHA-256 and the verity version is not 0, use
the SHA-256 library instead of crypto_shash.

This is a prerequisite for making dm-verity interleave the computation
of SHA-256 hashes for increased performance.  That optimization is
available in the SHA-256 library but not in crypto_shash.

Even without interleaved hashing, switching to the library also slightly
improves performance by itself because it avoids the overhead of
crypto_shash, including indirect calls and other API overhead.
(Benchmark on x86_64, AMD Zen 5: hashing 4K blocks gets 2.1% faster.)

SHA-256 is by far the most common hash algorithm used with dm-verity.
It makes sense to optimize for the common case and fall back to the
generic crypto layer for uncommon cases, as suggested by Linus:
https://lore.kernel.org/r/CAHk-=wgp-fosszsyrbyzqcafevrt5jqs1jl-97wc4semntu...@mail.gmail.com

Signed-off-by: Eric Biggers <[email protected]>
---
 drivers/md/Kconfig            |  1 +
 drivers/md/dm-verity-target.c | 61 +++++++++++++++++++++++++++--------
 drivers/md/dm-verity.h        | 20 +++++++++---
 3 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 104aa53550905..cac4926fc3401 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -544,10 +544,11 @@ config DM_FLAKEY
 config DM_VERITY
        tristate "Verity target support"
        depends on BLK_DEV_DM
        select CRYPTO
        select CRYPTO_HASH
+       select CRYPTO_LIB_SHA256
        select DM_BUFIO
        help
          This device-mapper target creates a read-only device that
          transparently validates the data on one underlying device against
          a pre-generated tree of cryptographic checksums stored on a second
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 20ddf560d22e3..bba9810805631 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -115,23 +115,37 @@ static sector_t verity_position_at_level(struct dm_verity 
*v, sector_t block,
 }
 
 int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
                const u8 *data, size_t len, u8 *digest)
 {
-       struct shash_desc *desc = &io->hash_desc;
+       struct shash_desc *desc;
        int r;
 
+       if (likely(v->use_sha256_lib)) {
+               struct sha256_ctx *ctx = &io->hash_ctx.sha256;
+
+               /*
+                * Fast path using SHA-256 library.  This is enabled only for
+                * verity version 1, where the salt is at the beginning.
+                */
+               *ctx = *v->initial_hashstate.sha256;
+               sha256_update(ctx, data, len);
+               sha256_final(ctx, digest);
+               return 0;
+       }
+
+       desc = &io->hash_ctx.shash;
        desc->tfm = v->shash_tfm;
-       if (unlikely(v->initial_hashstate == NULL)) {
+       if (unlikely(v->initial_hashstate.shash == NULL)) {
                /* Version 0: salt at end */
                r = crypto_shash_init(desc) ?:
                    crypto_shash_update(desc, data, len) ?:
                    crypto_shash_update(desc, v->salt, v->salt_size) ?:
                    crypto_shash_final(desc, digest);
        } else {
                /* Version 1: salt at beginning */
-               r = crypto_shash_import(desc, v->initial_hashstate) ?:
+               r = crypto_shash_import(desc, v->initial_hashstate.shash) ?:
                    crypto_shash_finup(desc, data, len, digest);
        }
        if (unlikely(r))
                DMERR("Error hashing block: %d", r);
        return r;
@@ -1002,11 +1016,11 @@ static void verity_dtr(struct dm_target *ti)
        if (v->bufio)
                dm_bufio_client_destroy(v->bufio);
 
        kvfree(v->validated_blocks);
        kfree(v->salt);
-       kfree(v->initial_hashstate);
+       kfree(v->initial_hashstate.shash);
        kfree(v->root_digest);
        kfree(v->zero_digest);
        verity_free_sig(v);
 
        crypto_free_shash(v->shash_tfm);
@@ -1067,12 +1081,11 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
        v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
 
        if (!v->zero_digest)
                return r;
 
-       io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm),
-                    GFP_KERNEL);
+       io = kmalloc(v->ti->per_io_data_size, GFP_KERNEL);
 
        if (!io)
                return r; /* verity_dtr will free zero_digest */
 
        zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
@@ -1254,10 +1267,24 @@ static int verity_setup_hash_alg(struct dm_verity *v, 
const char *alg_name)
        v->digest_size = crypto_shash_digestsize(shash);
        if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
                ti->error = "Digest size too big";
                return -EINVAL;
        }
+       if (likely(v->version && strcmp(alg_name, "sha256") == 0)) {
+               /*
+                * Fast path: use the library API for reduced overhead and
+                * interleaved hashing support.
+                */
+               v->use_sha256_lib = true;
+               ti->per_io_data_size =
+                       offsetofend(struct dm_verity_io, hash_ctx.sha256);
+       } else {
+               /* Fallback case: use the generic crypto API. */
+               ti->per_io_data_size =
+                       offsetofend(struct dm_verity_io, hash_ctx.shash) +
+                       crypto_shash_descsize(shash);
+       }
        return 0;
 }
 
 static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char 
*arg)
 {
@@ -1274,28 +1301,39 @@ static int verity_setup_salt_and_hashstate(struct 
dm_verity *v, const char *arg)
                    hex2bin(v->salt, arg, v->salt_size)) {
                        ti->error = "Invalid salt";
                        return -EINVAL;
                }
        }
-       if (v->version) { /* Version 1: salt at beginning */
+       if (likely(v->use_sha256_lib)) {
+               /* Implies version 1: salt at beginning */
+               v->initial_hashstate.sha256 =
+                       kmalloc(sizeof(struct sha256_ctx), GFP_KERNEL);
+               if (!v->initial_hashstate.sha256) {
+                       ti->error = "Cannot allocate initial hash state";
+                       return -ENOMEM;
+               }
+               sha256_init(v->initial_hashstate.sha256);
+               sha256_update(v->initial_hashstate.sha256,
+                             v->salt, v->salt_size);
+       } else if (v->version) { /* Version 1: salt at beginning */
                SHASH_DESC_ON_STACK(desc, v->shash_tfm);
                int r;
 
                /*
                 * Compute the pre-salted hash state that can be passed to
                 * crypto_shash_import() for each block later.
                 */
-               v->initial_hashstate = kmalloc(
+               v->initial_hashstate.shash = kmalloc(
                        crypto_shash_statesize(v->shash_tfm), GFP_KERNEL);
-               if (!v->initial_hashstate) {
+               if (!v->initial_hashstate.shash) {
                        ti->error = "Cannot allocate initial hash state";
                        return -ENOMEM;
                }
                desc->tfm = v->shash_tfm;
                r = crypto_shash_init(desc) ?:
                    crypto_shash_update(desc, v->salt, v->salt_size) ?:
-                   crypto_shash_export(desc, v->initial_hashstate);
+                   crypto_shash_export(desc, v->initial_hashstate.shash);
                if (r) {
                        ti->error = "Cannot set up initial hash state";
                        return r;
                }
        }
@@ -1553,13 +1591,10 @@ static int verity_ctr(struct dm_target *ti, unsigned 
int argc, char **argv)
                ti->error = "Cannot allocate workqueue";
                r = -ENOMEM;
                goto bad;
        }
 
-       ti->per_io_data_size = sizeof(struct dm_verity_io) +
-                              crypto_shash_descsize(v->shash_tfm);
-
        r = verity_fec_ctr(v);
        if (r)
                goto bad;
 
        ti->per_io_data_size = roundup(ti->per_io_data_size,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 6d141abd965c7..cdcee68a4bc0a 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -14,10 +14,11 @@
 #include <linux/dm-io.h>
 #include <linux/dm-bufio.h>
 #include <linux/device-mapper.h>
 #include <linux/interrupt.h>
 #include <crypto/hash.h>
+#include <crypto/sha2.h>
 
 #define DM_VERITY_MAX_LEVELS           63
 
 enum verity_mode {
        DM_VERITY_MODE_EIO,
@@ -40,11 +41,14 @@ struct dm_verity {
        struct dm_bufio_client *bufio;
        char *alg_name;
        struct crypto_shash *shash_tfm;
        u8 *root_digest;        /* digest of the root block */
        u8 *salt;               /* salt: its size is salt_size */
-       u8 *initial_hashstate;  /* salted initial state, if version >= 1 */
+       union {
+               struct sha256_ctx *sha256;      /* for use_sha256_lib=1 */
+               u8 *shash;                      /* for use_sha256_lib=0 */
+       } initial_hashstate; /* salted initial state, if version >= 1 */
        u8 *zero_digest;        /* digest for a zero block */
 #ifdef CONFIG_SECURITY
        u8 *root_digest_sig;    /* signature of the root digest */
        unsigned int sig_size;  /* root digest signature size */
 #endif /* CONFIG_SECURITY */
@@ -57,10 +61,11 @@ struct dm_verity {
        unsigned char hash_per_block_bits;      /* log2(hashes in hash block) */
        unsigned char levels;   /* the number of tree levels */
        unsigned char version;
        bool hash_failed:1;     /* set if hash of any block failed */
        bool use_bh_wq:1;       /* try to verify in BH wq before normal 
work-queue */
+       bool use_sha256_lib:1;  /* use SHA-256 library instead of generic 
crypto API */
        unsigned int digest_size;       /* digest size for the current hash 
algorithm */
        enum verity_mode mode;  /* mode for handling verification errors */
        enum verity_mode error_mode;/* mode for handling I/O errors */
        unsigned int corrupted_errs;/* Number of errors for corrupted blocks */
 
@@ -96,15 +101,20 @@ struct dm_verity_io {
 
        u8 real_digest[HASH_MAX_DIGESTSIZE];
        u8 want_digest[HASH_MAX_DIGESTSIZE];
 
        /*
-        * Temporary space for hashing.  This is variable-length and must be at
-        * the end of the struct.  struct shash_desc is just the fixed part;
-        * it's followed by a context of size crypto_shash_descsize(shash_tfm).
+        * Temporary space for hashing.  Either sha256 or shash is used,
+        * depending on the value of use_sha256_lib.  If shash is used,
+        * then this field is variable-length, with total size
+        * sizeof(struct shash_desc) + crypto_shash_descsize(shash_tfm).
+        * For this reason, this field must be the end of the struct.
         */
-       struct shash_desc hash_desc;
+       union {
+               struct sha256_ctx sha256;
+               struct shash_desc shash;
+       } hash_ctx;
 };
 
 static inline u8 *verity_io_real_digest(struct dm_verity *v,
                                        struct dm_verity_io *io)
 {
-- 
2.51.0


Reply via email to