On 1/13/26 1:28 PM, Thomas Weißschuh wrote:
> The current signature-based module integrity checking has some drawbacks
> in combination with reproducible builds. Either the module signing key
> is generated at build time, which makes the build unreproducible, or a
> static signing key is used, which precludes rebuilds by third parties
> and makes the whole build and packaging process much more complicated.
> 
> The goal is to reach bit-for-bit reproducibility. Excluding certain
> parts of the build output from the reproducibility analysis would be
> error-prone and force each downstream consumer to introduce new tooling.
> 
> Introduce a new mechanism to ensure only well-known modules are loaded
> by embedding a merkle tree root of all modules built as part of the full
> kernel build into vmlinux.
> 
> Non-builtin modules can be validated as before through signatures.
> 
> Normally the .ko module files depend on a fully built vmlinux to be
> available for modpost validation and BTF generation. With
> CONFIG_MODULE_HASHES, vmlinux now depends on the modules
> to build a merkle tree. This introduces a dependency cycle which is
> impossible to satisfy. Work around this by building the modules during
> link-vmlinux.sh, after vmlinux is complete enough for modpost and BTF
> but before the final module hashes are
> 
> The PKCS7 format which is used for regular module signatures can not
> represent Merkle proofs, so a new kind of module signature is
> introduced. As this signature type is only ever used for builtin
> modules, no compatibility issues can arise.

Nit: The description uses the term "builtin modules" in a misleading
way. Typically, "builtin modules" refers to modules that are linked
directly into vmlinux. However, this text uses the term to refer to
loadable modules that are built together with the main kernel image,
which is something different.

> diff --git a/scripts/modules-merkle-tree.c b/scripts/modules-merkle-tree.c
> new file mode 100644
> index 000000000000..a6ec0e21213b
> --- /dev/null
> +++ b/scripts/modules-merkle-tree.c
> @@ -0,0 +1,467 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Compute hashes for modules files and build a merkle tree.
> + *
> + * Copyright (C) 2025 Sebastian Andrzej Siewior <[email protected]>
> + * Copyright (C) 2025 Thomas Weißschuh <[email protected]>
> + *
> + */
> +#define _GNU_SOURCE 1
> +#include <arpa/inet.h>
> +#include <err.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <stdarg.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +
> +#include <sys/stat.h>
> +#include <sys/mman.h>
> +
> +#include <openssl/evp.h>
> +#include <openssl/err.h>
> +
> +#include "ssl-common.h"
> +
> +static int hash_size;
> +static EVP_MD_CTX *ctx;
> +
> +struct module_signature {
> +     uint8_t         algo;           /* Public-key crypto algorithm [0] */
> +     uint8_t         hash;           /* Digest algorithm [0] */
> +     uint8_t         id_type;        /* Key identifier type [PKEY_ID_PKCS7] 
> */
> +     uint8_t         signer_len;     /* Length of signer's name [0] */
> +     uint8_t         key_id_len;     /* Length of key identifier [0] */
> +     uint8_t         __pad[3];
> +     uint32_t        sig_len;        /* Length of signature data */
> +};
> +
> +#define PKEY_ID_MERKLE 3
> +
> +static const char magic_number[] = "~Module signature appended~\n";

It might make sense to put these common structures into a file under
scripts/include/ so they can be shared by both scripts/sign-file.c and
scripts/modules-merkle-tree.c.

> +
> +struct file_entry {
> +     char *name;
> +     unsigned int pos;
> +     unsigned char hash[EVP_MAX_MD_SIZE];
> +};
> +
> +static struct file_entry *fh_list;
> +static size_t num_files;
> +
> +struct leaf_hash {
> +     unsigned char hash[EVP_MAX_MD_SIZE];
> +};
> +
> +struct mtree {
> +     struct leaf_hash **l;
> +     unsigned int *entries;
> +     unsigned int levels;
> +};
> +
> +static inline void *xcalloc(size_t n, size_t size)
> +{
> +     void *p;
> +
> +     p = calloc(n, size);
> +     if (!p)
> +             errx(1, "Memory allocation failed");
> +
> +     return p;
> +}
> +
> +static void *xmalloc(size_t size)
> +{
> +     void *p;
> +
> +     p = malloc(size);
> +     if (!p)
> +             errx(1, "Memory allocation failed");
> +
> +     return p;
> +}
> +
> +static inline void *xreallocarray(void *oldp, size_t n, size_t size)
> +{
> +     void *p;
> +
> +     p = reallocarray(oldp, n, size);
> +     if (!p)
> +             errx(1, "Memory allocation failed");
> +
> +     return p;
> +}
> +
> +static inline char *xasprintf(const char *fmt, ...)
> +{
> +     va_list ap;
> +     char *strp;
> +     int ret;
> +
> +     va_start(ap, fmt);
> +     ret = vasprintf(&strp, fmt, ap);
> +     va_end(ap);
> +     if (ret == -1)
> +             err(1, "Memory allocation failed");
> +
> +     return strp;
> +}

I believe it is preferable to use xmalloc() and related functions from
scripts/include/xalloc.h, instead of defining your own variants. If
something is missing in xalloc.h, it can be extended.

> +
> +static unsigned int get_pow2(unsigned int val)
> +{
> +     return 31 - __builtin_clz(val);
> +}
> +
> +static unsigned int roundup_pow2(unsigned int val)
> +{
> +     return 1 << (get_pow2(val - 1) + 1);
> +}
> +
> +static unsigned int log2_roundup(unsigned int val)
> +{
> +     return get_pow2(roundup_pow2(val));
> +}

In the edge case when the kernel is built with only one module, the code
calls log2_roundup(1) -> roundup_pow2(1) -> get_pow2(0) ->
__builtin_clz(0). The return value of __builtin_clz() is undefined if
the input is zero.

> +
> +static void hash_data(void *p, unsigned int pos, size_t size, void *ret_hash)
> +{
> +     unsigned char magic = 0x01;
> +     unsigned int pos_be;
> +
> +     pos_be = htonl(pos);
> +
> +     ERR(EVP_DigestInit_ex(ctx, NULL, NULL) != 1, "EVP_DigestInit_ex()");
> +     ERR(EVP_DigestUpdate(ctx, &magic, sizeof(magic)) != 1, 
> "EVP_DigestUpdate(magic)");
> +     ERR(EVP_DigestUpdate(ctx, &pos_be, sizeof(pos_be)) != 1, 
> "EVP_DigestUpdate(pos)");
> +     ERR(EVP_DigestUpdate(ctx, p, size) != 1, "EVP_DigestUpdate(data)");
> +     ERR(EVP_DigestFinal_ex(ctx, ret_hash, NULL) != 1, 
> "EVP_DigestFinal_ex()");
> +}
> +
> +static void hash_entry(void *left, void *right, void *ret_hash)
> +{
> +     int hash_size = EVP_MD_CTX_get_size_ex(ctx);

Nit: The local variable hash_size can be removed, as the static variable
with the same name should hold the same value.

> +     unsigned char magic = 0x02;
> +
> +     ERR(EVP_DigestInit_ex(ctx, NULL, NULL) != 1, "EVP_DigestInit_ex()");
> +     ERR(EVP_DigestUpdate(ctx, &magic, sizeof(magic)) != 1, 
> "EVP_DigestUpdate(magic)");
> +     ERR(EVP_DigestUpdate(ctx, left, hash_size) != 1, 
> "EVP_DigestUpdate(left)");
> +     ERR(EVP_DigestUpdate(ctx, right, hash_size) != 1, 
> "EVP_DigestUpdate(right)");
> +     ERR(EVP_DigestFinal_ex(ctx, ret_hash, NULL) != 1, 
> "EVP_DigestFinal_ex()");
> +}
> +
> +static void hash_file(struct file_entry *fe)
> +{
> +     struct stat sb;
> +     int fd, ret;
> +     void *mem;
> +
> +     fd = open(fe->name, O_RDONLY);
> +     if (fd < 0)
> +             err(1, "Failed to open %s", fe->name);
> +
> +     ret = fstat(fd, &sb);
> +     if (ret)
> +             err(1, "Failed to stat %s", fe->name);
> +
> +     mem = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
> +     close(fd);
> +
> +     if (mem == MAP_FAILED)
> +             err(1, "Failed to mmap %s", fe->name);

Nit: The err() call should be moved immediately after mmap(). In theory,
the interleaving close() could change the errno value, resulting in
err() printing a misleading error message.

> +
> +     hash_data(mem, fe->pos, sb.st_size, fe->hash);
> +
> +     munmap(mem, sb.st_size);
> +}
> +
> +static struct mtree *build_merkle(struct file_entry *fh, size_t num)
> +{
> +     struct mtree *mt;
> +     unsigned int le;
> +
> +     if (!num)
> +             return NULL;
> +
> +     mt = xmalloc(sizeof(*mt));
> +     mt->levels = log2_roundup(num);
> +
> +     mt->l = xcalloc(sizeof(*mt->l), mt->levels);
> +
> +     mt->entries = xcalloc(sizeof(*mt->entries), mt->levels);
> +     le = num / 2;
> +     if (num & 1)
> +             le++;
> +     mt->entries[0] = le;
> +     mt->l[0] = xcalloc(sizeof(**mt->l), le);
> +
> +     /* First level of pairs */
> +     for (unsigned int i = 0; i < num; i += 2) {
> +             if (i == num - 1) {
> +                     /* Odd number of files, no pair. Hash with itself */
> +                     hash_entry(fh[i].hash, fh[i].hash, mt->l[0][i / 
> 2].hash);
> +             } else {
> +                     hash_entry(fh[i].hash, fh[i + 1].hash, mt->l[0][i / 
> 2].hash);
> +             }
> +     }
> +     for (unsigned int i = 1; i < mt->levels; i++) {
> +             int odd = 0;
> +
> +             if (le & 1) {
> +                     le++;
> +                     odd++;
> +             }
> +
> +             mt->entries[i] = le / 2;
> +             mt->l[i] = xcalloc(sizeof(**mt->l), le);

l[i] is overallocated. It needs only 'le / 2' entries.

> +
> +             for (unsigned int n = 0; n < le; n += 2) {
> +                     if (n == le - 2 && odd) {
> +                             /* Odd number of pairs, no pair. Hash with 
> itself */
> +                             hash_entry(mt->l[i - 1][n].hash, mt->l[i - 
> 1][n].hash,
> +                                        mt->l[i][n / 2].hash);
> +                     } else {
> +                             hash_entry(mt->l[i - 1][n].hash, mt->l[i - 1][n 
> + 1].hash,
> +                                        mt->l[i][n / 2].hash);
> +                     }
> +             }
> +             le =  mt->entries[i];

Nit: It might be helpful to write both the first-level and other-level
loops in the same style to make them easier to understand, perhaps by
clearly separating the number of entries at each level. I suggest
something like the following:

static struct mtree *build_merkle(struct file_entry *fh, size_t num_files)
{
        struct mtree *mt;
        unsigned int num_cur_le, num_prev_le;

        if (!num_files)
                return NULL;

        mt = xmalloc(sizeof(*mt));
        mt->levels = log2_roundup(num_files);

        mt->l = xcalloc(sizeof(*mt->l), mt->levels);

        mt->entries = xcalloc(sizeof(*mt->entries), mt->levels);
        num_cur_le = (num_files + 1) / 2;
        mt->entries[0] = num_cur_le;
        mt->l[0] = xcalloc(sizeof(**mt->l), num_cur_le);

        /* First level of pairs */
        for (unsigned int i = 0; i < num_files; i += 2) {
                /* Hash the pair, or the last file with itself if it's odd. */
                void *right = i + 1 < num_files ? fh[i + 1].hash : fh[i].hash;
                hash_entry(fh[i].hash, right, mt->l[0][i / 2].hash);
        }

        for (unsigned int i = 1; i < mt->levels; i++) {
                num_prev_le = num_cur_le;

                num_cur_le = (num_prev_le + 1) / 2;
                mt->entries[i] = num_cur_le;
                mt->l[i] = xcalloc(sizeof(**mt->l), num_cur_le);

                for (unsigned int n = 0; n < num_prev_le; n += 2) {
                        /* Hash the pair, or the last with itself if it's odd. 
*/
                        void *right = n + 1 < num_prev_le ?
                                              mt->l[i - 1][n + 1].hash :
                                              mt->l[i - 1][n].hash;
                        hash_entry(mt->l[i - 1][n].hash, right,
                                   mt->l[i][n / 2].hash);
                }
        }
        return mt;
}

> +     }
> +     return mt;
> +}
> +
> +static void free_mtree(struct mtree *mt)
> +{
> +     if (!mt)
> +             return;
> +
> +     for (unsigned int i = 0; i < mt->levels; i++)
> +             free(mt->l[i]);
> +
> +     free(mt->l);
> +     free(mt->entries);
> +     free(mt);
> +}
> +
> +static void write_be_int(int fd, unsigned int v)
> +{
> +     unsigned int be_val = htonl(v);
> +
> +     if (write(fd, &be_val, sizeof(be_val)) != sizeof(be_val))
> +             err(1, "Failed writing to file");
> +}
> +
> +static void write_hash(int fd, const void *h)
> +{
> +     ssize_t wr;
> +
> +     wr = write(fd, h, hash_size);
> +     if (wr != hash_size)
> +             err(1, "Failed writing to file");
> +}

Nit: This could be

if (write(fd, h, hash_size) != hash_size)

to keep the style of write_be_int() and write_hash() consistent.

> +
> +static void build_proof(struct mtree *mt, unsigned int n, int fd)
> +{
> +     unsigned char cur[EVP_MAX_MD_SIZE];
> +     unsigned char tmp[EVP_MAX_MD_SIZE];
> +     struct file_entry *fe, *fe_sib;
> +
> +     fe = &fh_list[n];
> +
> +     if ((n & 1) == 0) {
> +             /* No pair, hash with itself */
> +             if (n + 1 == num_files)
> +                     fe_sib = fe;
> +             else
> +                     fe_sib = &fh_list[n + 1];
> +     } else {
> +             fe_sib = &fh_list[n - 1];
> +     }
> +     /* First comes the node position into the file */
> +     write_be_int(fd, n);
> +
> +     if ((n & 1) == 0)
> +             hash_entry(fe->hash, fe_sib->hash, cur);
> +     else
> +             hash_entry(fe_sib->hash, fe->hash, cur);
> +
> +     /* Next is the sibling hash, followed by hashes in the tree */
> +     write_hash(fd, fe_sib->hash);
> +
> +     for (unsigned int i = 0; i < mt->levels - 1; i++) {
> +             n >>= 1;
> +             if ((n & 1) == 0) {
> +                     void *h;
> +
> +                     /* No pair, hash with itself */
> +                     if (n + 1 == mt->entries[i])
> +                             h = cur;
> +                     else
> +                             h = mt->l[i][n + 1].hash;
> +
> +                     hash_entry(cur, h, tmp);
> +                     write_hash(fd, h);
> +             } else {
> +                     hash_entry(mt->l[i][n - 1].hash, cur, tmp);
> +                     write_hash(fd, mt->l[i][n - 1].hash);
> +             }
> +             memcpy(cur, tmp, hash_size);
> +     }
> +
> +      /* After all that, the end hash should match the root hash */
> +     if (memcmp(cur, mt->l[mt->levels - 1][0].hash, hash_size))
> +             errx(1, "hash mismatch");
> +}
> +
> +static void append_module_signature_magic(int fd, unsigned int sig_len)
> +{
> +     struct module_signature sig_info = {
> +             .id_type        = PKEY_ID_MERKLE,
> +             .sig_len        = htonl(sig_len),
> +     };
> +
> +     if (write(fd, &sig_info, sizeof(sig_info)) < 0)
> +             err(1, "write(sig_info) failed");
> +
> +     if (write(fd, &magic_number, sizeof(magic_number) - 1) < 0)
> +             err(1, "write(magic_number) failed");

Nit: Checking that the written size exactly matches the size of the
input data would be safer and consistent with other uses of write() in
write_be_int() and write_hash(). Additionally, it would be good to make
the error messages consistent in all cases.

> +}
> +
> +static void write_merkle_root(struct mtree *mt, const char *fp)
> +{
> +     char buf[1024];
> +     unsigned int levels;
> +     unsigned char *h;
> +     FILE *f;
> +
> +     if (mt) {
> +             levels = mt->levels;
> +             h = mt->l[mt->levels - 1][0].hash;
> +     } else {
> +             levels = 0;
> +             h = xcalloc(1, hash_size);
> +     }
> +
> +     f = fopen(fp, "w");
> +     if (!f)
> +             err(1, "Failed to create %s", buf);

The last parameter to err() should be fp. The buf variable is then
unused and can be removed.

> +
> +     fprintf(f, "#include <linux/module_hashes.h>\n\n");
> +     fprintf(f, "const struct module_hashes_root module_hashes_root 
> __module_hashes_section = {\n");
> +
> +     fprintf(f, "\t.levels = %u,\n", levels);
> +     fprintf(f, "\t.hash = {");
> +     for (unsigned int i = 0; i < hash_size; i++) {
> +             char *space = "";
> +
> +             if (!(i % 8))
> +                     fprintf(f, "\n\t\t");
> +
> +             if ((i + 1) % 8)
> +                     space = " ";
> +
> +             fprintf(f, "0x%02x,%s", h[i], space);
> +     }
> +     fprintf(f, "\n\t},");
> +
> +     fprintf(f, "\n};\n");
> +     fclose(f);

Is it ok not to check the return values when writing to this output
file? Other code checks that its output was successful.

> +
> +     if (!mt)
> +             free(h);
> +}
> +
> +static char *xstrdup_replace_suffix(const char *str, const char *new_suffix)
> +{
> +     const char *current_suffix;
> +     size_t base_len;
> +
> +     current_suffix = strchr(str, '.');

It is safer to use strrchr() in case the module path happens to contain
a dot.

> +     if (!current_suffix)
> +             errx(1, "No existing suffix in '%s'", str);
> +
> +     base_len = current_suffix - str;
> +
> +     return xasprintf("%.*s%s", (int)base_len, str, new_suffix);
> +}
> +
> +static void read_modules_order(const char *fname, const char *suffix)
> +{
> +     char line[PATH_MAX];

<limits.h> should be included at the top to provide the definition of
PATH_MAX.

> +     FILE *in;
> +
> +     in = fopen(fname, "r");
> +     if (!in)
> +             err(1, "fopen(%s)", fname);

Nit: The error message could be "Failed to open %s" to maintain
consistency with a similar error in write_merkle_root().

> +
> +     while (fgets(line, PATH_MAX, in)) {
> +             struct file_entry *entry;
> +
> +             fh_list = xreallocarray(fh_list, num_files + 1, 
> sizeof(*fh_list));

It might be useful to not reallocate this array for each file, although
I don't immediately see that it contributes any significant time to the
runtime.

> +             entry = &fh_list[num_files];
> +
> +             entry->pos = num_files;
> +             entry->name = xstrdup_replace_suffix(line, suffix);
> +             hash_file(entry);
> +
> +             num_files++;
> +     }
> +
> +     fclose(in);
> +}
> +
> +static __attribute__((noreturn))
> +void format(void)
> +{
> +     fprintf(stderr,
> +             "Usage: scripts/modules-merkle-tree <root definition>\n");

The usage string should mention the second parameter, which is the
module suffix.

> +     exit(2);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +     const EVP_MD *hash_evp;
> +     struct mtree *mt;
> +
> +     if (argc != 3)
> +             format();
> +
> +     hash_evp = EVP_get_digestbyname("sha256");
> +     ERR(!hash_evp, "EVP_get_digestbyname");
> +
> +     ctx = EVP_MD_CTX_new();
> +     ERR(!ctx, "EVP_MD_CTX_new()");
> +
> +     hash_size = EVP_MD_get_size(hash_evp);
> +     ERR(hash_size <= 0, "EVP_get_digestbyname");
> +
> +     if (EVP_DigestInit_ex(ctx, hash_evp, NULL) != 1)
> +             ERR(1, "EVP_DigestInit_ex()");
> +
> +     read_modules_order("modules.order", argv[2]);
> +
> +     mt = build_merkle(fh_list, num_files);
> +     write_merkle_root(mt, argv[1]);
> +     for (unsigned int i = 0; i < num_files; i++) {
> +             char *signame;
> +             int fd;
> +
> +             signame = xstrdup_replace_suffix(fh_list[i].name, ".merkle");
> +
> +             fd = open(signame, O_WRONLY | O_CREAT | O_TRUNC, 0644);
> +             if (fd < 0)
> +                     err(1, "Can't create %s", signame);
> +
> +             build_proof(mt, i, fd);
> +             append_module_signature_magic(fd, lseek(fd, 0, SEEK_CUR));
> +             close(fd);

The return code of close() should be checked, otherwise it is
meaningless to check the write() calls in
append_module_signature_magic().

> +     }
> +
> +     free_mtree(mt);
> +     for (unsigned int i = 0; i < num_files; i++)
> +             free(fh_list[i].name);
> +     free(fh_list);
> +
> +     EVP_MD_CTX_free(ctx);
> +     return 0;
> +}

-- 
Thanks,
Petr

Reply via email to