With the introduction of support for Post Quantum Cryptography algorithms in TPM, the commands and responses will grow in size. Some TPMs have a physical hardware memory window (MMIO) that is smaller than the commands we need to send. Therefore this commit implements the core logic of sending/receiving data in chunks.
Instead of sending the whole command at once, the driver now sends it in small chunks. After each chunk, it signals the TPM using a nextChunk signal, and waits for the TPM to consume the data. Once the final piece is delivered, the driver signals the TPM to begin execution by toggling the start invoke bit. We use the same logic in reverse to read large responses from the TPM. This allows the driver to handle large payloads even when the hardware interface has limited memory. This kernel-side support corresponds to the backend implementation in QEMU [1]. QEMU reassembles the chunks before passing them to the TPM emulator. [1] https://lore.kernel.org/qemu-devel/[email protected]/ Signed-off-by: Arun Menon <[email protected]> --- drivers/char/tpm/tpm_crb.c | 155 +++++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 41 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 922bcf7a69ad5..a97fc5e9927e3 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -104,11 +104,13 @@ struct crb_priv { u8 __iomem *cmd; u8 __iomem *rsp; u32 cmd_size; + u32 rsp_size; u32 smc_func_id; u32 __iomem *pluton_start_addr; u32 __iomem *pluton_reply_addr; u8 ffa_flags; u8 ffa_attributes; + bool chunking_supported; }; struct tpm2_crb_smc { @@ -368,38 +370,6 @@ static u8 crb_status(struct tpm_chip *chip) return sts; } -static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) -{ - struct crb_priv *priv = dev_get_drvdata(&chip->dev); - unsigned int expected; - - /* A sanity check that the upper layer wants to get at least the header - * as that is the minimum size for any TPM response. - */ - if (count < TPM_HEADER_SIZE) - return -EIO; - - /* If this bit is set, according to the spec, the TPM is in - * unrecoverable condition. - */ - if (ioread32(&priv->regs_t->ctrl_sts) & CRB_CTRL_STS_ERROR) - return -EIO; - - /* Read the first 8 bytes in order to get the length of the response. - * We read exactly a quad word in order to make sure that the remaining - * reads will be aligned. - */ - memcpy_fromio(buf, priv->rsp, 8); - - expected = be32_to_cpup((__be32 *)&buf[2]); - if (expected > count || expected < TPM_HEADER_SIZE) - return -EIO; - - memcpy_fromio(&buf[8], &priv->rsp[8], expected - 8); - - return expected; -} - static int crb_do_acpi_start(struct tpm_chip *chip) { union acpi_object *obj; @@ -474,6 +444,8 @@ static int crb_trigger_tpm(struct tpm_chip *chip, u32 start_cmd) static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) { struct crb_priv *priv = dev_get_drvdata(&chip->dev); + size_t offset = 0; + size_t chunk_size; int rc = 0; /* Zero the cancel register so that the next command will not get @@ -481,7 +453,7 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) */ iowrite32(0, &priv->regs_t->ctrl_cancel); - if (len > priv->cmd_size) { + if (len > priv->cmd_size && !priv->chunking_supported) { dev_err(&chip->dev, "invalid command count value %zd %d\n", len, priv->cmd_size); return -E2BIG; @@ -491,18 +463,108 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) __crb_cmd_ready(&chip->dev, priv, chip->locality); - memcpy_toio(priv->cmd, buf, len); + while (offset < len) { + chunk_size = min_t(size_t, len - offset, priv->cmd_size); - /* Make sure that cmd is populated before issuing start. */ - wmb(); - - rc = crb_trigger_tpm(chip, CRB_START_INVOKE); - if (rc) - return rc; + if (chunk_size == 0) + break; + memcpy_toio(priv->cmd, buf + offset, chunk_size); + offset += chunk_size; + + /* Make sure that cmd is populated before issuing start. */ + wmb(); + if (offset < len) { + rc = crb_trigger_tpm(chip, CRB_START_NEXT_CHUNK); + if (rc) + return rc; + if (!crb_wait_for_reg_32(&priv->regs_t->ctrl_start, + CRB_START_NEXT_CHUNK, 0, TPM2_TIMEOUT_C)) { + dev_err(&chip->dev, + "Timeout waiting for backend to consume chunk\n"); + return -ETIME; + } + } else { + rc = crb_trigger_tpm(chip, CRB_START_INVOKE); + if (rc) + return rc; + } + } return crb_try_pluton_doorbell(priv, false); } +static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct crb_priv *priv = dev_get_drvdata(&chip->dev); + unsigned int expected; + size_t offset = 0; + size_t chunk_size; + size_t first_read; + int rc; + + /* A sanity check that the upper layer wants to get at least the header + * as that is the minimum size for any TPM response. + */ + if (count < TPM_HEADER_SIZE) + return -EIO; + + /* If this bit is set, according to the spec, the TPM is in + * unrecoverable condition. + */ + if (ioread32(&priv->regs_t->ctrl_sts) & CRB_CTRL_STS_ERROR) + return -EIO; + + /* Read the first 8 bytes in order to get the length of the response. + * We read exactly a quad word in order to make sure that the remaining + * reads will be aligned. + */ + memcpy_fromio(buf, priv->rsp, 8); + + expected = be32_to_cpup((__be32 *)&buf[2]); + if (expected > count || expected < TPM_HEADER_SIZE) + return -EIO; + + /* + * Set chunk_size by comparing the size of the buffer that the upper layer has + * allocated (count) to the hardware tpm limit (priv->rsp_size). + * This is to prevent buffer overflow while writing to buf. + */ + chunk_size = min_t(size_t, count, priv->rsp_size); + if (chunk_size < 8) + return -EIO; + + /* + * Compare the actual size of the response we found in the header to the chunk_size. + */ + first_read = min_t(size_t, expected, chunk_size); + + memcpy_fromio(&buf[8], &priv->rsp[8], first_read - 8); + offset = first_read; + + while (offset < expected) { + if (!priv->chunking_supported) { + dev_err(&chip->dev, "Response larger than MMIO and chunking not supported\n"); + return -EIO; + } + + rc = crb_trigger_tpm(chip, CRB_START_NEXT_CHUNK); + if (rc) + return rc; + + if (!crb_wait_for_reg_32(&priv->regs_t->ctrl_start, + CRB_START_NEXT_CHUNK, 0, TPM2_TIMEOUT_C)) { + dev_err(&chip->dev, "Timeout waiting for backend response\n"); + return -ETIME; + } + + chunk_size = min_t(size_t, expected - offset, priv->rsp_size); + memcpy_fromio(buf + offset, priv->rsp, chunk_size); + offset += chunk_size; + } + + return expected; +} + static void crb_cancel(struct tpm_chip *chip) { struct crb_priv *priv = dev_get_drvdata(&chip->dev); @@ -727,6 +789,15 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, goto out; } + if (priv->regs_h) { + u32 intf_id = ioread32((u32 __iomem *)&priv->regs_h->intf_id); + + if (intf_id & CRB_INTF_CAP_CRB_CHUNK) { + priv->chunking_supported = true; + dev_info(dev, "CRB Chunking is supported by backend\n"); + } + } + memcpy_fromio(&__rsp_pa, &priv->regs_t->ctrl_rsp_pa, 8); rsp_pa = le64_to_cpu(__rsp_pa); rsp_size = ioread32(&priv->regs_t->ctrl_rsp_size); @@ -764,8 +835,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, priv->rsp = priv->cmd; out: - if (!ret) + if (!ret) { priv->cmd_size = cmd_size; + priv->rsp_size = rsp_size; + } __crb_go_idle(dev, priv, 0); -- 2.53.0

