[PATCH 2/2] tpm2-sessions: NOT FOR COMMITTING add sessions testing
This runs through a preset sequence using sessions to demonstrate that the session handling code functions. It does both HMAC, encryption and decryption by testing an encrypted sealing operation with authority and proving that the same sealed data comes back again via an HMAC and response encryption. Signed-off-by: James Bottomley--- drivers/char/tpm/Makefile | 1 + drivers/char/tpm/tpm-chip.c | 1 + drivers/char/tpm/tpm2-sessions-test.c | 178 ++ 3 files changed, 180 insertions(+) create mode 100644 drivers/char/tpm/tpm2-sessions-test.c diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 95ef2b10cc8d..b9eb70f1aee6 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_TCG_TPM) += tpm.o tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm2-cmd.o \ tpm-dev-common.o tpmrm-dev.o tpm1_eventlog.o tpm2_eventlog.o \ tpm2-space.o tpm2-sessions.o +obj-m += tpm2-sessions-test.o tpm-$(CONFIG_ACPI) += tpm_ppi.o tpm_eventlog_acpi.o tpm-$(CONFIG_EFI) += tpm_eventlog_efi.o tpm-$(CONFIG_OF) += tpm_eventlog_of.o diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 0a62c19937b6..ca174ee1e670 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -118,6 +118,7 @@ struct tpm_chip *tpm_chip_find_get(struct tpm_chip *chip) return res; } +EXPORT_SYMBOL(tpm_chip_find_get); /** * tpm_dev_release() - free chip memory and the device number diff --git a/drivers/char/tpm/tpm2-sessions-test.c b/drivers/char/tpm/tpm2-sessions-test.c new file mode 100644 index ..a0e8a8b62cf1 --- /dev/null +++ b/drivers/char/tpm/tpm2-sessions-test.c @@ -0,0 +1,178 @@ +/* run a set of tests of the sessions code */ +#include "tpm.h" +#include "tpm2b.h" +#include "tpm2-sessions.h" + +#include + +int tpm2_sessions_test(void) +{ + struct tpm2_auth *auth; + struct tpm_buf buf, b1; + struct tpm2b t2b; + struct tpm_chip *chip; + int rc; + char payload[29]; + char *password = "Passw0Rd"; + const u8 *p; + u32 h; + u8 name[34]; + u16 len; + int ret = -EINVAL; + + chip = tpm_chip_find_get(NULL); + if (!chip) + return -ENODEV; + + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) + return -ENODEV; + + get_random_bytes(payload, sizeof(payload)); + + /* precursor: get a session */ + rc = tpm2_start_auth_session(chip, ); + dev_info(>dev, "TPM: start auth session returned %d\n", rc); + if (rc) + goto out; + + /* first test: get random bytes from TPM */ + tpm_buf_init(, TPM2_ST_SESSIONS, TPM2_CC_GET_RANDOM); + tpm_buf_append_hmac_session(, auth, TPM2_SA_ENCRYPT + | TPM2_SA_CONTINUE_SESSION, NULL, 0); + tpm_buf_append_u16(, 29); + tpm_buf_fill_hmac_session(, auth); + rc = tpm_transmit_cmd(chip, >kernel_space, buf.data, PAGE_SIZE, + 0, 0, "get random"); + rc = tpm_buf_check_hmac_response(, auth); + dev_info(>dev, "TPM: check hmac response returned %d\n", rc); + tpm_buf_destroy(); + + /* +* second test, seal random data protecting sensitive by +* encryption and also doing response encryption (not +* necessary) The encrypted payload has two components: an +* authorization password which must be presented on useal and +* the actual data (the random payload) +*/ + tpm_buf_init(, TPM2_ST_SESSIONS, TPM2_CC_CREATE); + tpm_buf_append_name(, auth, chip->tpmkey, chip->tpmkeyname); + tpm_buf_append_hmac_session(, auth, TPM2_SA_DECRYPT + | TPM2_SA_ENCRYPT + | TPM2_SA_CONTINUE_SESSION, NULL, 0); + /* sensitive */ + tpm2b_init(); + /* the authorization */ + tpm2b_append_u16(, strlen(password)); + tpm2b_append(, password, strlen(password)); + /* the payload */ + tpm2b_append_u16(, sizeof(payload)); + tpm2b_append(, payload, sizeof(payload)); + tpm_buf_append_2b(, ); + /* the public */ + /* type */ + tpm2b_append_u16(, TPM2_ALG_KEYEDHASH); + /* name hash */ + tpm2b_append_u16(, TPM2_ALG_SHA256); + /* object properties */ + tpm2b_append_u32(, TPM2_OA_USER_WITH_AUTH | TPM2_OA_NO_DA); + /* auth policy (empty) */ + tpm2b_append_u16(, 0); + /* keyed hash parameters (we're null for a non-HMAC data blob) */ + tpm2b_append_u16(, TPM2_ALG_NULL); + /* unique */ + tpm2b_append_u16(, 0); + tpm_buf_append_2b(, ); + /* outside info (also empty) */ + tpm_buf_append_u16(, 0); + /* creation PCR (empty) */ + tpm_buf_append_u32(, 0); + tpm_buf_fill_hmac_session(,
[PATCH 1/2] tpm2-sessions: Add full HMAC and encrypt/decrypt session handling
This code adds true session based HMAC authentication plus parameter decryption and response encryption using AES. The basic design of this code is to segregate all the nasty crypto, hash and hmac code into tpm2-sessions.c and export a usable API. The API first of all starts off by gaining a session with tpm2_start_auth_session() Which initiates a session with the TPM and allocates an opaque tpm2_auth structure to handle the session parameters. Then the use is simply: * tpm_buf_append_name() in place of the tpm_buf_append_u32 for the handles * tpm_buf_append_hmac_session() where tpm2_append_auth() would go * tpm_buf_fill_hmac_session() called after the entire command buffer is finished but before tpm_transmit_cmd() is called which computes the correct HMAC and places it in the command at the correct location. Finally, after tpm_transmit_cmd() is called, tpm_buf_check_hmac_response() is called to check that the returned HMAC matched and collect the new state for the next use of the session, if any. The features of the session is controlled by the session attributes set in tpm_buf_append_hmac_session(). If TPM2_SA_CONTINUE_SESSION is not specified, the session will be flushed and the tpm2_auth structure freed in tpm_buf_check_hmac_response(); otherwise the session may be used again. Parameter encryption is specified by or'ing the flag TPM2_SA_DECRYPT and response encryption by or'ing the flag TPM2_SA_ENCRYPT. the various encryptions will be taken care of by tpm_buf_fill_hmac_session() and tpm_buf_check_hmac_response() respectively. To get all of this to work securely, the Kernel now needs a primary key to encrypt the session salt to, so we derive an EC key from the NULL seed and store it in the tpm_chip structure. We also make sure that this seed remains for the kernel by using a kernel space to take it out of the TPM when userspace wants to use it. Signed-off-by: James Bottomley--- drivers/char/tpm/Kconfig | 3 + drivers/char/tpm/Makefile| 2 +- drivers/char/tpm/tpm.h | 22 + drivers/char/tpm/tpm2-cmd.c | 22 +- drivers/char/tpm/tpm2-sessions.c | 907 +++ drivers/char/tpm/tpm2-sessions.h | 55 +++ drivers/char/tpm/tpm2b.h | 82 7 files changed, 1080 insertions(+), 13 deletions(-) create mode 100644 drivers/char/tpm/tpm2-sessions.c create mode 100644 drivers/char/tpm/tpm2-sessions.h create mode 100644 drivers/char/tpm/tpm2b.h diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 0aee88df98d1..8c714d8550c4 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -8,6 +8,9 @@ menuconfig TCG_TPM select SECURITYFS select CRYPTO select CRYPTO_HASH_INFO + select CRYPTO_ECDH + select CRYPTO_AES + select CRYPTO_CFB ---help--- If you have a TPM security chip in your system, which implements the Trusted Computing Group's specification, diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index d37c4a1748f5..95ef2b10cc8d 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_TCG_TPM) += tpm.o tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm2-cmd.o \ tpm-dev-common.o tpmrm-dev.o tpm1_eventlog.o tpm2_eventlog.o \ - tpm2-space.o + tpm2-space.o tpm2-sessions.o tpm-$(CONFIG_ACPI) += tpm_ppi.o tpm_eventlog_acpi.o tpm-$(CONFIG_EFI) += tpm_eventlog_efi.o tpm-$(CONFIG_OF) += tpm_eventlog_of.o diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 3e083a30a108..95a0d5288d6a 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -42,6 +42,9 @@ #include #endif +/* fixed define for the curve we use which is NIST_P256 */ +#define EC_PT_SZ 32 + enum tpm_const { TPM_MINOR = 224,/* officially assigned */ TPM_BUFSIZE = 4096, @@ -114,16 +117,25 @@ enum tpm2_return_codes { enum tpm2_algorithms { TPM2_ALG_ERROR = 0x, TPM2_ALG_SHA1 = 0x0004, + TPM2_ALG_AES= 0x0006, TPM2_ALG_KEYEDHASH = 0x0008, TPM2_ALG_SHA256 = 0x000B, TPM2_ALG_SHA384 = 0x000C, TPM2_ALG_SHA512 = 0x000D, TPM2_ALG_NULL = 0x0010, TPM2_ALG_SM3_256= 0x0012, + TPM2_ALG_ECC= 0x0023, + TPM2_ALG_CFB= 0x0043, +}; + +enum tpm2_curves { + TPM2_ECC_NONE = 0x, + TPM2_ECC_NIST_P256 = 0x0003, }; enum tpm2_command_codes { TPM2_CC_FIRST = 0x011F, + TPM2_CC_CREATE_PRIMARY = 0x0131, TPM2_CC_SELF_TEST = 0x0143, TPM2_CC_STARTUP = 0x0144, TPM2_CC_SHUTDOWN= 0x0145, @@ -133,6 +145,7 @@ enum tpm2_command_codes { TPM2_CC_CONTEXT_LOAD= 0x0161, TPM2_CC_CONTEXT_SAVE= 0x0162,
[RFC 0/2] add integrity and security to TPM2 transactions
By now, everybody knows we have a problem with the TPM2_RS_PW easy button on TPM2 in that transactions on the TPM bus can be intercepted and altered. The way to fix this is to use real sessions for HMAC capabilities to ensure integrity and to use parameter and response encryption to ensure confidentiality of the data flowing over the TPM bus. This RFC is about adding a simple API which can ensure the above properties as a layered addition to the existing TPM handling code. Eventually we can add this to the random number generator, the PCR extensions and the trusted key handling, but this all depends on the conversion to tpm_buf which is not yet upstream, so I've constructed a second patch which demonstrates the new API in a test module for those who wish to play with it. This series is also dependent on additions to the crypto subsystem to fix problems in the elliptic curve key handling and add the Cipher FeedBack encryption scheme: https://marc.info/?l=linux-crypto-vger=151994371015475 --- James Bottomley (2): tpm2-sessions: Add full HMAC and encrypt/decrypt session handling tpm2-sessions: NOT FOR COMMITTING add sessions testing drivers/char/tpm/Kconfig | 3 + drivers/char/tpm/Makefile | 3 +- drivers/char/tpm/tpm-chip.c | 1 + drivers/char/tpm/tpm.h| 22 + drivers/char/tpm/tpm2-cmd.c | 22 +- drivers/char/tpm/tpm2-sessions-test.c | 178 +++ drivers/char/tpm/tpm2-sessions.c | 907 ++ drivers/char/tpm/tpm2-sessions.h | 55 +++ drivers/char/tpm/tpm2b.h | 82 +++ 9 files changed, 1260 insertions(+), 13 deletions(-) create mode 100644 drivers/char/tpm/tpm2-sessions-test.c create mode 100644 drivers/char/tpm/tpm2-sessions.c create mode 100644 drivers/char/tpm/tpm2-sessions.h create mode 100644 drivers/char/tpm/tpm2b.h -- 2.12.3
Re: [PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback
On 03/02/2018 05:58 PM, Maciej S. Szmigiero wrote: On 03.03.2018 00:49, Hook, Gary wrote: On 3/2/2018 5:15 PM, Maciej S. Szmigiero wrote: Thanks. However, what about the first patch from this series? Without it, while it no longer should cause a buffer overflow, in-kernel X.509 certificate verification will still fail with CCP driver loaded (since CCP RSA implementation has a higher priority than the software RSA implementation). Maciej I commented on that one here: https://marc.info/?l=linux-crypto-vger=151986452422791=2 Effectively a NACK. We are a reviewing a proposed patch right now. Your earlier comment referred to the third patch from this series. My message above was about the first one. Apologies; my mistake.
Re: [PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback
On 03.03.2018 00:49, Hook, Gary wrote: > On 3/2/2018 5:15 PM, Maciej S. Szmigiero wrote: >> On 02.03.2018 17:44, Herbert Xu wrote: >>> On Sat, Feb 24, 2018 at 05:03:21PM +0100, Maciej S. Szmigiero wrote: rsa-pkcs1pad uses a value returned from a RSA implementation max_size callback as a size of an input buffer passed to the RSA implementation for encrypt and sign operations. CCP RSA implementation uses a hardware input buffer which size depends only on the current RSA key length, so it should return this key length in the max_size callback, too. This also matches what the kernel software RSA implementation does. Previously, the value returned from this callback was always the maximum RSA key size the CCP hardware supports. This resulted in this huge buffer being passed by rsa-pkcs1pad to CCP even for smaller key sizes and then in a buffer overflow when ccp_run_rsa_cmd() tried to copy this large input buffer into a RSA key length-sized hardware input buffer. Signed-off-by: Maciej S. SzmigieroFixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP") Cc: sta...@vger.kernel.org >>> >>> Patch applied. Thanks. >> >> Thanks. >> >> However, what about the first patch from this series? >> Without it, while it no longer should cause a buffer overflow, in-kernel >> X.509 certificate verification will still fail with CCP driver loaded >> (since CCP RSA implementation has a higher priority than the software >> RSA implementation). >> >> Maciej >> > > > I commented on that one here: > https://marc.info/?l=linux-crypto-vger=151986452422791=2 > > Effectively a NACK. We are a reviewing a proposed patch right now. Your earlier comment referred to the third patch from this series. My message above was about the first one. Maciej
Re: [PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback
On 3/2/2018 5:15 PM, Maciej S. Szmigiero wrote: On 02.03.2018 17:44, Herbert Xu wrote: On Sat, Feb 24, 2018 at 05:03:21PM +0100, Maciej S. Szmigiero wrote: rsa-pkcs1pad uses a value returned from a RSA implementation max_size callback as a size of an input buffer passed to the RSA implementation for encrypt and sign operations. CCP RSA implementation uses a hardware input buffer which size depends only on the current RSA key length, so it should return this key length in the max_size callback, too. This also matches what the kernel software RSA implementation does. Previously, the value returned from this callback was always the maximum RSA key size the CCP hardware supports. This resulted in this huge buffer being passed by rsa-pkcs1pad to CCP even for smaller key sizes and then in a buffer overflow when ccp_run_rsa_cmd() tried to copy this large input buffer into a RSA key length-sized hardware input buffer. Signed-off-by: Maciej S. SzmigieroFixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP") Cc: sta...@vger.kernel.org Patch applied. Thanks. Thanks. However, what about the first patch from this series? Without it, while it no longer should cause a buffer overflow, in-kernel X.509 certificate verification will still fail with CCP driver loaded (since CCP RSA implementation has a higher priority than the software RSA implementation). Maciej I commented on that one here: https://marc.info/?l=linux-crypto-vger=151986452422791=2 Effectively a NACK. We are a reviewing a proposed patch right now.
Re: [PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback
On 02.03.2018 17:44, Herbert Xu wrote: > On Sat, Feb 24, 2018 at 05:03:21PM +0100, Maciej S. Szmigiero wrote: >> rsa-pkcs1pad uses a value returned from a RSA implementation max_size >> callback as a size of an input buffer passed to the RSA implementation for >> encrypt and sign operations. >> >> CCP RSA implementation uses a hardware input buffer which size depends only >> on the current RSA key length, so it should return this key length in >> the max_size callback, too. >> This also matches what the kernel software RSA implementation does. >> >> Previously, the value returned from this callback was always the maximum >> RSA key size the CCP hardware supports. >> This resulted in this huge buffer being passed by rsa-pkcs1pad to CCP even >> for smaller key sizes and then in a buffer overflow when ccp_run_rsa_cmd() >> tried to copy this large input buffer into a RSA key length-sized hardware >> input buffer. >> >> Signed-off-by: Maciej S. Szmigiero>> Fixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP") >> Cc: sta...@vger.kernel.org > > Patch applied. Thanks. Thanks. However, what about the first patch from this series? Without it, while it no longer should cause a buffer overflow, in-kernel X.509 certificate verification will still fail with CCP driver loaded (since CCP RSA implementation has a higher priority than the software RSA implementation). Maciej
Re: [RFC PATCH cryptodev] crypto: des3_ede_skciphers[] can be static
On Sat, Mar 03, 2018 at 04:29:46AM +0800, kbuild test robot wrote: > > Fixes: 09c0f03bf8ce ("crypto: x86/des3_ede - convert to skcipher interface") > Signed-off-by: Fengguang Wu> --- > des3_ede_glue.c |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c > index f9c7bdc..5c610d4e 100644 > --- a/arch/x86/crypto/des3_ede_glue.c > +++ b/arch/x86/crypto/des3_ede_glue.c > @@ -404,7 +404,7 @@ static struct crypto_alg des3_ede_cipher = { > } > }; > > -struct skcipher_alg des3_ede_skciphers[] = { > +static struct skcipher_alg des3_ede_skciphers[] = { > { > .base.cra_name = "ecb(des3_ede)", > .base.cra_driver_name = "ecb-des3_ede-asm", Acked-by: Eric Biggers Thanks!
Why are we testing an intermediate result in ahash?
Commit 466d7b9f6 (cryptodev-2.6) added code to testmgr to populate, for async hash operations, the result buffer with a known value and to test the buffer against that value at intermediate steps. If the result buffer changes the operation is failed. My question is: why? What problem does this solve? Has this requirement existed all along, or is it new? I'm now seeing complaints for AES/CMAC and SHA in my driver. I have no problem updating the driver, of course, but I'd like to better understand the precipitating issue for the commit. Mar 2 12:30:56 sosxen2 kernel: [ 60.919198] alg: No test for cfb(aes) (cfb-aes-ccp) Mar 2 12:30:56 sosxen2 kernel: [ 60.924787] 367: alg: hash: update failed on test 3 for cmac-aes-ccp: used req->result Mar 2 12:30:56 sosxen2 kernel: [ 60.946571] 367: alg: hash: update failed on test 4 for sha1-ccp: used req->result Mar 2 12:30:56 sosxen2 kernel: [ 60.956461] 367: alg: hash: update failed on test 1 for hmac-sha1-ccp: used req->result Mar 2 12:30:56 sosxen2 kernel: [ 60.966117] 367: alg: hash: update failed on test 4 for sha224-ccp: used req->result ... Thanks, Gary
[cryptodev:master 109/128] arch/x86/crypto/des3_ede_glue.c:407:21: sparse: symbol 'des3_ede_skciphers' was not declared. Should it be static?
tree: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master head: 23ea8b63a1e2e15199da4461eb303f642fa04f60 commit: 09c0f03bf8ce9304e0d17131c46690b2c95209f4 [109/128] crypto: x86/des3_ede - convert to skcipher interface reproduce: # apt-get install sparse git checkout 09c0f03bf8ce9304e0d17131c46690b2c95209f4 make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) >> arch/x86/crypto/des3_ede_glue.c:407:21: sparse: symbol 'des3_ede_skciphers' >> was not declared. Should it be static? Please review and possibly fold the followup patch. --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
[RFC PATCH cryptodev] crypto: des3_ede_skciphers[] can be static
Fixes: 09c0f03bf8ce ("crypto: x86/des3_ede - convert to skcipher interface") Signed-off-by: Fengguang Wu--- des3_ede_glue.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index f9c7bdc..5c610d4e 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -404,7 +404,7 @@ static struct crypto_alg des3_ede_cipher = { } }; -struct skcipher_alg des3_ede_skciphers[] = { +static struct skcipher_alg des3_ede_skciphers[] = { { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "ecb-des3_ede-asm",
[PATCH] fscrypt: add Speck128/256 support
Hello, here's a patch which adds Speck128/256 support to fscrypt, for ext4/f2fs/ubifs encryption. Note that this can be applied independently of the patches that added Speck to the crypto API since there is no hard dependency, but the crypto API support is needed for Speck-encrypted files to work. The crypto API patches can be found in linux-next via cryptodev/master. Commit da7a0ab5b4ba ("crypto: speck - add support for the Speck block cipher") with CONFIG_CRYPTO_SPECK enabled is sufficient for basic testing, but for testing on ARM the NEON-accelerated version should be applied and enabled as well. -8<- fscrypt currently only supports AES encryption. However, many low-end mobile devices have older CPUs that don't have AES instructions, e.g. the ARMv8 Cryptography Extensions. Currently, user data on such devices is not being encrypted because AES is not fast enough, even when the NEON bit-sliced implementation of AES is used. Therefore, this patch provides an alternative to AES by updating fscrypt to support the Speck block cipher. Separate patches added Speck to the crypto API; more details about the choice of Speck can be found in those patches. Note in particular that we can't use ChaCha20 as previously proposed, since it would, in general, be insecure to use a stream cipher in this context. The generic Speck implementation is not especially fast, but Speck can be greatly accelerated using general-purpose vector instructions, e.g. ARM NEON. As an example, on a device with an ARMv7 processor, the NEON-accelerated Speck128/256-XTS was 108 MB/s for both encryption and decryption, while AES-256-XTS (with the NEON bit-sliced implementation) was only 41 MB/s encryption and 37 MB/s decryption. There are multiple variants of Speck. This patch only adds support for Speck128/256, which is the variant with a 128-bit block size and 256-bit key size -- the same as AES-256. This is believed to be the most secure variant of Speck, and it's only about 6% slower than Speck128/128. Speck64/128 is also a contender, since it's at least 20% faster than Speck128/256 (having 20% fewer rounds), and can be even faster on CPUs that can't efficiently do the 64-bit operations needed for Speck128. But, ARM NEON supports the needed 64-bit operations even on 32-bit CPUs, and Speck128/256 appears to be fast enough for our targeted use cases. So, it makes sense to support Speck128/256 in fscrypt anyway, even if there's a possibility that Speck64/128 support would be added later. The chosen modes of operation are XTS for contents and CTS-CBC for filenames. These are the same modes recommended for AES currently. This patch intentionally does *not* make CONFIG_FS_ENCRYPTION select CONFIG_CRYPTO_SPECK. Thus, people will have to enable Speck support themselves if needed. This is firstly because we shouldn't bloat the FS_ENCRYPTION dependencies with every new cipher, especially ones that aren't recommended for most users. It's also because CRYPTO_SPECK just refers to the generic implementation, which won't be fast enough for many users; in practice, they'll need to enable a vectorized implementation such as CRYPTO_SPECK_NEON to get acceptable performance. Signed-off-by: Eric Biggers--- Documentation/filesystems/fscrypt.rst | 10 ++ fs/crypto/fscrypt_private.h | 4 fs/crypto/keyinfo.c | 2 ++ include/uapi/linux/fs.h | 2 ++ 4 files changed, 18 insertions(+) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index cfbc18f0d9c9..c99465965512 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -191,11 +191,21 @@ Currently, the following pairs of encryption modes are supported: - AES-256-XTS for contents and AES-256-CTS-CBC for filenames - AES-128-CBC for contents and AES-128-CTS-CBC for filenames +- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames It is strongly recommended to use AES-256-XTS for contents encryption. AES-128-CBC was added only for low-powered embedded devices with crypto accelerators such as CAAM or CESA that do not support XTS. +Similarly, Speck128/256 support was only added for older or low-end +CPUs which cannot do AES fast enough -- especially ARM CPUs which have +NEON instructions but not the Cryptography Extensions. It is not +recommended to use Speck on CPUs that have AES instructions. +Nevertheless, to use Speck, CONFIG_CRYPTO_SPECK must be enabled. It +is strongly recommended to enable any available architecture-specific +implementations of Speck as well, e.g. CONFIG_CRYPTO_SPECK_NEON for +ARM, since they can be much faster than the generic implementation. + New encryption modes can be added relatively easily, without changes to individual filesystems. However, authenticated encryption (AE) modes are not currently supported because of the difficulty of dealing diff --git
Re: [PATCH 1/2] crypto: talitos - don't persistently map req_ctx->hw_context and req_ctx->buf
On 2/26/2018 6:40 PM, Christophe Leroy wrote: > Commit 49f9783b0cea ("crypto: talitos - do hw_context DMA mapping > outside the requests") introduced a persistent dma mapping of > req_ctx->hw_context > Commit 37b5e8897eb5 ("crypto: talitos - chain in buffered data for ahash > on SEC1") introduced a persistent dma mapping of req_ctx->buf > > As there is no destructor for req_ctx (the request context), the > associated dma handlers where set in ctx (the tfm context). This is > wrong as several hash operations can run with the same ctx. > > This patch removes this persistent mapping. > > Reported-by: Horia Geanta> Fixes: 49f9783b0cea ("crypto: talitos - do hw_context DMA mapping outside the > requests") > Fixes: 37b5e8897eb5 ("crypto: talitos - chain in buffered data for ahash on > SEC1") > Signed-off-by: Christophe Leroy Tested-by: Horia Geantă Please add this to 4.15.y -stable tree. Thanks, Horia
Re: [PATCH 17/18] crypto: talitos - chain in buffered data for ahash on SEC1
Le 02/03/2018 à 18:27, Horia Geantă a écrit : On 10/6/2017 4:05 PM, Christophe Leroy wrote: [...] @@ -1778,6 +1814,36 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (is_sec1 && from_talitos_ptr_len(>ptr[3], true) == 0) talitos_handle_buggy_hash(ctx, edesc, >ptr[3]); + if (is_sec1 && req_ctx->nbuf && length) { + struct talitos_desc *desc2 = desc + 1; + dma_addr_t next_desc; [...] + next_desc = dma_map_single(dev, >hdr1, TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + desc->next_desc = cpu_to_be32(next_desc); Where is desc->next_desc initialized for the !is_sec1 case? Memory allocation is done using kmalloc(), and since desc->next_desc is checked in some cases also for SEC 2.x+, it should be initialized to 0. See https://elixir.bootlin.com/linux/v4.16-rc3/source/drivers/crypto/talitos.c#L1411 edesc = kmalloc(alloc_len, GFP_DMA | flags); if (!edesc) { dev_err(dev, "could not allocate edescriptor\n"); err = ERR_PTR(-ENOMEM); goto error_sg; } memset(>desc, 0, sizeof(edesc->desc)); Christophe
Re: [PATCH 1/2] crypto: ccp: Fix sparse, use plain integer as NULL pointer
Hi Herbert, On 03/02/2018 10:41 AM, Herbert Xu wrote: On Thu, Feb 15, 2018 at 01:34:44PM -0600, Brijesh Singh wrote: Fix sparse warning: Using plain integer as NULL pointer. Replaces assignment of 0 to pointer with NULL assignment. Fixes: 200664d5237f (Add Secure Encrypted Virtualization ...) Cc: Borislav PetkovCc: Herbert Xu Cc: Gary Hook Cc: Tom Lendacky Signed-off-by: Brijesh Singh All applied. Thanks. As part of other SEV fixes Paolo pulled these two patches through kvm tree. The patches were already included in KVM pull request for 4.16-rc3. After Paolo pulled the patches in kvm request, I looked at patchwork and saw that this series was marked as "Not applicable" in hence I was under impression that you will not be applying this patches in crypto tree hence didn't speak up. I hope its not big issue and git will able to detect it. -Brijesh
Re: [PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback
On Sat, Feb 24, 2018 at 05:03:21PM +0100, Maciej S. Szmigiero wrote: > rsa-pkcs1pad uses a value returned from a RSA implementation max_size > callback as a size of an input buffer passed to the RSA implementation for > encrypt and sign operations. > > CCP RSA implementation uses a hardware input buffer which size depends only > on the current RSA key length, so it should return this key length in > the max_size callback, too. > This also matches what the kernel software RSA implementation does. > > Previously, the value returned from this callback was always the maximum > RSA key size the CCP hardware supports. > This resulted in this huge buffer being passed by rsa-pkcs1pad to CCP even > for smaller key sizes and then in a buffer overflow when ccp_run_rsa_cmd() > tried to copy this large input buffer into a RSA key length-sized hardware > input buffer. > > Signed-off-by: Maciej S. Szmigiero> Fixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP") > Cc: sta...@vger.kernel.org Patch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] crypto/ccp: don't disable interrupts while setting up debugfs
On Fri, Feb 23, 2018 at 11:33:07PM +0100, Sebastian Andrzej Siewior wrote: > I don't why we need take a single write lock and disable interrupts > while setting up debugfs. This is what what happens when we try anyway: > > |ccp :03:00.2: enabling device ( -> 0002) > |BUG: sleeping function called from invalid context at > kernel/locking/rwsem.c:69 > |in_atomic(): 1, irqs_disabled(): 1, pid: 3, name: kworker/0:0 > |irq event stamp: 17150 > |hardirqs last enabled at (17149): [<97a18c49>] > restore_regs_and_return_to_kernel+0x0/0x23 > |hardirqs last disabled at (17150): [<0773b3a9>] > _raw_write_lock_irqsave+0x1b/0x50 > |softirqs last enabled at (17148): [<64d56155>] > __do_softirq+0x3b8/0x4c1 > |softirqs last disabled at (17125): [<92633c18>] irq_exit+0xb1/0xc0 > |CPU: 0 PID: 3 Comm: kworker/0:0 Not tainted 4.16.0-rc2+ #30 > |Workqueue: events work_for_cpu_fn > |Call Trace: > | dump_stack+0x7d/0xb6 > | ___might_sleep+0x1eb/0x250 > | down_write+0x17/0x60 > | start_creating+0x4c/0xe0 > | debugfs_create_dir+0x9/0x100 > | ccp5_debugfs_setup+0x191/0x1b0 > | ccp5_init+0x8a7/0x8c0 > | ccp_dev_init+0xb8/0xe0 > | sp_init+0x6c/0x90 > | sp_pci_probe+0x26e/0x590 > | local_pci_probe+0x3f/0x90 > | work_for_cpu_fn+0x11/0x20 > | process_one_work+0x1ff/0x650 > | worker_thread+0x1d4/0x3a0 > | kthread+0xfe/0x130 > | ret_from_fork+0x27/0x50 > > If any locking is required, a simple mutex will do it. > > Cc: Gary R Hook> Signed-off-by: Sebastian Andrzej Siewior Patch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v4 0/4] Enable CAAM on i.MX7s fix TrustZone issues
On Thu, Feb 22, 2018 at 02:22:46PM +, Rui Miguel Silva wrote: > For v4 I am taking over this patch series as all the left over patches were > implemented by me. > > V4: > - removed patch: [PATCH v3 2/5] crypto: caam: Fix endless loop when RNG is > already initialized from the series since Horia presented a better fix for the > endless loop in case of fail to acquire DECO: > 225ece3e7dad4 crypto: caam - fix endless loop when DECO acquire fails > - add Fabio Estevam reviewed by tag in PATCH 3/3. > - removed CAAM ERA from dts since bootloader will add it - Horia. > > V3: > - Added Cc: clk driver maintainers - Fabio Estevam > - Added Cc: i.MX arch maintainers - Fabio Estevam > - Removed bouncing email address for Herbert Xu > > V2-resend: > - Patch 0005 lost in the ether - resending > > V2: > - Endian detection is ok with TrustZone enabled Horia. > Endian detection logic tested with TrustZone enabled. The register that > this relies on though isn't affected by the lock-down in the first page. > Assuming set of affected registers is actually just the 'deco' registers > though there is no formal statement of that, that I am aware of. > > - Moving of TrustZone work-around into u-boot > This set actually doesn't need to deal with TrustZone at all now but, for > the sake of consistency keeping thread title > > https://patchwork.ozlabs.org/patch/866460/ > https://patchwork.ozlabs.org/patch/866462/ > https://patchwork.ozlabs.org/patch/865890/ > > - Reworded endless loop fix to read a bit better > > - Fixes to DTS additions - Rui > > - Fixes to number of clocks declared - Rui > > V1: > This patch-set enables CAAM on the i.MX7s and fixes a number of issues > identified with the CAAM driver and hardware when TrustZone mode is > enabled. > > The first block of patches are simple bug-fixes, followed by a second block > of patches which are simple enabling patches for the i.MX7Solo - note we > aren't enabling for the i.MX7Dual since we don't have hardware to test that > out but it should be a 1:1 mapping for others to enable when appropriate. > > Cheers, > Rui > > Rui Miguel Silva (4): > crypto: caam - Fix null dereference at error path > crypto: caam - do not use mem and emi_slow clock for imx7x > clk: imx7d: add CAAM clock > ARM: dts: imx7s: add CAAM device node > > arch/arm/boot/dts/imx7s.dtsi| 30 +++ > drivers/clk/imx/clk-imx7d.c | 1 + > drivers/crypto/caam/ctrl.c | 42 > +++-- > include/dt-bindings/clock/imx7d-clock.h | 3 ++- > 4 files changed, 57 insertions(+), 19 deletions(-) Patches 1 and 2 applied. Thanks. -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] crypto: atmel-aes - fix the keys zeroing on errors
On Fri, Feb 23, 2018 at 10:01:40AM +0100, Antoine Tenart wrote: > The Atmel AES driver uses memzero_explicit on the keys on error, but the > variable zeroed isn't the right one because of a typo. Fix this by using > the right variable. > > Fixes: 89a82ef87e01 ("crypto: atmel-authenc - add support to > authenc(hmac(shaX), Y(aes)) modes") > Signed-off-by: Antoine TenartPatch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v2] crypto: ccp: add check to get PSP master only when PSP is detected
Brijesh Singhwrote: > Paulian reported the below kernel crash on Ryzen 5 system: > > BUG: unable to handle kernel NULL pointer dereference at 0073 > RIP: 0010:.LC0+0x41f/0xa00 > RSP: 0018:a9968003bdd0 EFLAGS: 00010002 > RAX: b113b130 RBX: RCX: 05a7 > RDX: 00ff RSI: 8b46dee651a0 RDI: b1bd617c > RBP: 0246 R08: 000251a0 R09: > R10: d81f11a38200 R11: 8b52e8e0a161 R12: b19db220 > R13: 0007 R14: b17e4888 R15: 5dccd7affc30a31e > FS: () GS:8b46dee4() knlGS: > CR2: 0073 CR3: 80128120a000 CR4: 003406e0 > Call Trace: > ? sp_get_psp_master_device+0x56/0x80 > ? map_properties+0x540/0x540 > ? psp_pci_init+0x20/0xe0 > ? map_properties+0x540/0x540 > ? sp_mod_init+0x16/0x1a > ? do_one_initcall+0x4b/0x190 > ? kernel_init_freeable+0x19b/0x23c > ? rest_init+0xb0/0xb0 > ? kernel_init+0xa/0x100 > ? ret_from_fork+0x22/0x40 > > Since Ryzen does not support PSP/SEV firmware hence i->psp_data will > NULL in all sp instances. In those cases, 'i' will point to the > list head after list_for_each_entry(). Dereferencing the head will > cause kernel crash. > > Add check to call get master device only when PSP/SEV is detected. > > Reported-by: Paulian Bogdan Marinca > Cc: Borislav Petkov > Cc: Tom Lendacky > CC: Gary R Hook > Cc: linux-ker...@vger.kernel.org > Signed-off-by: Brijesh Singh > --- > > Changes since v1: > > v1 contained a local change I used for triggering the crash on EPYC system. > we do not need those changes in final patch. Patch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 00/30] crypto: x86 glue code cleanup/conversion
On Mon, Feb 19, 2018 at 11:47:58PM -0800, Eric Biggers wrote: > Hi, > > I got tired of seeing some block cipher implementations using the old > API (blkcipher or ablkcipher) and some using the new API (skcipher)... > so this series makes a dent in the problem by converting all the > remaining x86 glue code over to the skcipher API. > > Besides the conversion to use 'skcipher_alg', 'skcipher_request', and > 'skcipher_walk' this includes replacing all remaining users of > ablk_helper with crypto_simd, then removing ablk_helper. > > It also includes removing all users of lrw_crypt() and xts_crypt() in > favor of the lrw and xts templates wrapping an ECB-mode algorithm. So, > lrw_crypt() and xts_crypt() are removed too. > > Note that AES was already converted a while ago, so this series mostly > just deals with the other ciphers (Serpent, Twofish, Camellia, etc.). > > I tested all the affected algorithms with the self-tests. I also did > some testing with random sglist divisions, during which I found a bug in > ecb-cast5-avx which this series fixes as well. > > The overall diff is about 3100 lines of code removed, mainly due to the > ablk_helper => crypto_simd conversions and removing the unnecessary LRW > and XTS implementations. > > Eric Biggers (30): > crypto: simd - allow registering multiple algorithms at once > crypto: x86/glue_helper - add skcipher_walk functions > crypto: x86/serpent-sse2 - remove LRW algorithm > crypto: x86/serpent-sse2 - remove XTS algorithm > crypto: x86/serpent-sse2 - convert to skcipher interface > crypto: x86/serpent-avx2 - remove LRW algorithm > crypto: x86/serpent-avx - remove LRW algorithm > crypto: x86/serpent-avx,avx2 - convert to skcipher interface > crypto: x86/twofish-3way - remove LRW algorithm > crypto: x86/twofish-3way - remove XTS algorithm > crypto: x86/twofish-3way - convert to skcipher interface > crypto: x86/twofish-avx - remove LRW algorithm > crypto: x86/twofish-avx - convert to skcipher interface > crypto: x86/cast5-avx - fix ECB encryption when long sg follows short > one > crypto: x86/cast5-avx - convert to skcipher interface > crypto: x86/cast6-avx - remove LRW algorithm > crypto: x86/cast6-avx - convert to skcipher interface > crypto: x86/blowfish: convert to skcipher interface > crypto: x86/des3_ede - convert to skcipher interface > crypto: x86/camellia-aesni-avx - remove LRW algorithm > crypto: x86/camellia-aesni-avx2 - remove LRW algorithm > crypto: x86/camellia - remove LRW algorithm > crypto: x86/camellia - remove XTS algorithm > crypto: x86/camellia - convert to skcipher interface > crypto: x86/camellia-aesni-avx,avx2 - convert to skcipher interface > crypto: xts - remove xts_crypt() > crypto: lrw - remove lrw_crypt() > crypto: x86/glue_helper - remove blkcipher_walk functions > crypto: x86/glue_helper - rename glue_skwalk_fpu_begin() > crypto: ablk_helper - remove ablk_helper > > arch/x86/crypto/blowfish_glue.c| 230 +++-- > arch/x86/crypto/camellia_aesni_avx2_glue.c | 491 ++- > arch/x86/crypto/camellia_aesni_avx_glue.c | 495 +++ > arch/x86/crypto/camellia_glue.c| 356 +--- > arch/x86/crypto/cast5_avx_glue.c | 352 +++ > arch/x86/crypto/cast6_avx_glue.c | 489 ++- > arch/x86/crypto/des3_ede_glue.c| 238 ++--- > arch/x86/crypto/glue_helper.c | 391 +++--- > arch/x86/crypto/serpent_avx2_glue.c| 478 ++ > arch/x86/crypto/serpent_avx_glue.c | 518 +++- > arch/x86/crypto/serpent_sse2_glue.c| 519 > - > arch/x86/crypto/twofish_avx_glue.c | 493 +++ > arch/x86/crypto/twofish_glue_3way.c| 339 --- > arch/x86/include/asm/crypto/camellia.h | 16 +- > arch/x86/include/asm/crypto/glue_helper.h | 75 + > arch/x86/include/asm/crypto/serpent-avx.h | 17 +- > arch/x86/include/asm/crypto/twofish.h | 19 -- > crypto/Kconfig | 82 ++--- > crypto/Makefile| 1 - > crypto/ablk_helper.c | 150 - > crypto/lrw.c | 152 +++-- > crypto/simd.c | 50 +++ > crypto/xts.c | 72 > include/crypto/ablk_helper.h | 32 -- > include/crypto/internal/simd.h | 7 + > include/crypto/lrw.h | 44 --- > include/crypto/xts.h | 17 - > 27 files changed, 1489 insertions(+), 4634 deletions(-) > delete mode 100644 crypto/ablk_helper.c > delete mode 100644 include/crypto/ablk_helper.h > delete mode 100644 include/crypto/lrw.h All applied. Thanks. -- Email: Herbert Xu
Re: [PATCH] crypto: marvell/cesa - Clean up redundant #include
On Mon, Feb 19, 2018 at 01:55:36PM +, Robin Murphy wrote: > The inclusion of dma-direct.h was only needed temporarily to prevent > breakage from the DMA API rework, since the actual CESA fix making it > redundant was merged in parallel. Now that both have landed, it can go. > > Signed-off-by: Robin MurphyPatch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 0/4] cleanups and new HW rev support
On Mon, Feb 19, 2018 at 02:51:20PM +, Gilad Ben-Yossef wrote: > This patch set introduces backward compatible support for the older > CryptoCell hardware revision 710 and 630 along some minor code > cleanups. > > Gilad Ben-Yossef (4): > crypto: ccree: remove unused definitions > dt-bindings: Add DT bindings for ccree 710 and 630p > crypto: ccree: add support for older HW revs > crypto: ccree: replace memset+kfree with kzfree > > .../devicetree/bindings/crypto/arm-cryptocell.txt | 3 +- > drivers/crypto/Kconfig | 6 +- > drivers/crypto/ccree/cc_aead.c | 34 +++-- > drivers/crypto/ccree/cc_cipher.c | 25 +++- > drivers/crypto/ccree/cc_crypto_ctx.h | 36 - > drivers/crypto/ccree/cc_driver.c | 68 -- > drivers/crypto/ccree/cc_driver.h | 23 +++- > drivers/crypto/ccree/cc_fips.c | 13 +- > drivers/crypto/ccree/cc_hash.c | 149 > +++-- > drivers/crypto/ccree/cc_hash.h | 12 +- > drivers/crypto/ccree/cc_host_regs.h| 3 + > drivers/crypto/ccree/cc_hw_queue_defs.h| 2 +- > drivers/crypto/ccree/cc_kernel_regs.h | 1 + > drivers/crypto/ccree/cc_request_mgr.c | 9 +- > drivers/crypto/ccree/cc_sram_mgr.c | 14 ++ > 15 files changed, 240 insertions(+), 158 deletions(-) All applied. Thanks. -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 1/2] crypto: ccp: Fix sparse, use plain integer as NULL pointer
On Thu, Feb 15, 2018 at 01:34:44PM -0600, Brijesh Singh wrote: > Fix sparse warning: Using plain integer as NULL pointer. Replaces > assignment of 0 to pointer with NULL assignment. > > Fixes: 200664d5237f (Add Secure Encrypted Virtualization ...) > Cc: Borislav Petkov> Cc: Herbert Xu > Cc: Gary Hook > Cc: Tom Lendacky > Signed-off-by: Brijesh Singh All applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH Resend 0/5] hwrng: stm32 - Improvement for stm32-rng
On Thu, Feb 15, 2018 at 02:03:07PM +0100, Lionel Debieve wrote: > This set of patches add extended functionalities for stm32 rng > driver. > Patch #1 includes a reset during probe to avoid any error status > which can occur during bootup process and keep safe rng integrity. > > Patch #3 adds a new property to manage the clock error detection > feature which can be disabled on specific target. > > Patch #5 rework the timeout calculation for read value that was > previously defined based on loop operation and is now based on > timer. > > Lionel Debieve (5): > hwrng: stm32 - add reset during probe > dt-bindings: rng: add reset node for stm32 > hwrng: stm32 - allow disable clock error detection > dt-bindings: rng: add clock detection error for stm32 > hwrng: stm32 - rework read timeout calculation > > .../devicetree/bindings/rng/st,stm32-rng.txt | 4 ++ > drivers/char/hw_random/stm32-rng.c | 44 > ++ > 2 files changed, 32 insertions(+), 16 deletions(-) All applied. Thanks. -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH Resend 0/5] hwrng: stm32 - Improvement for stm32-rng
On Thu, Feb 22, 2018 at 04:25:46PM +0100, Alexandre Torgue wrote: > Hi > > On 02/22/2018 03:03 PM, Herbert Xu wrote: > >On Thu, Feb 15, 2018 at 02:03:07PM +0100, Lionel Debieve wrote: > >>This set of patches add extended functionalities for stm32 rng > >>driver. > >>Patch #1 includes a reset during probe to avoid any error status > >>which can occur during bootup process and keep safe rng integrity. > >> > >>Patch #3 adds a new property to manage the clock error detection > >>feature which can be disabled on specific target. > >> > >>Patch #5 rework the timeout calculation for read value that was > >>previously defined based on loop operation and is now based on > >>timer. > > > >I should take only patches 1/3/5, right? > > You could take all (dt-bindings + drivers part). Thanks! -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v3 0/4] crypto: AF_ALG AIO improvements
On Tue, 27 Feb 2018 15:08:58 +0100 Stephan Müllerwrote: > Am Freitag, 23. Februar 2018, 13:00:26 CET schrieb Herbert Xu: > > Hi Herbert, Hi Stephan / Herbert, > > > On Fri, Feb 23, 2018 at 09:33:33AM +0100, Stephan Müller wrote: > > > A simple copy operation, however, will imply that in one AIO recvmsg > > > request, only *one* IOCB can be set and processed. > > > > Sure, but the recvmsg will return as soon as the crypto API encrypt > > or decrypt function returns. It's still fully async. It's just > > that the setup part needs to be done with sendmsg/recvmsg. > > Wouldn't a copy of the ctx->iv into a per-request buffer change the behavoir > of the AF_ALG interface significantly? > > Today, if multiple IOCBs are submitted, most cipher implementations would > serialize the requests (e.g. all implementations that behave synchronous in > nature like all software implementations). > > Thus, when copying the ctx->iv into a separate per-request buffer, suddenly > all block-chained cipher operations are not block chained any more. Agreed - specific handling would be needed to ensure the IV is written to each copy to maintain the chain. Not nice at all. > > > > Even if we wanted to do what you stated, just inlining the IV isn't > > enough. You'd also need to inline the assoclen, and probably the > > optype in case you want to mix encrypt/decrypt too. > > Maybe that is what we have to do. The one element I could do with more clarity on here is use cases as it feels like the discussion is a little unfocused (helps with performance runs, but is it really useful?) When do we want to have separate IVs per request but a shared key? I think this is relevant for ctr modes in particular where userspace can provide the relevant ctrs but the key is shared. Storage encryption modes such as XTS can also benefit. My own knowledge is too abstract to give good answers to these. > > > > However, I must say that I don't see the point of going all the way > > to support such a bulk submission interface (e.g., lio_listio). > > IMHO, the point is that AF_ALG is the only interface to allow userspace to > utilize hardware crypto implementations. For example, on a small chip with > hardware crypto support, your user space code can offload crypto to that > hardware to free CPU time. > > How else would somebody access its crypto accelerators? This is also useful at the high end where we may well be throwing this bulk submission at a set of crypto units (hidden behind a queue) to parallelize when possible. Just because we have lots of cpu power doesn't mean it makes sense to use it for crypto :) We 'could' just do it all in userspace via vfio, but there are the usual disadvantages in that approach in terms of generality etc. > > > > Remember, the algif interface due to its inherent overhead is meant > > for bulk data. That is, the processing time for each request is > > dominated by the actual processing, not the submission process. > > I see that. And for smaller chips with crypto support, this would be the case > IMHO. Especially if we streamline the AF_ALG overhead such that we reduce the > number of syscalls and user/kernel space roundtrips. For larger devices the ability to run large numbers of requests and 'know' that they don't need to be chained is useful (because they have separate IVs). This allows you to let the hardware handle them in parallel (either because the hardware handles dependency tracking, or because we have done it in the driver.) Applies just as well for large blocks with lower overhead. You could do this by opening lots of separate sockets and simply providing them all with the same key. However, this assumes the hardware / driver can handle very large numbers of contexts (ours can though we only implement a subset of this functionality in the current driver to keep things simple). If we 'fake' such support in the driver then there is inherent nastiness around having to let the hardware queues drain before you can change the IV) and that the overhead of operating such a pool of sockets in your program isn't significant. Managing such a pool of sockets would also be a significant complexity overhead in complexity of the user space code. > > > > If you're instead processing lots of tiny requests, do NOT use > > algif because it's not designed for that. > > The only issue in this case is that it makes the operation slower. > > > > Therefore spending too much time to optimise the submission overhead > > seems pointless to me. > > > > Cheers, > > > Ciao > Stephan > > Thanks, Jonathan
[PATCH 4.4 12/34] lib/mpi: Fix umul_ppmm() for MIPS64r6
4.4-stable review patch. If anyone has any objections, please let me know. -- From: James Hogan[ Upstream commit bbc25bee37d2b32cf3a1fab9195b6da3a185614a ] Current MIPS64r6 toolchains aren't able to generate efficient DMULU/DMUHU based code for the C implementation of umul_ppmm(), which performs an unsigned 64 x 64 bit multiply and returns the upper and lower 64-bit halves of the 128-bit result. Instead it widens the 64-bit inputs to 128-bits and emits a __multi3 intrinsic call to perform a 128 x 128 multiply. This is both inefficient, and it results in a link error since we don't include __multi3 in MIPS linux. For example commit 90a53e4432b1 ("cfg80211: implement regdb signature checking") merged in v4.15-rc1 recently broke the 64r6_defconfig and 64r6el_defconfig builds by indirectly selecting MPILIB. The same build errors can be reproduced on older kernels by enabling e.g. CRYPTO_RSA: lib/mpi/generic_mpih-mul1.o: In function `mpihelp_mul_1': lib/mpi/generic_mpih-mul1.c:50: undefined reference to `__multi3' lib/mpi/generic_mpih-mul2.o: In function `mpihelp_addmul_1': lib/mpi/generic_mpih-mul2.c:49: undefined reference to `__multi3' lib/mpi/generic_mpih-mul3.o: In function `mpihelp_submul_1': lib/mpi/generic_mpih-mul3.c:49: undefined reference to `__multi3' lib/mpi/mpih-div.o In function `mpihelp_divrem': lib/mpi/mpih-div.c:205: undefined reference to `__multi3' lib/mpi/mpih-div.c:142: undefined reference to `__multi3' Therefore add an efficient MIPS64r6 implementation of umul_ppmm() using inline assembly and the DMULU/DMUHU instructions, to prevent __multi3 calls being emitted. Fixes: 7fd08ca58ae6 ("MIPS: Add build support for the MIPS R6 ISA") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-m...@linux-mips.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/mpi/longlong.h | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ** MIPS/64 ** ***/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ +: "=d" ((UDItype)(w0)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ +: "=d" ((UDItype)(w1)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI)));\
[PATCH 4.9 24/56] lib/mpi: Fix umul_ppmm() for MIPS64r6
4.9-stable review patch. If anyone has any objections, please let me know. -- From: James Hogan[ Upstream commit bbc25bee37d2b32cf3a1fab9195b6da3a185614a ] Current MIPS64r6 toolchains aren't able to generate efficient DMULU/DMUHU based code for the C implementation of umul_ppmm(), which performs an unsigned 64 x 64 bit multiply and returns the upper and lower 64-bit halves of the 128-bit result. Instead it widens the 64-bit inputs to 128-bits and emits a __multi3 intrinsic call to perform a 128 x 128 multiply. This is both inefficient, and it results in a link error since we don't include __multi3 in MIPS linux. For example commit 90a53e4432b1 ("cfg80211: implement regdb signature checking") merged in v4.15-rc1 recently broke the 64r6_defconfig and 64r6el_defconfig builds by indirectly selecting MPILIB. The same build errors can be reproduced on older kernels by enabling e.g. CRYPTO_RSA: lib/mpi/generic_mpih-mul1.o: In function `mpihelp_mul_1': lib/mpi/generic_mpih-mul1.c:50: undefined reference to `__multi3' lib/mpi/generic_mpih-mul2.o: In function `mpihelp_addmul_1': lib/mpi/generic_mpih-mul2.c:49: undefined reference to `__multi3' lib/mpi/generic_mpih-mul3.o: In function `mpihelp_submul_1': lib/mpi/generic_mpih-mul3.c:49: undefined reference to `__multi3' lib/mpi/mpih-div.o In function `mpihelp_divrem': lib/mpi/mpih-div.c:205: undefined reference to `__multi3' lib/mpi/mpih-div.c:142: undefined reference to `__multi3' Therefore add an efficient MIPS64r6 implementation of umul_ppmm() using inline assembly and the DMULU/DMUHU instructions, to prevent __multi3 calls being emitted. Fixes: 7fd08ca58ae6 ("MIPS: Add build support for the MIPS R6 ISA") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-m...@linux-mips.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/mpi/longlong.h | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ** MIPS/64 ** ***/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ +: "=d" ((UDItype)(w0)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ +: "=d" ((UDItype)(w1)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI)));\
[PATCH 4.14 050/115] lib/mpi: Fix umul_ppmm() for MIPS64r6
4.14-stable review patch. If anyone has any objections, please let me know. -- From: James Hogan[ Upstream commit bbc25bee37d2b32cf3a1fab9195b6da3a185614a ] Current MIPS64r6 toolchains aren't able to generate efficient DMULU/DMUHU based code for the C implementation of umul_ppmm(), which performs an unsigned 64 x 64 bit multiply and returns the upper and lower 64-bit halves of the 128-bit result. Instead it widens the 64-bit inputs to 128-bits and emits a __multi3 intrinsic call to perform a 128 x 128 multiply. This is both inefficient, and it results in a link error since we don't include __multi3 in MIPS linux. For example commit 90a53e4432b1 ("cfg80211: implement regdb signature checking") merged in v4.15-rc1 recently broke the 64r6_defconfig and 64r6el_defconfig builds by indirectly selecting MPILIB. The same build errors can be reproduced on older kernels by enabling e.g. CRYPTO_RSA: lib/mpi/generic_mpih-mul1.o: In function `mpihelp_mul_1': lib/mpi/generic_mpih-mul1.c:50: undefined reference to `__multi3' lib/mpi/generic_mpih-mul2.o: In function `mpihelp_addmul_1': lib/mpi/generic_mpih-mul2.c:49: undefined reference to `__multi3' lib/mpi/generic_mpih-mul3.o: In function `mpihelp_submul_1': lib/mpi/generic_mpih-mul3.c:49: undefined reference to `__multi3' lib/mpi/mpih-div.o In function `mpihelp_divrem': lib/mpi/mpih-div.c:205: undefined reference to `__multi3' lib/mpi/mpih-div.c:142: undefined reference to `__multi3' Therefore add an efficient MIPS64r6 implementation of umul_ppmm() using inline assembly and the DMULU/DMUHU instructions, to prevent __multi3 calls being emitted. Fixes: 7fd08ca58ae6 ("MIPS: Add build support for the MIPS R6 ISA") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-m...@linux-mips.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/mpi/longlong.h | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ** MIPS/64 ** ***/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ +: "=d" ((UDItype)(w0)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ +: "=d" ((UDItype)(w1)) \ +: "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI)));\