There seems to be a bug in the AVX2 codepath in poly1305-x86.pl. I have not attempted to debug this, but I have attached a test file which produces different output in normal and AVX2 codepaths. Our existing poly1305 implementation agrees with the former.
$ OPENSSL_ia32cap=0 ./poly1305_test PASS $ ./poly1305_test Poly1305 test failed. got: 2e65f0054e36505687d937ff5e8ed112 expected: 69d28f73dd09d39a92aa179da354b7ea You may wish to generalize that Poly1305_Update pattern into your own tests. This is what I did to catch this: https://boringssl-review.googlesource.com/#/c/7223/ >From looking at valgrind, this pattern seems to give good coverage. I used valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes and then kcachegrind to inspect the output. (kcachegrind is a bit heavy for this. I'm hoping I can find or write a better annotator here. Something which looks like, say, LCOV would be ideal.) By the way, this assembly code is quite complicated. I wasn't able to find problems in the others (I tested armv4, armv8, x86, and x86_64), but I'm far from confident I've covered all the cases. With the caveat that I'm no assembly programmer, much of the complexity seems to come the SIMD code needing a multiple of 2 or 4 blocks and the implementation converting internal state back and forth from base 2^26 and 2^64 and handling excess blocks slightly differently in different cases. (I counted nine distinct codepaths to test in the x86_64 AVX codepath alone.) The C code already buffers up to 16-byte blocks. Did you consider buffering up to 32 or 64 bytes in C when the SIMD code called for it? I think it could be simpler. You'd only need to handle excess blocks at the end. This would also simplify the SIMD upgrade on long inputs, so long as the buffer exceeds the cutoff. (You'll never process input before the upgrade.) I haven't tried this, so perhaps the performance costs are prohibitive, but if the costs are modest, the simplifications may be worth it. David -- Ticket here: http://rt.openssl.org/Ticket/Display.html?id=4346 Please log in as guest with password guest if prompted
/* Place in openssl checkout (for access to poly1305.h) and build with: * * gcc -m32 ./poly1305_test.c ./libcrypto.a -Iinclude/ -ldl -pthread -o poly1305_test */ #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <openssl/crypto.h> #include "crypto/include/internal/poly1305.h" /* Copy over the poly1305_context definition, so as not to fuss with sizes. */ typedef void (*poly1305_blocks_f)(void *ctx, const unsigned char *inp, size_t len, unsigned int padbit); typedef void (*poly1305_emit_f)(void *ctx, unsigned char mac[16], const unsigned int nonce[4]); struct poly1305_context { double opaque[24]; unsigned int nonce[4]; unsigned char data[POLY1305_BLOCK_SIZE]; size_t num; struct { poly1305_blocks_f blocks; poly1305_emit_f emit; } func; }; /* Adapted from poly1305.c's SELFTEST codepath. */ static uint8_t hex_digit(char h) { if (h >= '0' && h <= '9') { return h - '0'; } else if (h >= 'a' && h <= 'f') { return h - 'a' + 10; } else if (h >= 'A' && h <= 'F') { return h - 'A' + 10; } else { abort(); } } static void hex_decode(uint8_t *out, const char *hex) { size_t j = 0; while (*hex != 0) { uint8_t v = hex_digit(*hex++); v <<= 4; v |= hex_digit(*hex++); out[j++] = v; } } static void hexdump(uint8_t *a, size_t len) { size_t i; for (i = 0; i < len; i++) { printf("%02x", a[i]); } } static const char kKey[] = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"; static const char kInput[] = "248ac31085b6c2adaaa38259a0d7192c5c35d1bb4ef39ad94c38d1c82479e2dd2159a07702" "4b0589bc8a20101b506f0a1ad0bbab76e83a83f1b94be6beae74e874cab692c5963a75436b" "776121ec9f62399a3e66b2d22707dae81933b6277f3c8516bcbe26dbbd86f373103d7cf4ca" "d1888c952118fbfbd0d7b4bedc4ae4936aff91157e7aa47c54442ea78d6ac251d324a0fbe4" "9d89cc3521b66d16e9c66a3709894e4eb0a4eedc4ae19468e66b81f271351b1d921ea55104" "7abcc6b87a901fde7db79fa1818c11336dbc07244a40eb14cf77bde35e78ae9ad7d3f57ed7" "e7f23926c9172f82d77684ea5ed7d74ebc6f142b997036bcb7cce8df1bbc0d5b35a46509c9" "54fc9469d214d6238f166cbf872156b4c41d7aac5942cffb175023078252a3f36e315c5d4c" "e0e39928a018252862becacef96a19f03bdcf46d75584299d1f8b03c0169e9e407d937145b" "5e5024139e7022a1978f114f24cdfa23780a119735c41da8fb759bbb3f025c6ec30e6c6e9b" "ce8615be68e392fce59fd26a8e6a6cc5c606e3848116e4d01d29565a1facfb524b6d29643b" "826eee1e42869fc76df229dd79b39a2b1df28bb335c3a5f15a855d0121e4a6da34b5e4d5b7" "b5d5746a03ecff70811e1516fcec1bf7462e8876a2d21710aa168c78f45a6a15015950e221" "da85d3ec822ad6d0a6931b25a06b7bb5f3c10bb36cd4d647f9561982fde9818de5d4bf8db7" "f86c53b4ff14928ac15f79023b61861e73e44216540bb302153770da2533de9795252ab5fb" "77ad924c9338c8144c23d4c90dab9a18feac1a1574d4545e1435eb405e6c4c439fc724fce9" "92ae85badf345bad16d85fbd338f04433703614754d0e7e54c4ccde2670587d52ecfb5a70a" "14a501bacc727722649931d8515b13d020a78e511fe136d45fbf97f9c7f689fcc677cfb368" "3723878350ffe9d08130cc6e567b6179e01b7eb2b3bbcf0873e1308eec018edeb8cce94633" "8e15d5bf68c71916a83a99358039ef071e009546a2df936879dffbba397a93925d229a469f" "d17d71b7f524e03a30da6ee927542f8b369bed4734fe25dbd63d24ffd2a222f5f84f75d858" "ab989be925af570ad6d45bd28ce61b5139e1dd2f0b7795fe072e6e83acbb5e7b777a70c641" "e4cab2af40eed69abc334cd2703c3273204fac580c6a3d6680427e5f7d051e8380a53f93a1" "80f4556ecea4530b9a2d5948dad63d415b6874f6b90e767d6d265be86351b53ba690780bb5" "7c21b57418c5b97559e840c68257f839e7583a4bf7c7645c5987d40cc1ba79a218c35edfac" "dabe581d950e4bb7a481ebe64d61d00e75b1f25f1ce5f5462334a5b9038a697aa0937a3f80" "17e05d2c9c05dcb05c0b02508dea619b137f5444b6f088eb3cb2c66788f88afdfbba8faa1c" "490485624c88ae11e57347a676902e7553f056188493209bdbb30acc63c9e41e16a9d6c009" "416b520a76ba38f57628170c43626b5cb46179dc5bf65de865085f84bf741c223fbe474d2d" "19d8f43914fbd6586351089e73babf344f988b7963fe44528457d7aad3c564f6bcbd0d772a" "4c9fd328e6022d1c7c9f86726f8d5a23797d309c0f653ab1ac687833eb2700f156296062a8" "b377078f45f6b68c3d07cae1913ba8d5a6f9bf7525a3439eb932d4cefc4bf8e1b07b48ca13" "ece366cbc3e0388915915d1757475103a9e9454e7e6355de2d6acbf4710f9a63e4f6d3cd70" "c2d6fca88dd8a14448fdb63ce9350fdaafbe0b8bd1c5d307dae76dfed799aef2d8f23d5608" "d37d1330dd38b94860905dbeebf78d7b7318b7d42aed40d3f9899e9f420cbd92a6eeae3026" "f7725694e0e4bee016ba346fed2c21172bdb4a461cebe0cfe38e76645226ac127a259c1932" "64d735ce8c8a57e17dd3f0579e2e86dc295ad1f45ba2d85db35044da61f7d401274b31eefb" "eb34e8d2ae596e9b4541aae117bdac5ed0b324c20539c27c07a411d5288b0b5f6fa16e9a7d" "f85dc319fa6b71cd08a859c06a3f7b0289e1750adbf182f9750fea96fea5ab7aa347334060" "7cd7ed2c626f5382491c26d5d5bea61401dee7319c94d418f297e61ceac8f258ee8c23831b" "da081591f5a918e96855774ddedffc51e5b180f1971806d42fc333020b734aeb45adb0bc47" "325d0cea5f6713a786558022afc39d573892aa3635efbfd8bcb11c57f306c72146afe8b453" "88125cb7bf9ecf965a7ba4f768c77be366470dcdcf214b7f6a5a9460ed4fe44ae559d85e2f" "dc2094de83fff12ea8804db1215c4ca865871bdd7f8ef32ab799bf923ffb02c1ded7d129be" "adad46c5eda31ab1a6f43da05ea08bff7ffa88d8966353d01830558c39b930b01d175e4371" "24d8edd0d2698fd8932f2b2c9b14746e52879c57a395538150f390264f00e60d470711202f" "4194499ff79037ca9885dc8d695f7d917a3086ca88e8f8d0243efee09302cf39e039eb7cc8" "dd19d28120d5fe533b5727cd39133181c729ca6f90a015ed30be7668d5cb5ecc33a53ee69b" "f7d1a5ecbdb153803743c6adaaabd36bf84e5be38d3f04a5d5dbfd67bdcd3b176e65bd1391" "ade775cc32ce43a847fb6c672a3fe97a5d4081c4986959ec5fb898f42a9397ba2b3ec2c101" "8f8d76d057f2366bd0e4465514ad6560c599664fb85621fe771e00f43d39b591b2a6a32110" "0f4d1ef23a376d5ae3eeedbfe23da73dff0ee4d16b34ebddd8f5f053db9824105fc7300dbe" "e7ea6af56b112319e3e215a0fc79ae946f6b5227453ec7fcaf17cf7651f71499a50d812214" "04d5f129ac50ea7528ff0e0069ec4ab8acb7919d81749ab37a870c5ef2cc5a15cf96709d3c" "65b4addc77e7416847160bcabb94ea36377e0ef71be80b5cc53effd5444888044a353574c7" "2c924bba2a8b4e8354188ebfed"; static const char kMAC[] = "69d28f73dd09d39a92aa179da354b7ea"; int main() { OPENSSL_init_crypto(0, NULL); uint8_t key[32], out[16], expected[16]; POLY1305 poly1305; hex_decode(key, kKey); hex_decode(expected, kMAC); size_t in_len = strlen(kInput); in_len /= 2; uint8_t *in = malloc(in_len); hex_decode(in, kInput); size_t done = 0; Poly1305_Init(&poly1305, key); Poly1305_Update(&poly1305, in + done, 16); done += 16; size_t excess = 32; for (;;) { /* Feed 128 + |excess| bytes to test SIMD mode. */ if (done + 128 + excess > in_len) { break; } Poly1305_Update(&poly1305, in + done, 128 + excess); done += 128 + excess; /* Feed |excess| bytes to ensure SIMD mode can handle short inputs. */ if (done + excess > in_len) { break; } Poly1305_Update(&poly1305, in + done, excess); done += excess; } Poly1305_Update(&poly1305, in + done, in_len - done); Poly1305_Final(&poly1305, out); if (memcmp(out, expected, sizeof(expected)) != 0) { printf("Poly1305 test failed.\n"); printf("got: "); hexdump(out, sizeof(out)); printf("\nexpected: "); hexdump(expected, sizeof(expected)); printf("\n"); return 1; } printf("PASS\n"); return 0; }
-- openssl-dev mailing list To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev