> There seems to be a bug in the AVX2 codepath in poly1305-x86.pl. I have not
> attempted to debug this, but I have attached a test file which produces
> different output in normal and AVX2 codepaths. Our existing poly1305
> implementation agrees with the former.
> 
> $ OPENSSL_ia32cap=0 ./poly1305_test
> PASS
> $ ./poly1305_test
> Poly1305 test failed.
> got:      2e65f0054e36505687d937ff5e8ed112
> expected: 69d28f73dd09d39a92aa179da354b7ea

While I keep looking further, double-check attached.



-- 
Ticket here: http://rt.openssl.org/Ticket/Display.html?id=4346
Please log in as guest with password guest if prompted

diff --git a/crypto/poly1305/asm/poly1305-x86.pl 
b/crypto/poly1305/asm/poly1305-x86.pl
index 7c1aee5..6f743ba 100755
--- a/crypto/poly1305/asm/poly1305-x86.pl
+++ b/crypto/poly1305/asm/poly1305-x86.pl
@@ -1435,7 +1435,7 @@ sub X { my $reg=shift; $reg=~s/^ymm/xmm/; $reg; }
        &test   ("eax","eax");                          # is_base2_26?
        &jz     (&label("enter_blocks"));
 
-&set_label("enter_avx2",16);
+&set_label("enter_avx2");
        &vzeroupper     ();
 
        &call   (&label("pic_point"));
@@ -1540,6 +1540,8 @@ sub X { my $reg=shift; $reg=~s/^ymm/xmm/; $reg; }
        &and            ("ecx",-64);
        &and            ("edx",63);
 
+       &vlazy_reduction();
+
        &vmovdqu        (&X($T0),&QWP(16*0,"esi"));
        &cmp            ("edx",32);
        &jb             (&label("one"));
@@ -1778,8 +1780,8 @@ sub vlazy_reduction {
        &vmovd          (&DWP(-16*3+4*3,"edi"),"xmm3");
        &vmovd          (&DWP(-16*3+4*4,"edi"),"xmm4");
        &vzeroupper     ();
+       &mov            ("esp","ebp");
 &set_label("nodata");
-       &mov    ("esp","ebp");
 &function_end("_poly1305_blocks_avx2");
 }
 &set_label("const_sse2",64);
-- 
openssl-dev mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev

Reply via email to