This patch relates to RT 3667 and the paper here (to appear at COSADE 2015):
https://eprint.iacr.org/2015/036 Instead of a direct table lookup for precomputed points in the generic ECC multi-scalar multiplication routine, it computes the point by traversing the entire table. Motivation is better side-channel security. Benchmarking below (evidently code path is not executed for nistp256 anymore). Costs are 6-10% for ECDSA sign, 2-6% for ECDSA verify (defense unnecessary, but AFAICT there is no way to authoritatively distinguish at this depth whether to protect scalars or not), and 1-5% for ECDH. BBB openssl speed ecdsa before: sign verify sign/s verify/s 160 bit ecdsa (secp160r1) 0.0000s 0.0002s 20818.1 5670.3 192 bit ecdsa (nistp192) 0.0001s 0.0002s 17844.1 4570.7 224 bit ecdsa (nistp224) 0.0001s 0.0003s 13452.7 3413.4 256 bit ecdsa (nistp256) 0.0000s 0.0001s 26485.2 10577.4 384 bit ecdsa (nistp384) 0.0002s 0.0007s 5961.0 1438.0 521 bit ecdsa (nistp521) 0.0004s 0.0015s 2837.1 684.5 openssl speed ecdsa after: sign verify sign/s verify/s 160 bit ecdsa (secp160r1) 0.0001s 0.0002s 19056.1 5369.1 192 bit ecdsa (nistp192) 0.0001s 0.0002s 16270.0 4490.4 224 bit ecdsa (nistp224) 0.0001s 0.0003s 12150.9 3354.7 256 bit ecdsa (nistp256) 0.0000s 0.0001s 26489.9 10554.9 384 bit ecdsa (nistp384) 0.0002s 0.0007s 5470.9 1355.5 521 bit ecdsa (nistp521) 0.0004s 0.0015s 2673.3 667.4 openssl speed ecdh before: op op/s 160 bit ecdh (secp160r1) 0.0001s 7025.0 192 bit ecdh (nistp192) 0.0002s 5786.0 224 bit ecdh (nistp224) 0.0002s 4158.1 256 bit ecdh (nistp256) 0.0001s 14899.7 384 bit ecdh (nistp384) 0.0006s 1732.1 521 bit ecdh (nistp521) 0.0012s 820.4 openssl speed ecdh after: op op/s 160 bit ecdh (secp160r1) 0.0001s 6750.8 192 bit ecdh (nistp192) 0.0002s 5477.6 224 bit ecdh (nistp224) 0.0002s 4060.2 256 bit ecdh (nistp256) 0.0001s 14886.2 384 bit ecdh (nistp384) 0.0006s 1709.7 521 bit ecdh (nistp521) 0.0012s 802.1 (I do not have root on the benchmarking machine to peg the clock. Sorry.) $ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 60 model name : Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz stepping : 3 microcode : 26 cpu MHz : 800.000 cache size : 6144 KB physical id : 0 siblings : 4 core id : 0 cpu cores : 4 apicid : 0 initial apicid : 0 fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dts tpr_shadow vnmi flexpriority ept vpid fsgsbase bmi1 hle avx2 smep bmi2 erms invpcid rtm bogomips : 6385.38 clflush size : 64 cache_alignment : 64 address sizes : 39 bits physical, 48 bits virtual power management: SNIP
diff -rupN openssl-1.0.2_orig/crypto/ec/ec_mult.c openssl-1.0.2/crypto/ec/ec_mult.c --- openssl-1.0.2_orig/crypto/ec/ec_mult.c 2015-01-22 16:58:32.000000000 +0200 +++ openssl-1.0.2/crypto/ec/ec_mult.c 2015-03-10 13:06:50.661644241 +0200 @@ -306,6 +306,66 @@ static signed char *compute_wNAF(const B return r; } +/** + * Traverses t[i] for 0 <= i < 2**(w-1) and places t[digit >> 1] in r + * with bitwise operations as a side-channel defense. + * Assumes r has enough BIGNUM space so tops of BIGNUMs in t do not overflow. + * See https://eprint.iacr.org/2015/036 + */ +void EC_POINT_select(EC_POINT *r, EC_POINT **t, int digit, int w, int top) { + + w = (1 << w); + + int d, j; + BN_ULONG mask; + + for(j=0; j<top; j++) { + r->X.d[j] = 0; + r->Y.d[j] = 0; + r->Z.d[j] = 0; + } + r->X.top = 0; + r->X.dmax = 0; + r->X.neg = 0; + r->X.flags = 0; + r->Y.top = 0; + r->Y.dmax = 0; + r->Y.neg = 0; + r->Y.flags = 0; + r->Z.top = 0; + r->Z.dmax = 0; + r->Z.neg = 0; + r->Z.flags = 0; + r->Z_is_one = 0; + + for(d=1; d<w; d+=2) { + mask = (BN_ULONG)(((digit ^ d) - 1) >> 15); + for(j=0; j<t[d >> 1]->X.top; j++) { + r->X.d[j] |= mask & t[d >> 1]->X.d[j]; + } + for(j=0; j<t[d >> 1]->Y.top; j++) { + r->Y.d[j] |= mask & t[d >> 1]->Y.d[j]; + } + for(j=0; j<t[d >> 1]->Z.top; j++) { + r->Z.d[j] |= mask & t[d >> 1]->Z.d[j]; + } + r->X.top |= mask & (t[d >> 1]->X.top ); + r->X.dmax |= mask & (t[d >> 1]->X.dmax ); + r->X.neg |= mask & (t[d >> 1]->X.neg ); + r->X.flags |= mask & (t[d >> 1]->X.flags ); + r->Y.top |= mask & (t[d >> 1]->Y.top ); + r->Y.dmax |= mask & (t[d >> 1]->Y.dmax ); + r->Y.neg |= mask & (t[d >> 1]->Y.neg ); + r->Y.flags |= mask & (t[d >> 1]->Y.flags ); + r->Z.top |= mask & (t[d >> 1]->Z.top ); + r->Z.dmax |= mask & (t[d >> 1]->Z.dmax ); + r->Z.neg |= mask & (t[d >> 1]->Z.neg ); + r->Z.flags |= mask & (t[d >> 1]->Z.flags ); + r->Z_is_one |= mask & (t[d >> 1]->Z_is_one); + } + +} + /* * TODO: table should be optimised for the wNAF-based implementation, * sometimes smaller windows will give better performance (thus the @@ -532,6 +592,7 @@ int ec_wNAF_mul(const EC_GROUP *group, E tmp_points = pre_comp->points; for (i = num; i < totalnum; i++) { + wsize[i] = wsize[num]; if (i < totalnum - 1) { wNAF_len[i] = blocksize; if (tmp_len < blocksize) { @@ -635,6 +696,11 @@ int ec_wNAF_mul(const EC_GROUP *group, E goto err; #endif + /* use tmp for selecting point addition arg */ + bn_wexpand(&tmp->X, group->field.top); + bn_wexpand(&tmp->Y, group->field.top); + bn_wexpand(&tmp->Z, group->field.top); + r_is_at_infinity = 1; for (k = max_len - 1; k >= 0; k--) { @@ -665,12 +731,16 @@ int ec_wNAF_mul(const EC_GROUP *group, E /* digit > 0 */ if (r_is_at_infinity) { - if (!EC_POINT_copy(r, val_sub[i][digit >> 1])) - goto err; + bn_wexpand(&r->X, group->field.top); + bn_wexpand(&r->Y, group->field.top); + bn_wexpand(&r->Z, group->field.top); + EC_POINT_select(r, val_sub[i], digit, wsize[i], group->field.top); r_is_at_infinity = 0; } else { + /* prepare point addition argument */ + EC_POINT_select(tmp, val_sub[i], digit, wsize[i], group->field.top); if (!EC_POINT_add - (group, r, r, val_sub[i][digit >> 1], ctx)) + (group, r, r, tmp, ctx)) goto err; } }
_______________________________________________ openssl-dev mailing list To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev