Per lemire
(https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/),
a faster algorithm for integer x+.y. Also helps *., which is implemented in
terms of +..
before:
p=. ?1e7$1e7
q=. ?1e7$1e7
timex 'p+.q'
0.525084
after:
timex 'p+.q'
0.269749
I have no idea if this is of any use to anyone.
diff --git a/jsrc/ve.c b/jsrc/ve.c
index 4d64fe6c..e9e52906 100644
--- a/jsrc/ve.c
+++ b/jsrc/ve.c
@@ -350,17 +350,22 @@ AHDR2(remII,I,I,I){I u,v;
R EVOK;
}
-
-static I igcd1(I a,I b){R a?igcd1(b%a,a):b;} // Emulate Euclid
-
-I jtigcd(J jt,I a,I b){
- if(a>IMIN&&b>IMIN){a=ABS(a); b=ABS(b);}
- else{
- if(a==b||!a||!b){jt->jerr=EWOV; R 0;}
- if(a==IMIN){b=ABS(b); a=-(a+b);}else{a=ABS(a); b=-(a+b);}
- }
- R a?igcd1(b%a,a):b;
-}
+// 'binary gcd' algorithm, per Lemire
https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
+// can be vectorised, annoying without hardware ctz (avx512 has clz which
works)
+I jtigcd(J jt,I a,I b){ I d;UI4 s,xz,yz;UI x,y;
+ if(unlikely((a|b)==IMIN)){jt->jerr=EWOV; R 0;}
+ if(!a||!b)R a|b;
+ x=ABS(a);y=ABS(b);
+ xz=CTTZI(x); yz=CTTZI(y); s=CTTZI(x|y);
+ x>>=xz;
+ while(1){
+ y>>=yz;
+ d=y-x;
+ yz=CTTZI(d);
+ if(!d)break;
+ if(d<0)x=y;
+ y=ABS(d);}
+ R x<<s;}
D jtdgcd(J jt,D a,D b){D a1,b1,t;B stop = 0;
a=ABS(a); b=ABS(b); if(a>b){t=a; a=b; b=t;}
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm