Per lemire 
(https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/),
a faster algorithm for integer x+.y.  Also helps *., which is implemented in 
terms of +..

before:

   p=. ?1e7$1e7
   q=. ?1e7$1e7
   timex 'p+.q'
0.525084

after:

   timex 'p+.q'
0.269749

I have no idea if this is of any use to anyone.
diff --git a/jsrc/ve.c b/jsrc/ve.c
index 4d64fe6c..e9e52906 100644
--- a/jsrc/ve.c
+++ b/jsrc/ve.c
@@ -350,17 +350,22 @@ AHDR2(remII,I,I,I){I u,v;
  R EVOK;
 }
 
-
-static I igcd1(I a,I b){R a?igcd1(b%a,a):b;}  // Emulate Euclid
-
-I jtigcd(J jt,I a,I b){
- if(a>IMIN&&b>IMIN){a=ABS(a); b=ABS(b);}
- else{
-  if(a==b||!a||!b){jt->jerr=EWOV; R 0;}
-  if(a==IMIN){b=ABS(b); a=-(a+b);}else{a=ABS(a); b=-(a+b);}
- }
- R a?igcd1(b%a,a):b;
-}
+// 'binary gcd' algorithm, per Lemire 
https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
+// can be vectorised, annoying without hardware ctz (avx512 has clz which 
works)
+I jtigcd(J jt,I a,I b){ I d;UI4 s,xz,yz;UI x,y;
+ if(unlikely((a|b)==IMIN)){jt->jerr=EWOV; R 0;}
+ if(!a||!b)R a|b;
+ x=ABS(a);y=ABS(b);
+ xz=CTTZI(x); yz=CTTZI(y); s=CTTZI(x|y);
+ x>>=xz;
+ while(1){
+  y>>=yz;
+  d=y-x;
+  yz=CTTZI(d);
+  if(!d)break;
+  if(d<0)x=y;
+  y=ABS(d);}
+ R x<<s;}
 
 D jtdgcd(J jt,D a,D b){D a1,b1,t;B stop = 0;
  a=ABS(a); b=ABS(b); if(a>b){t=a; a=b; b=t;}
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm

Reply via email to