On Wed, Jan 28, 2009 at 04:42:49PM -0800, drblanco wrote: > Here's my attempt, which takes about 75s for r=10^8. > > circ2 r = (1+4*r) + 4 * (circ2' (rs+1) r 1 0) > where > rs = r^2 > circ2' rad x y sum > | x<y = sum > | rad<=rs = circ2' (rad+1+2*y) x (y+1) (sum+1+2*(x-y)) > | otherwise = circ2' (rad+1-2*x) (x-1) y sum > > For comparison, the C code below runs in <1 second. > > typedef unsigned long long bigint; > > bigint gausscount(bigint r) { > bigint sum=0; > bigint x, y; > bigint rs=r*r; > bigint rad=rs+1; > x=r; y=1; > > while (y<x) { > while (rad>rs) { > rad-=2*x; > rad++; > x--; > } > sum+=1+2*(x-y); > rad+=2*y+1; > y++; > > } > sum*=4; > sum++; > > return sum; > }
for the curious, here is the C output of jhc when fed circ2 annotated with the obvious bang-patterns (the strictness analyzer still needs some help): other than giving every intermediate value a name, and only performing one operation a line (which doesn't bother gcc at all), it is pretty darn close to the hand optimized C code... variable guide: x -> v86 y -> v88 rs -> v4146 rad -> v84 and sum is accumulated in v90 > static uint64_t > circ2(uint64_t v12) > { > uint64_t v212; > uint64_t v4146 = (v12 * v12); > uint64_t v4758 = (4 * v12); > uint64_t v4786 = (1 + v4758); > uint64_t v4160 = (1 + v4146); > uint64_t v84 = v4160; > uint64_t v86 = v12; > uint64_t v88 = 1; > uint64_t v90 = 0; > fW_a__fMain__3_ucirc2_t_u2:; > { uint16_t v100000 = (((int64_t)v86) < ((int64_t)v88)); > if (0 == v100000) { > uint16_t v100002 = (((int64_t)v84) <= ((int64_t)v4146)); > if (0 == v100002) { > uint64_t v4338 = (1 + v84); > uint64_t v4352 = (2 * v86); > uint64_t v4366 = (v4338 - v4352); > uint64_t v4380 = (v86 - 1); > v84 = v4366; > v86 = v4380; > v88 = v88; > v90 = v90; > goto fW_a__fMain__3_ucirc2_t_u2; > } else { > /* 1 */ > uint64_t v4226 = (1 + v90); > uint64_t v4772 = (v86 - v88); > uint64_t v4282 = (2 * v4772); > uint64_t v4296 = (v4226 + v4282); > uint64_t v4310 = (1 + v88); > uint64_t v4240 = (1 + v84); > uint64_t v4254 = (2 * v88); > uint64_t v4324 = (v4240 + v4254); > v84 = v4324; > v86 = v86; > v88 = v4310; > v90 = v4296; > goto fW_a__fMain__3_ucirc2_t_u2; > } > } else { > /* 1 */ > v212 = v90; > } > } > uint64_t v4174 = (4 * v212); > return v4786 + v4174; > } John -- John Meacham - ⑆repetae.net⑆john⑈ _______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe