[issue8692] Use divide-and-conquer for faster factorials

Alexander Belopolsky Thu, 13 May 2010 16:45:17 -0700

Alexander Belopolsky <belopol...@users.sourceforge.net> added the comment:

On Wed, May 12, 2010 at 3:47 PM, Mark Dickinson <rep...@bugs.python.org> wrote:
>...
> Realistically though, I don't see an iterative version of 
> factorial_part_product as
> an option for the C patch, without a significant increase in complexity. 
> Â Daniel's
> current patch is remarkably clean and simple, and I'd like to keep it that 
> way.
>

I am attaching an iterative version in C patch.  I don't think it
represents a dramatic increase in complexity ~ 40 lines over Daniel's
30.

> I did think about various evil schemes for an iterative version, ...

I would not say my patch is evil, maybe a bit naughty. :-) It can be
made less evil by resizing the list instead of filling its tail with
NULLs or more evil by using a tuple instead of list.

The performance  appears to be identical to Daniel's with no small
integer multiplication optimization.  The later gives about 2%
improvement.

----------
Added file: http://bugs.python.org/file17321/factorial-no-recursion.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue8692>
_______________________________________

Index: Modules/mathmodule.c
===================================================================
--- Modules/mathmodule.c        (revision 81150)
+++ Modules/mathmodule.c        (working copy)
@@ -1129,56 +1129,173 @@
 Return an accurate floating point sum of values in the iterable.\n\
 Assumes IEEE-754 floating point arithmetic.");
 
+/* Divide-and-conquer factorial algorithm
+ *
+ * Based on the formula and psuedo-code provided at:
+ * http://www.luschny.de/math/factorial/binarysplitfact.html
+ *
+ * Faster algorithms exist, but they're more complicated and depend on
+ * a fast prime factoriazation algorithm.
+ */
+
+/* i-th odd number */
+#define ODD(i) (i << 1 | 1) 
+/* Compute product(ODD(i) for i in range(first, last+1)) */
 static PyObject *
+factorial_partial_product(unsigned long first, unsigned long last)
+{
+    PyObject *result;
+    PyObject *a, *x, *y;
+    Py_ssize_t n, i;
+
+    n = last - first + 1;
+    if (n <= 0)
+       return PyLong_FromLong(1L);
+    if (n == 1)
+       return PyLong_FromUnsignedLong(ODD(first));
+
+    /* a = [ODD(i) for i in range(first, last + 1)] */
+    a = PyList_New(last - first + 1);
+    if (a == NULL)
+       return NULL;
+    for  (i = 0; i < n; ++i) {
+       result = PyLong_FromUnsignedLong(ODD(first + i));
+       if (result == NULL)
+           goto done;
+       PyList_SET_ITEM(a, i, result);
+    }
+    while (n > 1) {
+       for (i = (n >> 1) - 1; i >= 0; --i) {
+           x = PyList_GET_ITEM(a, i);
+           y = PyList_GET_ITEM(a, n - i - 1);
+           result = PyNumber_Multiply(x, y);
+           if (result == NULL)
+               goto done;
+           PyList_SET_ITEM(a, i, result);
+           PyList_SET_ITEM(a, n - i - 1, NULL);
+           Py_DECREF(x);
+           Py_DECREF(y);
+       }
+       n = (n >> 1) + (n & 1);
+    }
+    /* Release reference held by the list */
+    PyList_SET_ITEM(a, 0, NULL);
+ done:
+    Py_DECREF(a);
+    return result;
+}
+
+static unsigned long
+bit_length(unsigned long n)
+{
+    unsigned long len = 0;
+    while (n != 0) {
+       ++len;
+       n >>= 1;
+    }
+    return len;
+}
+
+static unsigned long
+bit_count(unsigned long n)
+{
+    unsigned long count = 0;
+    while (n != 0) {
+       ++count;
+       n &= n - 1; /* clear least significant bit */
+    }
+    return count;
+}
+
+static PyObject *
+factorial_loop(unsigned long n)
+{
+    long i, v;
+    PyObject *p, *r;
+    PyObject *part, *tmp;
+
+    p = PyLong_FromLong(1L);
+    if (p == NULL)
+        return NULL;
+    Py_INCREF(p);
+    r = p;
+
+    for (i = bit_length(n) - 2; i >= 0; --i) {
+        v = n >> i;
+        if (v > 2) {
+           part = factorial_partial_product(((v >> 1) + 1) >> 1, (v - 1) >> 1);
+           if (part == NULL)
+               goto error;
+
+           tmp = PyNumber_Multiply(p, part);
+           Py_DECREF(part);
+           if (tmp == NULL)
+               goto error;
+           Py_DECREF(p);
+           p = tmp;
+
+           tmp = PyNumber_Multiply(r, p);
+           if (tmp == NULL)
+               goto error;
+           Py_DECREF(r);
+           r = tmp;
+       }
+    }
+    Py_DECREF(p);
+    return r;
+ error:
+    Py_DECREF(p);
+    Py_DECREF(r);
+    return NULL;
+}
+
+static PyObject *
 math_factorial(PyObject *self, PyObject *arg)
 {
-    long i, x;
-    PyObject *result, *iobj, *newresult;
+    PyObject *result;
 
+    PyObject *r; /* result without trailing zeros */
+    PyObject *ntz; /* number of trailing zeros */
+    long n;
+
     if (PyFloat_Check(arg)) {
-        PyObject *lx;
         double dx = PyFloat_AS_DOUBLE((PyFloatObject *)arg);
         if (!(Py_IS_FINITE(dx) && dx == floor(dx))) {
             PyErr_SetString(PyExc_ValueError,
                 "factorial() only accepts integral values");
             return NULL;
         }
-        lx = PyLong_FromDouble(dx);
-        if (lx == NULL)
+        n = (long) dx;
+    } else {
+        n = PyLong_AsLong(arg);
+        if (n == -1 && PyErr_Occurred())
             return NULL;
-        x = PyLong_AsLong(lx);
-        Py_DECREF(lx);
     }
-    else
-        x = PyLong_AsLong(arg);
 
-    if (x == -1 && PyErr_Occurred())
-        return NULL;
-    if (x < 0) {
+    if (n < 0) {
         PyErr_SetString(PyExc_ValueError,
-            "factorial() not defined for negative values");
+                        "factorial() not defined for negative values");
         return NULL;
     }
 
-    result = (PyObject *)PyLong_FromLong(1);
-    if (result == NULL)
+    if (n <= 12) {
+        static const long lookup[] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320,
+                                      362880, 3628800, 39916800, 479001600};
+        return PyLong_FromLong(lookup[n]);
+    }
+
+    r = factorial_loop(n);
+    if (r == NULL)
         return NULL;
-    for (i=1 ; i<=x ; i++) {
-        iobj = (PyObject *)PyLong_FromLong(i);
-        if (iobj == NULL)
-            goto error;
-        newresult = PyNumber_Multiply(result, iobj);
-        Py_DECREF(iobj);
-        if (newresult == NULL)
-            goto error;
-        Py_DECREF(result);
-        result = newresult;
+
+    ntz = PyLong_FromLong(n - bit_count(n));
+    if (ntz != NULL) {
+       result = PyNumber_Lshift(r, ntz);
+       Py_DECREF(ntz);
     }
+
+    Py_DECREF(r);
     return result;
-
-error:
-    Py_DECREF(result);
-    return NULL;
 }
 
 PyDoc_STRVAR(math_factorial_doc,

_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue8692] Use divide-and-conquer for faster factorials

Reply via email to