Alexander Belopolsky <belopol...@users.sourceforge.net> added the comment:
On Wed, May 12, 2010 at 3:47 PM, Mark Dickinson <rep...@bugs.python.org> wrote:
>...
> Realistically though, I don't see an iterative version of
> factorial_part_product as
> an option for the C patch, without a significant increase in complexity.
> Â Daniel's
> current patch is remarkably clean and simple, and I'd like to keep it that
> way.
>
I am attaching an iterative version in C patch. I don't think it
represents a dramatic increase in complexity ~ 40 lines over Daniel's
30.
> I did think about various evil schemes for an iterative version, ...
I would not say my patch is evil, maybe a bit naughty. :-) It can be
made less evil by resizing the list instead of filling its tail with
NULLs or more evil by using a tuple instead of list.
The performance appears to be identical to Daniel's with no small
integer multiplication optimization. The later gives about 2%
improvement.
----------
Added file: http://bugs.python.org/file17321/factorial-no-recursion.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue8692>
_______________________________________
Index: Modules/mathmodule.c
===================================================================
--- Modules/mathmodule.c (revision 81150)
+++ Modules/mathmodule.c (working copy)
@@ -1129,56 +1129,173 @@
Return an accurate floating point sum of values in the iterable.\n\
Assumes IEEE-754 floating point arithmetic.");
+/* Divide-and-conquer factorial algorithm
+ *
+ * Based on the formula and psuedo-code provided at:
+ * http://www.luschny.de/math/factorial/binarysplitfact.html
+ *
+ * Faster algorithms exist, but they're more complicated and depend on
+ * a fast prime factoriazation algorithm.
+ */
+
+/* i-th odd number */
+#define ODD(i) (i << 1 | 1)
+/* Compute product(ODD(i) for i in range(first, last+1)) */
static PyObject *
+factorial_partial_product(unsigned long first, unsigned long last)
+{
+ PyObject *result;
+ PyObject *a, *x, *y;
+ Py_ssize_t n, i;
+
+ n = last - first + 1;
+ if (n <= 0)
+ return PyLong_FromLong(1L);
+ if (n == 1)
+ return PyLong_FromUnsignedLong(ODD(first));
+
+ /* a = [ODD(i) for i in range(first, last + 1)] */
+ a = PyList_New(last - first + 1);
+ if (a == NULL)
+ return NULL;
+ for (i = 0; i < n; ++i) {
+ result = PyLong_FromUnsignedLong(ODD(first + i));
+ if (result == NULL)
+ goto done;
+ PyList_SET_ITEM(a, i, result);
+ }
+ while (n > 1) {
+ for (i = (n >> 1) - 1; i >= 0; --i) {
+ x = PyList_GET_ITEM(a, i);
+ y = PyList_GET_ITEM(a, n - i - 1);
+ result = PyNumber_Multiply(x, y);
+ if (result == NULL)
+ goto done;
+ PyList_SET_ITEM(a, i, result);
+ PyList_SET_ITEM(a, n - i - 1, NULL);
+ Py_DECREF(x);
+ Py_DECREF(y);
+ }
+ n = (n >> 1) + (n & 1);
+ }
+ /* Release reference held by the list */
+ PyList_SET_ITEM(a, 0, NULL);
+ done:
+ Py_DECREF(a);
+ return result;
+}
+
+static unsigned long
+bit_length(unsigned long n)
+{
+ unsigned long len = 0;
+ while (n != 0) {
+ ++len;
+ n >>= 1;
+ }
+ return len;
+}
+
+static unsigned long
+bit_count(unsigned long n)
+{
+ unsigned long count = 0;
+ while (n != 0) {
+ ++count;
+ n &= n - 1; /* clear least significant bit */
+ }
+ return count;
+}
+
+static PyObject *
+factorial_loop(unsigned long n)
+{
+ long i, v;
+ PyObject *p, *r;
+ PyObject *part, *tmp;
+
+ p = PyLong_FromLong(1L);
+ if (p == NULL)
+ return NULL;
+ Py_INCREF(p);
+ r = p;
+
+ for (i = bit_length(n) - 2; i >= 0; --i) {
+ v = n >> i;
+ if (v > 2) {
+ part = factorial_partial_product(((v >> 1) + 1) >> 1, (v - 1) >> 1);
+ if (part == NULL)
+ goto error;
+
+ tmp = PyNumber_Multiply(p, part);
+ Py_DECREF(part);
+ if (tmp == NULL)
+ goto error;
+ Py_DECREF(p);
+ p = tmp;
+
+ tmp = PyNumber_Multiply(r, p);
+ if (tmp == NULL)
+ goto error;
+ Py_DECREF(r);
+ r = tmp;
+ }
+ }
+ Py_DECREF(p);
+ return r;
+ error:
+ Py_DECREF(p);
+ Py_DECREF(r);
+ return NULL;
+}
+
+static PyObject *
math_factorial(PyObject *self, PyObject *arg)
{
- long i, x;
- PyObject *result, *iobj, *newresult;
+ PyObject *result;
+ PyObject *r; /* result without trailing zeros */
+ PyObject *ntz; /* number of trailing zeros */
+ long n;
+
if (PyFloat_Check(arg)) {
- PyObject *lx;
double dx = PyFloat_AS_DOUBLE((PyFloatObject *)arg);
if (!(Py_IS_FINITE(dx) && dx == floor(dx))) {
PyErr_SetString(PyExc_ValueError,
"factorial() only accepts integral values");
return NULL;
}
- lx = PyLong_FromDouble(dx);
- if (lx == NULL)
+ n = (long) dx;
+ } else {
+ n = PyLong_AsLong(arg);
+ if (n == -1 && PyErr_Occurred())
return NULL;
- x = PyLong_AsLong(lx);
- Py_DECREF(lx);
}
- else
- x = PyLong_AsLong(arg);
- if (x == -1 && PyErr_Occurred())
- return NULL;
- if (x < 0) {
+ if (n < 0) {
PyErr_SetString(PyExc_ValueError,
- "factorial() not defined for negative values");
+ "factorial() not defined for negative values");
return NULL;
}
- result = (PyObject *)PyLong_FromLong(1);
- if (result == NULL)
+ if (n <= 12) {
+ static const long lookup[] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320,
+ 362880, 3628800, 39916800, 479001600};
+ return PyLong_FromLong(lookup[n]);
+ }
+
+ r = factorial_loop(n);
+ if (r == NULL)
return NULL;
- for (i=1 ; i<=x ; i++) {
- iobj = (PyObject *)PyLong_FromLong(i);
- if (iobj == NULL)
- goto error;
- newresult = PyNumber_Multiply(result, iobj);
- Py_DECREF(iobj);
- if (newresult == NULL)
- goto error;
- Py_DECREF(result);
- result = newresult;
+
+ ntz = PyLong_FromLong(n - bit_count(n));
+ if (ntz != NULL) {
+ result = PyNumber_Lshift(r, ntz);
+ Py_DECREF(ntz);
}
+
+ Py_DECREF(r);
return result;
-
-error:
- Py_DECREF(result);
- return NULL;
}
PyDoc_STRVAR(math_factorial_doc,
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com