Control: tags -1 patch
(Combined patch for both bugs as the changes are so close together, but
you *can* do the obvious split if you only want to fix one.)
This has been tested only by running sparse/test_basic.py and #855102's
example from the source tree, *not* a full build. This confirms that it
does fix #855102, but I can't test for #831541 (due to several qemu
bugs). The syntax for running a single test is nosetests3 -v
'/path/to/theano/theano/sparse/tests/test_basic.py':SamplingDotTester.test_op
I intend to send this upstream tomorrow.
Description: Fix invalid casts and negative stride handling
Cast values, not pointers, from int64 to int32.
Remember that first-in-index order (numpy) and
first-in-memory-order (BLAS) are not always the same thing.
Bump c_code_cache_version to make sure existing installs use the fixes.
Author: Rebecca N. Palmer <rebecca_pal...@zoho.com>
Bug-Debian: https://bugs.debian.org/855102 https://bugs.debian.org/831541
Forwarded: not yet
diff --git a/theano/sparse/opt.py b/theano/sparse/opt.py
index 6100405..d1c2b54 100644
--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -829,7 +829,11 @@ class UsmmCscDense(gof.Op):
npy_intp Sind = PyArray_STRIDES(%(x_ind)s)[0] / PyArray_DESCR(%(x_ind)s)->elsize;
npy_intp Sptr = PyArray_STRIDES(%(x_ptr)s)[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;
-
+
+ // blas expects ints; convert here (rather than just making N etc ints) to avoid potential overflow in the negative-stride correction
+ int N32 = N;
+ int Sy32 = Sy;
+ int Szn32 = Szn;
if (!(%(inplace)s))
{
@@ -859,7 +863,7 @@ class UsmmCscDense(gof.Op):
if (Szn < 0)
z_row += (N - 1) * Szn;
- %(axpy)s((int*)&N, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, (int*)&Sy, (%(conv_type)s*)z_row, (int*)&Szn);
+ %(axpy)s(&N32, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, &Sy32, (%(conv_type)s*)z_row, &Szn32);
}
}
}
@@ -868,7 +872,7 @@ class UsmmCscDense(gof.Op):
return rval
def c_code_cache_version(self):
- return (1, blas.blas_header_version())
+ return (1, blas.blas_header_version(), 0xdeb1a)
usmm_csc_dense = UsmmCscDense(inplace=False)
usmm_csc_dense_inplace = UsmmCscDense(inplace=True)
@@ -1748,7 +1752,7 @@ class SamplingDotCSR(gof.Op):
])
def c_code_cache_version(self):
- return (2, blas.blas_header_version())
+ return (2, blas.blas_header_version(), 0xdeb1a)
def c_support_code(self):
return blas.blas_header_text()
@@ -1891,6 +1895,11 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
+ // blas expects ints; convert here (rather than just making K etc ints) to avoid potential overflow in the negative-stride correction
+ int K32 = K;
+ int Sdx32 = Sdx;
+ int Sdy32 = Sdy;
+
for (npy_int32 m = 0; m < M; ++m) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
@@ -1898,8 +1907,15 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
const dtype_%(x)s* x_row = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * m);
const dtype_%(y)s* y_col = (dtype_%(y)s*)(PyArray_BYTES(%(y)s) + PyArray_STRIDES(%(y)s)[0] * n);
+ // dot expects pointer to the beginning of memory arrays,
+ // so when the stride is negative, we need to get the
+ // last element
+ if (Sdx < 0)
+ x_row += (K - 1) * Sdx;
+ if (Sdy < 0)
+ y_col += (K - 1) * Sdy;
- Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
+ Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s(&K32, (const %(conv_type)s*)x_row, &Sdx32, (const %(conv_type)s*)y_col, &Sdy32);
}
}
}
diff --git a/theano/sparse/tests/test_basic.py b/theano/sparse/tests/test_basic.py
index 8c183b9..03d79f1 100644
--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -3085,6 +3085,20 @@ class SamplingDotTester(utt.InferShapeTester):
assert tested.format == 'csr'
assert tested.dtype == expected.dtype
+ def test_negative_stride(self):
+ f = theano.function(
+ self.x,
+ sampling_dot(*self.x))
+
+ a2 = [self.a[0][::-1,:], self.a[1][:,::-1], self.a[2]]
+ tested = f(*a2)
+ x, y, p = a2
+ expected = p.multiply(numpy.dot(x, y.T))
+
+ utt.assert_allclose(as_ndarray(expected), tested.toarray())
+ assert tested.format == 'csr'
+ assert tested.dtype == expected.dtype
+
def test_infer_shape(self):
self._compile_and_check(self.x,
[sampling_dot(*self.x)],
--
debian-science-maintainers mailing list
debian-science-maintainers@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-science-maintainers