Control: tags -1 patch
(Combined patch for both bugs as the changes are so close together, but you *can* do the obvious split if you only want to fix one.)

This has been tested only by running sparse/test_basic.py and #855102's example from the source tree, *not* a full build. This confirms that it does fix #855102, but I can't test for #831541 (due to several qemu bugs). The syntax for running a single test is nosetests3 -v '/path/to/theano/theano/sparse/tests/test_basic.py':SamplingDotTester.test_op

I intend to send this upstream tomorrow.
Description: Fix invalid casts and negative stride handling

Cast values, not pointers, from int64 to int32.

Remember that first-in-index order (numpy) and
first-in-memory-order (BLAS) are not always the same thing.

Bump c_code_cache_version to make sure existing installs use the fixes.

Author: Rebecca N. Palmer <rebecca_pal...@zoho.com>
Bug-Debian: https://bugs.debian.org/855102 https://bugs.debian.org/831541
Forwarded: not yet

diff --git a/theano/sparse/opt.py b/theano/sparse/opt.py
index 6100405..d1c2b54 100644
--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -829,7 +829,11 @@ class UsmmCscDense(gof.Op):
             npy_intp Sind = PyArray_STRIDES(%(x_ind)s)[0] / PyArray_DESCR(%(x_ind)s)->elsize;
             npy_intp Sptr = PyArray_STRIDES(%(x_ptr)s)[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
             npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;
-
+            
+            // blas expects ints; convert here (rather than just making N etc ints) to avoid potential overflow in the negative-stride correction
+            int N32 = N;
+            int Sy32 = Sy;
+            int Szn32 = Szn;
 
             if (!(%(inplace)s))
             {
@@ -859,7 +863,7 @@ class UsmmCscDense(gof.Op):
                     if (Szn < 0)
                         z_row += (N - 1) * Szn;
 
-                    %(axpy)s((int*)&N, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, (int*)&Sy, (%(conv_type)s*)z_row, (int*)&Szn);
+                    %(axpy)s(&N32, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, &Sy32, (%(conv_type)s*)z_row, &Szn32);
                 }
             }
         }
@@ -868,7 +872,7 @@ class UsmmCscDense(gof.Op):
         return rval
 
     def c_code_cache_version(self):
-        return (1, blas.blas_header_version())
+        return (1, blas.blas_header_version(), 0xdeb1a)
 usmm_csc_dense = UsmmCscDense(inplace=False)
 usmm_csc_dense_inplace = UsmmCscDense(inplace=True)
 
@@ -1748,7 +1752,7 @@ class SamplingDotCSR(gof.Op):
         ])
 
     def c_code_cache_version(self):
-        return (2, blas.blas_header_version())
+        return (2, blas.blas_header_version(), 0xdeb1a)
 
     def c_support_code(self):
         return blas.blas_header_text()
@@ -1891,6 +1895,11 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
             memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
             memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
 
+            // blas expects ints; convert here (rather than just making K etc ints) to avoid potential overflow in the negative-stride correction
+            int K32 = K;
+            int Sdx32 = Sdx;
+            int Sdy32 = Sdy;
+
             for (npy_int32 m = 0; m < M; ++m) {
                 for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
                     const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
@@ -1898,8 +1907,15 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
                     const dtype_%(x)s* x_row = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * m);
 
                     const dtype_%(y)s* y_col = (dtype_%(y)s*)(PyArray_BYTES(%(y)s) + PyArray_STRIDES(%(y)s)[0] * n);
+                    // dot expects pointer to the beginning of memory arrays,
+                    // so when the stride is negative, we need to get the
+                    // last element
+                    if (Sdx < 0)
+                        x_row += (K - 1) * Sdx;
+                    if (Sdy < 0)
+                        y_col += (K - 1) * Sdy;
 
-                    Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
+                    Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s(&K32, (const %(conv_type)s*)x_row, &Sdx32, (const %(conv_type)s*)y_col, &Sdy32);
                 }
             }
         }
diff --git a/theano/sparse/tests/test_basic.py b/theano/sparse/tests/test_basic.py
index 8c183b9..03d79f1 100644
--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -3085,6 +3085,20 @@ class SamplingDotTester(utt.InferShapeTester):
         assert tested.format == 'csr'
         assert tested.dtype == expected.dtype
 
+    def test_negative_stride(self):
+        f = theano.function(
+            self.x,
+            sampling_dot(*self.x))
+
+        a2 = [self.a[0][::-1,:], self.a[1][:,::-1], self.a[2]]
+        tested = f(*a2)
+        x, y, p = a2
+        expected = p.multiply(numpy.dot(x, y.T))
+
+        utt.assert_allclose(as_ndarray(expected), tested.toarray())
+        assert tested.format == 'csr'
+        assert tested.dtype == expected.dtype
+
     def test_infer_shape(self):
         self._compile_and_check(self.x,
                                 [sampling_dot(*self.x)],

-- 
debian-science-maintainers mailing list
debian-science-maintainers@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-science-maintainers

Reply via email to