Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package scs for openSUSE:Factory checked in 
at 2021-04-12 17:10:44
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/scs (Old)
 and      /work/SRC/openSUSE:Factory/.scs.new.2401 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "scs"

Mon Apr 12 17:10:44 2021 rev:2 rq:884647 version:2.1.3

Changes:
--------
--- /work/SRC/openSUSE:Factory/scs/scs.changes  2020-09-25 16:32:27.587939662 
+0200
+++ /work/SRC/openSUSE:Factory/.scs.new.2401/scs.changes        2021-04-12 
17:10:54.482601073 +0200
@@ -1,0 +2,7 @@
+Mon Apr 12 12:12:03 UTC 2021 - andy great <andythe_gr...@pm.me>
+
+- Update to version 2.1.3.
+  * Improved GPU indirect solver.
+- Stop using %exclude to remove static lib. 
+
+-------------------------------------------------------------------

Old:
----
  scs-2.1.2.tar.gz

New:
----
  scs-2.1.3.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ scs.spec ++++++
--- /var/tmp/diff_new_pack.HL2UdR/_old  2021-04-12 17:10:54.966601608 +0200
+++ /var/tmp/diff_new_pack.HL2UdR/_new  2021-04-12 17:10:54.970601612 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package scs
 #
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2021 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -17,7 +17,7 @@
 
 
 Name:           scs
-Version:        2.1.2
+Version:        2.1.3
 Release:        0
 Summary:        Numerical package for solving large-scale convex cone problems
 License:        MIT
@@ -46,12 +46,12 @@
 
 %install
 %make_install INSTALL_LIB_DIR=%{buildroot}%{_libdir} 
INSTALL_INC_DIR=%{buildroot}%{_includedir}/%{name}
+rm -r %{buildroot}%{_libdir}/*.a
 
 %files devel
 %doc README.md
 %license LICENSE.txt
 %{_includedir}/%{name}
 %{_libdir}/*.so
-%exclude %{_libdir}/*.a
 
 %changelog

++++++ scs-2.1.2.tar.gz -> scs-2.1.3.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/.bumpversion.cfg 
new/scs-2.1.3/.bumpversion.cfg
--- old/scs-2.1.2/.bumpversion.cfg      2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/.bumpversion.cfg      2021-04-12 10:43:08.000000000 +0200
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.1.2
+current_version = 2.1.3
 
 [bumpversion:file:README.md]
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/README.md new/scs-2.1.3/README.md
--- old/scs-2.1.2/README.md     2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/README.md     2021-04-12 10:43:08.000000000 +0200
@@ -2,7 +2,7 @@
 ====
 
 [![Build 
Status](https://travis-ci.org/cvxgrp/scs.svg?branch=master)](https://travis-ci.org/cvxgrp/scs)
-[![Build 
status](https://ci.appveyor.com/api/projects/status/4542u6kom5293qpm/branch/master?svg=true)](https://ci.appveyor.com/project/bodono/scs/branch/master)
+<!--[![Build 
status](https://ci.appveyor.com/api/projects/status/4542u6kom5293qpm/branch/master?svg=true)](https://ci.appveyor.com/project/bodono/scs/branch/master)-->
 
 SCS (`splitting conic solver`) is a numerical optimization package for solving
 large-scale convex cone problems, based on our paper [Conic Optimization via
@@ -21,7 +21,7 @@
 [Convex.jl](https://github.com/JuliaOpt/Convex.jl), and
 [Yalmip](https://github.com/johanlofberg/YALMIP).
 
-The current version is `2.1.2`. If you wish to cite SCS, please use the
+The current version is `2.1.3`. If you wish to cite SCS, please use the
 following:
 ```
 @article{ocpb:16,
@@ -37,7 +37,7 @@
 }
 @misc{scs,
     author       = {B. O'Donoghue and E. Chu and N. Parikh and S. Boyd},
-    title        = {{SCS}: Splitting Conic Solver, version 2.1.2},
+    title        = {{SCS}: Splitting Conic Solver, version 2.1.3},
     howpublished = {\url{https://github.com/cvxgrp/scs}},
     month        = nov,
     year         = 2019
@@ -137,7 +137,7 @@
 
 The direct solver uses external numerical linear algebra packages:
 * [QDLDL](https://github.com/oxfordcontrol/qdldl)
-* [AMD](http://www.cise.ufl.edu/research/sparse/).
+* [AMD](https://github.com/DrTimothyAldenDavis/SuiteSparse).
 
 ### Using SCS in C
 Typing `make` at the command line will compile the code and create SCS 
libraries
@@ -153,7 +153,7 @@
 `libscsindir.ext` (where `.ext` extension is platform dependent) in the same
 folder. It will also produce two demo binaries in the `out` folder named
 `demo_socp_direct`, and `demo_socp_indirect`. If you have a GPU and have CUDA
-installed, you can also execture `make gpu` to compile SCS to run on the GPU
+installed, you can also execute `make gpu` to compile SCS to run on the GPU
 which will create additional libraries and demo binaries in the `out` folder
 corresponding to the gpu version. Note that the GPU version requires 32 bit
 ints, which can be enforced by compiling with `DLONG=0`.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/include/glbopts.h 
new/scs-2.1.3/include/glbopts.h
--- old/scs-2.1.2/include/glbopts.h     2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/include/glbopts.h     2021-04-12 10:43:08.000000000 +0200
@@ -13,7 +13,7 @@
 
 /* SCS VERSION NUMBER ----------------------------------------------    */
 #define SCS_VERSION \
-  ("2.1.2") /* string literals automatically null-terminated */
+  ("2.1.3") /* string literals automatically null-terminated */
 
 /* SCS returns one of the following integers:                           */
 #define SCS_INFEASIBLE_INACCURATE (-7)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/linsys/gpu/gpu.c 
new/scs-2.1.3/linsys/gpu/gpu.c
--- old/scs-2.1.2/linsys/gpu/gpu.c      2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/linsys/gpu/gpu.c      2021-04-12 10:43:08.000000000 +0200
@@ -1,33 +1,72 @@
 #include "gpu.h"
 
-void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
-                               scs_float *y, cusparseHandle_t cusparse_handle) 
{
+void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const 
cusparseDnVecDescr_t x,
+                               cusparseDnVecDescr_t y, cusparseHandle_t 
cusparse_handle,
+                               size_t *buffer_size, void **buffer) {
   /* y += A'*x
      x and y MUST be on GPU already
   */
   const scs_float onef = 1.0;
-  CUSPARSE(csrmv)
-  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, Ag->n, Ag->m, Ag->Annz,
-   &onef, Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
+  size_t new_buffer_size = 0;
+
+  CUSPARSE_GEN(SpMV_bufferSize)
+  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+    &onef, Ag->descr, x, &onef, y,
+    SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+    &new_buffer_size);
+
+  if (new_buffer_size > *buffer_size) {
+    if (*buffer != SCS_NULL) {
+      cudaFree(*buffer);
+    }
+    cudaMalloc(buffer, *buffer_size);
+    *buffer_size = new_buffer_size;
+  }
+
+  CUSPARSE_GEN(SpMV)
+  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+    &onef, Ag->descr, x, &onef, y,
+    SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+    buffer);
 }
 
-void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
-                          scs_float *y, cusparseHandle_t cusparse_handle) {
+void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
+                          cusparseDnVecDescr_t y, cusparseHandle_t 
cusparse_handle,
+                          size_t *buffer_size, void **buffer) {
   /* y += A*x
      x and y MUST be on GPU already
    */
   const scs_float onef = 1.0;
+  size_t new_buffer_size = 0;
+
   /* The A matrix idx pointers must be ORDERED */
-  CUSPARSE(csrmv)
-  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, Ag->n, Ag->m, Ag->Annz, 
&onef,
-   Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
+
+  CUSPARSE_GEN(SpMV_bufferSize)
+  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
+    &onef, Ag->descr, x, &onef, y,
+    SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+    &new_buffer_size);
+
+  if (new_buffer_size > *buffer_size) {
+    if (*buffer != SCS_NULL) {
+      cudaFree(*buffer);
+    }
+    cudaMalloc(buffer, *buffer_size);
+    *buffer_size = new_buffer_size;
+  }
+
+  CUSPARSE_GEN(SpMV)
+  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
+    &onef, Ag->descr, x, &onef, y,
+    SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+    buffer);
 }
 
 void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
   cudaFree(A->x);
   cudaFree(A->i);
   cudaFree(A->p);
-  cusparseDestroyMatDescr(A->descr);
+  cusparseDestroySpMat(A->descr);
 }
 
 void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/linsys/gpu/gpu.h 
new/scs-2.1.3/linsys/gpu/gpu.h
--- old/scs-2.1.2/linsys/gpu/gpu.h      2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/linsys/gpu/gpu.h      2021-04-12 10:43:08.000000000 +0200
@@ -34,6 +34,7 @@
 #define CUBLAS(x) cublasS##x
 #define CUSPARSE(x) cusparseS##x
 #endif
+#define CUSPARSE_GEN(x) cusparse##x
 #else
 #ifndef SFLOAT
 #define CUBLAS(x) \
@@ -50,8 +51,26 @@
   CUDA_CHECK_ERR;   \
   cusparseS##x
 #endif
+#define CUSPARSE_GEN(x) \
+  CUDA_CHECK_ERR;       \
+  cusparse##x
 #endif
 
+#ifndef SFLOAT
+#define SCS_CUDA_FLOAT CUDA_R_64F
+#else
+#define SCS_CUDA_FLOAT CUDA_R_32F
+#endif
+
+#ifndef DLONG
+#define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_32I
+#else
+#define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
+#endif
+
+#define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
+#define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
+
 /*
  CUDA matrix routines only for CSR, not CSC matrices:
     CSC             CSR             GPU     Mult
@@ -68,14 +87,16 @@
   scs_int m, n; /* m rows, n cols */
   scs_int Annz; /* num non-zeros in A matrix */
   /* CUDA */
-  cusparseMatDescr_t descr;
+  cusparseSpMatDescr_t descr;
 } ScsGpuMatrix;
 
-void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const scs_float *x,
-                               scs_float *y, cusparseHandle_t cusparse_handle);
-
-void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const scs_float *x,
-                          scs_float *y, cusparseHandle_t cusparse_handle);
+void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const 
cusparseDnVecDescr_t x,
+                               cusparseDnVecDescr_t y, cusparseHandle_t 
cusparse_handle,
+                               size_t *buffer_size, void **buffer);
+
+void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
+                          cusparseDnVecDescr_t y, cusparseHandle_t 
cusparse_handle,
+                          size_t *buffer_size, void **buffer);
 
 void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/linsys/gpu/indirect/private.c 
new/scs-2.1.3/linsys/gpu/indirect/private.c
--- old/scs-2.1.2/linsys/gpu/indirect/private.c 2020-06-05 11:04:23.000000000 
+0200
+++ new/scs-2.1.3/linsys/gpu/indirect/private.c 2021-04-12 10:43:08.000000000 
+0200
@@ -10,7 +10,14 @@
   scs_float *v_n = p->r;
   cudaMemcpy(v_m, x, A->m * sizeof(scs_float), cudaMemcpyHostToDevice);
   cudaMemcpy(v_n, y, A->n * sizeof(scs_float), cudaMemcpyHostToDevice);
-  SCS(_accum_by_atrans_gpu)(p->Ag, v_m, v_n, p->cusparse_handle);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) v_m);
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) v_n);
+  SCS(_accum_by_atrans_gpu)(
+    p->Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+
   cudaMemcpy(y, v_n, A->n * sizeof(scs_float), cudaMemcpyDeviceToHost);
 }
 
@@ -21,11 +28,21 @@
   scs_float *v_n = p->r;
   cudaMemcpy(v_n, x, A->n * sizeof(scs_float), cudaMemcpyHostToDevice);
   cudaMemcpy(v_m, y, A->m * sizeof(scs_float), cudaMemcpyHostToDevice);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) v_m);
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) v_n);
 #if GPU_TRANSPOSE_MAT > 0
-  SCS(_accum_by_atrans_gpu)(p->Agt, v_n, v_m, p->cusparse_handle);
+  SCS(_accum_by_atrans_gpu)(
+    p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
 #else
-  SCS(_accum_by_a_gpu)(p->Ag, v_n, v_m, p->cusparse_handle);
+  SCS(_accum_by_a_gpu)(
+    p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
 #endif
+
   cudaMemcpy(y, v_m, A->m * sizeof(scs_float), cudaMemcpyDeviceToHost);
 }
 
@@ -64,6 +81,11 @@
       SCS(free_gpu_matrix)(p->Agt);
       scs_free(p->Agt);
     }
+    if (p->buffer != SCS_NULL) {
+      cudaFree(p->buffer);
+    }
+    cusparseDestroyDnVec(p->dn_vec_m);
+    cusparseDestroyDnVec(p->dn_vec_n);
     cusparseDestroy(p->cusparse_handle);
     cublasDestroy(p->cublas_handle);
     /* Don't reset because it interferes with other GPU programs. */
@@ -78,9 +100,30 @@
   /* x and y MUST already be loaded to GPU */
   scs_float *tmp_m = p->tmp_m; /* temp memory */
   cudaMemset(tmp_m, 0, A->m * sizeof(scs_float));
-  SCS(_accum_by_a_gpu)(A, x, tmp_m, p->cusparse_handle);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) tmp_m);
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) x);
+#if GPU_TRANSPOSE_MAT > 0
+  SCS(_accum_by_atrans_gpu)(
+    p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+#else
+  SCS(_accum_by_a_gpu)(
+    A, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+#endif
+
   cudaMemset(y, 0, A->n * sizeof(scs_float));
-  SCS(_accum_by_atrans_gpu)(A, tmp_m, y, p->cusparse_handle);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) tmp_m);
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) y);
+  SCS(_accum_by_atrans_gpu)(
+    A, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+
   CUBLAS(axpy)(p->cublas_handle, A->n, &(s->rho_x), x, 1, y, 1);
 }
 
@@ -112,6 +155,14 @@
   cudaError_t err;
   ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
   ScsGpuMatrix *Ag = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
+  
+  /* Used for initializing dense vectors */
+  scs_float *tmp_null_n = SCS_NULL;
+  scs_float *tmp_null_m = SCS_NULL;
+
+#if GPU_TRANSPOSE_MAT > 0
+  size_t new_buffer_size = 0;
+#endif
 
   p->cublas_handle = 0;
   p->cusparse_handle = 0;
@@ -119,6 +170,9 @@
   p->total_solve_time = 0;
   p->tot_cg_its = 0;
 
+  p->buffer_size = 0;
+  p->buffer = SCS_NULL;
+
   /* Get handle to the CUBLAS context */
   cublasCreate(&p->cublas_handle);
 
@@ -130,9 +184,7 @@
   Ag->Annz = A->p[A->n];
   Ag->descr = 0;
   /* Matrix description */
-  cusparseCreateMatDescr(&Ag->descr);
-  cusparseSetMatType(Ag->descr, CUSPARSE_MATRIX_TYPE_GENERAL);
-  cusparseSetMatIndexBase(Ag->descr, CUSPARSE_INDEX_BASE_ZERO);
+
   p->Ag = Ag;
   p->Agt = SCS_NULL;
 
@@ -155,6 +207,18 @@
   cudaMemcpy(Ag->x, A->x, (A->p[A->n]) * sizeof(scs_float),
              cudaMemcpyHostToDevice);
 
+  cusparseCreateCsr
+  (&Ag->descr, Ag->n, Ag->m, Ag->Annz, Ag->p, Ag->i, Ag->x,
+    SCS_CUSPARSE_INDEX, SCS_CUSPARSE_INDEX,
+    CUSPARSE_INDEX_BASE_ZERO, SCS_CUDA_FLOAT);
+
+  cudaMalloc((void **)&tmp_null_n, A->n * sizeof(scs_float));
+  cudaMalloc((void **)&tmp_null_m, A->m * sizeof(scs_float));
+  cusparseCreateDnVec(&p->dn_vec_n, Ag->n, tmp_null_n, SCS_CUDA_FLOAT);
+  cusparseCreateDnVec(&p->dn_vec_m, Ag->m, tmp_null_m, SCS_CUDA_FLOAT);
+  cudaFree(tmp_null_n);
+  cudaFree(tmp_null_m);
+
   get_preconditioner(A, stgs, p);
 
 #if GPU_TRANSPOSE_MAT > 0
@@ -164,18 +228,41 @@
   p->Agt->Annz = A->p[A->n];
   p->Agt->descr = 0;
   /* Matrix description */
-  cusparseCreateMatDescr(&p->Agt->descr);
-  cusparseSetMatType(p->Agt->descr, CUSPARSE_MATRIX_TYPE_GENERAL);
-  cusparseSetMatIndexBase(p->Agt->descr, CUSPARSE_INDEX_BASE_ZERO);
 
   cudaMalloc((void **)&p->Agt->i, (A->p[A->n]) * sizeof(scs_int));
   cudaMalloc((void **)&p->Agt->p, (A->m + 1) * sizeof(scs_int));
   cudaMalloc((void **)&p->Agt->x, (A->p[A->n]) * sizeof(scs_float));
   /* transpose Ag into Agt for faster multiplies */
   /* TODO: memory intensive, could perform transpose in CPU and copy to GPU */
-  CUSPARSE(csr2csc)
-  (p->cusparse_handle, A->n, A->m, A->p[A->n], Ag->x, Ag->p, Ag->i, p->Agt->x,
-   p->Agt->i, p->Agt->p, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO);
+  cusparseCsr2cscEx2_bufferSize
+  (p->cusparse_handle, A->n, A->m, A->p[A->n],
+    Ag->x, Ag->p, Ag->i,
+    p->Agt->x, p->Agt->p, p->Agt->i,
+    SCS_CUDA_FLOAT, CUSPARSE_ACTION_NUMERIC,
+    CUSPARSE_INDEX_BASE_ZERO, SCS_CSR2CSC_ALG,
+    &new_buffer_size);
+
+  if (new_buffer_size > p->buffer_size) {
+    if (p->buffer != SCS_NULL) {
+      cudaFree(p->buffer);
+    }
+    cudaMalloc(&p->buffer, new_buffer_size);
+    p->buffer_size = new_buffer_size;
+  }
+
+  cusparseCsr2cscEx2
+  (p->cusparse_handle, A->n, A->m, A->p[A->n],
+    Ag->x, Ag->p, Ag->i,
+    p->Agt->x, p->Agt->p, p->Agt->i,
+    SCS_CUDA_FLOAT, CUSPARSE_ACTION_NUMERIC,
+    CUSPARSE_INDEX_BASE_ZERO, SCS_CSR2CSC_ALG,
+    p->buffer);
+
+  cusparseCreateCsr
+  (&p->Agt->descr, p->Agt->n, p->Agt->m, p->Agt->Annz,
+    p->Agt->p, p->Agt->i, p->Agt->x,
+    SCS_CUSPARSE_INDEX, SCS_CUSPARSE_INDEX,
+    CUSPARSE_INDEX_BASE_ZERO, SCS_CUDA_FLOAT);
 #endif
 
   err = cudaGetLastError();
@@ -285,11 +372,32 @@
   SCS(tic)(&linsys_timer);
   /* all on GPU */
   cudaMemcpy(bg, b, (Ag->n + Ag->m) * sizeof(scs_float), 
cudaMemcpyHostToDevice);
-  SCS(_accum_by_atrans_gpu)(Ag, &(bg[Ag->n]), bg, p->cusparse_handle);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) &(bg[Ag->n]));
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) bg);
+  SCS(_accum_by_atrans_gpu)(
+    Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+
   /* solves (I+A'A)x = b, s warm start, solution stored in b */
   cg_its = pcg(p->Ag, stgs, p, s, bg, Ag->n, MAX(cg_tol, CG_BEST_TOL));
   CUBLAS(scal)(p->cublas_handle, Ag->m, &neg_onef, &(bg[Ag->n]), 1);
-  SCS(_accum_by_a_gpu)(Ag, bg, &(bg[Ag->n]), p->cusparse_handle);
+
+  cusparseDnVecSetValues(p->dn_vec_m, (void *) &(bg[Ag->n]));
+  cusparseDnVecSetValues(p->dn_vec_n, (void *) bg);
+#if GPU_TRANSPOSE_MAT > 0
+  SCS(_accum_by_atrans_gpu)(
+    p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+#else
+  SCS(_accum_by_a_gpu)(
+    Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
+    &p->buffer_size, &p->buffer
+  );
+#endif
+
   cudaMemcpy(b, bg, (Ag->n + Ag->m) * sizeof(scs_float), 
cudaMemcpyDeviceToHost);
 
   if (iter >= 0) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/linsys/gpu/indirect/private.h 
new/scs-2.1.3/linsys/gpu/indirect/private.h
--- old/scs-2.1.2/linsys/gpu/indirect/private.h 2020-06-05 11:04:23.000000000 
+0200
+++ new/scs-2.1.3/linsys/gpu/indirect/private.h 2021-04-12 10:43:08.000000000 
+0200
@@ -28,6 +28,11 @@
   /* CUDA */
   cublasHandle_t cublas_handle;
   cusparseHandle_t cusparse_handle;
+  /* CUSPARSE */
+  size_t buffer_size;
+  void *buffer;
+  cusparseDnVecDescr_t dn_vec_m; /* Dense vector of length m */
+  cusparseDnVecDescr_t dn_vec_n; /* Dense vector of length n */
 };
 
 #ifdef __cplusplus
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scs-2.1.2/src/rw.c new/scs-2.1.3/src/rw.c
--- old/scs-2.1.2/src/rw.c      2020-06-05 11:04:23.000000000 +0200
+++ new/scs-2.1.3/src/rw.c      2021-04-12 10:43:08.000000000 +0200
@@ -37,7 +37,7 @@
   fread(&(k->ep), sizeof(scs_int), 1, fin);
   fread(&(k->ed), sizeof(scs_int), 1, fin);
   fread(&(k->psize), sizeof(scs_int), 1, fin);
-  k->p = scs_calloc(k->psize, sizeof(scs_int));
+  k->p = scs_calloc(k->psize, sizeof(scs_float));
   fread(k->p, sizeof(scs_float), k->psize, fin);
   return k;
 }

Reply via email to