http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 217acd6..d02a875 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -28,6 +28,7 @@ import static 
jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -333,11 +334,11 @@ public class LibMatrixCUDA {
                Pointer outputPointer = getDensePointer(gCtx, outputBlock, 
instName);
 
                long t1=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel("relu_backward",
                                
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
                                imagePointer, doutPointer, outputPointer, 
toInt(rows), toInt(cols));
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
 
        }
        
@@ -367,7 +368,7 @@ public class LibMatrixCUDA {
                Pointer tmp = gCtx.allocate(instName, cols*sizeOfDataType);
                reduceCol(gCtx, instName, "reduce_col_sum", imagePointer, tmp, 
N, cols);
                reduceRow(gCtx, instName, "reduce_row_sum", tmp, outputPointer, 
toInt(C), toInt(HW));
-               gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
+               gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
        }
 
        /**
@@ -402,11 +403,11 @@ public class LibMatrixCUDA {
                Pointer biasPointer = bias.getGPUObject(gCtx).getDensePointer();
                Pointer outputPointer = 
outputBlock.getGPUObject(gCtx).getDensePointer();
                long t1 = 0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel("bias_multiply",
                                
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
                                imagePointer, biasPointer, outputPointer, 
toInt(rows), toInt(cols), toInt(PQ));
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
 
        }
 
@@ -454,11 +455,11 @@ public class LibMatrixCUDA {
                }
                int PQ = cols / k;
                long t1 = 0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel("bias_add",
                                
ExecutionConfig.getConfigForSimpleMatrixOperations(rows, cols),
                                image, bias, output, rows, cols, PQ);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
        }
        
 
@@ -525,13 +526,13 @@ public class LibMatrixCUDA {
 
                long t0=0, t1=0;
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                cudaSupportFunctions.cublassyrk(getCublasHandle(gCtx), 
cublasFillMode.CUBLAS_FILL_MODE_LOWER,transa, m, k, one(), A, lda, zero(), C, 
ldc);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, 
System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, 
System.nanoTime() - t0);
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                copyUpperToLowerTriangle(gCtx, instName, output);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - 
t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - 
t1);
        }
 
        /**
@@ -740,7 +741,7 @@ public class LibMatrixCUDA {
                        default:
                                throw new DMLRuntimeException("Internal Error - 
Unsupported reduction direction for summation squared");
                        }
-                       gCtx.cudaFreeHelper(instName, tmp, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, tmp, 
gCtx.EAGER_CUDA_FREE);
                        break;
                }
                case OP_MEAN:{
@@ -853,7 +854,7 @@ public class LibMatrixCUDA {
                                ScalarOperator divideOp = new 
RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
                                matrixScalarOp(gCtx, instName, tmpRow, clen - 
1, rlen, 1, out, divideOp);
 
-                               gCtx.cudaFreeHelper(instName, tmpRow, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, tmpRow, 
gCtx.EAGER_CUDA_FREE);
 
                                break;
                        }
@@ -871,15 +872,15 @@ public class LibMatrixCUDA {
                                ScalarOperator divideOp = new 
RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
                                matrixScalarOp(gCtx, instName, tmpCol, rlen - 
1, 1, clen, out, divideOp);
 
-                               gCtx.cudaFreeHelper(instName, tmpCol, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, tmpCol, 
gCtx.EAGER_CUDA_FREE);
 
                                break;
                        }
                        default:
                                throw new DMLRuntimeException("Internal Error - 
Unsupported reduction direction for variance");
                        }
-                       gCtx.cudaFreeHelper(instName, tmp, 
DMLScript.EAGER_CUDA_FREE);
-                       gCtx.cudaFreeHelper(instName, tmp2, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, tmp, 
gCtx.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, tmp2, 
gCtx.EAGER_CUDA_FREE);
                        break;
                }
                case OP_MAXINDEX : {
@@ -938,24 +939,24 @@ public class LibMatrixCUDA {
 
                long t1=0,t2=0;
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel(kernelFunction, new 
ExecutionConfig(blocks, threads, sharedMem), in, tempOut, n);
                //cudaDeviceSynchronize;
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
 
                int s = blocks;
                while (s > 1) {
                        tmp = getKernelParamsForReduceAll(gCtx, s);
                        blocks = tmp[0]; threads = tmp[1]; sharedMem = tmp[2];
-                       if (DMLScript.FINEGRAINED_STATISTICS) t2 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t2 
= System.nanoTime();
                        getCudaKernels(gCtx).launchKernel(kernelFunction, new 
ExecutionConfig(blocks, threads, sharedMem),
                                        tempOut, tempOut, s);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
                        s = (s + (threads*2-1)) / (threads*2);
                }
                double[] result = {-1f};
                cudaSupportFunctions.deviceToHost(gCtx, tempOut, result, 
instName, false);
-               gCtx.cudaFreeHelper(instName, tempOut, 
DMLScript.EAGER_CUDA_FREE);
+               gCtx.cudaFreeHelper(instName, tempOut, gCtx.EAGER_CUDA_FREE);
                return result[0];
        }
 
@@ -978,11 +979,11 @@ public class LibMatrixCUDA {
                int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
 
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel(kernelFunction, new 
ExecutionConfig(blocks, threads, sharedMem),
                                in, out, rows, cols);
                //cudaDeviceSynchronize;
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
 
        }
 
@@ -1005,11 +1006,11 @@ public class LibMatrixCUDA {
                int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
 
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel(kernelFunction, new 
ExecutionConfig(blocks, threads, sharedMem),
                                in, out, rows, cols);
                //cudaDeviceSynchronize;
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
        }
 
        /**
@@ -1329,11 +1330,11 @@ public class LibMatrixCUDA {
                int isLeftScalar = (op instanceof LeftScalarOperator) ? 1 : 0;
                int size = rlenA * clenA;
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel("matrix_scalar_op",
                                
ExecutionConfig.getConfigForSimpleVectorOperations(size),
                                a, scalar, c, size, getBinaryOp(op.fn), 
isLeftScalar);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
        }
 
        /**
@@ -1431,11 +1432,11 @@ public class LibMatrixCUDA {
                        LOG.trace("GPU : matrix_matrix_cellwise_op" + ", 
GPUContext=" + gCtx);
                }
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                getCudaKernels(gCtx).launchKernel("matrix_matrix_cellwise_op",
                                
ExecutionConfig.getConfigForSimpleMatrixOperations(maxRlen, maxClen),
                                a, b, c, maxRlen, maxClen, vecStatusA, 
vecStatusB, getBinaryOp(op.fn));
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - 
t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - 
t0);
        }
 
        /**
@@ -1525,11 +1526,11 @@ public class LibMatrixCUDA {
                        int rlen = toInt(out.getNumRows());
                        int clen = toInt(out.getNumColumns());
                        long t0 = 0;
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                t0 = System.nanoTime();
                        int size = rlen * clen;
                        getCudaKernels(gCtx).launchKernel("fill", 
ExecutionConfig.getConfigForSimpleVectorOperations(size), A, constant, size);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_FILL_KERNEL, System.nanoTime() - t0);
                }
        }
@@ -1544,10 +1545,10 @@ public class LibMatrixCUDA {
         */
        public static void deviceCopy(String instName, Pointer src, Pointer 
dest, int rlen, int clen) {
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                int size = rlen * clen * sizeOfDataType;
                cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
        }
 
        /**
@@ -1629,19 +1630,19 @@ public class LibMatrixCUDA {
                        // Invoke cuSparse when either are in sparse format
                        // Perform sparse-sparse dgeam
                        if (!isInSparseFormat(gCtx, in1)) {
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        t0 = System.nanoTime();
                                in1.getGPUObject(gCtx).denseToSparse();
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
                                                        System.nanoTime() - t0);
                        }
                        CSRPointer A = 
in1.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
                        if (!isInSparseFormat(gCtx, in2)) {
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        t0 = System.nanoTime();
                                in2.getGPUObject(gCtx).denseToSparse();
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
                                                        System.nanoTime() - t0);
                        }
@@ -1664,21 +1665,21 @@ public class LibMatrixCUDA {
                                                        "Transpose in 
cusparseDcsrgeam not supported for sparse matrices on GPU");
                                }
 
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        t1 = System.nanoTime();
                                CSRPointer C = 
CSRPointer.allocateForDgeam(gCtx, getCusparseHandle(gCtx), A, B, m, n);
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
                                                        System.nanoTime() - t1);
 
                                
out.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
                                //long sizeOfC = CSRPointer.estimateSize(C.nnz, 
out.getNumRows());
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        t0 = System.nanoTime();
                                
cudaSupportFunctions.cusparsecsrgeam(getCusparseHandle(gCtx), m, n, alphaPtr, 
A.descr, toInt(A.nnz), A.val, A.rowPtr, A.colInd, betaPtr,
                                                B.descr, toInt(B.nnz), B.val, 
B.rowPtr, B.colInd, C.descr, C.val, C.rowPtr, C.colInd);
                                //cudaDeviceSynchronize;
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_SPARSE_DGEAM_LIB,
                                                        System.nanoTime() - t0);
                        }
@@ -1705,9 +1706,9 @@ public class LibMatrixCUDA {
                        getDenseMatrixOutputForGPUInstruction(ec, instName, 
outputName, outRLen, outCLen);      // Allocated the dense output matrix
                        Pointer C = getDensePointer(gCtx, out, instName);
 
-                       if (DMLScript.FINEGRAINED_STATISTICS) t0 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t0 
= System.nanoTime();
                        cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), 
transa, transb, m, n, alphaPtr, A, lda, betaPtr, B, ldb, C, ldc);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
                }
        }
        
@@ -1736,9 +1737,9 @@ public class LibMatrixCUDA {
                int m = toInt(numRowsA);
                int n = lda;
                int ldc = m;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), 
CUBLAS_OP_T, CUBLAS_OP_T, m, n, one(), A, lda, zero(), A, ldb, C, ldc);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
        }
 
 
@@ -1846,7 +1847,7 @@ public class LibMatrixCUDA {
         */
        protected static void sliceDenseDense(GPUContext gCtx, String instName, 
Pointer inPointer, Pointer outPointer, 
                        int rl, int ru, int cl, int cu, int inClen) {
-               long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t0 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                long retClen = cu - cl + 1;
                if (inClen == retClen) {
                        cudaMemcpy(outPointer, inPointer.withByteOffset(rl * 
inClen * sizeOfDataType), (ru - rl + 1) * inClen
@@ -1856,7 +1857,7 @@ public class LibMatrixCUDA {
                        getCudaKernels(gCtx).launchKernel("slice_dense_dense", 
ExecutionConfig.getConfigForSimpleVectorOperations(toInt(retRlen*retClen)),
                                        inPointer, outPointer, rl, ru, cl, cu, 
inClen,  retRlen, retClen);
                }
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
        }
        
        /**
@@ -1879,7 +1880,7 @@ public class LibMatrixCUDA {
                if(size == 0) return;
                
                int retRlen = ru - rl + 1;
-               long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t0 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                int retClen = cu - cl + 1;
                
                String kernel = null; String timer = null;
@@ -1901,7 +1902,7 @@ public class LibMatrixCUDA {
                // We can generalize this later to output sparse matrix.
                getCudaKernels(gCtx).launchKernel(kernel, 
ExecutionConfig.getConfigForSimpleVectorOperations(size),
                                inPointer.val, inPointer.rowPtr, 
inPointer.colInd, outPointer, rl, ru, cl, cu, retClen);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
        }
        
        /**
@@ -1946,11 +1947,11 @@ public class LibMatrixCUDA {
                int maxRows = toInt(Math.max(rowsA, rowsB));
                int maxCols = toInt(Math.max(colsA, colsB));
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx)
                .launchKernel("cbind", 
ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
                                rowsA, colsA, rowsB, colsB);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
 
        }
 
@@ -1981,11 +1982,11 @@ public class LibMatrixCUDA {
                int maxRows = Math.max(rowsA, rowsB);
                int maxCols = Math.max(colsA, colsB);
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t1 = 
System.nanoTime();
                getCudaKernels(gCtx)
                .launchKernel("rbind", 
ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
                                rowsA, colsA, rowsB, colsB);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
 
        }
 
@@ -2315,10 +2316,10 @@ public class LibMatrixCUDA {
                        Pointer output = getDensePointer(gCtx, out, instName);
                        Pointer input = getDensePointer(gCtx, in1, instName);
                        int size = toInt(in1.getNumColumns() * 
in1.getNumRows());
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
                        getCudaKernels(gCtx).launchKernel(kernel, 
ExecutionConfig.getConfigForSimpleVectorOperations(size),
                                        input, output, size);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - 
t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - 
t1);
                }
        }
 
@@ -2356,13 +2357,13 @@ public class LibMatrixCUDA {
                        // becomes
                        // C <- A
                        // C <- alpha*B + C
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
                        cudaMemcpy(C, A, n*((long)sizeOfDataType), 
cudaMemcpyDeviceToDevice);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
 
-                       if (DMLScript.FINEGRAINED_STATISTICS) t2 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t2 
= System.nanoTime();
                        cudaSupportFunctions.cublasaxpy(getCublasHandle(gCtx), 
toInt(n), alphaPtr, B, 1, C, 1);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
                }
                else {
                        if(LOG.isTraceEnabled()) {
@@ -2372,12 +2373,12 @@ public class LibMatrixCUDA {
                        // Matrix-Vector daxpy
                        // Note: Vector-Matrix operation is not supported
                        // daxpy_matrix_vector(double* A,  double* B, double 
alpha, double* ret, int rlenA, int clenA, int rlenB, int clenB)
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
                        int rlenA = toInt(in1.getNumRows()); int clenA =  
toInt(in1.getNumColumns());
                        int rlenB = toInt(in2.getNumRows()); int clenB =  
toInt(in2.getNumColumns());
                        
getCudaKernels(gCtx).launchKernel("daxpy_matrix_vector", 
ExecutionConfig.getConfigForSimpleMatrixOperations(rlenA, clenA),
                                        A, B, constant, C, rlenA, clenA, rlenB, 
clenB);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
                }
        }
 
@@ -2424,20 +2425,20 @@ public class LibMatrixCUDA {
                // convert dense matrices to row major
                // Operation in cuSolver and cuBlas are for column major dense 
matrices
                // and are destructive to the original input
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                GPUObject ATobj = (GPUObject) Aobj.clone();
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                ATobj.denseRowMajorToColumnMajor();
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
                Pointer A = ATobj.getDensePointer();
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                GPUObject bTobj = (GPUObject) bobj.clone();
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                bTobj.denseRowMajorToColumnMajor();
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
 
 
                Pointer b = bTobj.getDensePointer();
@@ -2446,18 +2447,18 @@ public class LibMatrixCUDA {
                // http://docs.nvidia.com/cuda/cusolver/#ormqr-example1
 
                // step 3: query working space of geqrf and ormqr
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                int[] lwork = {0};
                
cudaSupportFunctions.cusolverDngeqrf_bufferSize(gCtx.getCusolverDnHandle(), m, 
n, A, m, lwork);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
 
                // step 4: compute QR factorization
                Pointer work = gCtx.allocate(instName, lwork[0] * 
sizeOfDataType);
                Pointer tau = gCtx.allocate(instName, m * sizeOfDataType);
                Pointer devInfo = gCtx.allocate(instName, Sizeof.INT);
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                
cudaSupportFunctions.cusolverDngeqrf(gCtx.getCusolverDnHandle(), m, n, A, m, 
tau, work, lwork[0], devInfo);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, 
System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, 
System.nanoTime() - t0);
 
                int[] qrError = {-1};
                cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, 
cudaMemcpyDeviceToHost);
@@ -2466,34 +2467,34 @@ public class LibMatrixCUDA {
                }
 
                // step 5: compute Q^T*B
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                
cudaSupportFunctions.cusolverDnormqr(gCtx.getCusolverDnHandle(), 
cublasSideMode.CUBLAS_SIDE_LEFT, cublasOperation.CUBLAS_OP_T, m, 1, n, A, m, 
tau, b, m, work, lwork[0], devInfo);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, 
System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, 
System.nanoTime() - t0);
                cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, 
cudaMemcpyDeviceToHost);
                if (qrError[0] != 0) {
                        throw new DMLRuntimeException("GPU : Error in call to 
ormqr (to compuete Q^T*B after QR factorization) as part of solve, argument " + 
qrError[0] + " was wrong");
                }
 
                // step 6: compute x = R \ Q^T*B
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                cudaSupportFunctions.cublastrsm(gCtx.getCublasHandle(),
                        cublasSideMode.CUBLAS_SIDE_LEFT, 
cublasFillMode.CUBLAS_FILL_MODE_UPPER, cublasOperation.CUBLAS_OP_N, 
cublasDiagType.CUBLAS_DIAG_NON_UNIT,
                        n, 1, dataTypePointerTo(1.0), A, m, b, m);
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, 
System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, 
System.nanoTime() - t0);
 
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                bTobj.denseColumnMajorToRowMajor();
-               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
+               if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
 
                // TODO  : Find a way to assign bTobj directly to the output 
and set the correct flags so as to not crash
                // There is an avoidable copy happening here
                MatrixObject out = getDenseMatrixOutputForGPUInstruction(ec, 
instName, outputName, in1.getNumColumns(), 1);
                cudaMemcpy(out.getGPUObject(gCtx).getDensePointer(), 
bTobj.getDensePointer(), n * 1 * sizeOfDataType, cudaMemcpyDeviceToDevice);
 
-               gCtx.cudaFreeHelper(instName, work, DMLScript.EAGER_CUDA_FREE);
-               gCtx.cudaFreeHelper(instName, tau, DMLScript.EAGER_CUDA_FREE);
-               ATobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
-               bTobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
+               gCtx.cudaFreeHelper(instName, work, gCtx.EAGER_CUDA_FREE);
+               gCtx.cudaFreeHelper(instName, tau, gCtx.EAGER_CUDA_FREE);
+               ATobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
+               bTobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
 
                //debugPrintMatrix(b, n, 1);
     }
@@ -2514,10 +2515,10 @@ public class LibMatrixCUDA {
         */
        public static MatrixObject 
getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String instName, 
String name, long numRows, long numCols) {
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                Pair<MatrixObject, Boolean> mb = 
ec.getDenseMatrixOutputForGPUInstruction(name, numRows, numCols);
                if (mb.getValue())
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ALLOCATE_DENSE_OUTPUT, System.nanoTime() - t0);
                return mb.getKey();
        }
@@ -2535,10 +2536,10 @@ public class LibMatrixCUDA {
         */
        private static MatrixObject 
getSparseMatrixOutputForGPUInstruction(ExecutionContext ec, long numRows, long 
numCols, long nnz, String instName, String name) {
                long t0=0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t0 = 
System.nanoTime();
                Pair<MatrixObject, Boolean> mb = 
ec.getSparseMatrixOutputForGPUInstruction(name, numRows, numCols, nnz);
                if (mb.getValue())
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ALLOCATE_SPARSE_OUTPUT, System.nanoTime() - t0);
                return mb.getKey();
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
index c6abbfe..d3b5984 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
@@ -49,7 +49,7 @@ import jcuda.jcudnn.cudnnTensorDescriptor;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
@@ -136,7 +136,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
        private static Pointer denseIm2col(GPUContext gCtx, String instName, 
MatrixObject image, boolean isSparseImage, long N, long C, long H, long W,
                        int R, int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P, int Q) {
                Pointer im2colPointer = null;
-               long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t1 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                if(isSparseImage) {
                        CSRPointer inPointer = getSparsePointer(gCtx, image, 
instName);
                        if(inPointer.nnz < 0) {
@@ -147,7 +147,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                
getCudaKernels(gCtx).launchKernel("sparse_dense_im2col", 
ExecutionConfig.getConfigForSimpleVectorOperations(toInt(inPointer.nnz)), 
                                                inPointer.val, 
inPointer.rowPtr, inPointer.colInd, im2colPointer, inPointer.nnz, N, 
                                                C*H*W, H*W, W, R, S, P, Q, P*Q, 
R*S, N*P*Q, stride_h, stride_w, pad_h, pad_w);
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_SPARSE_IM2COL_KERNEL, System.nanoTime() - t1);
                        }
                        else
@@ -159,7 +159,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        getCudaKernels(gCtx).launchKernel("dense_dense_im2col", 
ExecutionConfig.getConfigForSimpleVectorOperations(toInt(N*C*H*W)), 
                                        imagePointer, im2colPointer, N*C*H*W, 
                                        C*H*W, H*W, W, R, S, P, Q, P*Q, R*S, 
N*P*Q, stride_h, stride_w, pad_h, pad_w);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_IM2COL_KERNEL, System.nanoTime() - t1);
                }
                return im2colPointer;
@@ -220,16 +220,16 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                CSRPointer filterPointer = 
filter.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
                                Pointer matmultOutputPointer = 
gCtx.allocate(instName, NKPQ*sizeOfDataType);
                                LibMatrixCuMatMult.sparseDenseMatMult(gCtx, 
instName, matmultOutputPointer, filterPointer, im2colPointer, K, CRS, CRS, NPQ, 
K, NPQ, false, false);
-                               gCtx.cudaFreeHelper(instName, im2colPointer, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, im2colPointer, 
gCtx.EAGER_CUDA_FREE);
                                
                                // Perform reorg_knpq a reorg operation of 
matmultOutputPointer matrix with dimensions [K, NPQ]
                                // and return a matrix dstPointer with 
dimensions [N, KPQ]
-                               long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                getCudaKernels(gCtx).launchKernel("reorg_knpq", 
ExecutionConfig.getConfigForSimpleVectorOperations(toInt(NKPQ)), 
                                                matmultOutputPointer, 
dstPointer, NKPQ, NPQ, KPQ, P*Q);
-                               if (DMLScript.FINEGRAINED_STATISTICS)
+                               if 
(ConfigurationManager.isFinegrainedStatistics())
                                        
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DENSE_REORG_KNPQ_KERNEL, System.nanoTime() - t1);
-                               gCtx.cudaFreeHelper(instName, 
matmultOutputPointer, DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, 
matmultOutputPointer, gCtx.EAGER_CUDA_FREE);
                        }
                        else {
                                // Filter and output are accounted as dense in 
the memory estimation for conv2d
@@ -357,13 +357,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                }
                try {
                        long t1 = 0;
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
                        int status = 
cudnnConvolutionForward(getCudnnHandle(gCtx), one(),
                                        algo.nchwTensorDesc, image,
                                        algo.filterDesc, filter,
                                        algo.convDesc, algo.algo, 
algo.workSpace, algo.sizeInBytes, zero(),
                                        algo.nkpqTensorDesc, output);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CONVOLUTION_FORWARD_LIB, System.nanoTime() - t1);
                        if (status != cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                throw new DMLRuntimeException("Could not 
executed cudnnConvolutionForward: " + cudnnStatus.stringFor(status));
@@ -438,9 +438,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                                // Perform one-input 
conv2dBackwardFilter
                                                Pointer tempdwPointer = 
gCtx.allocate(instName, KCRS*sizeOfDataType);
                                                for(int n = 0; n < N; n++) {
-                                                       long t0 = 
DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+                                                       long t0 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                                        
cudaMemset(tempdwPointer, 0, KCRS*sizeOfDataType);
-                                                       
if(DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, 
System.nanoTime() - t0);
+                                                       
if(ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, 
System.nanoTime() - t0);
                                                        // Perform one-input 
conv2dBackwardFilter
                                                        
cudnnConv2dBackwardFilter(gCtx, instName, imgFetcher.getNthRow(n), 
doutFetcher.getNthRow(n), tempdwPointer, algo);
                                                        
getCudaKernels(gCtx).launchKernel("inplace_add",
@@ -475,10 +475,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        LOG.trace("GPU : conv2dBackwardFilter" + ", 
GPUContext=" + gCtx);
                }
                try {
-                       long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        int status = 
cudnnConvolutionBackwardFilter(getCudnnHandle(gCtx), one(), 
algo.nchwTensorDesc, imagePointer,
                                        algo.nkpqTensorDesc, doutPointer, 
algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(), 
algo.filterDesc, dwPointer);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_FILTER_LIB, System.nanoTime() - 
t1);
                        if (status != 
jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                throw new DMLRuntimeException("Could not 
executed cudnnConvolutionBackwardFilter: " + 
jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -578,10 +578,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        LOG.trace("GPU : conv2dBackwardData" + ", GPUContext=" 
+ gCtx);
                }
                try {
-                       long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        int status = 
cudnnConvolutionBackwardData(getCudnnHandle(gCtx), one(), algo.filterDesc, w,
                                        algo.nkpqTensorDesc, dy, algo.convDesc, 
algo.algo, algo.workSpace, algo.sizeInBytes, zero(), algo.nchwTensorDesc, dx);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - 
t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - 
t1);
 
                        if(status != 
jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                throw new DMLRuntimeException("Could not 
executed cudnnConvolutionBackwardData: " + 
jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -653,11 +653,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                
LibMatrixCuDNNPoolingDescriptors.cudnnPoolingDescriptors(gCtx, instName, N, C, 
H, W, K, R, S, 
                                                pad_h, pad_w, stride_h, 
stride_w, P, Q, poolingType)) {
                        long t1=0,t2=0;
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
-                       if (DMLScript.FINEGRAINED_STATISTICS) t2 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) t2 
= System.nanoTime();
                        int status = cudnnPoolingForward(getCudnnHandle(gCtx), 
desc.poolingDesc, one(), desc.xDesc, x, zero(), desc.yDesc, y);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
                        if(status != 
jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                throw new DMLRuntimeException("Could not 
executed cudnnPoolingForward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
                        }
@@ -752,20 +752,20 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        long t1=0, t2=0, t3=0;
                        int status;
                        if(!isMaxPoolOutputProvided) {
-                               if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
+                               if 
(ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
                                long numBytes = N*C*P*Q*sizeOfDataType;
                                y = gCtx.allocate(instName, numBytes);
-                               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
-                               if (DMLScript.FINEGRAINED_STATISTICS) t2 = 
System.nanoTime();
+                               if 
(ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+                               if 
(ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
                                status = 
cudnnPoolingForward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.xDesc, 
x, zero(), desc.yDesc, y);
-                               if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+                               if 
(ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
                                if(status != 
jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                        throw new DMLRuntimeException("Could 
not executed cudnnPoolingForward before cudnnPoolingBackward: " + 
jcuda.jcudnn.cudnnStatus.stringFor(status));
                                }
                        }
-                       if (DMLScript.FINEGRAINED_STATISTICS) t3 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t3 
= System.nanoTime();
                        status = cudnnPoolingBackward(getCudnnHandle(gCtx), 
desc.poolingDesc, one(), desc.yDesc, y, desc.dyDesc, dy, desc.xDesc, x, zero(), 
desc.dxDesc, dx);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
 
                        if(status != 
jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
                                throw new DMLRuntimeException("Could not 
executed cudnnPoolingBackward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -775,10 +775,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                }
                finally {
                        long t4=0;
-                       if (DMLScript.FINEGRAINED_STATISTICS) t4 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t4 
= System.nanoTime();
                        if(!isMaxPoolOutputProvided)
-                               gCtx.cudaFreeHelper(instName, y, 
DMLScript.EAGER_CUDA_FREE);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
+                               gCtx.cudaFreeHelper(instName, y, 
gCtx.EAGER_CUDA_FREE);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
                }
        }
 
@@ -795,18 +795,18 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        cudnnCreateActivationDescriptor(activationDescriptor);
                        double dummy = -1;
                        cudnnSetActivationDescriptor(activationDescriptor, 
CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN, dummy);
-                       if (DMLScript.FINEGRAINED_STATISTICS) t0 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t0 
= System.nanoTime();
                        cudnnActivationForward(getCudnnHandle(gCtx), 
activationDescriptor,
                                        one(), srcTensorDesc, srcData,
                                        zero(), dstTensorDesc, dstData);
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
                } catch (CudaException e) {
                        throw new DMLRuntimeException("Error in conv2d in 
GPUContext " + gCtx.toString() + " from Thread " + 
Thread.currentThread().toString(), e);
                }
                finally {
                        long t1=0;
-                       if (DMLScript.FINEGRAINED_STATISTICS) t1 = 
System.nanoTime();
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
+                       if (ConfigurationManager.isFinegrainedStatistics()) t1 
= System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
                }
        }
 
@@ -831,11 +831,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        }
                        // Invokes relu(double* A,  double* ret, int rlen, int 
clen)
                        Pointer srcData = getDensePointerForCuDNN(gCtx, in, 
instName); // TODO: FIXME: Add sparse kernel support for relu
-                       if (DMLScript.FINEGRAINED_STATISTICS) t0 = 
System.nanoTime();
+                       if (ConfigurationManager.isFinegrainedStatistics()) t0 
= System.nanoTime();
                        getCudaKernels(gCtx).launchKernel("relu",
                                        
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(N), toInt(CHW)),
                                        srcData, dstData, toInt(N), toInt(CHW));
-                       if (DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
+                       if (ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
                }
                else {
                        cudnnTensorDescriptor tensorDescriptor = new 
cudnnTensorDescriptor();
@@ -910,13 +910,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                }
                
                if(return_sequences) {
-                       gCtx.cudaFreeHelper(instName, hyPointer, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, hyPointer, 
gCtx.EAGER_CUDA_FREE);
                        Pointer sysmlYPointer = getDenseOutputPointer(ec, gCtx, 
instName, outputName, N, T*M);
                        
LibMatrixCUDA.getCudaKernels(gCtx).launchKernel("prepare_lstm_output",
                                        
ExecutionConfig.getConfigForSimpleVectorOperations(N*T*M),
                                        sysmlYPointer, cudnnYPointer, N, T, M, 
N*T*M);
                }
-               gCtx.cudaFreeHelper(instName, cudnnYPointer, 
DMLScript.EAGER_CUDA_FREE);
+               gCtx.cudaFreeHelper(instName, cudnnYPointer, 
gCtx.EAGER_CUDA_FREE);
        }
        
        public static void lstmBackward(ExecutionContext ec, GPUContext gCtx, 
String instName,
@@ -966,7 +966,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                        // ----------------------
                                        algo.workSpace, algo.sizeInBytes, 
                                        algo.reserveSpace, 
algo.reserveSpaceSizeInBytes);
-                       gCtx.cudaFreeHelper(instName, dy, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, dy, gCtx.EAGER_CUDA_FREE);
                        ec.releaseMatrixInputForGPUInstruction(dcyName);
                        ec.releaseMatrixOutputForGPUInstruction(dhxName);
                        ec.releaseMatrixOutputForGPUInstruction(dcxName);
@@ -976,7 +976,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                        
ExecutionConfig.getConfigForSimpleVectorOperations(N*T*D),
                                        smlDx, cudnnDx, N, D, T*D, N*T*D);
                        ec.releaseMatrixOutputForGPUInstruction(dxName);
-                       gCtx.cudaFreeHelper(instName, cudnnDx, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, cudnnDx, 
gCtx.EAGER_CUDA_FREE);
                        
                        // 
-------------------------------------------------------------------------------------------
                        Pointer cudnnDwPointer = gCtx.allocate(instName, 
(D+M+2)*(4*M)*LibMatrixCUDA.sizeOfDataType);
@@ -991,12 +991,12 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                                        
ExecutionConfig.getConfigForSimpleVectorOperations((D+M+2)*(4*M)),
                                        getDenseOutputPointer(ec, gCtx, 
instName, dwName, D+M, 4*M), 
                                        getDenseOutputPointer(ec, gCtx, 
instName, dbName, 1, 4*M), cudnnDwPointer, D, M);
-                       gCtx.cudaFreeHelper(instName, cudnnDwPointer, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, cudnnDwPointer, 
gCtx.EAGER_CUDA_FREE);
                        ec.releaseMatrixOutputForGPUInstruction(dwName);
                        ec.releaseMatrixOutputForGPUInstruction(dbName);
                        // 
-------------------------------------------------------------------------------------------
                        
-                       gCtx.cudaFreeHelper(instName, yPointer, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, yPointer, 
gCtx.EAGER_CUDA_FREE);
                }
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
index f70b453..dbad80c 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysml.runtime.matrix.data;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -86,7 +86,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
        @Override
        public void close() {
                long t3 = 0;
-               if (DMLScript.FINEGRAINED_STATISTICS) t3 = System.nanoTime();
+               if (ConfigurationManager.isFinegrainedStatistics()) t3 = 
System.nanoTime();
                if(nchwTensorDesc != null)
                        cudnnDestroyTensorDescriptor(nchwTensorDesc);
                if(nkpqTensorDesc != null)
@@ -97,12 +97,12 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
                        cudnnDestroyConvolutionDescriptor(convDesc);
                if(sizeInBytes != 0) {
                        try {
-                               gCtx.cudaFreeHelper(instName, workSpace, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, workSpace, 
gCtx.EAGER_CUDA_FREE);
                        } catch (DMLRuntimeException e) {
                                throw new RuntimeException(e);
                        }
                }
-               if(DMLScript.FINEGRAINED_STATISTICS)
+               if(ConfigurationManager.isFinegrainedStatistics())
                        GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t3);
        }
        
@@ -130,7 +130,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
        public static LibMatrixCuDNNConvolutionAlgorithm 
cudnnGetConvolutionForwardAlgorithm(
                        GPUContext gCtx, String instName, int N, int C, int H, 
int W, int K, int R, int S, 
                        int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) {
-               long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t1 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                LibMatrixCuDNNConvolutionAlgorithm ret = new 
LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
                                pad_h, pad_w, stride_h, stride_w, P, Q);
                int[] algos = {-1};
@@ -144,7 +144,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
                        ret.workSpace = gCtx.allocate(instName, 
sizeInBytesArray[0]);
                ret.sizeInBytes = sizeInBytesArray[0];
                ret.algo = algos[0];
-               if (DMLScript.FINEGRAINED_STATISTICS)
+               if (ConfigurationManager.isFinegrainedStatistics())
                        GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
                return ret;
        }
@@ -173,7 +173,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
        public static LibMatrixCuDNNConvolutionAlgorithm 
cudnnGetConvolutionBackwardFilterAlgorithm(
                        GPUContext gCtx, String instName, int N, int C, int H, 
int W, int K, int R, int S, 
                        int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) {
-               long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t1 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                LibMatrixCuDNNConvolutionAlgorithm ret = new 
LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
                                pad_h, pad_w, stride_h, stride_w, P, Q);
                
@@ -190,7 +190,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
                ret.sizeInBytes = sizeInBytesArray[0];
                ret.algo = algos[0];
                
-               if (DMLScript.FINEGRAINED_STATISTICS)
+               if (ConfigurationManager.isFinegrainedStatistics())
                        GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
                return ret;
        }
@@ -229,7 +229,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
                        ret.algo = 
jcuda.jcudnn.cudnnConvolutionBwdDataAlgo.CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
                }
                else {
-                       long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        int[] algos = {-1};
                        long sizeInBytesArray[] = {Math.min(workspaceLimit, 
MAX_WORKSPACE_LIMIT_BYTES)};
                        
jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardDataAlgorithm(
@@ -242,7 +242,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
                                ret.workSpace = gCtx.allocate(instName, 
sizeInBytesArray[0]);
                        ret.sizeInBytes = sizeInBytesArray[0];
                        ret.algo = algos[0];
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
                }
                return ret;

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
index 0130aa6..81a703d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
@@ -21,7 +21,7 @@ package org.apache.sysml.runtime.matrix.data;
 import static jcuda.runtime.JCuda.cudaMemset;
 import jcuda.Pointer;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
@@ -59,10 +59,10 @@ public class LibMatrixCuDNNInputRowFetcher extends 
LibMatrixCUDA implements java
        public Pointer getNthRow(int n) {
                if(isInputInSparseFormat) {
                        jcuda.runtime.JCuda.cudaDeviceSynchronize();
-                       long t0 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t0 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        cudaMemset(outPointer, 0, numColumns*sizeOfDataType);
                        jcuda.runtime.JCuda.cudaDeviceSynchronize();
-                       if(DMLScript.FINEGRAINED_STATISTICS) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, 
System.nanoTime() - t0);
+                       if(ConfigurationManager.isFinegrainedStatistics()) 
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, 
System.nanoTime() - t0);
                        LibMatrixCUDA.sliceSparseDense(gCtx, instName, 
(CSRPointer)inPointer, outPointer, n, n, 0, LibMatrixCUDA.toInt(numColumns-1), 
numColumns);
                }
                else {

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
index 8ebc4e0..7b2c601 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
@@ -32,7 +32,6 @@ import static 
jcuda.jcudnn.cudnnRNNInputMode.CUDNN_LINEAR_INPUT;
 import static jcuda.jcudnn.cudnnDirectionMode.CUDNN_UNIDIRECTIONAL;
 import static jcuda.jcudnn.cudnnRNNAlgo.CUDNN_RNN_ALGO_STANDARD;
 
-import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -301,7 +300,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements 
java.lang.AutoCloseable {
                }
                if(sizeInBytes != 0) {
                        try {
-                               gCtx.cudaFreeHelper(instName, workSpace, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, workSpace, 
gCtx.EAGER_CUDA_FREE);
                        } catch (DMLRuntimeException e) {
                                throw new RuntimeException(e);
                        }
@@ -309,7 +308,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements 
java.lang.AutoCloseable {
                workSpace = null;
                if(reserveSpaceSizeInBytes != 0) {
                        try {
-                               gCtx.cudaFreeHelper(instName, reserveSpace, 
DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, reserveSpace, 
gCtx.EAGER_CUDA_FREE);
                        } catch (DMLRuntimeException e) {
                                throw new RuntimeException(e);
                        }
@@ -317,7 +316,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements 
java.lang.AutoCloseable {
                reserveSpace = null;
                if(dropOutSizeInBytes != 0) {
                        try {
-                               gCtx.cudaFreeHelper(instName, 
dropOutStateSpace, DMLScript.EAGER_CUDA_FREE);
+                               gCtx.cudaFreeHelper(instName, 
dropOutStateSpace, gCtx.EAGER_CUDA_FREE);
                        } catch (DMLRuntimeException e) {
                                throw new RuntimeException(e);
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
index 18739a8..9833456 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
@@ -26,7 +26,7 @@ import jcuda.Pointer;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -161,19 +161,19 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                        // and output
                        CSRPointer A = 
left.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
                        CSRPointer B = 
right.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
-                       long t0 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t0 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        CSRPointer C = 
CSRPointer.allocateForMatrixMultiply(gCtx, getCusparseHandle(gCtx), A, transa, 
B, transb,
                                        params.m, params.n, params.k);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
                                                System.nanoTime() - t0);
 
                        // Step 3: Invoke the kernel
-                       long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        
cudaSupportFunctions.cusparsecsrgemm(getCusparseHandle(gCtx), transa, transb, 
params.m, params.n, params.k, A.descr,
                                        (int) A.nnz, A.val, A.rowPtr, A.colInd, 
B.descr, (int) B.nnz, B.val, B.rowPtr, B.colInd, C.descr,
                                        C.val, C.rowPtr, C.colInd);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_SPARSE_MATRIX_SPARSE_MATRIX_LIB,
                                                System.nanoTime() - t1);
                        output.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
@@ -279,14 +279,14 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                denseSparseMatMult(getCusparseHandle(gCtx), instName, output, 
B, A, params);
                if (outRLen != 1 && outCLen != 1) {
                        // Transpose: C = t(output)
-                       long t0 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                       long t0 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                        cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), 
cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T,
                                        toInt(outCLen), toInt(outRLen), one(), 
output, toInt(outRLen), zero(), new Pointer(),
                                        toInt(outRLen), C, toInt(outCLen));
-                       if (!DMLScript.EAGER_CUDA_FREE)
+                       if (!gCtx.EAGER_CUDA_FREE)
                                JCuda.cudaDeviceSynchronize();
-                       gCtx.cudaFreeHelper(instName, output, 
DMLScript.EAGER_CUDA_FREE);
-                       if (DMLScript.FINEGRAINED_STATISTICS)
+                       gCtx.cudaFreeHelper(instName, output, 
gCtx.EAGER_CUDA_FREE);
+                       if (ConfigurationManager.isFinegrainedStatistics())
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_TRANSPOSE_LIB, System.nanoTime()
                                                - t0);
                }
@@ -312,7 +312,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         */
        private static void denseSparseMatMult(cusparseHandle handle, String 
instName, Pointer C, Pointer A, CSRPointer B,
                        CuMatMultParameters param) {
-               long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t0 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                String kernel = 
GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_MATRIX_LIB;
                // Ignoring sparse vector dense matrix multiplication and dot 
product
                boolean isVector = (param.leftNumRows == 1 && 
!param.isLeftTransposed)
@@ -336,7 +336,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                        cudaSupportFunctions.cusparsecsrmm2(handle, transa, 
transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val,
                                        B.rowPtr, B.colInd, A, param.ldb, 
zero(), C, param.ldc);
                }
-               if (DMLScript.FINEGRAINED_STATISTICS)
+               if (ConfigurationManager.isFinegrainedStatistics())
                        GPUStatistics.maintainCPMiscTimes(instName, kernel, 
System.nanoTime() - t0);
        }
 
@@ -361,7 +361,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         */
        private static void denseDenseMatMult(cublasHandle handle, String 
instName, Pointer C, Pointer A, Pointer B,
                        CuMatMultParameters param) {
-               long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
+               long t0 = ConfigurationManager.isFinegrainedStatistics() ? 
System.nanoTime() : 0;
                String kernel = null;
                param.rowToColumnMajor();
                param.validate();
@@ -403,7 +403,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                                        zero(), C, param.ldc);
                        kernel = 
GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_MATRIX_LIB;
                }
-               if (DMLScript.FINEGRAINED_STATISTICS)
+               if (ConfigurationManager.isFinegrainedStatistics())
                        GPUStatistics.maintainCPMiscTimes(instName, kernel, 
System.nanoTime() - t0);
        }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 0c6f41a..4569dbe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -28,7 +28,7 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.functionobjects.KahanPlus;
@@ -91,7 +91,7 @@ public class LibMatrixDNN {
        static AtomicLong loopedConvBwdDataCol2ImTime = new AtomicLong(0);
        
        public static void appendStatistics(StringBuilder sb) {
-               if(DMLScript.FINEGRAINED_STATISTICS) {
+               if(ConfigurationManager.isFinegrainedStatistics()) {
                        sb.append("LibMatrixDNN dense count 
(conv/bwdF/bwdD/im2col/maxBwd):\t" 
                                        + conv2dDenseCount.get() + "/"
                                        + conv2dBwdFilterDenseCount.get() + "/"
@@ -238,7 +238,7 @@ public class LibMatrixDNN {
                        throw new DMLRuntimeException("Incorrect dout 
dimensions in pooling_backward:" + input.getNumRows() + " " + 
input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
                }
                
-               if(DMLScript.FINEGRAINED_STATISTICS) {
+               if(ConfigurationManager.isFinegrainedStatistics()) {
                        boolean isSparse = (poolType == PoolingType.MAX) ? 
(input.isInSparseFormat() || dout.isInSparseFormat()) : dout.isInSparseFormat();
                        if(isSparse)
                                maxPoolBwdSparseCount.addAndGet(1);
@@ -780,7 +780,7 @@ public class LibMatrixDNN {
                if(params.stride_h <= 0 || params.stride_w <= 0) 
                        throw new DMLRuntimeException("Only positive strides 
supported:" + params.stride_h + ", " + params.stride_w);
                
-               if(DMLScript.FINEGRAINED_STATISTICS) {
+               if(ConfigurationManager.isFinegrainedStatistics()) {
                        if(filter.isInSparseFormat() || 
dout.isInSparseFormat()) {
                                conv2dBwdDataSparseCount.addAndGet(1);
                        }
@@ -805,7 +805,7 @@ public class LibMatrixDNN {
                if(params.stride_h <= 0 || params.stride_w <= 0) 
                        throw new DMLRuntimeException("Only positive strides 
supported:" + params.stride_h + ", " + params.stride_w);
                
-               if(DMLScript.FINEGRAINED_STATISTICS) {
+               if(ConfigurationManager.isFinegrainedStatistics()) {
                        if(input.isInSparseFormat() || dout.isInSparseFormat()) 
{
                                conv2dBwdFilterSparseCount.addAndGet(1);
                        }
@@ -831,7 +831,7 @@ public class LibMatrixDNN {
                if(params.stride_h <= 0 || params.stride_w <= 0) 
                        throw new DMLRuntimeException("Only positive strides 
supported:" + params.stride_h + ", " + params.stride_w);
                
-               if(DMLScript.FINEGRAINED_STATISTICS) {
+               if(ConfigurationManager.isFinegrainedStatistics()) {
                        if(input.isInSparseFormat() || 
filter.isInSparseFormat()) {
                                conv2dSparseCount.addAndGet(1);
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
index b938a0a..982949f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.Callable;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import 
org.apache.sysml.runtime.matrix.data.LibMatrixDNNRotate180.Rotate180Worker;
@@ -172,16 +172,16 @@ public class LibMatrixDNNConv2d
                        MatrixBlock outMM = new MatrixBlock(K, PQ, 
_params.output.sparse);
                        long time1 = 0; long time2 = 0;
                        for(int n = _rl; n < _ru; n++)  {
-                               long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                LibMatrixDNNIm2Col.im2col(_params.input1, 
outIm2col, n, _params, false);
-                               long t2 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t2 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
                                // filter %*% _im2ColOutBlock => matMultOutBlock
                                outMM.reset(outMM.rlen, outMM.clen, 
_params.output.sparse);
                                
LibMatrixDNNHelper.singleThreadedMatMult(_params.input2, outIm2col, outMM, 
false, true, _params);
-                               long t3 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t3 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
-                               if(DMLScript.FINEGRAINED_STATISTICS) {
+                               
if(ConfigurationManager.isFinegrainedStatistics()) {
                                        time1 += t2 - t1;
                                        time2 += t3 - t2;
                                }
@@ -195,7 +195,7 @@ public class LibMatrixDNNConv2d
                                                
_params.bias.getDenseBlockValues(), K, PQ);
                        }
                        
-                       if(DMLScript.FINEGRAINED_STATISTICS) {
+                       if(ConfigurationManager.isFinegrainedStatistics()) {
                                
LibMatrixDNN.loopedConvIm2ColTime.addAndGet(time1);
                                
LibMatrixDNN.loopedConvMatMultTime.addAndGet(time2);
                        }
@@ -416,20 +416,20 @@ public class LibMatrixDNNConv2d
                                // rotate180(dout[n,]) => dout_reshaped
                                rotate180Worker.execute(n, 0);
                                // dout_reshaped %*% filter => temp
-                               long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                outMM.reset(PQ, CRS, false);
                                
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, filter, outMM, 
!outRotate.sparse, false, _params);
-                               long t2 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t2 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                // col2im(temp) => output[n,] 
                                LibMatrixDNNIm2Col.col2imOverSingleImage(n, 
outMM, _params);
-                               long t3 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t3 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
-                               if(DMLScript.FINEGRAINED_STATISTICS) {
+                               
if(ConfigurationManager.isFinegrainedStatistics()) {
                                        time1 += t2 - t1;
                                        time2 += t3 - t2;
                                }
                        }
-                       if(DMLScript.FINEGRAINED_STATISTICS) {
+                       if(ConfigurationManager.isFinegrainedStatistics()) {
                                
LibMatrixDNN.loopedConvBwdDataMatMultTime.addAndGet(time1);
                                
LibMatrixDNN.loopedConvBwdDataCol2ImTime.addAndGet(time2);
                        }
@@ -512,24 +512,24 @@ public class LibMatrixDNNConv2d
                                rotate180Worker.execute(n, 0);
                                
                                // im2col(input) => _im2ColOutBlock
-                               long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                LibMatrixDNNIm2Col.im2col(_params.input1, 
im2ColOutBlock, n, _params, false);
-                               long t2 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t2 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
                                outMM.reset(CRS, K, false);
                                
LibMatrixDNNHelper.singleThreadedMatMult(im2ColOutBlock, outRotate, outMM, 
!im2ColOutBlock.sparse, !outRotate.sparse, _params);
-                               long t3 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t3 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
                                if( !outMM.isEmptyBlock() ) //accumulate row 
results
                                        
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
                                
-                               if(DMLScript.FINEGRAINED_STATISTICS) {
+                               
if(ConfigurationManager.isFinegrainedStatistics()) {
                                        time1 += t2 - t1;
                                        time2 += t3 - t2;
                                }
                        }
                        inplaceTransAdd(partRet, _params);
-                       if(DMLScript.FINEGRAINED_STATISTICS) {
+                       if(ConfigurationManager.isFinegrainedStatistics()) {
                                
LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
                                
LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
                        }
@@ -562,27 +562,27 @@ public class LibMatrixDNNConv2d
                                rotate180Worker.execute(n, 0);
                                
                                // im2col(input) => _im2ColOutBlock
-                               long t1 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t1 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                LibMatrixDNNIm2Col.im2col(_params.input1, 
im2ColOutBlock, n, _params, true);
-                               long t2 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t2 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
                                outMM.reset(K, CRS, false);
                                //Timing time = new Timing(true);
                                
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, im2ColOutBlock, 
                                        outMM, !outRotate.sparse, 
!im2ColOutBlock.sparse, _params);
-                               long t3 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;
+                               long t3 = 
ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
                                
                                if( !outMM.isEmptyBlock() ) //accumulate row 
results
                                        
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
                                
-                               if(DMLScript.FINEGRAINED_STATISTICS) {
+                               
if(ConfigurationManager.isFinegrainedStatistics()) {
                                        time1 += t2 - t1;
                                        time2 += t3 - t2;
                                }
                        }
                        //no need to transpose because t(t(out)) cancel out
                        inplaceAdd(partRet, _params);
-                       if(DMLScript.FINEGRAINED_STATISTICS) {
+                       if(ConfigurationManager.isFinegrainedStatistics()) {
                                
LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
                                
LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index aa5ba86..2cb64c2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -26,7 +26,6 @@ import java.util.stream.IntStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.OptimizerUtils;
@@ -87,7 +86,7 @@ public class LibMatrixNative
                {
                        ret.sparse = false;
                        ret.allocateDenseBlock();
-                       long start = DMLScript.STATISTICS ? System.nanoTime() : 
0;
+                       long start = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                        boolean rccode = false;
                        if( isSinglePrecision() ) {
                                FloatBuffer fin1 = 
toFloatBuffer(m1.getDenseBlockValues(), inBuff, true);
@@ -102,7 +101,7 @@ public class LibMatrixNative
                                        ret.getDenseBlockValues(), 
m1.getNumRows(), m1.getNumColumns(), m2.getNumColumns(), k);
                        }
                        if (rccode) {
-                               if(DMLScript.STATISTICS) {
+                               if(ConfigurationManager.isStatistics()) {
                                        Statistics.nativeLibMatrixMultTime += 
System.nanoTime() - start;
                                        
Statistics.numNativeLibMatrixMultCalls.increment();
                                }
@@ -158,7 +157,7 @@ public class LibMatrixNative
                params.numThreads = params.numThreads <= 0 ? 
NativeHelper.getMaxNumThreads() : params.numThreads;
                if(NativeHelper.isNativeLibraryLoaded() && 
!input.isInSparseFormat() && !filter.isInSparseFormat()) {
                        setNumThreads(params);
-                       long start = DMLScript.STATISTICS ? System.nanoTime() : 
0;
+                       long start = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                        int nnz = 0;
                        if(params.bias == null) {
                                nnz = 
NativeHelper.conv2dDense(input.getDenseBlockValues(), 
filter.getDenseBlockValues(),
@@ -195,7 +194,7 @@ public class LibMatrixNative
                        }
                        //post processing and error handling
                        if(nnz != -1) {
-                               if(DMLScript.STATISTICS) {
+                               if(ConfigurationManager.isStatistics()) {
                                        Statistics.nativeConv2dTime += 
System.nanoTime() - start;
                                        
Statistics.numNativeConv2dCalls.increment();
                                }
@@ -234,13 +233,13 @@ public class LibMatrixNative
                params.numThreads = params.numThreads <= 0 ? 
NativeHelper.getMaxNumThreads() : params.numThreads;
                if(NativeHelper.isNativeLibraryLoaded() && 
!dout.isInSparseFormat() && !input.isInSparseFormat()) {
                        setNumThreads(params);
-                       long start = DMLScript.STATISTICS ? System.nanoTime() : 
0;
+                       long start = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                        int nnz = 
NativeHelper.conv2dBackwardFilterDense(input.getDenseBlockValues(), 
dout.getDenseBlockValues(),
                                        outputBlock.getDenseBlockValues(), 
params.N, params.C, params.H, params.W, 
                                        params.K, params.R, params.S, 
params.stride_h, params.stride_w, params.pad_h, params.pad_w, 
                                        params.P, params.Q, params.numThreads);
                        if(nnz != -1) {
-                               if(DMLScript.STATISTICS) {
+                               if(ConfigurationManager.isStatistics()) {
                                        Statistics.nativeConv2dBwdFilterTime += 
System.nanoTime() - start;
                                        
Statistics.numNativeConv2dBwdFilterCalls.increment();
                                }
@@ -270,13 +269,13 @@ public class LibMatrixNative
                params.numThreads = params.numThreads <= 0 ? 
NativeHelper.getMaxNumThreads() : params.numThreads;
                if(NativeHelper.isNativeLibraryLoaded() && 
!dout.isInSparseFormat() && !filter.isInSparseFormat()) {
                        setNumThreads(params);
-                       long start = DMLScript.STATISTICS ? System.nanoTime() : 
0;
+                       long start = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                        int nnz = 
NativeHelper.conv2dBackwardDataDense(filter.getDenseBlockValues(), 
dout.getDenseBlockValues(),
                                        outputBlock.getDenseBlockValues(), 
params.N, params.C, params.H, params.W, 
                                        params.K, params.R, params.S, 
params.stride_h, params.stride_w, params.pad_h, params.pad_w, 
                                        params.P, params.Q, params.numThreads);
                        if(nnz != -1) {
-                               if(DMLScript.STATISTICS) {
+                               if(ConfigurationManager.isStatistics()) {
                                        Statistics.nativeConv2dBwdDataTime += 
System.nanoTime() - start;
                                        
Statistics.numNativeConv2dBwdDataCalls.increment();
                                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
index 942b56b..044e943 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
@@ -29,7 +29,7 @@ import java.util.stream.IntStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.utils.GPUStatistics;
@@ -168,7 +168,7 @@ public class SinglePrecisionCudaSupportFunctions implements 
CudaSupportFunctions
        
        @Override
        public void deviceToHost(GPUContext gCtx, Pointer src, double[] dest, 
String instName, boolean isEviction) {
-               long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+               long t0 = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                // We invoke transfer matrix from device to host in two cases:
                // 1. During eviction of unlocked matrices
                // 2. During acquireHostRead
@@ -182,7 +182,7 @@ public class SinglePrecisionCudaSupportFunctions implements 
CudaSupportFunctions
                        Pointer deviceDoubleData = gCtx.allocate(instName, 
((long)dest.length)*Sizeof.DOUBLE);
                        LibMatrixCUDA.float2double(gCtx, src, deviceDoubleData, 
dest.length);
                        cudaMemcpy(Pointer.to(dest), deviceDoubleData, 
((long)dest.length)*Sizeof.DOUBLE, cudaMemcpyDeviceToHost);
-                       gCtx.cudaFreeHelper(instName, deviceDoubleData, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, deviceDoubleData, 
gCtx.EAGER_CUDA_FREE);
                }
                else {
                        LOG.debug("Potential OOM: Allocated additional space on 
host in deviceToHost");
@@ -190,11 +190,11 @@ public class SinglePrecisionCudaSupportFunctions 
implements CudaSupportFunctions
                        cudaMemcpy(Pointer.to(floatData), src, 
((long)dest.length)*Sizeof.FLOAT, cudaMemcpyDeviceToHost);
                        LibMatrixNative.fromFloatBuffer(floatData, dest);
                }
-               if(DMLScript.STATISTICS) {
+               if(ConfigurationManager.isStatistics()) {
                        long totalTime = System.nanoTime() - t0;
                        GPUStatistics.cudaFloat2DoubleTime.add(totalTime);
                        GPUStatistics.cudaFloat2DoubleCount.add(1);
-                       if(DMLScript.FINEGRAINED_STATISTICS && instName != 
null) 
+                       if(ConfigurationManager.isFinegrainedStatistics() && 
instName != null) 
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_DEVICE_TO_HOST, totalTime);
                }
        }
@@ -203,12 +203,12 @@ public class SinglePrecisionCudaSupportFunctions 
implements CudaSupportFunctions
        public void hostToDevice(GPUContext gCtx, double[] src, Pointer dest, 
String instName) {
                LOG.debug("Potential OOM: Allocated additional space in 
hostToDevice");
                // TODO: Perform conversion on GPU using double2float and 
float2double kernels
-               long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+               long t0 = ConfigurationManager.isStatistics() ? 
System.nanoTime() : 0;
                if(PERFORM_CONVERSION_ON_DEVICE) {
                        Pointer deviceDoubleData = gCtx.allocate(instName, 
((long)src.length)*Sizeof.DOUBLE);
                        cudaMemcpy(deviceDoubleData, Pointer.to(src), 
((long)src.length)*Sizeof.DOUBLE, cudaMemcpyHostToDevice);
                        LibMatrixCUDA.double2float(gCtx, deviceDoubleData, 
dest, src.length);
-                       gCtx.cudaFreeHelper(instName, deviceDoubleData, 
DMLScript.EAGER_CUDA_FREE);
+                       gCtx.cudaFreeHelper(instName, deviceDoubleData, 
gCtx.EAGER_CUDA_FREE);
                }
                else {
                        FloatBuffer floatData = 
ByteBuffer.allocateDirect(Sizeof.FLOAT*src.length).order(ByteOrder.nativeOrder()).asFloatBuffer();
@@ -216,11 +216,11 @@ public class SinglePrecisionCudaSupportFunctions 
implements CudaSupportFunctions
                        cudaMemcpy(dest, Pointer.to(floatData), 
((long)src.length)*Sizeof.FLOAT, cudaMemcpyHostToDevice);
                }
                
-               if(DMLScript.STATISTICS) {
+               if(ConfigurationManager.isStatistics()) {
                        long totalTime = System.nanoTime() - t0;
                        GPUStatistics.cudaDouble2FloatTime.add(totalTime);
                        GPUStatistics.cudaDouble2FloatCount.add(1);
-                       if(DMLScript.FINEGRAINED_STATISTICS && instName != 
null) 
+                       if(ConfigurationManager.isFinegrainedStatistics() && 
instName != null) 
                                GPUStatistics.maintainCPMiscTimes(instName, 
GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, totalTime);
                }
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
index 21e6bd3..8289c30 100644
--- a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
@@ -688,7 +688,7 @@ public class ProgramConverter
                builder.append(NEWLINE);
 
                //handle additional configurations
-               builder.append(CONF_STATS + "=" + DMLScript.STATISTICS);
+               builder.append(CONF_STATS + "=" + 
ConfigurationManager.isStatistics());
                builder.append(COMPONENTS_DELIM);
                builder.append(NEWLINE);
 
@@ -753,7 +753,7 @@ public class ProgramConverter
                sb.append( NEWLINE );
                
                //handle additional configurations
-               sb.append( CONF_STATS + "=" + DMLScript.STATISTICS );
+               sb.append( CONF_STATS + "=" + 
ConfigurationManager.isStatistics() );
                sb.append( COMPONENTS_DELIM );
                sb.append( NEWLINE );
                
@@ -1727,7 +1727,7 @@ public class ProgramConverter
        
        private static void parseAndSetAdditionalConfigurations(String conf) {
                String[] statsFlag = conf.split("=");
-               DMLScript.STATISTICS = Boolean.parseBoolean(statsFlag[1]);
+               
ConfigurationManager.setStatistics(Boolean.parseBoolean(statsFlag[1]));
        }
 
        //////////

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/utils/GPUStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java 
b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
index fcbc4c4..e748057 100644
--- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -28,7 +28,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.LongAdder;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 
 /**
  * Measures performance numbers when GPU mode is enabled
@@ -155,7 +155,7 @@ public class GPUStatistics {
         */
        public synchronized static void maintainCPMiscTimes( String 
instructionName, String miscTimer, long timeNanos, long incrementCount)
        {
-               if (!(DMLScript.FINEGRAINED_STATISTICS))
+               if (!(ConfigurationManager.isFinegrainedStatistics()))
                        return;
 
                HashMap<String, Long> miscTimesMap = 
_cpInstMiscTime.get(instructionName);

Reply via email to