Repository: systemml
Updated Branches:
  refs/heads/master 9b270d61a -> 91f6fb572


[SYSTEMML-445] Display time taken for eviction as well as two-staged 
fragmentation in the statistics.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/91f6fb57
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/91f6fb57
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/91f6fb57

Branch: refs/heads/master
Commit: 91f6fb57293d9adf15714dc55c0bba962aefd107
Parents: 9b270d6
Author: Niketan Pansare <npan...@us.ibm.com>
Authored: Mon Jan 29 16:59:40 2018 -0800
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Mon Jan 29 16:59:40 2018 -0800

----------------------------------------------------------------------
 .../instructions/gpu/context/GPUContext.java      | 18 +++++++++++++++++-
 .../org/apache/sysml/utils/GPUStatistics.java     | 13 +++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/91f6fb57/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index 311e2a7..2741697 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -336,10 +336,15 @@ public class GPUContext {
                                        
                                        // Step 1:
                                        LOG.debug("Eagerly deallocating 
rmvar-ed matrices to avoid memory allocation error due to potential 
fragmentation.");
+                                       long forcedEvictStartTime = 
DMLScript.STATISTICS ? System.nanoTime() : 0;
                                        clearFreeCUDASpaceMap(instructionName, 
-1);
+                                       if(DMLScript.STATISTICS) {
+                                               
GPUStatistics.cudaForcedClearLazyFreedEvictTime.add(System.nanoTime()-forcedEvictStartTime);
+                                       }
                                        try {
                                                cudaMalloc(A, size);
                                        } catch(jcuda.CudaException e1) {
+                                               forcedEvictStartTime = 
DMLScript.STATISTICS ? System.nanoTime() : 0;
                                                // Step 2:
                                                
GPUStatistics.cudaForcedClearUnpinnedMatCount.add(1);
                                                LOG.warn("Eagerly deallocating 
unpinned matrices to avoid memory allocation error due to potential 
fragmentation. "
@@ -352,6 +357,9 @@ public class GPUContext {
                                                                
toBeRemoved.clearData(true);
                                                        }
                                                }
+                                               if(DMLScript.STATISTICS) {
+                                                       
GPUStatistics.cudaForcedClearUnpinnedEvictTime.add(System.nanoTime()-forcedEvictStartTime);
+                                               }
                                                cudaMalloc(A, size);
                                        }
                                }
@@ -555,6 +563,7 @@ public class GPUContext {
         * @throws DMLRuntimeException If no reusable memory blocks to free up 
or if not enough matrix blocks with zero locks on them.
         */
        protected void evict(String instructionName, final long neededSize) 
throws DMLRuntimeException {
+               long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
                if (LOG.isTraceEnabled()) {
                        LOG.trace("GPU : evict called from " + instructionName 
+ " for size " + neededSize + " on " + this);
                }
@@ -565,8 +574,12 @@ public class GPUContext {
 
                clearFreeCUDASpaceMap(instructionName,  neededSize);
 
-               if (neededSize <= getAvailableMemory())
+               if (neededSize <= getAvailableMemory()) {
+                       if(DMLScript.STATISTICS) {
+                               
GPUStatistics.cudaEvictTime.add(System.nanoTime() - t0);
+                       }
                        return;
+               }
 
                if (allocatedGPUObjects.size() == 0) {
                        throw new DMLRuntimeException(
@@ -628,6 +641,9 @@ public class GPUContext {
                        }
                        toBeRemoved.clearData(true);
                }
+               if(DMLScript.STATISTICS) {
+                       GPUStatistics.cudaEvictTime.add(System.nanoTime() - t0);
+               }
        }
 
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/91f6fb57/src/main/java/org/apache/sysml/utils/GPUStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java 
b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
index f671d36..b7eb00c 100644
--- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -51,6 +51,9 @@ public class GPUStatistics {
        public static LongAdder cudaMemSet0Time = new LongAdder();           // 
time spent in setting memory to 0 on the GPU (part of reusing and for new 
allocates)
        public static LongAdder cudaToDevTime = new LongAdder();             // 
time spent in copying data from host (CPU) to device (GPU) memory
        public static LongAdder cudaFromDevTime = new LongAdder();           // 
time spent in copying data from device to host
+       public static LongAdder cudaEvictTime = new LongAdder();                
 // time spent in eviction
+       public static LongAdder cudaForcedClearLazyFreedEvictTime = new 
LongAdder(); // time spent in forced lazy eviction
+       public static LongAdder cudaForcedClearUnpinnedEvictTime = new 
LongAdder(); // time spent in forced unpinned eviction
        public static LongAdder cudaAllocCount = new LongAdder();
        public static LongAdder cudaDeAllocCount = new LongAdder();
        public static LongAdder cudaMemSet0Count = new LongAdder();
@@ -86,6 +89,9 @@ public class GPUStatistics {
                cudaMemSet0Count.reset();
                cudaToDevTime.reset();
                cudaFromDevTime.reset();
+               cudaEvictTime.reset();
+               cudaForcedClearLazyFreedEvictTime.reset();
+               cudaForcedClearUnpinnedEvictTime.reset();
                cudaAllocCount.reset();
                cudaDeAllocCount.reset();
                cudaToDevCount.reset();
@@ -191,12 +197,15 @@ public class GPUStatistics {
                sb.append("CUDA/CuLibraries init time:\t" + 
String.format("%.3f", cudaInitTime*1e-9) + "/"
                                + String.format("%.3f", 
cudaLibrariesInitTime*1e-9) + " sec.\n");
                sb.append("Number of executed GPU inst:\t" + 
getNoOfExecutedGPUInst() + ".\n");
-               sb.append("GPU mem tx time  
(alloc/dealloc/set0/toDev/fromDev):\t"
+               sb.append("GPU mem tx time  
(alloc/dealloc/set0/toDev/fromDev/evict/forcedEvict(lazy/unpinned)):\t"
                                + String.format("%.3f", 
cudaAllocTime.longValue()*1e-9) + "/"
                                + String.format("%.3f", 
cudaDeAllocTime.longValue()*1e-9) + "/"
                                + String.format("%.3f", 
cudaMemSet0Time.longValue()*1e-9) + "/"
                                + String.format("%.3f", 
cudaToDevTime.longValue()*1e-9) + "/"
-                               + String.format("%.3f", 
cudaFromDevTime.longValue()*1e-9)  + " sec.\n");
+                               + String.format("%.3f", 
cudaFromDevTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaEvictTime.longValue()*1e-9) + "/("
+                               + String.format("%.3f", 
cudaForcedClearLazyFreedEvictTime.longValue()*1e-9) + "/" 
+                               + String.format("%.3f", 
cudaForcedClearUnpinnedEvictTime.longValue()*1e-9) + ") sec.\n");
                sb.append("GPU mem tx count 
(alloc/dealloc/set0/toDev/fromDev/evict/forcedEvict(lazy/unpinned)):\t"
                                + cudaAllocCount.longValue() + "/"
                                + cudaDeAllocCount.longValue() + "/"

Reply via email to