Repository: systemml Updated Branches: refs/heads/master 9b270d61a -> 91f6fb572
[SYSTEMML-445] Display time taken for eviction as well as two-staged fragmentation in the statistics. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/91f6fb57 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/91f6fb57 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/91f6fb57 Branch: refs/heads/master Commit: 91f6fb57293d9adf15714dc55c0bba962aefd107 Parents: 9b270d6 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Mon Jan 29 16:59:40 2018 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Mon Jan 29 16:59:40 2018 -0800 ---------------------------------------------------------------------- .../instructions/gpu/context/GPUContext.java | 18 +++++++++++++++++- .../org/apache/sysml/utils/GPUStatistics.java | 13 +++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/91f6fb57/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java index 311e2a7..2741697 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java @@ -336,10 +336,15 @@ public class GPUContext { // Step 1: LOG.debug("Eagerly deallocating rmvar-ed matrices to avoid memory allocation error due to potential fragmentation."); + long forcedEvictStartTime = DMLScript.STATISTICS ? System.nanoTime() : 0; clearFreeCUDASpaceMap(instructionName, -1); + if(DMLScript.STATISTICS) { + GPUStatistics.cudaForcedClearLazyFreedEvictTime.add(System.nanoTime()-forcedEvictStartTime); + } try { cudaMalloc(A, size); } catch(jcuda.CudaException e1) { + forcedEvictStartTime = DMLScript.STATISTICS ? System.nanoTime() : 0; // Step 2: GPUStatistics.cudaForcedClearUnpinnedMatCount.add(1); LOG.warn("Eagerly deallocating unpinned matrices to avoid memory allocation error due to potential fragmentation. " @@ -352,6 +357,9 @@ public class GPUContext { toBeRemoved.clearData(true); } } + if(DMLScript.STATISTICS) { + GPUStatistics.cudaForcedClearUnpinnedEvictTime.add(System.nanoTime()-forcedEvictStartTime); + } cudaMalloc(A, size); } } @@ -555,6 +563,7 @@ public class GPUContext { * @throws DMLRuntimeException If no reusable memory blocks to free up or if not enough matrix blocks with zero locks on them. */ protected void evict(String instructionName, final long neededSize) throws DMLRuntimeException { + long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0; if (LOG.isTraceEnabled()) { LOG.trace("GPU : evict called from " + instructionName + " for size " + neededSize + " on " + this); } @@ -565,8 +574,12 @@ public class GPUContext { clearFreeCUDASpaceMap(instructionName, neededSize); - if (neededSize <= getAvailableMemory()) + if (neededSize <= getAvailableMemory()) { + if(DMLScript.STATISTICS) { + GPUStatistics.cudaEvictTime.add(System.nanoTime() - t0); + } return; + } if (allocatedGPUObjects.size() == 0) { throw new DMLRuntimeException( @@ -628,6 +641,9 @@ public class GPUContext { } toBeRemoved.clearData(true); } + if(DMLScript.STATISTICS) { + GPUStatistics.cudaEvictTime.add(System.nanoTime() - t0); + } } /** http://git-wip-us.apache.org/repos/asf/systemml/blob/91f6fb57/src/main/java/org/apache/sysml/utils/GPUStatistics.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java b/src/main/java/org/apache/sysml/utils/GPUStatistics.java index f671d36..b7eb00c 100644 --- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java +++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java @@ -51,6 +51,9 @@ public class GPUStatistics { public static LongAdder cudaMemSet0Time = new LongAdder(); // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates) public static LongAdder cudaToDevTime = new LongAdder(); // time spent in copying data from host (CPU) to device (GPU) memory public static LongAdder cudaFromDevTime = new LongAdder(); // time spent in copying data from device to host + public static LongAdder cudaEvictTime = new LongAdder(); // time spent in eviction + public static LongAdder cudaForcedClearLazyFreedEvictTime = new LongAdder(); // time spent in forced lazy eviction + public static LongAdder cudaForcedClearUnpinnedEvictTime = new LongAdder(); // time spent in forced unpinned eviction public static LongAdder cudaAllocCount = new LongAdder(); public static LongAdder cudaDeAllocCount = new LongAdder(); public static LongAdder cudaMemSet0Count = new LongAdder(); @@ -86,6 +89,9 @@ public class GPUStatistics { cudaMemSet0Count.reset(); cudaToDevTime.reset(); cudaFromDevTime.reset(); + cudaEvictTime.reset(); + cudaForcedClearLazyFreedEvictTime.reset(); + cudaForcedClearUnpinnedEvictTime.reset(); cudaAllocCount.reset(); cudaDeAllocCount.reset(); cudaToDevCount.reset(); @@ -191,12 +197,15 @@ public class GPUStatistics { sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/" + String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n"); sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n"); - sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev):\t" + sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev/evict/forcedEvict(lazy/unpinned)):\t" + String.format("%.3f", cudaAllocTime.longValue()*1e-9) + "/" + String.format("%.3f", cudaDeAllocTime.longValue()*1e-9) + "/" + String.format("%.3f", cudaMemSet0Time.longValue()*1e-9) + "/" + String.format("%.3f", cudaToDevTime.longValue()*1e-9) + "/" - + String.format("%.3f", cudaFromDevTime.longValue()*1e-9) + " sec.\n"); + + String.format("%.3f", cudaFromDevTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaEvictTime.longValue()*1e-9) + "/(" + + String.format("%.3f", cudaForcedClearLazyFreedEvictTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaForcedClearUnpinnedEvictTime.longValue()*1e-9) + ") sec.\n"); sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict/forcedEvict(lazy/unpinned)):\t" + cudaAllocCount.longValue() + "/" + cudaDeAllocCount.longValue() + "/"