This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 6b23ea4227 [MINOR] Fix compression statistic logging for frames
6b23ea4227 is described below

commit 6b23ea4227127dd8bb9f071453de59ddf518b226
Author: Sebastian Baunsgaard <baunsga...@apache.org>
AuthorDate: Fri Apr 5 17:07:54 2024 +0200

    [MINOR] Fix compression statistic logging for frames
    
    Logging of frames statistics for compression is misleading when
    samples are used to estimate the number of elements.
    Therefore this commit change the logging message to reflect the
    approximate nature of distinct counts
---
 .../frame/data/compress/ArrayCompressionStatistics.java        | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/compress/ArrayCompressionStatistics.java
 
b/src/main/java/org/apache/sysds/runtime/frame/data/compress/ArrayCompressionStatistics.java
index 8323060f81..c9d5dc71e8 100644
--- 
a/src/main/java/org/apache/sysds/runtime/frame/data/compress/ArrayCompressionStatistics.java
+++ 
b/src/main/java/org/apache/sysds/runtime/frame/data/compress/ArrayCompressionStatistics.java
@@ -20,6 +20,8 @@
 package org.apache.sysds.runtime.frame.data.compress;
 
 import org.apache.sysds.common.Types.ValueType;
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.conf.DMLConfig;
 import org.apache.sysds.runtime.frame.data.columns.ArrayFactory.FrameArrayType;
 
 public class ArrayCompressionStatistics {
@@ -48,8 +50,12 @@ public class ArrayCompressionStatistics {
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
-               sb.append(String.format("Compressed Stats: size:%8d->%8d, 
Use:%10s, Unique:%6d, ValueType:%7s", originalSize,
-                       compressedSizeEstimate, bestType == null ? "None" : 
bestType.toString(), nUnique, valueType));
+               
if(ConfigurationManager.getDMLConfig().getDoubleValue(DMLConfig.COMPRESSED_SAMPLING_RATIO)
 < 1)
+                       sb.append(String.format("Compressed Stats: 
size:%8d->%8d, Use:%10s, EstUnique:%6d, ValueType:%7s",
+                               originalSize, compressedSizeEstimate, bestType 
== null ? "None" : bestType.toString(), nUnique, valueType));
+               else
+                       sb.append(String.format("Compressed Stats: 
size:%8d->%8d, Use:%10s, Unique:%6d, ValueType:%7s", originalSize,
+                               compressedSizeEstimate, bestType == null ? 
"None" : bestType.toString(), nUnique, valueType));
                return sb.toString();
        }
 }

Reply via email to