This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
from d1bc4eb84f [SYSTEMDS-3153] Missing value imputation via KNN-based
methods
add 18d793683b [MINOR] Add linting compile flag
add bd82dabd9c [MINOR] Fix ThreadPoolTest Lint
add 884ad3ac88 [SYSTEMDS-3614] Log that Hadoop Binaries are missing
add a54f513e31 [SYSTEMDS-3444][SYSTEMDS-2699] Compressed I/O This commit
is a major overhaul of the writing and reading of compressed matrices.
No new revisions were added by this update.
Summary of changes:
.gitignore | 2 +
conf/log4j.properties | 1 +
pom.xml | 1 +
.../runtime/compress/CompressedMatrixBlock.java | 13 +-
.../runtime/compress/cocode/CoCodeHybrid.java | 2 +-
.../sysds/runtime/compress/colgroup/AColGroup.java | 17 +-
.../compress/colgroup/AColGroupCompressed.java | 4 +-
.../runtime/compress/colgroup/AColGroupOffset.java | 4 +-
.../runtime/compress/colgroup/AColGroupValue.java | 20 +-
.../compress/colgroup/ADictBasedColGroup.java | 29 +-
.../compress/colgroup/AMorphingMMColGroup.java | 10 +-
.../sysds/runtime/compress/colgroup/APreAgg.java | 20 +-
.../sysds/runtime/compress/colgroup/ASDC.java | 4 +-
.../sysds/runtime/compress/colgroup/ASDCZero.java | 4 +-
.../runtime/compress/colgroup/ColGroupConst.java | 22 +-
.../runtime/compress/colgroup/ColGroupDDC.java | 16 +-
.../runtime/compress/colgroup/ColGroupDDCFOR.java | 35 +-
.../runtime/compress/colgroup/ColGroupFactory.java | 122 +--
.../runtime/compress/colgroup/ColGroupOLE.java | 18 +-
.../runtime/compress/colgroup/ColGroupRLE.java | 16 +-
.../runtime/compress/colgroup/ColGroupSDC.java | 34 +-
.../runtime/compress/colgroup/ColGroupSDCFOR.java | 34 +-
.../compress/colgroup/ColGroupSDCSingle.java | 26 +-
.../compress/colgroup/ColGroupSDCSingleZeros.java | 28 +-
.../compress/colgroup/ColGroupSDCZeros.java | 26 +-
.../compress/colgroup/ColGroupUncompressed.java | 4 +-
.../runtime/compress/colgroup/ColGroupUtils.java | 2 +-
.../compress/colgroup/IContainADictionary.java | 4 +-
.../compress/colgroup/dictionary/ADictionary.java | 895 +--------------------
.../colgroup/dictionary/DictLibMatrixMult.java | 14 +-
.../compress/colgroup/dictionary/Dictionary.java | 53 +-
.../colgroup/dictionary/DictionaryFactory.java | 84 +-
.../{ADictionary.java => IDictionary.java} | 281 +++----
.../colgroup/dictionary/IdentityDictionary.java | 72 +-
.../dictionary/IdentityDictionarySlice.java | 12 +-
.../colgroup/dictionary/MatrixBlockDictionary.java | 66 +-
.../colgroup/dictionary/PlaceHolderDict.java | 502 ++++++++++++
.../compress/colgroup/dictionary/QDictionary.java | 68 +-
.../compress/colgroup/mapping/AMapToData.java | 67 +-
.../compress/colgroup/mapping/MapToBit.java | 230 +++---
.../compress/colgroup/mapping/MapToByte.java | 5 +
.../compress/colgroup/mapping/MapToChar.java | 5 +
.../compress/colgroup/mapping/MapToCharPByte.java | 7 +-
.../compress/colgroup/mapping/MapToFactory.java | 3 +-
.../compress/colgroup/mapping/MapToInt.java | 5 +
.../compress/colgroup/mapping/MapToZero.java | 9 +-
.../compress/colgroup/scheme/ACLAScheme.java | 36 +-
.../colgroup/scheme/CompressionScheme.java | 53 +-
.../compress/colgroup/scheme/ConstScheme.java | 15 +-
.../compress/colgroup/scheme/DDCScheme.java | 9 +-
.../compress/colgroup/scheme/DDCSchemeMC.java | 72 +-
.../compress/colgroup/scheme/DDCSchemeSC.java | 92 ++-
.../compress/colgroup/scheme/ICLAScheme.java | 37 +-
.../compress/colgroup/scheme/SDCScheme.java | 4 +-
.../compress/colgroup/scheme/SDCSchemeMC.java | 32 +-
.../compress/colgroup/scheme/SDCSchemeSC.java | 4 +-
.../sysds/runtime/compress/estim/ComEstSample.java | 70 +-
.../runtime/compress/estim/EstimationFactors.java | 13 +-
.../compress/estim/encoding/DenseEncoding.java | 16 +-
.../compress/estim/encoding/EncodingFactory.java | 115 ++-
.../compress/estim/encoding/SparseEncoding.java | 30 +-
.../estim/sample/SampleEstimatorFactory.java | 35 +-
.../runtime/compress/io/CompressedWriteBlock.java | 3 +-
.../sysds/runtime/compress/io/DictWritable.java | 94 +++
.../runtime/compress/io/ReaderCompressed.java | 53 +-
.../runtime/compress/io/ReaderSparkCompressed.java | 118 +++
.../runtime/compress/io/WriterCompressed.java | 242 ++++--
.../runtime/compress/lib/CLALibCombineGroups.java | 8 +-
.../CLALibMerge.java} | 45 +-
.../runtime/compress/lib/CLALibSeparator.java | 117 +++
.../sysds/runtime/compress/lib/CLALibSlice.java | 16 +-
.../sysds/runtime/compress/lib/CLALibStack.java | 159 ++--
.../compress/readers/ReaderColumnSelection.java | 10 -
.../ReaderColumnSelectionDenseMultiBlock.java | 24 +-
...erColumnSelectionDenseMultiBlockTransposed.java | 23 +-
.../ReaderColumnSelectionDenseSingleBlock.java | 25 +-
...rColumnSelectionDenseSingleBlockTransposed.java | 22 +-
.../readers/ReaderColumnSelectionSparse.java | 12 +-
.../ReaderColumnSelectionSparseTransposed.java | 15 -
.../sysds/runtime/compress/utils/ACount.java | 199 ++++-
.../runtime/compress/utils/ACountHashMap.java | 198 +++++
.../sysds/runtime/compress/utils/DblArray.java | 67 +-
.../compress/utils/DblArrayCountHashMap.java | 254 +-----
.../compress/utils/DblArrayIntListHashMap.java | 3 +
.../runtime/compress/utils/DoubleCountHashMap.java | 293 +------
.../runtime/controlprogram/caching/CacheBlock.java | 15 +-
.../context/SparkExecutionContext.java | 25 +-
.../controlprogram/parfor/ResultMergeMatrix.java | 9 +-
.../runtime/controlprogram/parfor/stat/Timing.java | 51 +-
.../org/apache/sysds/runtime/data/TensorBlock.java | 2 +-
.../sysds/runtime/frame/data/FrameBlock.java | 9 +-
.../runtime/frame/data/columns/BitSetArray.java | 51 +-
.../instructions/spark/AppendGSPInstruction.java | 4 +-
.../spark/data/PartitionedBroadcast.java | 10 +-
.../spark/utils/RDDAggregateUtils.java | 14 +-
.../apache/sysds/runtime/io/IOUtilFunctions.java | 6 +-
.../sysds/runtime/matrix/data/MatrixBlock.java | 94 +--
.../org/apache/sysds/runtime/util/HDFSTool.java | 3 +-
.../java/org/apache/sysds/performance/README.md | 6 +
.../org/apache/sysds/performance/TimingUtils.java | 52 +-
.../sysds/performance/compression/APerfTest.java | 18 +-
.../sysds/performance/compression/Serialize.java | 130 ++-
.../org/apache/sysds/test/AutomatedTestBase.java | 2 +
.../component/compress/CompressedCustomTests.java | 44 +-
.../sysds/test/component/compress/TestBase.java | 7 +-
.../ColGroupMorphingPerformanceCompare.java | 4 +-
.../compress/colgroup/ColGroupNegativeTests.java | 10 +-
.../compress/colgroup/JolEstimateTest.java | 9 +-
.../component/compress/combine/CombineTest.java | 4 +-
.../component/compress/dictionary/CombineTest.java | 128 +--
.../compress/dictionary/DictionaryTests.java | 30 +-
.../estim/encoding/EncodeSampleCustom.java | 29 +-
.../compress/estim/encoding/EncodeSampleTest.java | 11 +-
.../estim/sample/SampleDistinctNegativeTest.java | 2 +-
.../compress/io/IOCompressionTestUtils.java | 19 +-
.../sysds/test/component/compress/io/IOEmpty.java | 13 +-
.../test/component/compress/io/IONegativeTest.java | 1 +
.../sysds/test/component/compress/io/IOSpark.java | 246 ++++--
.../sysds/test/component/compress/io/IOTest.java | 82 +-
.../io/SeparateDictionariesAndIndexes.java | 70 ++
.../component/compress/lib/CLALibSliceTest.java | 133 +++
.../compress/mapping/CustomMappingTest.java | 52 ++
.../compress/mapping/MappingPreAggregateTests.java | 2 +
.../component/compress/mapping/MappingTests.java | 4 +-
.../compress/mapping/MappingTestsResize.java | 2 +
.../compress/mapping/PreAggregateDDC_DDCTest.java | 2 +
.../compress/mapping/PreAggregateDDC_SDCZTest.java | 6 +-
.../compress/mapping/PreAggregateSDCZ_DDCTest.java | 3 +
.../mapping/PreAggregateSDCZ_SDCZTest.java | 2 +
.../component/compress/readers/ReadersTest.java | 237 +-----
.../readers/ReadersTestCompareReaders.java | 98 ++-
.../component/compress/util/ArrCountMapTest.java | 218 +++++
.../test/component/compress/util/BitmapTest.java | 29 -
.../test/component/compress/util/CountMapTest.java | 231 ++++++
.../test/component/compress/util/CountTest.java | 88 ++
.../sysds/test/component/misc/ThreadPool.java | 13 +-
.../test/component/sparse/SparseBlockMerge.java | 2 +-
.../sysds/test/functions/io/ReadWriteListTest.java | 21 +-
.../io/compressed/CompressedTestBase.java | 1 +
.../io/compressed/ReadCompressedTest.java | 6 +
.../io/compressed/WriteCompressedTest.java | 12 +-
src/test/resources/log4j.properties | 1 +
142 files changed, 4521 insertions(+), 3381 deletions(-)
copy
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/{ADictionary.java
=> IDictionary.java} (76%)
create mode 100644
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/PlaceHolderDict.java
create mode 100644
src/main/java/org/apache/sysds/runtime/compress/io/DictWritable.java
create mode 100644
src/main/java/org/apache/sysds/runtime/compress/io/ReaderSparkCompressed.java
copy
src/main/java/org/apache/sysds/runtime/compress/{cost/MemoryCostEstimator.java
=> lib/CLALibMerge.java} (53%)
create mode 100644
src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSeparator.java
create mode 100644
src/main/java/org/apache/sysds/runtime/compress/utils/ACountHashMap.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/io/SeparateDictionariesAndIndexes.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/lib/CLALibSliceTest.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/util/ArrCountMapTest.java
delete mode 100644
src/test/java/org/apache/sysds/test/component/compress/util/BitmapTest.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
create mode 100644
src/test/java/org/apache/sysds/test/component/compress/util/CountTest.java