This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 04b0d098c5 [SYSTEMDS-3650] New I/O Support Cloud-optimized GeoTIFF
04b0d098c5 is described below
commit 04b0d098c5404e0d80067ea86f53f9df58c00fb7
Author: fietenoer <[email protected]>
AuthorDate: Thu Nov 28 15:04:08 2024 +0100
[SYSTEMDS-3650] New I/O Support Cloud-optimized GeoTIFF
DIA WiSe 24/25 project
Closes #2195.
Co-authored-by: lorenzkautzsch
<[email protected]>
Co-authored-by: Aditya Pandey <[email protected]>
---
docs/site/dml-language-reference.md | 13 +-
src/main/java/org/apache/sysds/common/Types.java | 1 +
.../java/org/apache/sysds/lops/compile/Dag.java | 1 -
.../org/apache/sysds/parser/DMLTranslator.java | 1 -
.../org/apache/sysds/parser/DataExpression.java | 4 +-
.../colgroup/dictionary/ACachingMBDictionary.java | 1 +
.../colgroup/dictionary/AIdentityDictionary.java | 1 +
.../functionobjects/ParameterizedBuiltin.java | 1 -
.../gpu/BuiltinBinaryGPUInstruction.java | 2 -
.../gpu/MatrixBuiltinGPUInstruction.java | 1 -
.../sysds/runtime/io/FileFormatPropertiesCOG.java | 50 +++
.../sysds/runtime/io/MatrixReaderFactory.java | 17 +-
.../org/apache/sysds/runtime/io/ReaderCOG.java | 215 ++++++++++
.../apache/sysds/runtime/io/ReaderCOGParallel.java | 406 +++++++++++++++++++
.../apache/sysds/runtime/io/cog/COGByteReader.java | 130 ++++++
.../sysds/runtime/io/cog/COGCompressionUtils.java | 57 +++
.../org/apache/sysds/runtime/io/cog/COGHeader.java | 447 +++++++++++++++++++++
.../apache/sysds/runtime/io/cog/COGProperties.java | 206 ++++++++++
.../org/apache/sysds/runtime/io/cog/IFDTag.java | 69 ++++
.../sysds/runtime/io/cog/IFDTagDictionary.java | 104 +++++
.../cog/SampleFormatDataTypes.java} | 43 +-
.../apache/sysds/runtime/io/cog/TIFFDataTypes.java | 86 ++++
.../test/component/matrix/EigenDecompTest.java | 1 -
.../sysds/test/functions/io/cog/COGTestBase.java} | 37 +-
.../sysds/test/functions/io/cog/ReadCOGTest.java | 93 +++++
...dCOGTest1Tile1BandFloat32PC1ComNoneSquare.java} | 37 +-
...eadCOGTest1Tile1BandInt32PC1ComNoneSquare.java} | 39 +-
...COGTestNTiles1BandFloat32PC1ComNoneSquare.java} | 37 +-
...adCOGTestNTilesNBandsBytePC1ComNoneSquare.java} | 39 +-
...adCOGTestNTilesNBandsUInt16PC1ComNoneRect.java} | 39 +-
...OGTestNTilesNBandsUInt16PC2ComDeflateRect.java} | 37 +-
...adCOGTestNTilesNBandsUInt16PC2ComNoneRect.java} | 39 +-
...stNTilesNBandsUInt16PC2ComNoneRectBIGTIFF.java} | 39 +-
src/test/resources/datasets/cog/testCOG_1.tif | Bin 0 -> 4510 bytes
src/test/resources/datasets/cog/testCOG_1.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_2.tif | Bin 0 -> 12714 bytes
src/test/resources/datasets/cog/testCOG_2.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_3.tif | Bin 0 -> 16774 bytes
src/test/resources/datasets/cog/testCOG_3.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_4.tif | Bin 0 -> 16774 bytes
src/test/resources/datasets/cog/testCOG_4.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_5.tif | Bin 0 -> 12698 bytes
src/test/resources/datasets/cog/testCOG_5.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_6.tif | Bin 0 -> 12804 bytes
src/test/resources/datasets/cog/testCOG_6.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_7.tif | Bin 0 -> 12726 bytes
src/test/resources/datasets/cog/testCOG_7.tif.mtd | 5 +
src/test/resources/datasets/cog/testCOG_8.tif | Bin 0 -> 12892 bytes
src/test/resources/datasets/cog/testCOG_8.tif.mtd | 5 +
.../scripts/functions/io/cog/ReadCOGTest_1.dml | 26 ++
50 files changed, 2163 insertions(+), 196 deletions(-)
diff --git a/docs/site/dml-language-reference.md
b/docs/site/dml-language-reference.md
index ad34b7761f..abafb74a5a 100644
--- a/docs/site/dml-language-reference.md
+++ b/docs/site/dml-language-reference.md
@@ -868,13 +868,15 @@ that users provide MTD files for their own data as well.
#### File formats and MTD files
-SystemDS supports 4 file formats:
+SystemDS supports 6 file formats:
* CSV (delimited)
* Matrix Market (coordinate)
* Text (i,j,v)
+ * LIBSVM
* Binary
* HDF5
+ * COG
The CSV format is a standard text-based format where columns are separated by
delimiter characters, typically commas, and
rows are represented on separate lines.
@@ -899,6 +901,15 @@ Hierarchical Data Format (HDF) is a file format designed
to store and organize l
some features of the HDF5 like two dimension data (Matrix), matrix with FP64
(double) data type,
single dataset, single group, and contiguous dataset.
+Cloud Optimized GeoTIFF (COG) is an image format designed to store large
amounts of geospatial data while allowing for
+efficient access. This is done by splitting the image into tiles which can
then be accessed independently. Currently, SystemDS
+only supports reading COG files and can only process the most important
metadata that is required for reading the image. Normal
+TIFF files that aren't tiled cannot be read by SystemDS. Support for BigTIFF
is very limited and not recommended.
+
+The currently supported compression methods are as follows:
+- None
+- Deflate
+
Let's look at a matrix and examples of its data represented in the supported
formats with corresponding metadata. In the table below, we have
a matrix consisting of 4 rows and 3 columns.
diff --git a/src/main/java/org/apache/sysds/common/Types.java
b/src/main/java/org/apache/sysds/common/Types.java
index 6e61f44615..c4674ba64f 100644
--- a/src/main/java/org/apache/sysds/common/Types.java
+++ b/src/main/java/org/apache/sysds/common/Types.java
@@ -863,6 +863,7 @@ public interface Types {
FEDERATED, // A federated matrix
PROTO, // protocol buffer representation
HDF5, // Hierarchical Data Format (HDF)
+ COG, // Cloud-optimized GeoTIFF
UNKNOWN;
public boolean isIJV() {
diff --git a/src/main/java/org/apache/sysds/lops/compile/Dag.java
b/src/main/java/org/apache/sysds/lops/compile/Dag.java
index 5412beff8c..b26c539e9a 100644
--- a/src/main/java/org/apache/sysds/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysds/lops/compile/Dag.java
@@ -30,7 +30,6 @@ import java.util.stream.Collectors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.api.DMLScript;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.common.Types.FileFormat;
diff --git a/src/main/java/org/apache/sysds/parser/DMLTranslator.java
b/src/main/java/org/apache/sysds/parser/DMLTranslator.java
index 60483d12e3..2b4131d0ad 100644
--- a/src/main/java/org/apache/sysds/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysds/parser/DMLTranslator.java
@@ -32,7 +32,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Builtins;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.common.Types.AggOp;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.Direction;
diff --git a/src/main/java/org/apache/sysds/parser/DataExpression.java
b/src/main/java/org/apache/sysds/parser/DataExpression.java
index 142cc806aa..1b9afb41b6 100644
--- a/src/main/java/org/apache/sysds/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysds/parser/DataExpression.java
@@ -1164,6 +1164,8 @@ public class DataExpression extends DataIdentifier
boolean isHDF5 = (formatTypeString != null &&
formatTypeString.equalsIgnoreCase(FileFormat.HDF5.toString()));
+ boolean isCOG = (formatTypeString != null &&
formatTypeString.equalsIgnoreCase(FileFormat.COG.toString()));
+
dataTypeString = (getVarParam(DATATYPEPARAM) == null) ?
null : getVarParam(DATATYPEPARAM).toString();
if ( dataTypeString == null ||
dataTypeString.equalsIgnoreCase(Statement.MATRIX_DATA_TYPE)
@@ -1188,7 +1190,7 @@ public class DataExpression extends DataIdentifier
// initialize size of target data identifier to
UNKNOWN
getOutput().setDimensions(-1, -1);
- if (!isCSV && !isLIBSVM && !isHDF5 &&
ConfigurationManager.getCompilerConfig()
+ if (!isCSV && !isLIBSVM && !isHDF5 && !isCOG &&
ConfigurationManager.getCompilerConfig()
.getBool(ConfigType.REJECT_READ_WRITE_UNKNOWNS) //skip check for csv/libsvm
format / jmlc api
&& (getVarParam(READROWPARAM) == null
|| getVarParam(READCOLPARAM) == null) ) {
raiseValidateError("Missing or
incomplete dimension information in read statement: "
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
index 8117bd345c..f2b6b6163d 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
@@ -23,6 +23,7 @@ import java.lang.ref.SoftReference;
public abstract class ACachingMBDictionary extends ADictionary {
+ private static final long serialVersionUID = 7035552219254994595L;
/** A Cache to contain a materialized version of the identity matrix. */
protected volatile SoftReference<MatrixBlockDictionary> cache = null;
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/AIdentityDictionary.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/AIdentityDictionary.java
index 2bc10b1b06..17b382f06a 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/AIdentityDictionary.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/AIdentityDictionary.java
@@ -22,6 +22,7 @@ package org.apache.sysds.runtime.compress.colgroup.dictionary;
import org.apache.sysds.runtime.compress.DMLCompressionException;
public abstract class AIdentityDictionary extends ACachingMBDictionary {
+ private static final long serialVersionUID = 5013713435287705877L;
/** The number of rows or columns, rows can be +1 if withEmpty is set.
*/
protected final int nRowCol;
/** Specify if the Identity matrix should contain an empty row in the
end. */
diff --git
a/src/main/java/org/apache/sysds/runtime/functionobjects/ParameterizedBuiltin.java
b/src/main/java/org/apache/sysds/runtime/functionobjects/ParameterizedBuiltin.java
index adef124b0d..d800efd4e3 100644
---
a/src/main/java/org/apache/sysds/runtime/functionobjects/ParameterizedBuiltin.java
+++
b/src/main/java/org/apache/sysds/runtime/functionobjects/ParameterizedBuiltin.java
@@ -27,7 +27,6 @@ import
org.apache.commons.math3.distribution.ExponentialDistribution;
import org.apache.commons.math3.distribution.FDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.TDistribution;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.util.UtilFunctions;
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
index 7b97a87155..5ab09f5ed1 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
@@ -19,7 +19,6 @@
package org.apache.sysds.runtime.instructions.gpu;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.DMLRuntimeException;
@@ -77,5 +76,4 @@ public abstract class BuiltinBinaryGPUInstruction extends
GPUInstruction {
throw new DMLRuntimeException(
"GPU : Unsupported GPU builtin operations on a
matrix and a scalar:" + opcode);
}
-
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
index 4bb36bd110..0557ccc279 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
@@ -21,7 +21,6 @@ package org.apache.sysds.runtime.instructions.gpu;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
diff --git
a/src/main/java/org/apache/sysds/runtime/io/FileFormatPropertiesCOG.java
b/src/main/java/org/apache/sysds/runtime/io/FileFormatPropertiesCOG.java
new file mode 100644
index 0000000000..10cb5a0f22
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/FileFormatPropertiesCOG.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.io.Serializable;
+
+public class FileFormatPropertiesCOG extends FileFormatProperties implements
Serializable {
+ protected static final Log LOG =
LogFactory.getLog(FileFormatPropertiesCOG.class.getName());
+ private static final long serialVersionUID = 1038419221722594985L;
+
+ private String datasetName;
+
+ public FileFormatPropertiesCOG() {
+ this.datasetName = "systemdscog";
+ }
+
+ public FileFormatPropertiesCOG(String datasetName) {
+ this.datasetName = datasetName;
+ }
+
+ public String getDatasetName() {
+ return datasetName;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(" datasetName " + datasetName);
+ return sb.toString();
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
index 845271c061..dc1c7da230 100644
--- a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
@@ -64,8 +64,15 @@ public class MatrixReaderFactory {
break;
case HDF5:
- reader = (par & mcsr) ? new ReaderHDF5Parallel(
- new FileFormatPropertiesHDF5()) : new
ReaderHDF5(new FileFormatPropertiesHDF5());
+ reader = (par & mcsr) ?
+ new ReaderHDF5Parallel(new
FileFormatPropertiesHDF5()) :
+ new ReaderHDF5(new
FileFormatPropertiesHDF5());
+ break;
+
+ case COG:
+ reader = (par & mcsr) ?
+ new ReaderCOGParallel(new
FileFormatPropertiesCOG()) :
+ new ReaderCOG(new
FileFormatPropertiesCOG());
break;
case COMPRESSED:
@@ -124,6 +131,12 @@ public class MatrixReaderFactory {
fileFormatPropertiesHDF5);
break;
+ case COG:
+ FileFormatPropertiesCOG fileFormatPropertiesCOG
= props.formatProperties != null ? (FileFormatPropertiesCOG)
props.formatProperties : new FileFormatPropertiesCOG();
+ reader = (par & mcsr) ?
+ new
ReaderCOGParallel(fileFormatPropertiesCOG) : new
ReaderCOG(fileFormatPropertiesCOG);
+ break;
+
case COMPRESSED:
reader = new ReaderCompressed();
break;
diff --git a/src/main/java/org/apache/sysds/runtime/io/ReaderCOG.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderCOG.java
new file mode 100644
index 0000000000..1223894366
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderCOG.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.io.cog.*;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class ReaderCOG extends MatrixReader{
+ protected final FileFormatPropertiesCOG _props;
+
+ public ReaderCOG(FileFormatPropertiesCOG props) {
+ _props = props;
+ }
+ @Override
+ public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long
clen, int blen, long estnnz) throws IOException, DMLRuntimeException {
+ JobConf job = new
JobConf(ConfigurationManager.getCachedJobConf());
+ Path path = new Path(fname);
+ FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
+
+ BufferedInputStream bis = new
BufferedInputStream(fs.open(path));
+ return readCOG(bis, estnnz);
+ }
+
+ @Override
+ public MatrixBlock readMatrixFromInputStream(InputStream is, long rlen,
long clen, int blen, long estnnz) throws IOException, DMLRuntimeException {
+ BufferedInputStream bis = new BufferedInputStream(is);
+ return readCOG(bis, estnnz);
+ }
+
+ /**
+ * Reads a COG file from a BufferedInputStream.
+ * Not handling number of columns or rows, as this can be inferred from
the data, but
+ * may be used in the future for validation or possibly removed as a
requirement for COG.
+ * Specific to COG files, normal TIFFs will break because they aren't
tiled, only
+ * tiled data is supported.
+ * @param bis
+ * @return
+ */
+ private MatrixBlock readCOG(BufferedInputStream bis, long estnnz)
throws IOException {
+ COGByteReader byteReader = new COGByteReader(bis);
+ COGHeader cogHeader = COGHeader.readCOGHeader(byteReader);
+
+ // Check compatibility of the file with our reader
+ // Certain options are not supported, and we need to filter out
some non-standard options
+ String isCompatible =
COGHeader.isCompatible(cogHeader.getIFD());
+ if (!isCompatible.equals("")) {
+ throw new DMLRuntimeException("Incompatible COG file: "
+ isCompatible);
+ }
+
+ // TODO: Currently only reads the first image which is the full
resolution image
+ // In the future, this could be extended to read the overviews
as well
+ // But keep in mind that we are only returning a single
MatrixBlock, so there needs to be some special handling
+ // TODO: Is the metadata (e.g. the coordinates) necessary in
SystemDS? Currently not possible as we only return a MatrixBlock
+ // However, this could possibly be changed in the future to
somehow also store relevant metadata if desired.
+ // Currently this implementation reads the most important data
from the header.
+ COGProperties cogP = new COGProperties(cogHeader.getIFD());
+
+ // number of tiles for Width and Length
+ int tileCols = cogP.getCols() / cogP.getTileWidth();
+ int tileRows = cogP.getRows() / cogP.getTileLength();
+
+ // total number of tiles if every tile contains all bands
+ int calculatedAmountTiles = tileCols * tileRows;
+ // actual given number of tiles, longer for
PlanarConfiguration=2
+ int actualAmountTiles = cogP.getTileOffsets().length;
+
+ int currentTileCol = 0;
+ int currentTileRow = 0;
+ int currentBand = 0;
+
+ // Check if the tiles are fully sequential (always starting at
a higher byte offset)
+ // If that is the case, we can skip the mark/reset calls and
avoid buffering large amounts of data
+ boolean tilesFullySequential = cogP.tilesFullySequential();
+
+ MatrixBlock outputMatrix =
createOutputMatrixBlock(cogP.getRows(), cogP.getCols() * cogP.getBands(),
cogP.getRows(), estnnz, true, false);
+
+ for (int currenTileIdx = 0; currenTileIdx < actualAmountTiles;
currenTileIdx++) {
+ long bytesToRead =
(cogP.getTileOffsets()[currenTileIdx] - byteReader.getTotalBytesRead()) +
cogP.getBytesPerTile()[currenTileIdx];
+ // Mark the current position in the stream
+ // This is used to reset the stream to this position
after reading the data
+ // Valid until bytesToRead + 1 bytes are read
+ // Only necessary if we might need to jump back in the
stream (when tiles are not fully sequential)
+ if (!tilesFullySequential) {
+ byteReader.mark(bytesToRead);
+ }
+ // Read until offset is reached
+
byteReader.skipBytes(cogP.getTileOffsets()[currenTileIdx] -
byteReader.getTotalBytesRead());
+ byte[] currentTileData =
byteReader.readBytes(cogP.getBytesPerTile()[currenTileIdx]);
+
+ if (!tilesFullySequential) {
+ byteReader.reset();
+ }
+
+ if (cogP.getCompression() == 8) {
+ currentTileData =
COGCompressionUtils.decompressDeflate(currentTileData);
+ }
+
+ int pixelsRead = 0;
+ int bytesRead = 0;
+ int currentRow = 0;
+ if (cogP.getPlanarConfiguration() == 1) {
+ // Interleaved
+ // RGBRGBRGB
+ while (currentRow < cogP.getTileLength() &&
pixelsRead < cogP.getTileWidth()) {
+ for (int bandIdx = 0; bandIdx <
cogP.getBands(); bandIdx++) {
+ double value = 0;
+ int sampleLength =
cogP.getBitsPerSample()[bandIdx] / 8;
+
+ switch
(cogP.getSampleFormat()[bandIdx]) {
+ case UNSIGNED_INTEGER:
+ case UNDEFINED:
+ // According to
the standard, this should be handled as not being there -> 1 (unsigned integer)
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, false,
false, false).doubleValue();
+ break;
+ case SIGNED_INTEGER:
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, false, true,
false).doubleValue();
+ break;
+ case FLOATING_POINT:
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, true, false,
false).doubleValue();
+ break;
+ }
+
+ bytesRead += sampleLength;
+
outputMatrix.set((currentTileRow * cogP.getTileLength()) + currentRow,
+ (currentTileCol
* cogP.getTileWidth() * cogP.getBands()) + (pixelsRead * cogP.getBands()) +
bandIdx,
+ value);
+ }
+
+ pixelsRead++;
+ if (pixelsRead >= cogP.getTileWidth()) {
+ pixelsRead = 0;
+ currentRow++;
+ }
+ }
+ } else if (cogP.getPlanarConfiguration() == 2 &&
calculatedAmountTiles * cogP.getBands() == cogP.getTileOffsets().length) {
+ // If every band is stored in different tiles,
so first one R, second one G and so on
+ // RRRGGGBBB
+ // TODO: Currently this doesn't seem
standardized properly, there are still open GitHub issues about that
+ // e.g.:
https://github.com/cogeotiff/cog-spec/issues/17
+ // if something changes in the standard, this
may need to be adjusted, interleaved is discouraged in COG though
+ if (currenTileIdx - (currentBand *
calculatedAmountTiles) >= calculatedAmountTiles) {
+ currentTileCol = 0;
+ currentTileRow = 0;
+ currentBand++;
+ }
+
+ int sampleLength =
cogP.getBitsPerSample()[currentBand] / 8;
+
+ while (currentRow < cogP.getTileLength() &&
pixelsRead < cogP.getTileWidth()) {
+ double value = 0;
+
+ switch
(cogP.getSampleFormat()[currentBand]) {
+ case UNSIGNED_INTEGER:
+ case UNDEFINED:
+ // According to the
standard, this should be handled as not being there -> 1 (unsigned integer)
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, false,
false, false).doubleValue();
+ break;
+ case SIGNED_INTEGER:
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, false, true,
false).doubleValue();
+ break;
+ case FLOATING_POINT:
+ value =
cogHeader.parseByteArray(currentTileData, sampleLength, bytesRead, true, false,
false).doubleValue();
+ break;
+ }
+ bytesRead += sampleLength;
+ outputMatrix.set((currentTileRow *
cogP.getTileLength()) + currentRow,
+ (currentTileCol *
cogP.getTileWidth() * cogP.getBands()) + (pixelsRead * cogP.getBands()) +
currentBand,
+ value);
+ pixelsRead++;
+ if (pixelsRead >= cogP.getTileWidth()) {
+ pixelsRead = 0;
+ currentRow++;
+ }
+ }
+ } else {
+ throw new DMLRuntimeException("Unsupported
Planar Configuration: " + cogP.getPlanarConfiguration());
+ }
+
+ currentTileCol++;
+ if (currentTileCol >= tileCols) {
+ currentTileCol = 0;
+ currentTileRow++;
+ }
+ }
+
+ outputMatrix.examSparsity();
+ return outputMatrix;
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/ReaderCOGParallel.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderCOGParallel.java
new file mode 100644
index 0000000000..f8665a1873
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderCOGParallel.java
@@ -0,0 +1,406 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.io.cog.*;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.data.SparseBlock;
+import org.apache.sysds.runtime.data.SparseBlockMCSR;
+import org.apache.sysds.runtime.util.CommonThreadPool;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+public class ReaderCOGParallel extends MatrixReader{
+ protected final FileFormatPropertiesCOG _props;
+ final private int _numThreads;
+
+ public ReaderCOGParallel(FileFormatPropertiesCOG props) {
+ _props = props;
+ _numThreads = OptimizerUtils.getParallelBinaryReadParallelism();
+ }
+ @Override
+ public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long
clen, int blen, long estnnz) throws IOException, DMLRuntimeException {
+ JobConf job = new
JobConf(ConfigurationManager.getCachedJobConf());
+ Path path = new Path(fname);
+ FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
+
+ BufferedInputStream bis = new
BufferedInputStream(fs.open(path));
+ return readCOG(bis, estnnz);
+ }
+
+ @Override
+ public MatrixBlock readMatrixFromInputStream(InputStream is, long rlen,
long clen, int blen, long estnnz) throws IOException, DMLRuntimeException {
+ BufferedInputStream bis = new BufferedInputStream(is);
+ return readCOG(bis, estnnz);
+ }
+
+ /**
+ * Reads a COG file from a BufferedInputStream.
+ * Not handling number of columns or rows, as this can be inferred from
the data, but
+ * may be used in the future for validation or possibly removed as a
requirement for COG.
+ * Specific to COG files, normal TIFFs will break because they aren't
tiled, only
+ * tiled data is supported.
+ * @param bis
+ * @return
+ */
+ private MatrixBlock readCOG(BufferedInputStream bis, long estnnz)
throws IOException {
+ COGByteReader byteReader = new COGByteReader(bis);
+ COGHeader cogHeader = COGHeader.readCOGHeader(byteReader);
+
+ // Check compatibility of the file with our reader
+ // Certain options are not supported, and we need to filter out
some non-standard options
+ String isCompatible =
COGHeader.isCompatible(cogHeader.getIFD());
+ if (!isCompatible.equals("")) {
+ throw new DMLRuntimeException("Incompatible COG file: "
+ isCompatible);
+ }
+
+ // TODO: Currently only reads the first image which is the full
resolution image
+ // In the future, this could be extended to read the overviews
as well
+ // But keep in mind that we are only returning a single
MatrixBlock, so there needs to be some special handling
+ COGProperties cogP = new COGProperties(cogHeader.getIFD());
+
+ // number of tiles for Width and Length
+ int tileCols = cogP.getCols() / cogP.getTileWidth();
+ int tileRows = cogP.getRows() / cogP.getTileLength();
+
+ // total number of tiles if every tile contains all bands
+ int calculatedAmountTiles = tileCols * tileRows;
+ // actual given number of tiles, longer for
PlanarConfiguration=2
+ int actualAmountTiles = cogP.getTileOffsets().length;
+
+ int currentTileCol = 0;
+ int currentTileRow = 0;
+ int currentBand = 0;
+
+ ExecutorService pool = CommonThreadPool.get(_numThreads);
+
+ MatrixBlock outputMatrix =
createOutputMatrixBlock(cogP.getRows(), cogP.getCols() * cogP.getBands(),
cogP.getRows(), estnnz, false, true);
+
+ // Check if the tiles are fully sequential (always starting at
a higher byte offset)
+ // If that is the case, we can skip the mark/reset calls and
avoid buffering large amounts of data
+ boolean tilesFullySequential = cogP.tilesFullySequential();
+
+ try {
+ ArrayList<Callable<MatrixBlock>> tasks = new
ArrayList<>();
+ // Principle: We're reading all tiles in sequence as
I/O likely won't benefit from parallel reads
+ // in most cases.
+ // Then: Process the read tile data in parallel
+ for (int currenTileIdx = 0; currenTileIdx <
actualAmountTiles; currenTileIdx++) {
+ // First read the bytes for the new tile
+ long bytesToRead =
(cogP.getTileOffsets()[currenTileIdx] - byteReader.getTotalBytesRead()) +
cogP.getBytesPerTile()[currenTileIdx];
+ // Only necessary if we might need to jump back
in the stream (when tiles are not fully sequential)
+ if (!tilesFullySequential) {
+ byteReader.mark(bytesToRead);
+ }
+
byteReader.skipBytes(cogP.getTileOffsets()[currenTileIdx] -
byteReader.getTotalBytesRead());
+ byte[] currentTileData =
byteReader.readBytes(cogP.getBytesPerTile()[currenTileIdx]);
+
+ if (!tilesFullySequential) {
+ byteReader.reset();
+ }
+
+ if (cogP.getCompression() == 8) {
+ currentTileData =
COGCompressionUtils.decompressDeflate(currentTileData);
+ }
+
+ TileProcessor tileProcessor;
+ if (cogP.getPlanarConfiguration() == 1) {
+ // Every band is in the same tile, e.g.
RGBRGBRGB
+ tileProcessor = new
TileProcessor(cogP.getCols() * cogP.getBands(), currentTileData,
currentTileRow, currentTileCol,
+ cogP.getTileWidth(),
cogP.getTileLength(), cogP.getBands(), cogP.getBitsPerSample(),
cogP.getSampleFormat(), cogHeader, outputMatrix,
+
cogP.getPlanarConfiguration());
+
+ currentTileCol++;
+ if (currentTileCol >= tileCols) {
+ currentTileCol = 0;
+ currentTileRow++;
+ }
+ } else if (cogP.getPlanarConfiguration() == 2) {
+ // Every band is in a different tile,
e.g. RRRGGGBBB
+ // Note here that first all tiles from
a single band are present
+ // after that all tiles from the next
band are present and so on (so they don't interleave)
+ if (currenTileIdx - (currentBand *
calculatedAmountTiles) >= calculatedAmountTiles) {
+ currentTileCol = 0;
+ currentTileRow = 0;
+ currentBand++;
+ }
+
+ tileProcessor = new
TileProcessor(cogP.getCols() * cogP.getBands(), currentTileData,
currentTileRow, currentTileCol,
+ cogP.getTileWidth(),
cogP.getTileLength(), cogP.getBands(), cogP.getBitsPerSample(),
cogP.getSampleFormat(), cogHeader, outputMatrix,
+
cogP.getPlanarConfiguration(), currentBand);
+
+ currentTileCol++;
+
+ if (currentTileCol >= tileCols) {
+ currentTileCol = 0;
+ currentTileRow++;
+ }
+ } else {
+ throw new
DMLRuntimeException("Unsupported Planar Configuration: " +
cogP.getPlanarConfiguration());
+ }
+ tasks.add(tileProcessor);
+ }
+
+ try {
+ for (Future<MatrixBlock> result :
pool.invokeAll(tasks)) {
+ result.get();
+ }
+
+ if (outputMatrix.isInSparseFormat()) {
+ sortSparseRowsParallel(outputMatrix,
cogP.getRows(), _numThreads, pool);
+ }
+ } catch (Exception e) {
+ throw new IOException("Error during parallel
task execution.", e);
+ }
+
+ } catch (IOException e) {
+ throw new IOException("Thread pool issue or file
reading error.", e);
+ } finally {
+ pool.shutdown();
+ }
+
+ // TODO: If the tile is compressed, decompress the
currentTileData here
+
+ outputMatrix.examSparsity();
+ return outputMatrix;
+ }
+
+ public class TileProcessor implements Callable<MatrixBlock> {
+
+ private final int clen;
+ private final byte[] tileData;
+ private final int tileRow;
+ private final int tileCol;
+ private final int tileWidth;
+ private final int tileLength;
+ private final int bands;
+ private final int[] bitsPerSample;
+ private final SampleFormatDataTypes[] sampleFormat;
+ private final COGHeader cogHeader;
+ private final MatrixBlock _dest;
+ private final int planarConfiguration;
+ private final boolean sparse;
+ private final int band;
+
+ public TileProcessor(int clen, byte[] tileData, int tileRow,
int tileCol, int tileWidth, int tileLength,
+ int bands, int[]
bitsPerSample, SampleFormatDataTypes[] sampleFormat, COGHeader cogHeader,
+ MatrixBlock dest, int
planarConfiguration) {
+ this(clen, tileData, tileRow, tileCol, tileWidth,
tileLength, bands, bitsPerSample, sampleFormat,
+ cogHeader, dest, planarConfiguration,
0);
+ }
+
+ public TileProcessor(int clen, byte[] tileData, int tileRow,
int tileCol, int tileWidth, int tileLength,
+ int bands, int[]
bitsPerSample, SampleFormatDataTypes[] sampleFormat, COGHeader cogHeader,
+ MatrixBlock dest, int
planarConfiguration, int band) {
+ this.clen = clen;
+ this.tileData = tileData;
+ this.tileRow = tileRow;
+ this.tileCol = tileCol;
+ this.tileWidth = tileWidth;
+ this.tileLength = tileLength;
+ this.bands = bands;
+ this.bitsPerSample = bitsPerSample;
+ this.sampleFormat = sampleFormat;
+ this.cogHeader = cogHeader;
+ this._dest = dest;
+ this.planarConfiguration = planarConfiguration;
+ this.sparse = _dest.isInSparseFormat();
+ this.band = band;
+ }
+
+
+ @Override
+ public MatrixBlock call() throws Exception {
+ if (planarConfiguration==1) {
+ processTileByPixel();
+ }
+ else if (planarConfiguration==2){
+ processTileByBand();
+ }
+ else{
+ throw new DMLRuntimeException("Unsupported
Planar Configuration: " + planarConfiguration);
+ }
+ return _dest;
+ }
+
+ private void processTileByPixel() {
+ int pixelsRead = 0;
+ int bytesRead = 0;
+ int currentRow = 0;
+
+ MatrixBlock tileMatrix = new MatrixBlock(tileLength,
tileWidth*bands, sparse);
+
+ if(sparse) {
+ tileMatrix.allocateAndResetSparseBlock(true,
SparseBlock.Type.CSR);
+ tileMatrix.getSparseBlock().allocate(0,
tileLength*tileWidth*bands);
+ }
+
+ while (currentRow < tileLength && pixelsRead <
tileWidth) {
+ for (int bandIdx = 0; bandIdx < bands;
bandIdx++) {
+ double value = 0;
+ int sampleLength =
bitsPerSample[bandIdx] / 8;
+
+ switch (sampleFormat[bandIdx]) {
+ case UNSIGNED_INTEGER:
+ case UNDEFINED:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, false, false,
false).doubleValue();
+ break;
+ case SIGNED_INTEGER:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, false, true,
false).doubleValue();
+ break;
+ case FLOATING_POINT:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, true, false,
false).doubleValue();
+ break;
+ }
+
+ bytesRead += sampleLength;
+ tileMatrix.set(currentRow, (pixelsRead
* bands) + bandIdx, value);
+ }
+
+ pixelsRead++;
+ if (pixelsRead >= tileWidth) {
+ pixelsRead = 0;
+ currentRow++;
+ }
+ }
+
+ try {
+ int rowOffset = tileRow * tileLength;
+ int colOffset = tileCol * tileWidth * bands;
+ if (sparse) {
+ // if outputMatrix is sparse apply
synchronisation if tiles are more narrow then outputMatrix
+ insertIntoSparse(_dest, tileMatrix,
rowOffset, colOffset);
+ }
+ else {
+ // if matrix is dense inserting just
the tileMatrix as is
+ _dest.copy(rowOffset, rowOffset +
tileLength - 1,
+ colOffset, colOffset +
(tileWidth * bands) -1,
+ tileMatrix, false);
+ }
+ } catch (RuntimeException e) {
+ throw new DMLRuntimeException("Error while
processing tile", e);
+ }
+ }
+
+ private void processTileByBand() {
+ int pixelsRead = 0;
+ int bytesRead = 0;
+ int currentRow = 0;
+
+ MatrixBlock tileMatrix = new MatrixBlock(tileLength,
tileWidth*bands, sparse);
+
+ if(sparse) {
+ tileMatrix.allocateAndResetSparseBlock(true,
SparseBlock.Type.CSR);
+ tileMatrix.getSparseBlock().allocate(0,
tileLength*tileWidth*bands);
+ }
+
+ while (currentRow < tileLength && pixelsRead <
tileWidth) {
+ double value = 0;
+ int sampleLength = bitsPerSample[band] / 8;
+
+ switch (sampleFormat[band]) {
+ case UNSIGNED_INTEGER:
+ case UNDEFINED:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, false, false,
false).doubleValue();
+ break;
+ case SIGNED_INTEGER:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, false, true,
false).doubleValue();
+ break;
+ case FLOATING_POINT:
+ value =
cogHeader.parseByteArray(tileData, sampleLength, bytesRead, true, false,
false).doubleValue();
+ break;
+ }
+
+ bytesRead += sampleLength;
+ tileMatrix.set(currentRow, (pixelsRead * bands)
+ band, value);
+
+ pixelsRead++;
+ if (pixelsRead >= tileWidth) {
+ pixelsRead = 0;
+ currentRow++;
+ }
+ }
+
+ try {
+ int rowOffset = tileRow * tileLength;
+ int colOffset = tileCol * tileWidth * bands;
+ if (sparse) {
+ // if outputMatrix is sparse apply
synchronisation if tiles are more narrow then outputMatrix
+ insertIntoSparse(_dest, tileMatrix,
rowOffset, colOffset);
+ }
+ else {
+ // insert only values the thread is
responsible for
+ // denseBlocks have zero values by
default, so actual current band 0 values dont need to be written
+ for (int i = 0; i < tileLength; i++) {
+ for (int j = 0; j < tileWidth *
bands; j++) {
+ if (tileMatrix.get(i,
j) != 0) {
+
_dest.set(rowOffset + i, colOffset + j, tileMatrix.get(i, j));
+ }
+ }
+ }
+ }
+ } catch (RuntimeException e) {
+ throw new DMLRuntimeException("Error while
processing tile", e);
+ }
+ }
+
+ private void insertIntoSparse(MatrixBlock _dest, MatrixBlock
tileMatrix, int rowOffset, int colOffset ) {
+ SparseBlock sblock = _dest.getSparseBlock();
+ if (tileWidth < clen) {
+ // if there is more then one tile in horizontal
direction, synchronization is needed
+ // such that threads do not write the same rows
concurrently
+ // appendToSparse and appendRowToSparse require
sorting
+ if (sblock instanceof SparseBlockMCSR &&
sblock.get(rowOffset) != null) {
+ for (int i = 0; i < tileLength; i++)
+ synchronized
(sblock.get(rowOffset + i)) {
+
_dest.appendRowToSparse(sblock, tileMatrix, i,
+
rowOffset,
+
colOffset, true);
+ }
+ }
+ else{
+ synchronized (_dest) {
+ _dest.appendToSparse(
+ tileMatrix,
+ rowOffset,
+ colOffset);
+ }
+ }
+ }
+ else {
+ // otherwise no further synchronization is
needed
+ _dest.appendToSparse(tileMatrix, rowOffset,
colOffset);
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/cog/COGByteReader.java
b/src/main/java/org/apache/sysds/runtime/io/cog/COGByteReader.java
new file mode 100644
index 0000000000..d765bd72cf
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/COGByteReader.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+import org.apache.sysds.runtime.DMLRuntimeException;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * This class is used by the COGReader to read bytes from a
BufferedInputStream.
+ * It is wrapper that keeps track of the bytes read and can therefore be used
to
+ * easily go to specific offsets.
+ */
+public class COGByteReader {
+ private long totalBytesRead;
+ private BufferedInputStream bis;
+ private long readlimit = 0;
+
+ public COGByteReader(BufferedInputStream bis) {
+ this.bis = bis;
+ totalBytesRead = 0;
+ }
+
+ public COGByteReader(BufferedInputStream bis, int totalBytesRead) {
+ this.bis = bis;
+ this.totalBytesRead = totalBytesRead;
+ }
+
+ public long getTotalBytesRead() {
+ return totalBytesRead;
+ }
+
+ public void setTotalBytesRead(int totalBytesRead) {
+ this.totalBytesRead = totalBytesRead;
+ }
+
+ /**
+ * Reads a given number of bytes from the BufferedInputStream.
+ * Increments the totalBytesRead counter by the number of bytes read.
+ * @param length ???
+ * @return ???
+ */
+ public byte[] readBytes(int length) {
+ byte[] header = new byte[length];
+ try {
+ bis.read(header);
+ totalBytesRead += length;
+ } catch (IOException e) {
+ throw new DMLRuntimeException(e);
+ }
+ return header;
+ }
+
+ /**
+ * Reads a given number of bytes from the BufferedInputStream.
+ * Increments the totalBytesRead counter by the number of bytes read.
+ * @param length ???
+ * @return ???
+ */
+ public byte[] readBytes(long length) {
+ // TODO: When properly implementing BigTIFF, this could be a
problem when not being able to skip bytes
+ // In BigTIFF the offset can be larger than maxInt which isn't
a problem for skipping bytes
+ // but could be a problem when the tiles are not sequential in
the file and we need to jump back
+ // to a previous position (where we can't use skip).
+ if (length > Integer.MAX_VALUE) {
+ throw new DMLRuntimeException("Cannot read more than
Integer.MAX_VALUE bytes at once");
+ }
+ return readBytes((int) length);
+ }
+
+ /**
+ * Offers the same functionality as BufferedInputStream.mark.
+ * Allows for returning to a previous point if the readlimit is not
exceeded.
+ * @param readlimit ???
+ */
+ public void mark(long readlimit) {
+ this.readlimit = readlimit;
+ bis.mark((int) readlimit + 1);
+ }
+
+ /**
+ * Offers the same functionality as BufferedInputStream.reset.
+ * Resets the stream to the last marked position.
+ * @throws DMLRuntimeException ???
+ */
+ public void reset() throws DMLRuntimeException {
+ try {
+ bis.reset();
+ totalBytesRead -= this.readlimit;
+ } catch (IOException e) {
+ throw new DMLRuntimeException(e);
+ }
+ }
+
+ /**
+ * Skips a given number of bytes without reading them.
+ * Useful for jumping to specific offsets
+ * @param n Number of bytes to skip
+ * @throws DMLRuntimeException ???
+ */
+ public void skipBytes(long n) throws DMLRuntimeException {
+ try {
+ long skipped = bis.skip(n);
+ if (skipped != n) {
+ throw new DMLRuntimeException("Could not skip "
+ n + " bytes, only skipped " + skipped + " bytes");
+ }
+ totalBytesRead += n;
+ } catch (IOException e) {
+ throw new DMLRuntimeException(e);
+ }
+ }
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/io/cog/COGCompressionUtils.java
b/src/main/java/org/apache/sysds/runtime/io/cog/COGCompressionUtils.java
new file mode 100644
index 0000000000..0df14017a7
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/COGCompressionUtils.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+import org.apache.sysds.runtime.DMLRuntimeException;
+
+import java.io.ByteArrayOutputStream;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+public class COGCompressionUtils {
+ /**
+ * Decompresses a byte array that was compressed using the Deflate
algorithm
+ * @param compressedData ???
+ * @return ???
+ * @throws DMLRuntimeException ???
+ */
+ public static byte[] decompressDeflate(byte[] compressedData) throws
DMLRuntimeException {
+ // Use the native Java implementation of deflate to decompress
the data
+ Inflater inflater = new Inflater();
+ inflater.setInput(compressedData);
+
+ ByteArrayOutputStream outputStream = new
ByteArrayOutputStream(compressedData.length);
+ byte[] buffer = new byte[1024];
+
+ while (!inflater.finished()) {
+ int decompressedSize = 0;
+ try {
+ decompressedSize = inflater.inflate(buffer);
+ } catch (DataFormatException e) {
+ throw new DMLRuntimeException("Failed to
decompress tile data", e);
+ }
+ outputStream.write(buffer, 0, decompressedSize);
+ }
+
+ inflater.end();
+
+ return outputStream.toByteArray();
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/cog/COGHeader.java
b/src/main/java/org/apache/sysds/runtime/io/cog/COGHeader.java
new file mode 100644
index 0000000000..2c832af9db
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/COGHeader.java
@@ -0,0 +1,447 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+import org.apache.sysds.runtime.DMLRuntimeException;
+
+import java.util.ArrayList;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * Represents a header for a COG file. This includes IFDs, endianess etc.
+ */
+public class COGHeader {
+ private boolean isLittleEndian;
+ private String GDALMetadata;
+ private IFDTag[] IFD;
+ private boolean isBigTIFF;
+ // Do we even need this or will we throw it away?
+ // If we keep this and write it again, we also need to write the
additional images
+ // So this could very likely not make the cut
+ private ArrayList<IFDTag[]> additionalIFDs;
+
+ public COGHeader(boolean isLittleEndian) {
+ this.isLittleEndian = isLittleEndian;
+ GDALMetadata = "";
+ additionalIFDs = new ArrayList<IFDTag[]>();
+ }
+
+ public void setIFD(IFDTag[] IFD) {
+ this.IFD = IFD;
+ }
+
+ public IFDTag[] getIFD() {
+ return IFD;
+ }
+
+ public void addAdditionalIFD(IFDTag[] IFD) {
+ additionalIFDs.add(IFD);
+ }
+
+ public ArrayList<IFDTag[]> getAdditionalIFDs() {
+ return additionalIFDs;
+ }
+
+ public IFDTag[] getSingleAdditionalIFD(int index) {
+ return additionalIFDs.get(index);
+ }
+
+ public void setSingleAdditionalIFD(int index, IFDTag[] IFD) {
+ additionalIFDs.set(index, IFD);
+ }
+
+ public void removeSingleAdditionalIFD(int index) {
+ additionalIFDs.remove(index);
+ }
+
+ public void setLittleEndian(boolean isLittleEndian) {
+ this.isLittleEndian = isLittleEndian;
+ }
+
+ public boolean isLittleEndian() {
+ return isLittleEndian;
+ }
+
+ public void setGDALMetadata(String GDALMetadata) {
+ this.GDALMetadata = GDALMetadata;
+ }
+
+ public String getGDALMetadata() {
+ return GDALMetadata;
+ }
+
+ public void setBigTIFF(boolean isBigTIFF) {
+ this.isBigTIFF = isBigTIFF;
+ }
+
+ public boolean isBigTIFF() {
+ return isBigTIFF;
+ }
+
+ /**
+ * Parses a byte array into a generic number. Can be byte, short, int,
float or double
+ * depending on the options given. E.g.: Use .doubleValue() on the
result to get a double value easily
+ *
+ * Supported lengths:
+ * isDecimal:
+ * - 4 bytes: float
+ * - 8 bytes: double
+ * otherwise:
+ * - 1 byte: byte
+ * - 2 bytes: short
+ * - 4 bytes: int
+ * Anything else will throw an exception
+ * @param bytes ???
+ * @param length number of bytes that should be read
+ * @param offset from the start of the byte array
+ * @param isDecimal Whether we are dealing with a floating point number
+ * @param isSigned Whether the number is signed
+ * @param isRational Whether the number is a rational number as
specified in the TIFF standard
+ * (first 32 bit integer numerator of a
fraction, second 32 bit integer denominator)
+ * @return ???
+ */
+ public Number parseByteArray(byte[] bytes, int length, int offset,
boolean isDecimal, boolean isSigned, boolean isRational) {
+ ByteBuffer buffer = ByteBuffer.wrap(bytes);
+ buffer.order(isLittleEndian ? ByteOrder.LITTLE_ENDIAN :
ByteOrder.BIG_ENDIAN);
+ buffer.position(offset);
+
+ if (isRational && !isSigned) {
+ long numerator =
Integer.toUnsignedLong(buffer.getInt());
+ long denominator =
Integer.toUnsignedLong(buffer.getInt());
+ return (double)numerator / denominator;
+ }
+ if (isRational && isSigned) {
+ long numerator = buffer.getInt();
+ long denominator = buffer.getInt();
+ return (double)numerator / denominator;
+ }
+ if (isDecimal) {
+ switch (length) {
+ case 4:
+ return buffer.getFloat();
+ case 8:
+ return buffer.getDouble();
+ default:
+ throw new
IllegalArgumentException("Unsupported length: " + length);
+ }
+ }
+ switch (length) {
+ case 1:
+ return isSigned ? (byte)buffer.get() :
Byte.toUnsignedInt(buffer.get());
+ case 2:
+ return isSigned ? (short)buffer.getShort() :
Short.toUnsignedInt(buffer.getShort());
+ case 4:
+ return isSigned ? (int)buffer.getInt() :
Integer.toUnsignedLong(buffer.getInt());
+ case 8:
+ return isSigned ? (long)buffer.getLong() :
buffer.getLong();
+ default:
+ throw new IllegalArgumentException("Unsupported
length: " + length);
+ }
+ }
+
+ /**
+ * Prepares the COG header by reading the first 4 bytes and determining
the byte order.
+ * Needs to be called before anything else is done with the COG header.
+ * @param byteReader ???
+ * @return ???
+ */
+ private static COGHeader prepareHeader(COGByteReader byteReader) {
+ // Read first 4 bytes to determine byte order and make sure it
is a valid TIFF
+ byte[] header = byteReader.readBytes(4);
+
+ // Read the byte order
+ boolean littleEndian = false;
+ if ((header[0] & 0xFF) == 0x4D && (header[1] & 0xFF) == 0x4D) {
+ littleEndian = false;
+ } else if ((header[0] & 0xFF) == 0x49 && (header[1] & 0xFF) ==
0x49) {
+ littleEndian = true;
+ } else {
+ throw new DMLRuntimeException("Invalid Byte-Order");
+ }
+
+ // Create COGHeader object, initialize with the correct byte
order
+ COGHeader cogHeader = new COGHeader(littleEndian);
+
+ // Check magic number (42), otherwise this is not a valid TIFF
+ int magic = cogHeader.parseByteArray(header, 2, 2, false,
false, false).intValue();
+ if (magic == 42) {
+ cogHeader.setBigTIFF(false);
+ } else if (magic == 43) {
+ cogHeader.setBigTIFF(true);
+ } else {
+ throw new DMLRuntimeException("Invalid Magic Number");
+ }
+
+ return cogHeader;
+ }
+
+ /**
+ * Reads the COG header from the BufferedInputStream.
+ * Handles little endian setting, checking magic number. After this you
manually
+ * have to check the compatibility though if you desire to do so.
+ * @param byteReader ???
+ * @return filled COGHeader object
+ */
+ public static COGHeader readCOGHeader(COGByteReader byteReader) {
+ COGHeader cogHeader = prepareHeader(byteReader);
+ // Read offset of the first IFD
+ // Usually this is 8 (right after the header) we are at right
now
+ // With COG, GDAL usually writes some metadata before the IFD
+ short ifdOffsetSize = 4;
+ // BigTIFF allows for differently sized offsets
+ if (cogHeader.isBigTIFF()) {
+ byte[] offsetSize = byteReader.readBytes(2);
+ ifdOffsetSize = cogHeader.parseByteArray(offsetSize, 2,
0, false, false, false).shortValue();
+ byteReader.skipBytes(2); // Skip the next 2 bytes
+ }
+
+ byte[] ifdOffsetRaw = byteReader.readBytes(ifdOffsetSize);
+ long ifdOffset = cogHeader.parseByteArray(ifdOffsetRaw,
ifdOffsetSize, 0, false, false, false).intValue();
+
+ // If the IFD offset is larger than 8, read that and store it
in the COGHeader
+ // This is the GDAL metadata
+ if (ifdOffset > 8) {
+ // Read the metadata from the current position to the
IFD offset
+ // -8 because the offset is calculated from the
beginning of the file
+ byte[] metadata = byteReader.readBytes(ifdOffset -
(cogHeader.isBigTIFF() ? 16 : 8));
+ cogHeader.setGDALMetadata(new String(metadata));
+ }
+
+ // If we read the first IFD, we handle it somewhat differently
+ // See the if-statement below
+ boolean firstIFD = true;
+ // Is used at the end of the while loop to determine if there
is another IFD
+ byte[] nextIFDOffsetRaw;
+ int nextIFDOffset = 0;
+
+ // Used in the beginning of the while loop to read the number
of tags in the IFD
+ byte[] numberOfTagsRaw;
+ int numberOfTags;
+ // Array to store the IFD tags, initialized after the number of
tags were read
+ IFDTag[] ifdTags;
+ int tagCountLength = cogHeader.isBigTIFF() ? 8 : 4;
+ int tagDataLength = cogHeader.isBigTIFF() ? 8 : 4;
+
+ // Read the IFDs, always read the first one
+ // The nextIFDOffset ist 0 if there is no next IFD
+ while (nextIFDOffset != 0 || firstIFD) {
+ // There can be data in-between IFDs, we need to skip
that
+ // Read until the next IFD, discard any data until then
+ byteReader.skipBytes(nextIFDOffset - (firstIFD ? 0 :
byteReader.getTotalBytesRead()));
+
+ // Read the number of tags in the IFD and initialize
the array
+ numberOfTagsRaw =
byteReader.readBytes(cogHeader.isBigTIFF() ? 8 : 2);
+ numberOfTags =
cogHeader.parseByteArray(numberOfTagsRaw, cogHeader.isBigTIFF() ? 8 : 2, 0,
false, false, false).intValue();
+ ifdTags = new IFDTag[numberOfTags];
+
+ // Read the tags
+ for (int i = 0; i < numberOfTags; i++) {
+ // Read the tag (Normal 12 bytes, 20 bytes for
BigTIFF)
+ // 2 bytes tag ID
+ // 2 bytes data type
+ // 4 bytes data count (8 bytes BigTIFF)
+ // 4 bytes data value (can also be offset) (8
bytes BigTIFF)
+ byte[] tag =
byteReader.readBytes(cogHeader.isBigTIFF() ? 20 : 12);
+ int tagId = cogHeader.parseByteArray(tag, 2, 0,
false, false, false).intValue();
+
+ int tagType = cogHeader.parseByteArray(tag, 2,
2, false, false, false).intValue();
+ TIFFDataTypes dataType =
TIFFDataTypes.valueOf(tagType);
+
+ int tagCount = cogHeader.parseByteArray(tag,
tagCountLength, 4, false, false, false).intValue();
+
+ Number[] tagData;
+ long tagValue = cogHeader.parseByteArray(tag,
tagDataLength, cogHeader.isBigTIFF() ? 12 : 8, false, false, false).longValue();
+
+ if (dataType.getSize() * tagCount <=
tagDataLength) {
+ tagData = parseTagData(tagCount, tag,
dataType, cogHeader, cogHeader.isBigTIFF() ? 8 : 4, cogHeader.isBigTIFF() ? 12
: 8);
+ } else {
+ // If the data in total is larger than
4 bytes it is an offset to the actual data
+ // Read the data from the offset
+ // tagValue = offset, just assigning
this for better readability
+ long offset = tagValue;
+ // data length = tagCount * data type
size
+ int totalSize = tagCount *
dataType.getSize();
+
+ // Calculate the number of bytes to
read in order to reset our reader
+ // after going to that offset
+ long bytesToRead = (offset -
byteReader.getTotalBytesRead()) + totalSize;
+
+ // Mark the current position in the
stream
+ // This is used to reset the stream to
this position after reading the data
+ // Valid until bytesToRead + 1 bytes
are read
+ byteReader.mark((int) bytesToRead);
+ // Read until offset is reached
+ byteReader.readBytes(offset -
byteReader.getTotalBytesRead());
+ // Read actual data
+ byte[] data =
byteReader.readBytes(totalSize);
+
+ tagData = parseTagData(tagCount, data,
dataType, cogHeader, 0);
+
+ // Reset the stream to the beginning of
the next tag
+ byteReader.reset();
+ }
+ // Read the tag ID and get the corresponding
tag from the dictionary (enum)
+ IFDTagDictionary tagDictionary =
IFDTagDictionary.valueOf(tagId);
+
+ // Create the constructed IFDTag object and
store it in the array
+ IFDTag ifdTag = new IFDTag(tagDictionary !=
null ? tagDictionary : IFDTagDictionary.Unknown, (short) tagType, tagCount,
tagData);
+ ifdTags[i] = ifdTag;
+ }
+ if (firstIFD) {
+ // If this is the first IFD, set it as the main
IFD in the COGHeader
+ cogHeader.setIFD(ifdTags.clone());
+ firstIFD = false;
+ } else {
+ // If this is not the first IFD, add it as an
additional IFD
+ cogHeader.addAdditionalIFD(ifdTags.clone());
+ }
+ // Read the offset to the next IFD. If it is 0, there
is no next IFD
+ nextIFDOffsetRaw = byteReader.readBytes(4);
+ nextIFDOffset =
cogHeader.parseByteArray(nextIFDOffsetRaw, 4, 0, false, false,
false).intValue();
+ }
+ return cogHeader;
+ }
+
+ /**
+ * Parses the data of an IFD entry from a byte array. Can throw an
error if something is not expected,
+ * e.g. when a broken TIFF causes the data size to differ from what is
expected.
+ * @param tagCount Number of tags that should be present
+ * @param rawData Raw data where the tags can be found
+ * @param dataType Data Type, used for size calculation
+ * @param cogHeader COGHeader is used for properly parsing the byte
array with the correct data type etc.
+ * @param maxSize Should be set to 0 if no other value is useful!
Throws an error when the data is too large for the header field
+ * @return ???
+ */
+ private static Number[] parseTagData(int tagCount, byte[] rawData,
TIFFDataTypes dataType, COGHeader cogHeader, int maxSize) {
+ return parseTagData(tagCount, rawData, dataType, cogHeader,
maxSize, 0);
+ }
+
+ /**
+ * Parses the data of an IFD entry from a byte array. Can throw an
error if something is not expected,
+ * e.g. when a broken TIFF causes the data size to differ from what is
expected.
+ * @param tagCount Number of tags that should be present
+ * @param rawData Raw data where the tags can be found
+ * @param dataType Data Type, used for size calculation
+ * @param cogHeader COGHeader is used for properly parsing the byte
array with the correct data type etc.
+ * @param maxSize Should be set to 0 if no other value is useful!
Throws an error when the data is too large for the header field
+ * @param offset (Optional) offset where to start reading, e.g. when
giving in whole tag
+ * @return ???
+ */
+ private static Number[] parseTagData(int tagCount, byte[] rawData,
TIFFDataTypes dataType, COGHeader cogHeader, int maxSize, int offset) {
+ if (maxSize > 0 && dataType.getSize() * tagCount > maxSize) {
+ throw new DMLRuntimeException("Error while parsing.
Data type " + dataType.toString() + " cannot fit into " + maxSize + " bytes");
+ }
+ Number[] tagData = new Number[tagCount];
+ for (int j = 0; j < tagCount; j++) {
+ switch(dataType) {
+ case BYTE:
+ case ASCII:
+ case SHORT:
+ case LONG:
+ case LONG8:
+ case UNDEFINED:
+ tagData[j] =
cogHeader.parseByteArray(rawData, dataType.getSize(), offset + j *
dataType.getSize(), false, false, false);
+ break;
+ case SBYTE:
+ case SSHORT:
+ case SLONG:
+ case SLONG8:
+ case IFD8:
+ tagData[j] =
cogHeader.parseByteArray(rawData, dataType.getSize(), offset + j *
dataType.getSize(), false, true, false);
+ break;
+ case RATIONAL:
+ tagData[j] =
cogHeader.parseByteArray(rawData, dataType.getSize(), offset + j *
dataType.getSize(), false, false, true);
+ break;
+ case SRATIONAL:
+ tagData[j] =
cogHeader.parseByteArray(rawData, dataType.getSize(), offset + j *
dataType.getSize(), false, true, true);
+ break;
+ case FLOAT:
+ case DOUBLE:
+ tagData[j] =
cogHeader.parseByteArray(rawData, dataType.getSize(), offset + j *
dataType.getSize(), true, false, false);
+ break;
+ }
+ }
+
+ return tagData;
+ }
+
+ /**
+ * Checks a given header for compatibility with the reader
+ * @param IFD ???
+ * @return empty string if compatible, error message otherwise
+ */
+ @SuppressWarnings("incomplete-switch")
+ public static String isCompatible(IFDTag[] IFD) {
+ boolean hasTileOffsets = false;
+ int imageWidth = -1;
+ int imageHeight = -1;
+ int tileWidth = -1;
+ int tileHeight = -1;
+ for (IFDTag tag : IFD) {
+ // Only 8 bit, 16 bit, 32 bit images are supported
+ // This is common practice in TIFF readers
+ // 12 bit values e.g. should instead be scaled to 16 bit
+ switch (tag.getTagId()) {
+ case BitsPerSample:
+ Number[] data = tag.getData();
+ for (int i = 0; i < data.length; i++) {
+ if (data[i].intValue() != 8 &&
data[i].intValue() != 16 && data[i].intValue() != 32) {
+ return "Unsupported bit
depth: " + data[i];
+ }
+ }
+ break;
+ case TileOffsets:
+ if (tag.getData().length > 0) {
+ hasTileOffsets = true;
+ }
+ break;
+ case Compression:
+ // After implementing additional
decompression methods, this can be extended
+ // TODO: LZW would be a great addition
as it is widely used
+ // Furthermore, JPEG support would also
be a good addition
+ // 1: none, 8: deflate
+ if (tag.getData()[0].intValue() != 1 &&
tag.getData()[0].intValue() != 8) {
+ return "Unsupported
compression: " + tag.getData()[0];
+ }
+ break;
+ case ImageWidth:
+ imageWidth =
tag.getData()[0].intValue();
+ break;
+ case ImageLength:
+ imageHeight =
tag.getData()[0].intValue();
+ break;
+ case TileWidth:
+ tileWidth = tag.getData()[0].intValue();
+ break;
+ case TileLength:
+ tileHeight =
tag.getData()[0].intValue();
+ break;
+ }
+ }
+ if (!hasTileOffsets) {
+ return "No tile offsets found";
+ }
+ if (imageWidth % tileWidth != 0 || imageHeight % tileHeight !=
0) {
+ return "Image can't be split into tiles equally";
+ }
+ return "";
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/cog/COGProperties.java
b/src/main/java/org/apache/sysds/runtime/io/cog/COGProperties.java
new file mode 100644
index 0000000000..59759711b7
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/COGProperties.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+import java.util.Arrays;
+
+/**
+ * Properties of a COG file that are useful for reading the file.
+ */
+public class COGProperties {
+
+ private int rows;
+ private int cols;
+ private int bands;
+ private int[] bitsPerSample;
+ private SampleFormatDataTypes[] sampleFormat;
+ private int planarConfiguration;
+ private int tileWidth;
+ private int tileLength;
+ private int[] tileOffsets;
+ private int[] bytesPerTile;
+ private int compression;
+
+ public COGProperties() {
+
+ }
+
+ public COGProperties(IFDTag[] ifdTags) {
+ this.initFromIFDTags(ifdTags);
+ }
+
+ // Getters and Setters
+ public int getRows() {
+ return rows;
+ }
+
+ public void setRows(int rows) {
+ this.rows = rows;
+ }
+
+ public int getCols() {
+ return cols;
+ }
+
+ public void setCols(int cols) {
+ this.cols = cols;
+ }
+
+ public int getBands() {
+ return bands;
+ }
+
+ public void setBands(int bands) {
+ this.bands = bands;
+ }
+
+ public int[] getBitsPerSample() {
+ return bitsPerSample;
+ }
+
+ public void setBitsPerSample(int[] bitsPerSample) {
+ this.bitsPerSample = bitsPerSample;
+ }
+
+ public SampleFormatDataTypes[] getSampleFormat() {
+ return sampleFormat;
+ }
+
+ public void setSampleFormat(SampleFormatDataTypes[] sampleFormat) {
+ this.sampleFormat = sampleFormat;
+ }
+
+ public int getPlanarConfiguration() {
+ return planarConfiguration;
+ }
+
+ public void setPlanarConfiguration(int planarConfiguration) {
+ this.planarConfiguration = planarConfiguration;
+ }
+
+ public int getTileWidth() {
+ return tileWidth;
+ }
+
+ public void setTileWidth(int tileWidth) {
+ this.tileWidth = tileWidth;
+ }
+
+ public int getTileLength() {
+ return tileLength;
+ }
+
+ public void setTileLength(int tileLength) {
+ this.tileLength = tileLength;
+ }
+
+ public int[] getTileOffsets() {
+ return tileOffsets;
+ }
+
+ public void setTileOffsets(int[] tileOffsets) {
+ this.tileOffsets = tileOffsets;
+ }
+
+ public int[] getBytesPerTile() {
+ return bytesPerTile;
+ }
+
+ public void setBytesPerTile(int[] bytesPerTile) {
+ this.bytesPerTile = bytesPerTile;
+ }
+
+ public int getCompression() {
+ return compression;
+ }
+
+ public void setCompression(int compression) {
+ this.compression = compression;
+ }
+
+ public void initFromIFDTags(IFDTag[] ifdTags) {
+ for (IFDTag ifd : ifdTags) {
+ IFDTagDictionary tag = ifd.getTagId();
+ switch (tag) {
+ case ImageWidth:
+ this.cols = ifd.getData()[0].intValue();
+ break;
+ case ImageLength:
+ this.rows = ifd.getData()[0].intValue();
+ break;
+ case SamplesPerPixel:
+ this.bands =
ifd.getData()[0].intValue();
+ break;
+ case BitsPerSample:
+ this.bitsPerSample =
Arrays.stream(ifd.getData()).mapToInt(Number::intValue).toArray();
+ break;
+ case TileWidth:
+ this.tileWidth =
ifd.getData()[0].intValue();
+ break;
+ case TileLength:
+ this.tileLength =
ifd.getData()[0].intValue();
+ break;
+ case TileOffsets:
+ this.tileOffsets =
Arrays.stream(ifd.getData()).mapToInt(Number::intValue).toArray();
+ break;
+ case TileByteCounts:
+ if (ifd.getData() != null) {
+ this.bytesPerTile =
Arrays.stream(ifd.getData()).mapToInt(Number::intValue).toArray();
+ } else {
+ this.bytesPerTile = new
int[this.tileOffsets.length];
+ for (int tile = 0; tile <
this.tileOffsets.length; tile++) {
+ int bits = 0;
+ for (int band = 0; band
< this.bands; band++) {
+ bits +=
this.bitsPerSample[band];
+ }
+ this.bytesPerTile[tile]
= this.tileWidth * this.tileLength * (bits / 8);
+ }
+ }
+ break;
+ case SampleFormat:
+ int dataCount = ifd.getDataCount();
+ this.sampleFormat = new
SampleFormatDataTypes[dataCount];
+ for (int i = 0; i < dataCount; i++) {
+ this.sampleFormat[i] =
SampleFormatDataTypes.valueOf(ifd.getData()[i].intValue());
+ }
+ break;
+ case PlanarConfiguration:
+ this.planarConfiguration =
ifd.getData()[0].intValue();
+ break;
+ case Compression:
+ this.compression =
ifd.getData()[0].intValue();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ public boolean tilesFullySequential() {
+ boolean tilesFullySequential = true;
+ for (int i = 1; i < getTileOffsets().length; i++) {
+ if (getTileOffsets()[i] < getTileOffsets()[i - 1]) {
+ tilesFullySequential = false;
+ break;
+ }
+ }
+ return tilesFullySequential;
+ }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/io/cog/IFDTag.java
b/src/main/java/org/apache/sysds/runtime/io/cog/IFDTag.java
new file mode 100644
index 0000000000..bff858ffb5
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/IFDTag.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+/**
+ * Represents a single tag in the IFD of a TIFF file
+ */
+public class IFDTag {
+ private IFDTagDictionary tagId;
+ private short dataType;
+ private int dataCount;
+ private Number[] data;
+
+ public IFDTag(IFDTagDictionary tagId, short dataType, int dataCount,
Number[] data) {
+ this.tagId = tagId;
+ this.dataType = dataType;
+ this.dataCount = dataCount;
+ this.data = data;
+ }
+
+ public IFDTagDictionary getTagId() {
+ return tagId;
+ }
+
+ public void setTagId(IFDTagDictionary tagId) {
+ this.tagId = tagId;
+ }
+
+ public short getDataType() {
+ return dataType;
+ }
+
+ public void setDataType(short dataType) {
+ this.dataType = dataType;
+ }
+
+ public int getDataCount() {
+ return dataCount;
+ }
+
+ public void setDataCount(int dataCount) {
+ this.dataCount = dataCount;
+ }
+
+ public Number[] getData() {
+ return data;
+ }
+
+ public void setData(Number[] data) {
+ this.data = data;
+ }
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/io/cog/IFDTagDictionary.java
b/src/main/java/org/apache/sysds/runtime/io/cog/IFDTagDictionary.java
new file mode 100644
index 0000000000..0a239842cc
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/IFDTagDictionary.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+/**
+ * Enum for mapping IFD tag values to their corresponding tag names
+ */
+public enum IFDTagDictionary {
+ Unknown(-1),
+ // Right now we will only support baseline TIFF
+ // not the extended version
+ NewSubfileType(254),
+ ImageWidth(256),
+ ImageLength(257),
+ BitsPerSample(258),
+ Compression(259),
+ PhotometricInterpretation(262),
+ Threshholding(263),
+ CellWidth(264),
+ CellLength(265),
+ FillOrder(266),
+ ImageDescription(270),
+ Make(271),
+ Model(272),
+ StripOffsets(273),
+ Orientation(274),
+ SamplesPerPixel(277),
+ RowsPerStrip(278),
+ StripByteCounts(279),
+ MinSampleValue(280),
+ MaxSampleValue(281),
+ XResolution(282),
+ YResolution(283),
+ PlanarConfiguration(284),
+ FreeOffsets(288),
+ FreeByteCounts(289),
+ GrayResponseUnit(290),
+ GrayResponseCurve(291),
+ ResolutionUnit(296),
+ Software(305),
+ DateTime(306),
+ Artist(315),
+ HostComputer(316),
+ ColorMap(320),
+ ExtraSamples(338),
+ /**
+ * 1 = unsigned integer data
+ * 2 = two's complement signed integer data
+ * 3 = IEEE floating point data [IEEE]
+ * 4 = undefined data format
+ * Has as many values as SamplesPerPixel
+ */
+ SampleFormat(339),
+
+ // Extended tags we need (COG specifically)
+ TileWidth(322),
+ TileLength(323),
+ TileOffsets(324),
+ TileByteCounts(325),
+ GDALNoData(42113),
+ GeoKeyDirectoryTag(34735),
+ GeoDoubleParamsTag(34736),
+ GeoAsciiParamsTag(34737),
+ ModelPixelScaleTag(33550),
+ ModelTiepointTag(33922),
+ ModelTransformationTag(34264);
+
+
+ private final int value;
+
+ IFDTagDictionary(int value) {
+ this.value = value;
+ }
+
+ public int getValue() {
+ return value;
+ }
+
+ public static IFDTagDictionary valueOf(int value) {
+ for (IFDTagDictionary tag : IFDTagDictionary.values()) {
+ if (tag.getValue() == value) {
+ return tag;
+ }
+ }
+ return null;
+ }
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/main/java/org/apache/sysds/runtime/io/cog/SampleFormatDataTypes.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to src/main/java/org/apache/sysds/runtime/io/cog/SampleFormatDataTypes.java
index 8117bd345c..d4e359eb0d 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/SampleFormatDataTypes.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,33 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.runtime.io.cog;
-import java.lang.ref.SoftReference;
+/**
+ * Enum for mapping sample formats of TIFF image data to names
+ */
+public enum SampleFormatDataTypes {
+ UNSIGNED_INTEGER(1),
+ SIGNED_INTEGER(2),
+ FLOATING_POINT(3),
+ UNDEFINED(4);
-public abstract class ACachingMBDictionary extends ADictionary {
+ private final int value;
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ SampleFormatDataTypes(int value) {
+ this.value = value;
+ }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ public int getValue() {
+ return value;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
+ public static SampleFormatDataTypes valueOf(int value) {
+ for (SampleFormatDataTypes dataType :
SampleFormatDataTypes.values()) {
+ if (dataType.getValue() == value) {
+ return dataType;
+ }
+ }
+ return null;
+ }
}
diff --git a/src/main/java/org/apache/sysds/runtime/io/cog/TIFFDataTypes.java
b/src/main/java/org/apache/sysds/runtime/io/cog/TIFFDataTypes.java
new file mode 100644
index 0000000000..1b0fef83e4
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/io/cog/TIFFDataTypes.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io.cog;
+
+/**
+ * Enum for mapping data types of IFD tags in TIFF to readable names
+ */
+public enum TIFFDataTypes {
+ BYTE(1),
+ ASCII(2),
+ SHORT(3),
+ LONG(4),
+ RATIONAL(5),
+ SBYTE(6),
+ UNDEFINED(7),
+ SSHORT(8),
+ SLONG(9),
+ SRATIONAL(10),
+ FLOAT(11),
+ DOUBLE(12),
+ LONG8(16),
+ SLONG8(17),
+ IFD8(18);
+
+ private final int value;
+
+ TIFFDataTypes(int value) {
+ this.value = value;
+ }
+
+ public int getValue() {
+ return value;
+ }
+
+ public int getSize() {
+ switch(this) {
+ case BYTE:
+ case ASCII:
+ case SBYTE:
+ case UNDEFINED:
+ return 1;
+ case SHORT:
+ case SSHORT:
+ return 2;
+ case LONG:
+ case SLONG:
+ case FLOAT:
+ return 4;
+ case RATIONAL:
+ case SRATIONAL:
+ case DOUBLE:
+ case LONG8:
+ case SLONG8:
+ case IFD8:
+ return 8;
+ default:
+ return 0;
+ }
+ }
+
+ public static TIFFDataTypes valueOf(int value) {
+ for (TIFFDataTypes dataType : TIFFDataTypes.values()) {
+ if (dataType.getValue() == value) {
+ return dataType;
+ }
+ }
+ return null;
+ }
+}
diff --git
a/src/test/java/org/apache/sysds/test/component/matrix/EigenDecompTest.java
b/src/test/java/org/apache/sysds/test/component/matrix/EigenDecompTest.java
index 94d64ca6aa..6292a14138 100644
--- a/src/test/java/org/apache/sysds/test/component/matrix/EigenDecompTest.java
+++ b/src/test/java/org/apache/sysds/test/component/matrix/EigenDecompTest.java
@@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.common.Opcodes;
import org.apache.sysds.runtime.matrix.data.LibCommonsMath;
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/COGTestBase.java
similarity index 52%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to src/test/java/org/apache/sysds/test/functions/io/cog/COGTestBase.java
index 8117bd345c..767779df92 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/cog/COGTestBase.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
-public abstract class ACachingMBDictionary extends ADictionary {
+public abstract class COGTestBase extends AutomatedTestBase {
+ protected final static String TEST_DIR = "functions/io/cog/";
+ protected static final Log LOG =
LogFactory.getLog(COGTestBase.class.getName());
+ protected final static double eps = 1e-6;
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected abstract String getTestClassDir();
+
+ protected abstract String getTestName();
+
+ protected abstract int getScriptId();
@Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ public void setUp() {
+ addTestConfiguration(getTestName(),
+ new TestConfiguration(getTestClassDir(),
getTestName(), new String[] {"Rout"}));
}
-
- public abstract MatrixBlockDictionary createMBDict(int nCol);
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest.java
new file mode 100644
index 0000000000..f99f6a8ec5
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.io.cog;
+
+import org.apache.sysds.api.DMLScript;
+import org.apache.sysds.common.Types;
+import org.apache.sysds.conf.CompilerConfig;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+
+
+
+public abstract class ReadCOGTest extends COGTestBase {
+ protected abstract int getId();
+
+ protected String getInputCOGFileName() {
+ return "testCOG_" + getId();
+ }
+
+ protected abstract double getResult();
+
+ @Test
+ public void testCOG_Seq_CP() {
+ runReadCOGTest(getId(), getResult(),
Types.ExecMode.SINGLE_NODE, false);
+ }
+ @Test
+ public void testCOG_Parallel_CP1() {
+ runReadCOGTest(getId(), getResult(),
Types.ExecMode.SINGLE_NODE, true);
+ }
+
+ @Test
+ public void testCOG_Parallel_CP() {
+ runReadCOGTest(getId(), getResult(), Types.ExecMode.HYBRID,
true);
+ }
+
+ // TODO: Spark
+
+
+
+ protected void runReadCOGTest(int testNumber, double result,
Types.ExecMode platform, boolean parallel) {
+ Types.ExecMode oldPlatform = rtplatform;
+ rtplatform = platform;
+
+ boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+ if(rtplatform == Types.ExecMode.SPARK)
+ DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+ boolean oldpar = CompilerConfig.FLAG_PARREADWRITE_TEXT; // set
to false for debugging maybeee
+
+ try {
+ CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
+ TestConfiguration config =
getTestConfiguration(getTestName());
+ loadTestConfiguration(config);
+
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ String inputMatrixName = DATASET_DIR + "cog/" +
getInputCOGFileName() + ".tif";
+
+ String dmlOutput = output("dml.scalar");
+
+ fullDMLScriptName = HOME + getTestName() + "_" +
getScriptId() + ".dml";
+ programArgs = new String[] {"-args", inputMatrixName,
dmlOutput};
+
+ runTest(true, false, null, -1);
+
+ double dmlScalarOutput =
TestUtils.readDMLScalar(dmlOutput);
+ TestUtils.compareScalars(dmlScalarOutput, result, eps *
getResult());
+ }
+ finally {
+ rtplatform = oldPlatform;
+ CompilerConfig.FLAG_PARREADWRITE_TEXT = oldpar;
+ DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+ }
+ }
+}
+
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandFloat32PC1ComNoneSquare.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandFloat32PC1ComNoneSquare.java
index 8117bd345c..eb93eb82bc 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandFloat32PC1ComNoneSquare.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTest1Tile1BandFloat32PC1ComNoneSquare extends ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTest1Tile1BandFloat32PC1ComNoneSquare.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
+ protected double getResult(){ return 510226.0; }
+
+ protected int getId() {
+ return 3;
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandInt32PC1ComNoneSquare.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandInt32PC1ComNoneSquare.java
index 8117bd345c..6417fd96c1 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTest1Tile1BandInt32PC1ComNoneSquare.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTest1Tile1BandInt32PC1ComNoneSquare extends ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTest1Tile1BandInt32PC1ComNoneSquare.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return -202351912174.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
-}
+ protected int getId() {
+ return 4;
+ }
+}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTiles1BandFloat32PC1ComNoneSquare.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTiles1BandFloat32PC1ComNoneSquare.java
index 8117bd345c..2584930bd5 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTiles1BandFloat32PC1ComNoneSquare.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTiles1BandFloat32PC1ComNoneSquare extends ReadCOGTest
{
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTiles1BandFloat32PC1ComNoneSquare.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return 126423.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
+ protected int getId() {
+ return 1;
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsBytePC1ComNoneSquare.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsBytePC1ComNoneSquare.java
index 8117bd345c..384fddbbe8 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsBytePC1ComNoneSquare.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTilesNBandsBytePC1ComNoneSquare extends ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTilesNBandsBytePC1ComNoneSquare.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return 1552412.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
-}
+ protected int getId() {
+ return 2;
+ }
+}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC1ComNoneRect.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC1ComNoneRect.java
index 8117bd345c..e7ec740c35 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC1ComNoneRect.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTilesNBandsUInt16PC1ComNoneRect extends ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTilesNBandsUInt16PC1ComNoneRect.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return 199571808.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
-}
+ protected int getId() {
+ return 5;
+ }
+}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComDeflateRect.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComDeflateRect.java
index 8117bd345c..6da693e1ad 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComDeflateRect.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTilesNBandsUInt16PC2ComDeflateRect extends
ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTilesNBandsUInt16PC2ComDeflateRect.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
+ protected double getResult(){ return 199571808.0; }
+
+ protected int getId() {
+ return 6;
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRect.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRect.java
index 8117bd345c..930ef0dbb3 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRect.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTilesNBandsUInt16PC2ComNoneRect extends ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTilesNBandsUInt16PC2ComNoneRect.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return 199571808.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
-}
+ protected int getId() {
+ return 7;
+ }
+}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRectBIGTIFF.java
similarity index 55%
copy from
src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
copy to
src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRectBIGTIFF.java
index 8117bd345c..fe13d96de8 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ACachingMBDictionary.java
+++
b/src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC2ComNoneRectBIGTIFF.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- * O
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,26 +17,27 @@
* under the License.
*/
-package org.apache.sysds.runtime.compress.colgroup.dictionary;
+package org.apache.sysds.test.functions.io.cog;
-import java.lang.ref.SoftReference;
+public class ReadCOGTestNTilesNBandsUInt16PC2ComNoneRectBIGTIFF extends
ReadCOGTest {
+ private final static String TEST_NAME = "ReadCOGTest";
+ public final static String TEST_CLASS_DIR = TEST_DIR +
ReadCOGTestNTilesNBandsUInt16PC2ComNoneRectBIGTIFF.class.getSimpleName() + "/";
-public abstract class ACachingMBDictionary extends ADictionary {
+ protected String getTestName() {
+ return TEST_NAME;
+ }
+
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- /** A Cache to contain a materialized version of the identity matrix. */
- protected volatile SoftReference<MatrixBlockDictionary> cache = null;
+ protected double getResult(){ return 199571808.0; }
- @Override
- public final MatrixBlockDictionary getMBDict(int nCol) {
- if(cache != null) {
- MatrixBlockDictionary r = cache.get();
- if(r != null)
- return r;
- }
- MatrixBlockDictionary ret = createMBDict(nCol);
- cache = new SoftReference<>(ret);
- return ret;
+ protected int getScriptId() {
+ return 1;
}
- public abstract MatrixBlockDictionary createMBDict(int nCol);
-}
+ protected int getId() {
+ return 8;
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_1.tif
b/src/test/resources/datasets/cog/testCOG_1.tif
new file mode 100644
index 0000000000..4837648f52
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_1.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_1.tif.mtd
b/src/test/resources/datasets/cog/testCOG_1.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_1.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_2.tif
b/src/test/resources/datasets/cog/testCOG_2.tif
new file mode 100644
index 0000000000..b8ac695119
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_2.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_2.tif.mtd
b/src/test/resources/datasets/cog/testCOG_2.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_2.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_3.tif
b/src/test/resources/datasets/cog/testCOG_3.tif
new file mode 100644
index 0000000000..e7072543d0
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_3.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_3.tif.mtd
b/src/test/resources/datasets/cog/testCOG_3.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_3.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_4.tif
b/src/test/resources/datasets/cog/testCOG_4.tif
new file mode 100644
index 0000000000..c442314b86
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_4.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_4.tif.mtd
b/src/test/resources/datasets/cog/testCOG_4.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_4.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_5.tif
b/src/test/resources/datasets/cog/testCOG_5.tif
new file mode 100644
index 0000000000..01d8518577
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_5.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_5.tif.mtd
b/src/test/resources/datasets/cog/testCOG_5.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_5.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_6.tif
b/src/test/resources/datasets/cog/testCOG_6.tif
new file mode 100644
index 0000000000..adaa2d6a75
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_6.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_6.tif.mtd
b/src/test/resources/datasets/cog/testCOG_6.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_6.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_7.tif
b/src/test/resources/datasets/cog/testCOG_7.tif
new file mode 100644
index 0000000000..27dad7950f
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_7.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_7.tif.mtd
b/src/test/resources/datasets/cog/testCOG_7.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_7.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/resources/datasets/cog/testCOG_8.tif
b/src/test/resources/datasets/cog/testCOG_8.tif
new file mode 100644
index 0000000000..9ad99e385a
Binary files /dev/null and b/src/test/resources/datasets/cog/testCOG_8.tif
differ
diff --git a/src/test/resources/datasets/cog/testCOG_8.tif.mtd
b/src/test/resources/datasets/cog/testCOG_8.tif.mtd
new file mode 100644
index 0000000000..01e8957017
--- /dev/null
+++ b/src/test/resources/datasets/cog/testCOG_8.tif.mtd
@@ -0,0 +1,5 @@
+{
+ "data_type": "matrix"
+ ,"format": "cog"
+ ,"description": { "author": "SystemDS" }
+}
\ No newline at end of file
diff --git a/src/test/scripts/functions/io/cog/ReadCOGTest_1.dml
b/src/test/scripts/functions/io/cog/ReadCOGTest_1.dml
new file mode 100644
index 0000000000..0be5fe140a
--- /dev/null
+++ b/src/test/scripts/functions/io/cog/ReadCOGTest_1.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML script that tests read COG
+
+A = read($1, format="cog");
+x = sum(A);
+write(x, $2);
\ No newline at end of file