Baunsgaard commented on a change in pull request #971:
URL: https://github.com/apache/systemml/pull/971#discussion_r441597515



##########
File path: src/main/java/org/apache/sysds/runtime/io/FrameWriterFactory.java
##########
@@ -55,6 +55,10 @@ public static FrameWriter createFrameWriter( FileFormat fmt, 
FileFormatPropertie
                                else
                                        writer = new FrameWriterBinaryBlock();
                                break;
+
+                       case PROTO:
+                               writer = new FrameWriterProto();

Review comment:
       maybe add a parallel todo again.

##########
File path: src/main/java/org/apache/sysds/runtime/io/FrameWriterProto.java
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.io;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.protobuf.SysdsProtos;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.matrix.data.FrameBlock;
+import org.apache.sysds.runtime.util.HDFSTool;
+
+public class FrameWriterProto extends FrameWriter {
+       @Override
+       public void writeFrameToHDFS(FrameBlock src, String fname, long rlen, 
long clen)
+               throws IOException, DMLRuntimeException {
+               // prepare file access
+               JobConf job = new 
JobConf(ConfigurationManager.getCachedJobConf());
+               Path path = new Path(fname);
+
+               // if the file already exists on HDFS, remove it.
+               HDFSTool.deleteFileIfExistOnHDFS(fname);
+
+               // validity check frame dimensions
+               if(src.getNumRows() != rlen || src.getNumColumns() != clen) {
+                       throw new IOException("Frame dimensions mismatch with 
metadata: " + src.getNumRows() + "x"
+                               + src.getNumColumns() + " vs " + rlen + "x" + 
clen + ".");
+               }
+
+               writeProtoFrameToHDFS(path, job, src, rlen, clen);
+       }
+
+       protected void writeProtoFrameToHDFS(Path path, JobConf jobConf, 
FrameBlock src, long rlen, long clen)
+               throws IOException {
+               FileSystem fileSystem = IOUtilFunctions.getFileSystem(path, 
jobConf);
+               writeProtoFrameToFile(path, fileSystem, src, 0, (int) rlen);
+               IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fileSystem, 
path);
+       }
+
+       protected void writeProtoFrameToFile(Path path, FileSystem fileSystem, 
FrameBlock src, int lowerRowBound,
+               int upperRowBound) throws IOException {
+               // what about > 2GB Protobuf Messages?

Review comment:
       does this mean that >2GB is impossible?

##########
File path: src/main/java/org/apache/sysds/runtime/io/FrameReaderProto.java
##########
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysds.runtime.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.sysds.common.Types;
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.protobuf.SysdsProtos;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.matrix.data.FrameBlock;
+import org.apache.sysds.runtime.util.UtilFunctions;
+
+public class FrameReaderProto extends FrameReader {
+       @Override
+       public FrameBlock readFrameFromHDFS(String fname, Types.ValueType[] 
schema, String[] names, long rlen, long clen)
+               throws IOException, DMLRuntimeException {

Review comment:
       Runtime Exceptions does not need to be declared as thrown.

##########
File path: src/main/resources/protobuf/Frame.proto
##########
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+syntax = "proto3";
+package sysds;
+option java_package = "org.apache.sysds.protobuf";
+option java_outer_classname = "SysdsProtos";
+option java_multiple_files = false;
+
+message Frame {
+    repeated Row rows = 1;
+}
+
+message Row {
+    repeated string column_names = 1;
+    repeated string column_data = 2;
+    repeated Schema column_schema = 3;
+}
+
+message Schema {
+    enum ValueType {
+        FP32 = 0;
+        FP64 = 1;
+        INT32 = 2;
+        INT64 = 3;
+        BOOLEAN = 4;
+        STRING = 5;
+        UNKNOWN = 6;
+    }
+    repeated ValueType valueType = 1;
+}

Review comment:
       Just to make Git happy, add a single newline




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to