Repository: incubator-systemml Updated Branches: refs/heads/master 48ca2010a -> 199562313
[SYSTEMML-562]Frame Right Indexing Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/19956231 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/19956231 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/19956231 Branch: refs/heads/master Commit: 1995623139f0dc8f452639ba6eadc75136fe07cc Parents: 48ca201 Author: Arvind Surve <ac...@yahoo.com> Authored: Tue Jun 28 09:23:12 2016 -0700 Committer: Arvind Surve <ac...@yahoo.com> Committed: Tue Jun 28 09:23:12 2016 -0700 ---------------------------------------------------------------------- .../spark/FrameIndexingSPInstruction.java | 120 ++++++++++++- .../spark/functions/IsFrameBlockInRange.java | 47 ++++++ .../matrix/data/OperationsOnMatrixValues.java | 2 +- .../sysml/runtime/util/UtilFunctions.java | 5 +- .../functions/frame/FrameIndexingDistTest.java | 169 ++++++++++++++----- .../functions/frame/FrameIndexingDistTest.R | 42 ----- .../functions/frame/FrameIndexingDistTest.dml | 32 ---- .../scripts/functions/frame/FrameLeftIndexing.R | 42 +++++ .../functions/frame/FrameLeftIndexing.dml | 32 ++++ .../functions/frame/FrameRightIndexing.R | 36 ++++ .../functions/frame/FrameRightIndexing.dml | 29 ++++ 11 files changed, 432 insertions(+), 124 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/main/java/org/apache/sysml/runtime/instructions/spark/FrameIndexingSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/FrameIndexingSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/FrameIndexingSPInstruction.java index c1d8766..ac45ec0 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/FrameIndexingSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/FrameIndexingSPInstruction.java @@ -35,6 +35,7 @@ import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; import org.apache.sysml.runtime.instructions.cp.CPOperand; import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator; import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast; +import org.apache.sysml.runtime.instructions.spark.functions.IsFrameBlockInRange; import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils; import org.apache.sysml.runtime.instructions.spark.utils.SparkUtils; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; @@ -88,8 +89,37 @@ public class FrameIndexingSPInstruction extends IndexingSPInstruction long cu = ec.getScalarInput(colUpper.getName(), colUpper.getValueType(), colUpper.isLiteral()).getLongValue(); IndexRange ixrange = new IndexRange(rl, ru, cl, cu); + //right indexing + if( opcode.equalsIgnoreCase("rangeReIndex") ) + { + //update and check output dimensions + MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName()); + MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName()); + mcOut.set(ru-rl+1, cu-cl+1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock()); + checkValidOutputDimensions(mcOut); + + //execute right indexing operation (partitioning-preserving if possible) + JavaPairRDD<Long,FrameBlock> in1 = sec.getFrameBinaryBlockRDDHandleForVariable( input1.getName() ); + JavaPairRDD<Long,FrameBlock> out = null; + if( isPartitioningPreservingRightIndexing(mcIn, ixrange) ) { + out = in1.mapPartitionsToPair( + new SliceBlockPartitionFunction(ixrange, mcOut), true); + } + else{ + out = in1.filter(new IsFrameBlockInRange(rl, ru, mcOut)) + .flatMapToPair(new SliceBlock(ixrange, mcOut)); + + //aggregation if required + if( _aggType != SparkAggType.NONE ) + out = RDDAggregateUtils.mergeByFrameKey(out); + } + + //put output RDD handle into symbol table + sec.setRDDHandleForVariable(output.getName(), out); + sec.addLineageRDD(output.getName(), input1.getName()); + } //left indexing - if ( opcode.equalsIgnoreCase("leftIndex") || opcode.equalsIgnoreCase("mapLeftIndex")) + else if ( opcode.equalsIgnoreCase("leftIndex") || opcode.equalsIgnoreCase("mapLeftIndex")) { JavaPairRDD<Long,FrameBlock> in1 = sec.getFrameBinaryBlockRDDHandleForVariable( input1.getName() ); PartitionedBroadcast<FrameBlock> broadcastIn2 = null; @@ -146,6 +176,19 @@ public class FrameIndexingSPInstruction extends IndexingSPInstruction /** * + * @param mcIn + * @param ixrange + * @return + */ + private boolean isPartitioningPreservingRightIndexing(MatrixCharacteristics mcIn, IndexRange ixrange) + { + return ( mcIn.dimsKnown() && + (ixrange.rowStart==1 && ixrange.rowEnd==mcIn.getRows() )); //Entire Column/s + } + + + /** + * * @param mcOut * @throws DMLRuntimeException */ @@ -327,4 +370,79 @@ public class FrameIndexingSPInstruction extends IndexingSPInstruction } } } + + /** + * + */ + private static class SliceBlock implements PairFlatMapFunction<Tuple2<Long, FrameBlock>, Long, FrameBlock> + { + private static final long serialVersionUID = -5270171193018691692L; + + private IndexRange _ixrange; + private int _brlen; + private int _bclen; + + public SliceBlock(IndexRange ixrange, MatrixCharacteristics mcOut) { + _ixrange = ixrange; + _brlen = OptimizerUtils.getDefaultFrameSize(); + _bclen = (int) mcOut.getCols(); + } + + @Override + public Iterable<Tuple2<Long, FrameBlock>> call(Tuple2<Long, FrameBlock> kv) + throws Exception + { + Pair<Long, FrameBlock> in = SparkUtils.toIndexedFrameBlock(kv); + + ArrayList<Pair<Long, FrameBlock>> outlist = new ArrayList<Pair<Long, FrameBlock>>(); + OperationsOnMatrixValues.performSlice(in, _ixrange, _brlen, _bclen, outlist); + + return SparkUtils.fromIndexedFrameBlock(outlist); + } + } + + /** + * + */ + private static class SliceBlockPartitionFunction implements PairFlatMapFunction<Iterator<Tuple2<Long, FrameBlock>>, Long, FrameBlock> + { + private static final long serialVersionUID = -1655390518299307588L; + + private IndexRange _ixrange; + private int _brlen; + private int _bclen; + + public SliceBlockPartitionFunction(IndexRange ixrange, MatrixCharacteristics mcOut) { + _ixrange = ixrange; + _brlen = (int) Math.min(OptimizerUtils.getDefaultFrameSize(), mcOut.getRows()); + _bclen = (int) mcOut.getCols(); + } + + @Override + public Iterable<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Long, FrameBlock>> arg0) + throws Exception + { + return new SliceBlockPartitionIterator(arg0); + } + + private class SliceBlockPartitionIterator extends LazyIterableIterator<Tuple2<Long, FrameBlock>> + { + public SliceBlockPartitionIterator(Iterator<Tuple2<Long, FrameBlock>> in) { + super(in); + } + + @Override + protected Tuple2<Long, FrameBlock> computeNext(Tuple2<Long, FrameBlock> arg) + throws Exception + { + Pair<Long, FrameBlock> in = SparkUtils.toIndexedFrameBlock(arg); + + ArrayList<Pair<Long, FrameBlock>> outlist = new ArrayList<Pair<Long, FrameBlock>>(); + OperationsOnMatrixValues.performSlice(in, _ixrange, _brlen, _bclen, outlist); + + assert(outlist.size() == 1); //1-1 row/column block indexing + return SparkUtils.fromIndexedFrameBlock(outlist.get(0)); + } + } + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/IsFrameBlockInRange.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/IsFrameBlockInRange.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/IsFrameBlockInRange.java new file mode 100644 index 0000000..939042e --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/IsFrameBlockInRange.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.instructions.spark.functions; + +import org.apache.spark.api.java.function.Function; + +import scala.Tuple2; + +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; +import org.apache.sysml.runtime.matrix.data.FrameBlock; +import org.apache.sysml.runtime.util.UtilFunctions; + +public class IsFrameBlockInRange implements Function<Tuple2<Long,FrameBlock>, Boolean> +{ + private static final long serialVersionUID = 4433918122474769296L; + + private long _rl, _ru; + + public IsFrameBlockInRange(long rl, long ru, MatrixCharacteristics mcOut) { + _rl = rl; + _ru = ru; + } + + @Override + public Boolean call(Tuple2<Long, FrameBlock> kv) + throws Exception + { + return UtilFunctions.isInFrameBlockRange(kv._1(), kv._2().getNumRows(), _rl, _ru); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java index be12228..8ea1ddd 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java @@ -542,7 +542,7 @@ public class OperationsOnMatrixValues for(long r=resultBlockIndexTop; r<=resultBlockIndexBottom; r++) { List<ValueType> schema = UtilFunctions.getSubSchema(block.getSchema(), tmpRange.colStart, tmpRange.colEnd); - long iResultIndex = (r-1)*brlen+tmpRange.rowStart; + long iResultIndex = Math.max(((r-1)*brlen - ixrange.rowStart + 1), 0); Pair<Long,FrameBlock> out=new Pair<Long,FrameBlock>(new Long(iResultIndex+1), new FrameBlock(schema)); outlist.add(out); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java index a904651..5e6e5b7 100644 --- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java @@ -152,7 +152,7 @@ public class UtilFunctions * @param ru * @return */ - public static boolean isInFrameBlockRange( Long ix, int brlen, int bclen, long rl, long ru ) + public static boolean isInFrameBlockRange( Long ix, int brlen, long rl, long ru ) { if(rl > ix+brlen-1 || ru < ix) return false; @@ -185,8 +185,7 @@ public class UtilFunctions */ public static boolean isInFrameBlockRange( Long ix, int brlen, int bclen, IndexRange ixrange ) { - return isInFrameBlockRange(ix, brlen, bclen, - ixrange.rowStart, ixrange.rowEnd); + return isInFrameBlockRange(ix, brlen, ixrange.rowStart, ixrange.rowEnd); } // Reused by both MR and Spark for performing zero out http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameIndexingDistTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameIndexingDistTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameIndexingDistTest.java index 1ba1a32..b43dc6b 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameIndexingDistTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameIndexingDistTest.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import org.junit.Assert; @@ -48,8 +49,15 @@ public class FrameIndexingDistTest extends AutomatedTestBase private final static String TEST_DIR = "functions/frame/"; private final static String TEST_CLASS_DIR = TEST_DIR + FrameIndexingDistTest.class.getSimpleName() + "/"; - private final static String TEST_NAME = "FrameIndexingDistTest"; + private final static String TEST_NAME = "FrameLeftIndexing"; + private final static String RTEST_NAME = "FrameRightIndexing"; + private enum IXType { + RIX, + LIX, + } + + private final static double epsilon=0.0000000001; // Test data with 2 blocks of rows and columns @@ -58,6 +66,12 @@ public class FrameIndexingDistTest extends AutomatedTestBase private final static int min=0; private final static int max=100; + private final static double sparsity1 = 1.0; + private final static double sparsity2 = 0.5; + private final static double sparsity3 = 0.1; + private final static double sparsity4 = 0.01; + + private final static List<ValueType> schemaMixedLargeListStr = Collections.nCopies(cols/4, ValueType.STRING); private final static List<ValueType> schemaMixedLargeListDble = Collections.nCopies(cols/4, ValueType.DOUBLE); private final static List<ValueType> schemaMixedLargeListInt = Collections.nCopies(cols/4, ValueType.INT); @@ -74,27 +88,51 @@ public class FrameIndexingDistTest extends AutomatedTestBase @Override public void setUp() { - addTestConfiguration("FrameIndexingDistTest", new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, + addTestConfiguration("FrameLeftIndexing", new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"AB", "AC", "AD"})); + addTestConfiguration("FrameRightIndexing", new TestConfiguration(TEST_CLASS_DIR, RTEST_NAME, + new String[] {"B", "C", "D"})); } // Left Indexing Spark test cases @Test public void testMapLeftIndexingSP() throws DMLRuntimeException, IOException { - runTestLeftIndexing(ExecType.SPARK, LeftIndexingMethod.SP_MLEFTINDEX, schemaMixedLarge); + runTestLeftIndexing(ExecType.SPARK, LeftIndexingMethod.SP_MLEFTINDEX, schemaMixedLarge, IXType.LIX, true); } @Test public void testGeneralLeftIndexingSP() throws DMLRuntimeException, IOException { - runTestLeftIndexing(ExecType.SPARK, LeftIndexingMethod.SP_GLEFTINDEX, schemaMixedLarge); + runTestLeftIndexing(ExecType.SPARK, LeftIndexingMethod.SP_GLEFTINDEX, schemaMixedLarge, IXType.LIX, true); } - private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema) throws DMLRuntimeException, IOException { + + // Right Indexing Spark test cases + @Test + public void testRightIndexingSPSparse() throws DMLRuntimeException, IOException { + runTestLeftIndexing(ExecType.SPARK, null, schemaMixedLarge, IXType.RIX, true); + } + + @Test + public void testRightIndexingSPDense() throws DMLRuntimeException, IOException { + runTestLeftIndexing(ExecType.SPARK, null, schemaMixedLarge, IXType.RIX, false); + } + + + + private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema, IXType itype, boolean bSparse) throws DMLRuntimeException, IOException { boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; RUNTIME_PLATFORM oldRTP = rtplatform; - TestConfiguration config = getTestConfiguration("FrameIndexingDistTest"); + TestConfiguration config = null; + + HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>(); + + if (itype == IXType.LIX) + config = getTestConfiguration("FrameLeftIndexing"); + else + config = getTestConfiguration("FrameRightIndexing"); + try { if(indexingMethod != null) { @@ -123,47 +161,87 @@ public class FrameIndexingDistTest extends AutomatedTestBase config.addVariable("colend", colend); loadTestConfiguration(config); - /* This is for running the junit test the new way, i.e., construct the arguments directly */ - String LI_HOME = SCRIPT_DIR + TEST_DIR; - fullDMLScriptName = LI_HOME + TEST_NAME + ".dml"; - programArgs = new String[]{"-args", input("A"), - Long.toString(rows), Long.toString(cols), - Long.toString(rowstart), Long.toString(rowend), - Long.toString(colstart), Long.toString(colend), - output("AB"), output("AC"), output("AD"), - input("B"), input("C"), input("D"), - Long.toString(rowend-rowstart+1), - Long.toString(colend-colstart+1), - Long.toString(cols-colstart+1)}; - - fullRScriptName = LI_HOME + TEST_NAME + ".R"; - rCmd = "Rscript" + " " + fullRScriptName + " " + - inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir(); - - //initialize the frame data. - List<ValueType> lschema = Arrays.asList(schema); + if (itype == IXType.LIX) { + /* This is for running the junit test the new way, i.e., construct the arguments directly */ + String LI_HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = LI_HOME + TEST_NAME + ".dml"; + programArgs = new String[]{"-args", input("A"), + Long.toString(rows), Long.toString(cols), + Long.toString(rowstart), Long.toString(rowend), + Long.toString(colstart), Long.toString(colend), + output("AB"), output("AC"), output("AD"), + input("B"), input("C"), input("D"), + Long.toString(rowend-rowstart+1), + Long.toString(colend-colstart+1), + Long.toString(cols-colstart+1)}; + + fullRScriptName = LI_HOME + TEST_NAME + ".R"; + rCmd = "Rscript" + " " + fullRScriptName + " " + + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir(); + + //initialize the frame data. + List<ValueType> lschema = Arrays.asList(schema); + + double sparsity=sparsity1;//rand.nextDouble(); + double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111 /*\\System.currentTimeMillis()*/); + writeInputFrameWithMTD("A", A, true, lschema, OutputInfo.BinaryBlockOutputInfo); + + sparsity=sparsity3;//rand.nextDouble(); + double[][] B = getRandomMatrix((int)(rowend-rowstart+1), (int)(colend-colstart+1), min, max, sparsity, 2345 /*System.currentTimeMillis()*/); + List<ValueType> lschemaB = lschema.subList((int)colstart-1, (int)colend); + writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo); - double sparsity=1.0;//rand.nextDouble(); - double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111 /*\\System.currentTimeMillis()*/); - writeInputFrameWithMTD("A", A, true, lschema, OutputInfo.BinaryBlockOutputInfo); - - sparsity=0.1;//rand.nextDouble(); - double[][] B = getRandomMatrix((int)(rowend-rowstart+1), (int)(colend-colstart+1), min, max, sparsity, 2345 /*System.currentTimeMillis()*/); - List<ValueType> lschemaB = lschema.subList((int)colstart-1, (int)colend); - writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo); + sparsity=sparsity2;//rand.nextDouble(); + double[][] C = getRandomMatrix((int)(rowend), (int)(cols-colstart+1), min, max, sparsity, 3267 /*System.currentTimeMillis()*/); + List<ValueType> lschemaC = lschema.subList((int)colstart-1, (int)cols); + writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo); + + sparsity=sparsity4;//rand.nextDoublBe(); + double[][] D = getRandomMatrix(rows, (int)(colend-colstart+1), min, max, sparsity, 4856 /*System.currentTimeMillis()*/); + writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo); + + boolean exceptionExpected = false; + int expectedNumberOfJobs = -1; + runTest(true, exceptionExpected, null, expectedNumberOfJobs); + + for(String file: config.getOutputFiles()) + outputSchema.put(file, schema); + } + else { + /* This is for running the junit test the new way, i.e., construct the arguments directly */ + String RI_HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = RI_HOME + RTEST_NAME + ".dml"; + programArgs = new String[]{"-stats", "-explain","-args", input("A"), + Long.toString(rows), Long.toString(cols), + Long.toString(rowstart), Long.toString(rowend), + Long.toString(colstart), Long.toString(colend), + output("B"), output("C"), output("D")}; + + fullRScriptName = RI_HOME + RTEST_NAME + ".R"; + rCmd = "Rscript" + " " + fullRScriptName + " " + + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir(); + + //initialize the frame data. + List<ValueType> lschema = Arrays.asList(schema); + + double sparsity = bSparse ? sparsity4 : sparsity2; + double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111 /*\\System.currentTimeMillis()*/); + writeInputFrameWithMTD("A", A, true, lschema, OutputInfo.BinaryBlockOutputInfo); + + ValueType[] schemaB = new ValueType[(int) (colend-colstart+1)]; + System.arraycopy(schema, (int)(colstart-1), schemaB, 0, (int)(colend-colstart+1)); + outputSchema.put(config.getOutputFiles()[0], schemaB); - sparsity=0.5;//rand.nextDouble(); - double[][] C = getRandomMatrix((int)(rowend), (int)(cols-colstart+1), min, max, sparsity, 3267 /*System.currentTimeMillis()*/); - List<ValueType> lschemaC = lschema.subList((int)colstart-1, (int)cols); - writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo); + ValueType[] schemaC = new ValueType[(int) (cols-colstart+1)]; + System.arraycopy(schema, (int)(colstart-1), schemaC, 0, (int)(cols-colstart+1)); + outputSchema.put(config.getOutputFiles()[1], schemaC); - sparsity=0.01;//rand.nextDoublBe(); - double[][] D = getRandomMatrix(rows, (int)(colend-colstart+1), min, max, sparsity, 4856 /*System.currentTimeMillis()*/); - writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo); - - boolean exceptionExpected = false; - int expectedNumberOfJobs = -1; - runTest(true, exceptionExpected, null, expectedNumberOfJobs); + outputSchema.put(config.getOutputFiles()[2], schemaB); + + boolean exceptionExpected = false; + int expectedNumberOfJobs = -1; + runTest(true, exceptionExpected, null, expectedNumberOfJobs); + } } catch(Exception ex) { ex.printStackTrace(); @@ -183,7 +261,8 @@ public class FrameIndexingDistTest extends AutomatedTestBase FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo); MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1); FrameBlock frameRBlock = readRFrameFromHDFS(file+".csv", InputInfo.CSVInputInfo, md); - verifyFrameData(frameBlock, frameRBlock, schema); + ValueType[] schemaOut = outputSchema.get(file); + verifyFrameData(frameBlock, frameRBlock, schemaOut); System.out.println("File processed is " + file); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameIndexingDistTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameIndexingDistTest.R b/src/test/scripts/functions/frame/FrameIndexingDistTest.R deleted file mode 100644 index b3b55a9..0000000 --- a/src/test/scripts/functions/frame/FrameIndexingDistTest.R +++ /dev/null @@ -1,42 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -args <- commandArgs(TRUE) -options(digits=22) -library("Matrix") - -A=read.csv(paste(args[1], "A.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) -B=read.csv(paste(args[1], "B.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) -C=read.csv(paste(args[1], "C.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) -D=read.csv(paste(args[1], "D.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) - -A[args[2]:args[3],args[4]:args[5]]=0 -A[args[2]:args[3],args[4]:args[5]]=B -write.csv(A, paste(args[6], "AB.csv", sep=""), row.names = FALSE, quote = FALSE) - -A[1:args[3],args[4]:ncol(A)]=0 -A[1:args[3],args[4]:ncol(A)]=C -write.csv(A, paste(args[6], "AC.csv", sep=""), row.names = FALSE, quote = FALSE) - -A[,args[4]:args[5]]=0 -A[,args[4]:args[5]]=D -write.csv(A, paste(args[6], "AD.csv", sep=""), row.names = FALSE, quote = FALSE) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameIndexingDistTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameIndexingDistTest.dml b/src/test/scripts/functions/frame/FrameIndexingDistTest.dml deleted file mode 100644 index 5e998cb..0000000 --- a/src/test/scripts/functions/frame/FrameIndexingDistTest.dml +++ /dev/null @@ -1,32 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -A=read($1, data_type="frame", rows=$2, cols=$3, format="binary") -B=read($11, data_type="frame", rows=$14, cols=$15, format="binary") -C=read($12, data_type="frame", rows=$5, cols=$16, format="binary") -D=read($13, data_type="frame", rows=$2, cols=$15, format="binary") -A[$4:$5,$6:$7]=B -write(A, $8, format="binary") -A[1:$5,$6:ncol(A)]=C -write(A, $9, format="binary") -A[,$6:$7]=D -write(A, $10, format="binary") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameLeftIndexing.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameLeftIndexing.R b/src/test/scripts/functions/frame/FrameLeftIndexing.R new file mode 100644 index 0000000..b3b55a9 --- /dev/null +++ b/src/test/scripts/functions/frame/FrameLeftIndexing.R @@ -0,0 +1,42 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +args <- commandArgs(TRUE) +options(digits=22) +library("Matrix") + +A=read.csv(paste(args[1], "A.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) +B=read.csv(paste(args[1], "B.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) +C=read.csv(paste(args[1], "C.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) +D=read.csv(paste(args[1], "D.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) + +A[args[2]:args[3],args[4]:args[5]]=0 +A[args[2]:args[3],args[4]:args[5]]=B +write.csv(A, paste(args[6], "AB.csv", sep=""), row.names = FALSE, quote = FALSE) + +A[1:args[3],args[4]:ncol(A)]=0 +A[1:args[3],args[4]:ncol(A)]=C +write.csv(A, paste(args[6], "AC.csv", sep=""), row.names = FALSE, quote = FALSE) + +A[,args[4]:args[5]]=0 +A[,args[4]:args[5]]=D +write.csv(A, paste(args[6], "AD.csv", sep=""), row.names = FALSE, quote = FALSE) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameLeftIndexing.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameLeftIndexing.dml b/src/test/scripts/functions/frame/FrameLeftIndexing.dml new file mode 100644 index 0000000..5e998cb --- /dev/null +++ b/src/test/scripts/functions/frame/FrameLeftIndexing.dml @@ -0,0 +1,32 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +A=read($1, data_type="frame", rows=$2, cols=$3, format="binary") +B=read($11, data_type="frame", rows=$14, cols=$15, format="binary") +C=read($12, data_type="frame", rows=$5, cols=$16, format="binary") +D=read($13, data_type="frame", rows=$2, cols=$15, format="binary") +A[$4:$5,$6:$7]=B +write(A, $8, format="binary") +A[1:$5,$6:ncol(A)]=C +write(A, $9, format="binary") +A[,$6:$7]=D +write(A, $10, format="binary") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameRightIndexing.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameRightIndexing.R b/src/test/scripts/functions/frame/FrameRightIndexing.R new file mode 100644 index 0000000..e72d8f4 --- /dev/null +++ b/src/test/scripts/functions/frame/FrameRightIndexing.R @@ -0,0 +1,36 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +args <- commandArgs(TRUE) +options(digits=22) +library("Matrix") + + +A=read.csv(paste(args[1], "A.csv", sep=""), header = FALSE, stringsAsFactors=FALSE) + +B=A[args[2]:args[3],args[4]:args[5]] +C=A[1:args[3],args[4]:ncol(A)] +D=A[,args[4]:args[5]] + +write.csv(B, paste(args[6], "B.csv", sep=""), row.names = FALSE, quote = FALSE) +write.csv(C, paste(args[6], "C.csv", sep=""), row.names = FALSE, quote = FALSE) +write.csv(D, paste(args[6], "D.csv", sep=""), row.names = FALSE, quote = FALSE) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19956231/src/test/scripts/functions/frame/FrameRightIndexing.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/frame/FrameRightIndexing.dml b/src/test/scripts/functions/frame/FrameRightIndexing.dml new file mode 100644 index 0000000..5d68caf --- /dev/null +++ b/src/test/scripts/functions/frame/FrameRightIndexing.dml @@ -0,0 +1,29 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +A=read($1, data_type="frame", rows=$2, cols=$3, format="binary") +B=A[$4:$5,$6:$7] +C=A[1:$5,$6:ncol(A)] +D=A[,$6:$7] +write(B, $8, format="binary") +write(C, $9, format="binary") +write(D, $10, format="binary")