Repository: incubator-systemml
Updated Branches:
  refs/heads/master 08a3f1c7e -> bc2231982


[SYSTEMML-583] Simplify jmlc transform meta read util (w/o spec), tests

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/72e21663
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/72e21663
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/72e21663

Branch: refs/heads/master
Commit: 72e216637840eb17090725064c3208ade4ab6160
Parents: 08a3f1c
Author: Matthias Boehm <mbo...@us.ibm.com>
Authored: Wed Apr 20 18:57:43 2016 -0700
Committer: Matthias Boehm <mbo...@us.ibm.com>
Committed: Wed Apr 20 18:58:14 2016 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/api/jmlc/Connection.java   | 145 +++++++++++++------
 .../functions/jmlc/FrameReadMetaTest.java       |  18 ++-
 .../functions/jmlc/tfmtd_example/coltypes.csv   |   2 +-
 3 files changed, 119 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/72e21663/src/main/java/org/apache/sysml/api/jmlc/Connection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/Connection.java 
b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
index 58aa0fc..984268a 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
@@ -406,6 +406,24 @@ public class Connection
                return ret;
        }
        
+       
+       ////////////////////////////////////////////
+       // Read transform meta data
+       ////////////////////////////////////////////
+       
+       /**
+        * Reads transform meta data from an HDFS file path and converts it 
into an in-memory
+        * FrameBlock object. The column names in the meta data file 
'column.names' is processed
+        * with default separator ','.
+        * 
+        * @param metapath  hdfs file path to meta data directory
+        * @return FrameBlock object representing transform metadata
+        * @throws IOException
+        */
+       public FrameBlock readTransformMetaDataFromFile(String metapath) throws 
IOException {
+               return readTransformMetaDataFromFile(null, metapath, 
TfUtils.TXMTD_SEP);
+       }
+       
        /**
         * Reads transform meta data from an HDFS file path and converts it 
into an in-memory
         * FrameBlock object. The column names in the meta data file 
'column.names' is processed
@@ -433,6 +451,8 @@ public class Connection
        public FrameBlock readTransformMetaDataFromFile(String spec, String 
metapath, String colDelim) 
                throws IOException 
        {
+               //NOTE: this implementation assumes column alignment of 
colnames and coltypes
+               
                //read column types (for sanity check column names)
                String coltypesStr = 
MapReduceTool.readStringFromHDFSFile(metapath+File.separator+TfUtils.TXMTD_COLTYPES);
                List<String> coltypes = 
Arrays.asList(IOUtilFunctions.split(coltypesStr.trim(), TfUtils.TXMTD_SEP));
@@ -460,9 +480,25 @@ public class Connection
                                LOG.warn("Recode map for column '"+colName+"' 
does not exist.");
                        }
                }
+
+               //get list of recode ids
+               List<Integer> recodeIDs = parseRecodeColIDs(spec, coltypes);
                
                //create frame block from in-memory strings
-               return convertToTransformMetaDataFrame(spec, rows, colnames, 
meta);
+               return convertToTransformMetaDataFrame(rows, recodeIDs, 
colnames, meta);
+       }
+       
+       /**
+        * Reads transform meta data from the class path and converts it into 
an in-memory
+        * FrameBlock object. The column names in the meta data file 
'column.names' is processed
+        * with default separator ','.
+        * 
+        * @param metapath  resource path to meta data directory
+        * @return FrameBlock object representing transform metadata
+        * @throws IOException
+        */
+       public FrameBlock readTransformMetaDataFromPath(String metapath) throws 
IOException {
+               return readTransformMetaDataFromPath(null, metapath, 
TfUtils.TXMTD_SEP);
        }
        
        /**
@@ -492,6 +528,8 @@ public class Connection
        public FrameBlock readTransformMetaDataFromPath(String spec, String 
metapath, String colDelim) 
                throws IOException 
        {
+               //NOTE: this implementation assumes column alignment of 
colnames and coltypes
+               
                //read column types (for sanity check column names)
                String coltypesStr = 
IOUtilFunctions.toString(Connection.class.getResourceAsStream(metapath+"/"+TfUtils.TXMTD_COLTYPES));
                List<String> coltypes = 
Arrays.asList(IOUtilFunctions.split(coltypesStr.trim(), TfUtils.TXMTD_SEP));
@@ -521,21 +559,24 @@ public class Connection
                        }
                }
                
+               //get list of recode ids
+               List<Integer> recodeIDs = parseRecodeColIDs(spec, coltypes);
+               
                //create frame block from in-memory strings
-               return convertToTransformMetaDataFrame(spec, rows, colnames, 
meta);
+               return convertToTransformMetaDataFrame(rows, recodeIDs, 
colnames, meta);
        }
        
        /**
         * Converts transform meta data into an in-memory FrameBlock object.
         * 
-        * @param spec      transform specification as json string
-        * @param rows      maximum number of distinct items (number of rows in 
frame block)
-        * @param colnames  column names, ordered by position
-        * @param meta      map of (column name, recode map)-pairs, with recode 
maps in their original csv representation
-        * @return FrameBlock object representing transform metadata
+        * @param rows
+        * @param recodeIDs
+        * @param colnames
+        * @param meta
+        * @return
         * @throws IOException
         */
-       public FrameBlock convertToTransformMetaDataFrame(String spec, int 
rows, List<String> colnames, HashMap<String,String> meta) 
+       private FrameBlock convertToTransformMetaDataFrame(int rows, 
List<Integer> recodeIDs, List<String> colnames, HashMap<String,String> meta) 
                throws IOException 
        {
                //create frame block w/ pure string schema
@@ -543,46 +584,68 @@ public class Connection
                FrameBlock ret = new FrameBlock(schema, colnames);
                ret.ensureAllocatedColumns(rows);
                
-               try
-               {
-                       ArrayList<Integer> specRecodeIDs = new 
ArrayList<Integer>();
-                       
-                       //parse json transform specification
-                       JSONObject jSpec = new JSONObject(spec);
-                       if ( jSpec.containsKey(TfUtils.TXMETHOD_RECODE))  {
-                               JSONArray attrs = null; //TODO simplify once 
json spec consolidated
-                               if( jSpec.get(TfUtils.TXMETHOD_RECODE) 
instanceof JSONObject ) {
-                                       JSONObject obj = (JSONObject) 
jSpec.get(TfUtils.TXMETHOD_RECODE);
-                                       attrs = (JSONArray) 
obj.get(TfUtils.JSON_ATTRS);
-                               }
-                               else
-                                       attrs = 
(JSONArray)jSpec.get(TfUtils.TXMETHOD_RECODE);                          
-                               for(int j=0; j<attrs.length(); j++) 
-                                       
specRecodeIDs.add(UtilFunctions.toInt(attrs.get(j)));
-                       }       
+               //encode recode maps into frame
+               for( Integer colID : recodeIDs ) {
+                       String name = colnames.get(colID-1);
+                       String map = meta.get(name);
+                       if( map == null )
+                               throw new IOException("Recode map for column 
'"+name+"' (id="+colID+") not existing.");
                        
-                       //encode recode maps into frame
-                       for( Integer colID : specRecodeIDs ) {
-                               String name = colnames.get(colID-1);
-                               String map = meta.get(name);
-                               if( map == null )
-                                       throw new IOException("Recode map for 
column '"+name+"' (id="+colID+") not existing.");
-                               
-                               InputStream is = new 
ByteArrayInputStream(map.getBytes("UTF-8"));
-                               BufferedReader br = new BufferedReader(new 
InputStreamReader(is));
-                               Pair<String,String> pair = new 
Pair<String,String>();
-                               String line; int rpos = 0;
-                               while( (line = br.readLine()) != null ) {
-                                       DecoderRecode.parseRecodeMapEntry(line, 
pair);
-                                       String tmp = pair.getKey() + 
Lop.DATATYPE_PREFIX + pair.getValue();
-                                       ret.set(rpos++, colID-1, tmp);
+                       InputStream is = new 
ByteArrayInputStream(map.getBytes("UTF-8"));
+                       BufferedReader br = new BufferedReader(new 
InputStreamReader(is));
+                       Pair<String,String> pair = new Pair<String,String>();
+                       String line; int rpos = 0;
+                       while( (line = br.readLine()) != null ) {
+                               DecoderRecode.parseRecodeMapEntry(line, pair);
+                               String tmp = pair.getKey() + 
Lop.DATATYPE_PREFIX + pair.getValue();
+                               ret.set(rpos++, colID-1, tmp);
+                       }
+               }
+               
+               return ret;
+       }
+       
+       /**
+        * Parses the given json specification and extracts a list of column ids
+        * that are subject to recoding.
+        * 
+        * @param spec
+        * @param coltypes
+        * @return
+        * @throws IOException
+        */
+       private ArrayList<Integer> parseRecodeColIDs(String spec, List<String> 
coltypes) 
+               throws IOException 
+       {       
+               ArrayList<Integer> specRecodeIDs = new ArrayList<Integer>();
+               
+               try {
+                       if( spec != null ) {
+                               //parse json transform specification for recode 
col ids
+                               JSONObject jSpec = new JSONObject(spec);
+                               if ( 
jSpec.containsKey(TfUtils.TXMETHOD_RECODE))  {
+                                       JSONArray attrs = null; //TODO simplify 
once json spec consolidated
+                                       if( jSpec.get(TfUtils.TXMETHOD_RECODE) 
instanceof JSONObject ) {
+                                               JSONObject obj = (JSONObject) 
jSpec.get(TfUtils.TXMETHOD_RECODE);
+                                               attrs = (JSONArray) 
obj.get(TfUtils.JSON_ATTRS);
+                                       }
+                                       else
+                                               attrs = 
(JSONArray)jSpec.get(TfUtils.TXMETHOD_RECODE);                          
+                                       for(int j=0; j<attrs.length(); j++) 
+                                               
specRecodeIDs.add(UtilFunctions.toInt(attrs.get(j)));
                                }
                        }
+                       else {
+                               //obtain recode col ids from coltypes 
+                               for( int j=0; j<coltypes.size(); j++ )
+                                       if( coltypes.get(j).equals("2") )
+                                               specRecodeIDs.add(j+1);
+                       }
                }
                catch(Exception ex) {
                        throw new IOException(ex);
                }
                
-               return ret;
+               return specRecodeIDs;
        }
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/72e21663/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
index bb5cbd9..05beeed 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
@@ -53,13 +53,23 @@ public class FrameReadMetaTest extends AutomatedTestBase
        }
        
        @Test
+       public void testJMLCTransformDenseSpec() throws IOException {
+               runJMLCReadMetaTest(TEST_NAME1, false, true);
+       }
+       
+       @Test
+       public void testJMLCTransformDenseReuseSpec() throws IOException {
+               runJMLCReadMetaTest(TEST_NAME1, true, true);
+       }
+       
+       @Test
        public void testJMLCTransformDense() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, false);
+               runJMLCReadMetaTest(TEST_NAME1, false, false);
        }
        
        @Test
        public void testJMLCTransformDenseReuse() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, true);
+               runJMLCReadMetaTest(TEST_NAME1, true, false);
        }
 
        /**
@@ -69,7 +79,7 @@ public class FrameReadMetaTest extends AutomatedTestBase
         * @param instType
         * @throws IOException 
         */
-       private void runJMLCReadMetaTest( String testname, boolean modelReuse ) 
+       private void runJMLCReadMetaTest( String testname, boolean modelReuse, 
boolean useSpec ) 
                throws IOException
        {       
                String TEST_NAME = testname;
@@ -82,7 +92,7 @@ public class FrameReadMetaTest extends AutomatedTestBase
                
                //read meta data frame
                String spec = MapReduceTool.readStringFromHDFSFile(SCRIPT_DIR + 
TEST_DIR+"tfmtd_example/spec.json");
-               FrameBlock M = conn.readTransformMetaDataFromFile(spec, 
SCRIPT_DIR + TEST_DIR+"tfmtd_example/");
+               FrameBlock M = conn.readTransformMetaDataFromFile(useSpec ? 
spec : null, SCRIPT_DIR + TEST_DIR+"tfmtd_example/");
                
                //generate data based on recode maps
                HashMap<String,Long>[] RC = getRecodeMaps(M);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/72e21663/src/test/scripts/functions/jmlc/tfmtd_example/coltypes.csv
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/tfmtd_example/coltypes.csv 
b/src/test/scripts/functions/jmlc/tfmtd_example/coltypes.csv
index 0b80246..0475aed 100644
--- a/src/test/scripts/functions/jmlc/tfmtd_example/coltypes.csv
+++ b/src/test/scripts/functions/jmlc/tfmtd_example/coltypes.csv
@@ -1 +1 @@
-2,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1
\ No newline at end of file
+2,2,1,2,2,2,2,1,1
\ No newline at end of file

Reply via email to