quintinali commented on a change in pull request #35: SDAP-161 MUDROD embedded 
unit test
URL: 
https://github.com/apache/incubator-sdap-mudrod/pull/35#discussion_r239931381
 
 

 ##########
 File path: 
core/src/test/java/org/apache/sdap/mudrod/discoveryengine/WeblogDiscoveryEngineTest.java
 ##########
 @@ -0,0 +1,139 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you 
+ * may not use this file except in compliance with the License. 
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sdap.mudrod.discoveryengine;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+import org.apache.sdap.mudrod.driver.ESDriver;
+import org.apache.sdap.mudrod.driver.SparkDriver;
+import org.apache.sdap.mudrod.main.AbstractElasticsearchIntegrationTest;
+import org.apache.sdap.mudrod.main.MudrodConstants;
+import org.apache.sdap.mudrod.main.MudrodEngine;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class WeblogDiscoveryEngineTest extends 
AbstractElasticsearchIntegrationTest {
+
+  private static WeblogDiscoveryEngine weblogEngine = null;
+
+  @BeforeClass
+  public static void setUp() {
+    MudrodEngine mudrodEngine = new MudrodEngine();
+    Properties props = mudrodEngine.loadConfig();
+    ESDriver es = new ESDriver(props);
+    SparkDriver spark = new SparkDriver(props);
+    String dataDir = getTestDataPath();
+    System.out.println(dataDir);
+    props.setProperty(MudrodConstants.DATA_DIR, dataDir);
+    MudrodEngine.loadPathConfig(mudrodEngine, dataDir);
+    weblogEngine = new WeblogDiscoveryEngine(props, es, spark);
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    // TODO
+  }
+
+  private static String getTestDataPath() {
+    File resourcesDirectory = new File("src/test/resources/");
+    String resourcedir = "/Testing_Data_1_3dayLog+Meta+Onto/";
+    String dataDir = resourcesDirectory.getAbsolutePath() + resourcedir;
+    return dataDir;
+  }
+
+  @Test
+  public void testPreprocess() throws IOException {
+
+    weblogEngine.preprocess();
+    testPreprocess_userHistory();
+    testPreprocess_clickStream();
+  }
+
+  private void testPreprocess_userHistory() throws IOException {
+    // compare user history data
+    String userHistorycsvFile = getTestDataPath() + "/userHistoryMatrix.csv";
+    BufferedReader br = new BufferedReader(new FileReader(userHistorycsvFile));
+    String line = null;
+    HashMap<String, List<String>> map = new HashMap<>();
+    int i = 0;
+    List<String> header = new LinkedList<>();
+    while ((line = br.readLine()) != null) {
+      if (i == 0) {
+        String str[] = line.split(",");
+        for (String s : str) {
+          header.add(s);
+        }
+      } else {
+        String str[] = line.split(",");
+        for (int j = 1; j < str.length; j++) {
+          if (!str[j].equals("0")) {
+            if (!map.containsKey(str[0])) {
+              map.put(str[0], new ArrayList<>());
+            }
+            map.get(str[0]).add(header.get(j));
+          }
+        }
+      }
+      i += 1;
+    }
+
+    Assert.assertEquals("failed in history data result!", "195.219.98.7", 
String.join(",", map.get("sea surface topography")));
+  }
+
+  private void testPreprocess_clickStream() throws IOException {
+    // TODO compare clickStream data
+    // String clickStreamcsvFile =
+    // 
"C:/Users/admin/Documents/GitHub/incubator-sdap-mudrod/core/clickStreamMatrix.csv";
+    String clickStreamcsvFile = getTestDataPath() + "/clickStreamMatrix.csv";
+    System.out.println(clickStreamcsvFile);
+    BufferedReader br = new BufferedReader(new FileReader(clickStreamcsvFile));
+    String line = null;
+    HashMap<String, List<String>> map = new HashMap<>();
+
+    int i = 0;
+    List<String> header = new LinkedList<>();
+    while ((line = br.readLine()) != null) {
+      if (i == 0) {
+        String str[] = line.split(",");
+        for (String s : str) {
+          header.add(s);
+        }
+      } else {
+        String str[] = line.split(",");
+        for (int j = 1; j < str.length; j++) {
+          if (!str[j].equals("0.0")) { //
+            if (!map.containsKey(str[0])) {
+              map.put(str[0], new ArrayList<>());
+            }
+            map.get(str[0]).add(header.get(j));
+          }
+        }
+      }
+      i += 1;
+    }
+    System.out.println(map);
+
+    Assert.assertEquals("failed in click stream result!", 
"\"ostm_l2_ost_ogdr_gps\"", String.join(",", map.get("sea surface 
topography")));
 
 Review comment:
   @lewismc This is the reason why I submit the pull request. I hope you can 
check the test cases and let me know whether the cases are the ones your team 
needs or not. The proprocess function in the WeblogDiscoveryEngine.java ingests 
a HTTP file and a FTP file and processes it to userhistory data and clickstream 
data. To test the function, I prepare a small size of http log and ftp log and 
process it with the function, and extract information from generated csv file 
to a map, and then compare the results with expected results. 
   
   Assert.assertEquals("failed in click stream result!", 
"\"ostm_l2_ost_ogdr_gps\"", String.join(",", map.get("sea surface 
topography")));
   
   This code means the log ingester should find a use clicks  
ostm_l2_ost_ogdr_gps after searches "sea surface topography" from the test log
   
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to