This is an automated email from the ASF dual-hosted git repository.

zaleslaw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 4b8b7ff  IGNITE-12168: [ML] Flaky ML example tests (#6866)
4b8b7ff is described below

commit 4b8b7ff254db202f2f9af1130f0183057bb3f6aa
Author: Alexey Zinoviev <zaleslaw....@gmail.com>
AuthorDate: Fri Sep 13 22:13:51 2019 +0300

    IGNITE-12168: [ML] Flaky ML example tests (#6866)
---
 .../apache/ignite/ml/util/MLSandboxDatasets.java   | 26 ++++++-------
 .../org/apache/ignite/ml/util/SandboxMLCache.java  | 45 ++++++++++++----------
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/util/MLSandboxDatasets.java 
b/modules/ml/src/main/java/org/apache/ignite/ml/util/MLSandboxDatasets.java
index 4d423e9..12bff53 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/util/MLSandboxDatasets.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/util/MLSandboxDatasets.java
@@ -22,50 +22,50 @@ package org.apache.ignite.ml.util;
  */
 public enum MLSandboxDatasets {
     /** Movielens dataset with ratings. */
-    MOVIELENS("ratings.csv", true, ","),
+    MOVIELENS("modules/ml/src/main/resources/datasets/ratings.csv", true, ","),
 
     /** The full Iris dataset from Machine Learning Repository. */
-    IRIS("iris.txt", false, "\t"),
+    IRIS("modules/ml/src/main/resources/datasets/iris.txt", false, "\t"),
 
     /** The Titanic dataset from Kaggle competition. */
-    TITANIC("titanic.csv", true, ";"),
+    TITANIC("modules/ml/src/main/resources/datasets/titanic.csv", true, ";"),
 
     /** The 1st and 2nd classes from the Iris dataset. */
-    TWO_CLASSED_IRIS("two_classed_iris.csv", false, "\t"),
+    
TWO_CLASSED_IRIS("modules/ml/src/main/resources/datasets/two_classed_iris.csv", 
false, "\t"),
 
     /** The dataset is about different computers' properties based on 
https://archive.ics.uci.edu/ml/datasets/Computer+Hardware. */
-    CLEARED_MACHINES("cleared_machines.csv", false, ";"),
+    
CLEARED_MACHINES("modules/ml/src/main/resources/datasets/cleared_machines.csv", 
false, ";"),
 
     /**
      * The health data is related to death rate based on; doctor availability, 
hospital availability,
      * annual per capita income, and population density people per square mile.
      */
-    MORTALITY_DATA("mortalitydata.csv", false, ";"),
+    MORTALITY_DATA("modules/ml/src/main/resources/datasets/mortalitydata.csv", 
false, ";"),
 
     /**
      * The preprocessed Glass dataset from the Machine Learning Repository 
https://archive.ics.uci.edu/ml/datasets/Glass+Identification
      * There are 3 classes with labels: 1 {building_windows_float_processed}, 
3 {vehicle_windows_float_processed}, 7 {headlamps}.
      * Feature names: 'Na-Sodium', 'Mg-Magnesium', 'Al-Aluminum', 'Ba-Barium', 
'Fe-Iron'.
      */
-    GLASS_IDENTIFICATION("glass_identification.csv", false, ";"),
+    
GLASS_IDENTIFICATION("modules/ml/src/main/resources/datasets/glass_identification.csv",
 false, ";"),
 
     /** The Wine recognition data. Could be found <a 
href="https://archive.ics.uci.edu/ml/machine-learning-databases/wine/";>here</a>.
 */
-    WINE_RECOGNITION("wine.txt", false, ","),
+    WINE_RECOGNITION("modules/ml/src/main/resources/datasets/wine.txt", false, 
","),
 
     /** The Boston house-prices dataset. Could be found <a 
href="https://archive.ics.uci.edu/ml/machine-learning-databases/housing/";>here</a>.
 */
-    BOSTON_HOUSE_PRICES("boston_housing_dataset.txt", false, ","),
+    
BOSTON_HOUSE_PRICES("modules/ml/src/main/resources/datasets/boston_housing_dataset.txt",
 false, ","),
 
     /** Example from book Barber D. Bayesian reasoning and machine learning. 
Chapter 10. */
-    ENGLISH_VS_SCOTTISH("english_vs_scottish_binary_dataset.csv", true, ","),
+    
ENGLISH_VS_SCOTTISH("modules/ml/src/main/resources/datasets/english_vs_scottish_binary_dataset.csv",
 true, ","),
 
     /** Wholesale customers dataset. Could be found <a 
href="https://archive.ics.uci.edu/ml/datasets/Wholesale+customers";>here</a>. */
-    WHOLESALE_CUSTOMERS("wholesale_customers.csv", true, ","),
+    
WHOLESALE_CUSTOMERS("modules/ml/src/main/resources/datasets/wholesale_customers.csv",
 true, ","),
 
     /** Fraud detection problem [part of whole dataset]. Could be found <a 
href="https://www.kaggle.com/mlg-ulb/creditcardfraud/";>here</a>. */
-    FRAUD_DETECTION("fraud_detection.csv", false, ","),
+    
FRAUD_DETECTION("modules/ml/src/main/resources/datasets/fraud_detection.csv", 
false, ","),
 
     /** A dataset with discrete and continious features. */
-    MIXED_DATASET("mixed_dataset.csv", true, ",");
+    MIXED_DATASET("modules/ml/src/main/resources/datasets/mixed_dataset.csv", 
true, ",");
 
     /** Filename. */
     private final String filename;
diff --git 
a/modules/ml/src/main/java/org/apache/ignite/ml/util/SandboxMLCache.java 
b/modules/ml/src/main/java/org/apache/ignite/ml/util/SandboxMLCache.java
index 23febff..ef2e260 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/util/SandboxMLCache.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/util/SandboxMLCache.java
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.util;
 
+import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.file.Paths;
@@ -31,13 +32,10 @@ import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
-import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.internal.util.IgniteUtils;
 import org.apache.ignite.ml.math.exceptions.knn.FileParsingException;
 import org.apache.ignite.ml.math.primitives.vector.Vector;
 import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
-import org.springframework.core.io.Resource;
-import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
-import org.springframework.core.io.support.ResourcePatternResolver;
 
 /**
  * Common utility code used in some ML examples to set up test cache.
@@ -46,10 +44,6 @@ public class SandboxMLCache {
     /** */
     private final Ignite ignite;
 
-    /** Resource resolver. */
-    private static final ResourcePatternResolver RESOURCE_RESOLVER =
-        new 
PathMatchingResourcePatternResolver(SandboxMLCache.class.getClassLoader());
-
     /** */
     public SandboxMLCache(Ignite ignite) {
         this.ignite = ignite;
@@ -74,6 +68,7 @@ public class SandboxMLCache {
         return cache;
     }
 
+
     /**
      * Loads dataset as a list of rows.
      *
@@ -84,10 +79,15 @@ public class SandboxMLCache {
     public List<String> loadDataset(MLSandboxDatasets dataset) throws 
IOException {
         List<String> res = new ArrayList<>();
 
-        Resource[] resources = RESOURCE_RESOLVER.getResources("classpath*:*/" 
+ dataset.getFileName());
-        A.ensure(resources.length == 1, "Cannot find resource");
+        String fileName = dataset.getFileName();
+
+        File file = IgniteUtils.resolveIgnitePath(fileName);
+
+        if (file == null)
+            throw new FileNotFoundException(fileName);
+
+        Scanner scanner = new Scanner(file);
 
-        Scanner scanner = new Scanner(resources[0].getInputStream());
         if (dataset.hasHeader() && scanner.hasNextLine())
             scanner.nextLine();
 
@@ -99,6 +99,7 @@ public class SandboxMLCache {
         return res;
     }
 
+
     /**
      * Fills cache with data and returns it.
      *
@@ -106,19 +107,23 @@ public class SandboxMLCache {
      * @return Filled Ignite Cache.
      * @throws FileNotFoundException If file not found.
      */
-    public IgniteCache<Integer, Vector> fillCacheWith(MLSandboxDatasets 
dataset) throws IOException {
+    public IgniteCache<Integer, Vector> fillCacheWith(MLSandboxDatasets 
dataset) throws FileNotFoundException {
+
         IgniteCache<Integer, Vector> cache = getCache();
 
         String fileName = dataset.getFileName();
-        Resource[] resources = RESOURCE_RESOLVER.getResources("classpath*:*/" 
+ fileName);
-        A.ensure(resources.length == 1, "Cannot find resource");
 
-        Scanner scanner = new Scanner(resources[0].getInputStream());
+        File file = IgniteUtils.resolveIgnitePath(fileName);
+
+        if (file == null)
+            throw new FileNotFoundException(fileName);
+
+        Scanner scanner = new Scanner(file);
 
         int cnt = 0;
         while (scanner.hasNextLine()) {
             String row = scanner.nextLine();
-            if (dataset.hasHeader() && cnt == 0) {
+            if(dataset.hasHeader() && cnt == 0) {
                 cnt++;
                 continue;
             }
@@ -129,11 +134,9 @@ public class SandboxMLCache {
             NumberFormat format = NumberFormat.getInstance(Locale.FRANCE);
 
             for (int i = 0; i < cells.length; i++)
-                try {
-                    if (cells[i].equals(""))
-                        data[i] = Double.NaN;
-                    else
-                        data[i] = Double.valueOf(cells[i]);
+                try{
+                    if(cells[i].equals("")) data[i] = Double.NaN;
+                    else data[i] = Double.valueOf(cells[i]);
                 } catch (java.lang.NumberFormatException e) {
                     try {
                         data[i] = format.parse(cells[i]).doubleValue();

Reply via email to