Author: adeneche
Date: Mon Oct 24 19:39:40 2011
New Revision: 1188332

URL: http://svn.apache.org/viewvc?rev=1188332&view=rev
Log:
MAHOUT-840 Instance.id removed

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Mon Oct 
24 19:39:40 2011
@@ -305,7 +305,7 @@ public class Data implements Cloneable {
     try {
       int index = 0;
       while (iterator.hasNext()) {
-        labels[index++] = (int) converter.convert(0, 
iterator.next()).get(labelId);
+        labels[index++] = (int) 
converter.convert(iterator.next()).get(labelId);
       }
     } finally {
       Closeables.closeQuietly(iterator);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java 
Mon Oct 24 19:39:40 2011
@@ -42,7 +42,7 @@ public class DataConverter {
     this.dataset = dataset;
   }
   
-  public Instance convert(int id, CharSequence string) {
+  public Instance convert(CharSequence string) {
     // all attributes (categorical, numerical, label), ignored
     int nball = dataset.nbAttributes() + dataset.getIgnored().length;
     
@@ -83,10 +83,10 @@ public class DataConverter {
     }
     
     if (label == -1) {
-      log.error("Label not found, instance id : {}, string : {}", id, string);
+      log.error("Label not found, instance string : {}", string);
       throw new IllegalStateException("Label not found!");
     }
     
-    return new Instance(id, vector);
+    return new Instance(vector);
   }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java 
Mon Oct 24 19:39:40 2011
@@ -54,15 +54,13 @@ public final class DataLoader {
   /**
    * Converts a comma-separated String to a Vector.
    * 
-   * @param id
-   *          unique id for the current instance
    * @param attrs
    *          attributes description
    * @param values
    *          used to convert CATEGORICAL attribute values to Integer
    * @return null if there are missing values '?'
    */
-  private static Instance parseString(int id, Attribute[] attrs, 
List<String>[] values, CharSequence string) {
+  private static Instance parseString(Attribute[] attrs, List<String>[] 
values, CharSequence string) {
     String[] tokens = COMMA_SPACE.split(string);
     Preconditions.checkArgument(tokens.length == attrs.length, "Wrong number 
of attributes in the string");
 
@@ -112,7 +110,7 @@ public final class DataLoader {
       throw new IllegalStateException("Label not found!");
     }
     
-    return new Instance(id, vector);
+    return new Instance(vector);
   }
   
   /**
@@ -141,7 +139,7 @@ public final class DataLoader {
         continue;
       }
       
-      Instance instance = converter.convert(instances.size(), line);
+      Instance instance = converter.convert(line);
       if (instance == null) {
         // missing values found
         log.warn("{}: missing values", instances.size());
@@ -170,7 +168,7 @@ public final class DataLoader {
         continue;
       }
       
-      Instance instance = converter.convert(instances.size(), line);
+      Instance instance = converter.convert(line);
       if (instance == null) {
         // missing values found
         log.warn("{}: missing values", instances.size());
@@ -205,21 +203,21 @@ public final class DataLoader {
     // used to convert CATEGORICAL attribute to Integer
     List<String>[] values = new List[attrs.length];
     
-    int id = 0;
+    int size = 0;
     while (scanner.hasNextLine()) {
       String line = scanner.nextLine();
       if (line.isEmpty()) {
         continue;
       }
       
-      if (parseString(id, attrs, values, line) != null) {
-        id++;
+      if (parseString(attrs, values, line) != null) {
+        size++;
       }
     }
     
     scanner.close();
     
-    return new Dataset(attrs, values, id, regression);
+    return new Dataset(attrs, values, size, regression);
   }
   
   /**
@@ -234,18 +232,18 @@ public final class DataLoader {
     // used to convert CATEGORICAL and LABEL attributes to Integer
     List<String>[] values = new List[attrs.length];
     
-    int id = 0;
+    int size = 0;
     for (String aData : data) {
       if (aData.isEmpty()) {
         continue;
       }
       
-      if (parseString(id, attrs, values, aData) != null) {
-        id++;
+      if (parseString(attrs, values, aData) != null) {
+        size++;
       }
     }
     
-    return new Dataset(attrs, values, id, regression);
+    return new Dataset(attrs, values, size, regression);
   }
 
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java Mon 
Oct 24 19:39:40 2011
@@ -24,13 +24,10 @@ import org.apache.mahout.math.Vector;
  */
 public class Instance {
   
-  private final int id;
-  
   /** attributes, except LABEL and IGNORED */
   private final Vector attrs;
   
-  public Instance(int id, Vector attrs) {
-    this.id = id;
+  public Instance(Vector attrs) {
     this.attrs = attrs;
   }
   
@@ -67,17 +64,12 @@ public class Instance {
     
     Instance instance = (Instance) obj;
     
-    return id == instance.id && attrs.equals(instance.attrs);
+    return /*id == instance.id &&*/ attrs.equals(instance.attrs);
     
   }
   
   @Override
   public int hashCode() {
-    return id + attrs.hashCode();
-  }
-
-  /** instance unique id */
-  public int getId() {
-    return id;
+    return /*id +*/ attrs.hashCode();
   }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java 
Mon Oct 24 19:39:40 2011
@@ -241,7 +241,7 @@ public class Classifier {
 
       String line = value.toString();
       if (!line.isEmpty()) {
-        Instance instance = converter.convert(0, line);
+        Instance instance = converter.convert(line);
         int prediction = forest.classify(rng, instance);
         key.set(dataset.getLabel(instance));
         lvalue.set(Integer.toString(prediction));

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
 Mon Oct 24 19:39:40 2011
@@ -64,9 +64,6 @@ public class Step1Mapper extends MapredM
   /** will contain all instances if this mapper's split */
   private final List<Instance> instances = Lists.newArrayList();
   
-  /** current instance's id */
-  private int id;
-  
   public int getFirstTreeId() {
     return firstTreeId;
   }
@@ -142,7 +139,7 @@ public class Step1Mapper extends MapredM
   
   @Override
   protected void map(LongWritable key, Text value, Context context) throws 
IOException, InterruptedException {
-    instances.add(converter.convert(id++, value.toString()));
+    instances.add(converter.convert(value.toString()));
   }
   
   @Override

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java 
Mon Oct 24 19:39:40 2011
@@ -187,7 +187,7 @@ public class FrequenciesJob {
         firstId = new LongWritable(key.get());
       }
       
-      Instance instance = converter.convert((int) key.get(), value.toString());
+      Instance instance = converter.convert(value.toString());
       
       context.write(firstId, new IntWritable(dataset.getLabel(instance)));
     }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java 
Mon Oct 24 19:39:40 2011
@@ -174,7 +174,7 @@ public final class UDistrib {
       }
       
       // write the tuple in files[tuple.label]
-      Instance instance = converter.convert(id++, line);
+      Instance instance = converter.convert(line);
       int label = dataset.getLabel(instance);
       files[currents[label]].writeBytes(line);
       files[currents[label]].writeChar('\n');

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
 Mon Oct 24 19:39:40 2011
@@ -42,7 +42,7 @@ public final class DataConverterTest ext
     DataConverter converter = new DataConverter(dataset);
     
     for (int index = 0; index < data.size(); index++) {
-      assertEquals(data.get(index), converter.convert(index, sData[index]));
+      assertEquals(data.get(index), converter.convert(sData[index]));
     }
   }
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java 
(original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java 
Mon Oct 24 19:39:40 2011
@@ -146,9 +146,6 @@ public final class DataLoaderTest extend
       double[] vector = data[index];
       Instance instance = loaded.get(lind);
 
-      // make sure the id is correct
-      assertEquals(lind, instance.getId());
-
       int aId = 0;
       for (int attr = 0; attr < nbAttributes; attr++) {
         if (attrs[attr].isIgnored()) {

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
 Mon Oct 24 19:39:40 2011
@@ -252,7 +252,7 @@ public class TestForest extends Configur
           continue; // skip empty lines
         }
 
-        Instance instance = converter.convert(0, line);
+        Instance instance = converter.convert(line);
         int prediction = forest.classify(rng, instance);
 
         if (outputPath != null) {


Reply via email to