Author: srowen
Date: Sun Nov 6 09:44:54 2011
New Revision: 1198330
URL: http://svn.apache.org/viewvc?rev=1198330&view=rev
Log:
MAHOUT-155 DateTestAndLabelFix
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
---
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
(original)
+++
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
Sun Nov 6 09:44:54 2011
@@ -29,8 +29,9 @@ import org.apache.mahout.math.Vector;
final class ARFFIterator extends AbstractIterator<Vector> {
- private static final Pattern COMMA_PATTERN = Pattern.compile(",");
- private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
+ // This pattern will make sure a , inside a string is not a point for split.
+ // Ex: "Arizona" , "0:08 PM, PDT" , 110 will be split considering "0:08 PM,
PDT" as one string
+ private static final Pattern COMMA_PATTERN =
Pattern.compile(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
private final BufferedReader reader;
private final ARFFModel model;
@@ -63,9 +64,11 @@ final class ARFFIterator extends Abstrac
String[] splits = COMMA_PATTERN.split(line);
result = new RandomAccessSparseVector(model.getLabelSize());
for (String split : splits) {
- String[] data = SPACE_PATTERN.split(split); // first is index, second
is
- int idx = Integer.parseInt(data[0]);
- result.setQuick(idx, model.getValue(data[1], idx));
+ split = split.trim();
+ int idIndex = split.indexOf(' ');
+ int idx = Integer.parseInt(split.substring(0, idIndex).trim());
+ String data = split.substring(idIndex).trim();
+ result.setQuick(idx, model.getValue(data, idx));
}
} else {
result = new DenseVector(model.getLabelSize());
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
---
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
(original)
+++
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
Sun Nov 6 09:44:54 2011
@@ -18,9 +18,14 @@
package org.apache.mahout.utils.vectors.arff;
public enum ARFFType {
- NUMERIC("numeric"), NOMINAL("{"), DATE("date"), STRING("string");
+
+ NUMERIC("numeric"),
+ NOMINAL("{"),
+ DATE("date"),
+ STRING("string");
private final String indicator;
+
ARFFType(String indicator) {
this.indicator = indicator;
}
@@ -30,8 +35,7 @@ public enum ARFFType {
}
public String getLabel(String line) {
- int idx = line.indexOf(indicator);
- return line.substring(ARFFModel.ATTRIBUTE.length(),
- idx).trim();
+ int idx = line.lastIndexOf(indicator);
+ return line.substring(ARFFModel.ATTRIBUTE.length(), idx).trim();
}
}
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
---
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
(original)
+++
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
Sun Nov 6 09:44:54 2011
@@ -110,7 +110,7 @@ public class ARFFVectorIterable implemen
type = ARFFType.DATE;
//TODO: DateFormatter map
DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss",
Locale.ENGLISH);
- int idx = lower.indexOf(ARFFType.DATE.getIndicator());
+ int idx = lower.lastIndexOf(ARFFType.DATE.getIndicator());
String[] split = SPACE_PATTERN.split(line);
if (split.length >= 4) { //we have a date format
String formStr = line.substring(idx +
ARFFType.DATE.getIndicator().length()).trim();
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
---
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
(original)
+++
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
Sun Nov 6 09:44:54 2011
@@ -122,23 +122,10 @@ public final class Driver {
});
for (File file : files) {
- writeFile(outDir, file, maxDocs, model);
+ writeFile(outDir, file, maxDocs, model, dictOut, delimiter);
}
} else {
- writeFile(outDir, input, maxDocs, model);
- }
- log.info("Dictionary Output file: {}", dictOut);
- Map<String,Integer> labels = model.getLabelBindings();
- Writer writer = Files.newWriter(dictOut, Charsets.UTF_8);
- try {
- for (Map.Entry<String,Integer> entry : labels.entrySet()) {
- writer.write(entry.getKey());
- writer.write(delimiter);
- writer.write(String.valueOf(entry.getValue()));
- writer.write('\n');
- }
- } finally {
- Closeables.closeQuietly(writer);
+ writeFile(outDir, input, maxDocs, model, dictOut, delimiter);
}
}
@@ -148,7 +135,29 @@ public final class Driver {
}
}
- private static void writeFile(String outDir, File file, long maxDocs,
ARFFModel arffModel) throws IOException {
+ private static void writeLabelBindings(File dictOut, ARFFModel arffModel,
String delimiter) throws IOException {
+ Map<String,Integer> labels = arffModel.getLabelBindings();
+ Writer writer = Files.newWriterSupplier(dictOut, Charsets.UTF_8,
true).getOutput();
+ try {
+ writer.write("Label bindings for Relation " + arffModel.getRelation() +
"\n");
+ for (Map.Entry<String,Integer> entry : labels.entrySet()) {
+ writer.write(entry.getKey());
+ writer.write(delimiter);
+ writer.write(String.valueOf(entry.getValue()));
+ writer.write('\n');
+ }
+ writer.write('\n');
+ } finally {
+ Closeables.closeQuietly(writer);
+ }
+ }
+
+ private static void writeFile(String outDir,
+ File file,
+ long maxDocs,
+ ARFFModel arffModel,
+ File dictOut,
+ String delimiter) throws IOException {
log.info("Converting File: {}", file);
ARFFModel model = new MapBackedARFFModel(arffModel.getWords(),
arffModel.getWordCount() + 1, arffModel
.getNominalMap());
@@ -158,6 +167,7 @@ public final class Driver {
VectorWriter vectorWriter = getSeqFileWriter(outFile);
try {
long numDocs = vectorWriter.write(iteratable, maxDocs);
+ writeLabelBindings(dictOut, model, delimiter);
log.info("Wrote: {} vectors", numDocs);
} finally {
Closeables.closeQuietly(vectorWriter);
Modified:
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
---
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
(original)
+++
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
Sun Nov 6 09:44:54 2011
@@ -35,20 +35,16 @@ public final class ARFFVectorIterableTes
@Test
public void testValues() throws Exception {
StringBuilder builder = new StringBuilder();
- builder.append("%comments").append('\n').append("@RELATION
Mahout").append('\n')
- .append("@ATTRIBUTE foo numeric").append('\n')
- .append("@ATTRIBUTE bar numeric").append('\n')
- .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
- .append("@ATTRIBUTE junk string").append('\n')
- .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
- .append("@DATA").append('\n')
- .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
- .append("2,3").append('\n')
- .append("{0 5,1 23}").append('\n');
+ builder.append("%comments").append('\n').append("@RELATION
Mahout").append('\n').append(
+ "@ATTRIBUTE foo numeric").append('\n').append("@ATTRIBUTE bar
numeric").append('\n').append(
+ "@ATTRIBUTE timestamp DATE \"yyyy-MM-dd
HH:mm:ss\"").append('\n').append("@ATTRIBUTE junk string")
+ .append('\n').append("@ATTRIBUTE theNominal
{c,b,a}").append('\n').append("@DATA").append('\n')
+ .append("1,2, \"2009-01-01 5:55:55\", foo,
c").append('\n').append("2,3").append('\n').append(
+ "{0 5,1 23}").append('\n');
ARFFModel model = new MapBackedARFFModel();
ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(),
model);
assertEquals("Mahout", iterable.getModel().getRelation());
- Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
+ Map<String,Integer> bindings = iterable.getModel().getLabelBindings();
assertNotNull(bindings);
assertEquals(5, bindings.size());
Iterator<Vector> iter = iterable.iterator();
@@ -109,21 +105,28 @@ public final class ARFFVectorIterableTes
assertTrue("Vector is not dense", vector instanceof
RandomAccessSparseVector);
count++;
}
+
+ iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+ Iterator<Vector> iter = iterable.iterator();
+ Vector firstVector = iter.next();
+
+ assertEquals(1.0, firstVector.get(2), 0);
+
assertEquals(10, count);
- Map<String, Map<String, Integer>> nominalMap =
iterable.getModel().getNominalMap();
+ Map<String,Map<String,Integer>> nominalMap =
iterable.getModel().getNominalMap();
assertNotNull(nominalMap);
assertEquals(1, nominalMap.size());
- Map<String, Integer> noms = nominalMap.get("bar");
+ Map<String,Integer> noms = nominalMap.get("bar");
assertNotNull("nominals for bar are null", noms);
assertEquals(2, noms.size());
- Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
+ Map<Integer,ARFFType> integerARFFTypeMap = model.getTypeMap();
assertNotNull("Type map null", integerARFFTypeMap);
assertEquals(5, integerARFFTypeMap.size());
- Map<String, Long> words = model.getWords();
+ Map<String,Long> words = model.getWords();
assertNotNull("words null", words);
assertEquals(10, words.size());
- //System.out.println("Words: " + words);
- Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
+ // System.out.println("Words: " + words);
+ Map<Integer,DateFormat> integerDateFormatMap = model.getDateMap();
assertNotNull("date format null", integerDateFormatMap);
assertEquals(1, integerDateFormatMap.size());
}
@@ -131,14 +134,39 @@ public final class ARFFVectorIterableTes
@Test
public void testDate() throws Exception {
MapBackedARFFModel model = new MapBackedARFFModel();
- ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF,
model);
+ ARFFVectorIterable iterable = new ARFFVectorIterable(DATE_ARFF, model);
Iterator<Vector> iter = iterable.iterator();
Vector firstVector = iter.next();
- assertEquals(1.0, firstVector.get(2),0);
- DateFormat format = new SimpleDateFormat("yyyy-MM-dd", Locale.ENGLISH);
- Date date = format.parse("1973-10-23");
+
+ DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss",
Locale.ENGLISH);
+ Date date = format.parse("2001-07-04T12:08:56");
long result = date.getTime();
- assertEquals(result, firstVector.get(4),0);
+ assertEquals(result, firstVector.get(1), 0);
+
+ format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z",
Locale.ENGLISH);
+ date = format.parse("2001.07.04 AD at 12:08:56 PDT");
+ result = date.getTime();
+ assertEquals(result, firstVector.get(2), 0);
+
+ format = new SimpleDateFormat("EEE, MMM d, ''yy", Locale.ENGLISH);
+ date = format.parse("Wed, Jul 4, '01,4 0:08 PM, PDT");
+ result = date.getTime();
+ assertEquals(result, firstVector.get(3), 0);
+
+ format = new SimpleDateFormat("K:mm a, z", Locale.ENGLISH);
+ date = format.parse("0:08 PM, PDT");
+ result = date.getTime();
+ assertEquals(result, firstVector.get(4), 0);
+
+ format = new SimpleDateFormat("yyyyy.MMMMM.dd GGG hh:mm aaa",
Locale.ENGLISH);
+ date = format.parse("02001.July.04 AD 12:08 PM");
+ result = date.getTime();
+ assertEquals(result, firstVector.get(5), 0);
+
+ format = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z",
Locale.ENGLISH);
+ date = format.parse("Wed, 4 Jul 2001 12:08:56 -0700");
+ result = date.getTime();
+ assertEquals(result, firstVector.get(6), 0);
}
@@ -152,24 +180,23 @@ public final class ARFFVectorIterableTes
count++;
}
assertEquals(10, count);
- Map<String, Map<String, Integer>> nominalMap =
iterable.getModel().getNominalMap();
+ Map<String,Map<String,Integer>> nominalMap =
iterable.getModel().getNominalMap();
assertNotNull(nominalMap);
assertEquals(1, nominalMap.size());
- Map<String, Integer> noms = nominalMap.get("bar");
+ Map<String,Integer> noms = nominalMap.get("bar");
assertNotNull("nominals for bar are null", noms);
assertEquals(2, noms.size());
- Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
+ Map<Integer,ARFFType> integerARFFTypeMap = model.getTypeMap();
assertNotNull("Type map null", integerARFFTypeMap);
assertEquals(5, integerARFFTypeMap.size());
- Map<String, Long> words = model.getWords();
+ Map<String,Long> words = model.getWords();
assertNotNull("words null", words);
assertEquals(10, words.size());
- //System.out.println("Words: " + words);
- Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
+ // System.out.println("Words: " + words);
+ Map<Integer,DateFormat> integerDateFormatMap = model.getDateMap();
assertNotNull("date format null", integerDateFormatMap);
assertEquals(1, integerDateFormatMap.size());
- model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
- model.getNominalMap());
+ model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
model.getNominalMap());
iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
count = 0;
for (Vector vector : iterable) {
@@ -184,108 +211,90 @@ public final class ARFFVectorIterableTes
assertEquals(2, noms.size());
}
+ private static final String SAMPLE_DENSE_ARFF = " % Comments\n" + " %
\n" + " % Comments go here"
+ + " % \n" + " @RELATION
Mahout\n" + '\n'
+ + " @ATTRIBUTE foo
NUMERIC\n"
+ + " @ATTRIBUTE bar
NUMERIC\n"
+ + " @ATTRIBUTE hockey
NUMERIC\n"
+ + " @ATTRIBUTE football
NUMERIC\n" + " \n" + '\n'
+ + '\n' + " @DATA\n" + "
23.1,3.23,1.2,0.2\n"
+ + " 2.9,3.0,1.2,0.2\n" + "
2.7,3.2,1.3,0.2\n"
+ + " 2.6,3.1,1.23,0.2\n" +
" 23.0,3.6,1.2,0.2\n"
+ + " 23.2,3.9,1.7,0.2\n" +
" 2.6,3.2,1.2,0.3\n"
+ + " 23.0,3.2,1.23,0.2\n" +
" 2.2,2.9,1.2,0.2\n"
+ + " 2.9,3.1,1.23,0.1\n";
- private static final String SAMPLE_DENSE_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar NUMERIC\n" +
- " @ATTRIBUTE hockey NUMERIC\n" +
- " @ATTRIBUTE football NUMERIC\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " 23.1,3.23,1.2,0.2\n" +
- " 2.9,3.0,1.2,0.2\n" +
- " 2.7,3.2,1.3,0.2\n" +
- " 2.6,3.1,1.23,0.2\n" +
- " 23.0,3.6,1.2,0.2\n" +
- " 23.2,3.9,1.7,0.2\n" +
- " 2.6,3.2,1.2,0.3\n" +
- " 23.0,3.2,1.23,0.2\n" +
- " 2.2,2.9,1.2,0.2\n" +
- " 2.9,3.1,1.23,0.1\n";
-
+ private static final String SAMPLE_SPARSE_ARFF = " % Comments\n" + " %
\n" + " % Comments go here"
+ + " % \n" + " @RELATION
Mahout\n" + '\n'
+ + " @ATTRIBUTE foo
NUMERIC\n"
+ + " @ATTRIBUTE bar
NUMERIC\n"
+ + " @ATTRIBUTE hockey
NUMERIC\n"
+ + " @ATTRIBUTE football
NUMERIC\n"
+ + " @ATTRIBUTE tennis
NUMERIC\n" + " \n" + '\n'
+ + '\n' + " @DATA\n" + "
{1 23.1,2 3.23,3 1.2,4 0.2}\n"
+ + " {0 2.9}\n" + " {0
2.7,2 3.2,3 1.3,4 0.2}\n"
+ + " {1 2.6,2 3.1,3 1.23,4
0.2}\n"
+ + " {1 23.0,2 3.6,3 1.2,4
0.2}\n"
+ + " {0 23.2,1 3.9,3 1.7,4
0.2}\n"
+ + " {0 2.6,1 3.2,2 1.2,4
0.3}\n"
+ + " {1 23.0,2 3.2,3
1.23}\n"
+ + " {1 2.2,2 2.94,3
0.2}\n" + " {1 2.9,2 3.1}\n";
- private static final String SAMPLE_SPARSE_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar NUMERIC\n" +
- " @ATTRIBUTE hockey NUMERIC\n" +
- " @ATTRIBUTE football NUMERIC\n" +
- " @ATTRIBUTE tennis NUMERIC\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
- " {0 2.9}\n" +
- " {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
- " {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
- " {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
- " {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
- " {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
- " {1 23.0,2 3.2,3 1.23}\n" +
- " {1 2.2,2 2.94 0.2}\n" +
- " {1 2.9,2 3.1}\n";
+ private static final String NON_NUMERIC_ARFF = " % Comments\n" + " % \n"
+ " % Comments go here"
+ + " % \n" + " @RELATION
Mahout\n" + '\n'
+ + " @ATTRIBUTE junk
NUMERIC\n"
+ + " @ATTRIBUTE foo
NUMERIC\n"
+ + " @ATTRIBUTE bar
{c,d}\n"
+ + " @ATTRIBUTE hockey
string\n"
+ + " @ATTRIBUTE football
date \"yyyy-MM-dd\"\n" + " \n"
+ + '\n' + '\n' + " @DATA\n"
+ + " {2 c,3 gretzky,4
1973-10-23}\n"
+ + " {1 2.9,2 d,3 orr,4
1973-11-23}\n"
+ + " {2 c,3 bossy,4
1981-10-23}\n"
+ + " {1 2.6,2 c,3 lefleur,4
1989-10-23}\n"
+ + " {3 esposito,4
1973-04-23}\n"
+ + " {1 23.2,2 d,3 chelios,4
1999-2-23}\n"
+ + " {3 richard,4
1973-10-12}\n"
+ + " {3 howe,4 1983-06-23}\n"
+ + " {0 2.2,2 d,3 messier,4
2008-11-23}\n"
+ + " {2 c,3 roy,4
1973-10-13}\n";
- private static final String NON_NUMERIC_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE junk NUMERIC\n" +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar {c,d}\n" +
- " @ATTRIBUTE hockey string\n" +
- " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {2 c,3 gretzky,4 1973-10-23}\n" +
- " {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
- " {2 c,3 bossy,4 1981-10-23}\n" +
- " {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
- " {3 esposito,4 1973-04-23}\n" +
- " {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
- " {3 richard,4 1973-10-12}\n" +
- " {3 howe,4 1983-06-23}\n" +
- " {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
- " {2 c,3 roy,4 1973-10-13}\n";
+ private static final String NON_NUMERIC_ARFF2 = " % Comments\n" + " %
\n" + " % Comments go here"
+ + " % \n" + " @RELATION
Mahout\n" + '\n'
+ + " @ATTRIBUTE junk
NUMERIC\n"
+ + " @ATTRIBUTE foo
NUMERIC\n"
+ + " @ATTRIBUTE test
{f,z}\n"
+ + " @ATTRIBUTE hockey
string\n"
+ + " @ATTRIBUTE football
date \"yyyy-MM-dd\"\n" + " \n"
+ + '\n' + '\n' + " @DATA\n"
+ + " {2 f,3 gretzky,4
1973-10-23}\n"
+ + " {1 2.9,2 z,3 orr,4
1973-11-23}\n"
+ + " {2 f,3 bossy,4
1981-10-23}\n"
+ + " {1 2.6,2 f,3 lefleur,4
1989-10-23}\n"
+ + " {3 esposito,4
1973-04-23}\n"
+ + " {1 23.2,2 z,3
chelios,4 1999-2-23}\n"
+ + " {3 richard,4
1973-10-12}\n"
+ + " {3 howe,4
1983-06-23}\n"
+ + " {0 2.2,2 f,3 messier,4
2008-11-23}\n"
+ + " {2 f,3 roy,4
1973-10-13}\n";
- private static final String NON_NUMERIC_ARFF2 = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE junk NUMERIC\n" +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE test {f,z}\n" +
- " @ATTRIBUTE hockey string\n" +
- " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {2 f,3 gretzky,4 1973-10-23}\n" +
- " {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
- " {2 f,3 bossy,4 1981-10-23}\n" +
- " {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
- " {3 esposito,4 1973-04-23}\n" +
- " {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
- " {3 richard,4 1973-10-12}\n" +
- " {3 howe,4 1983-06-23}\n" +
- " {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
- " {2 f,3 roy,4 1973-10-13}\n";
+ private static final String DATE_ARFF = " % Comments\n"
+ + " % \n"
+ + " % Comments go here"
+ + " % \n"
+ + " @RELATION MahoutDateTest\n"
+ + '\n'
+ + " @ATTRIBUTE junk NUMERIC\n"
+ + " @ATTRIBUTE date1 \n"
+ + " @ATTRIBUTE date2 date
\"yyyy.MM.dd G 'at' HH:mm:ss z\" \n"
+ + " @ATTRIBUTE date3 date \"EEE,
MMM d, ''yy\" \n"
+ + " @ATTRIBUTE date4 date \"K:mm
a, z\" \n"
+ + " @ATTRIBUTE date5 date
\"yyyyy.MMMMM.dd GGG hh:mm aaa\" \n"
+ + " @ATTRIBUTE date6 date \"EEE,
d MMM yyyy HH:mm:ss Z\" \n"
+ + " \n"
+ + '\n'
+ + '\n'
+ + " @DATA\n"
+ + " {0 1,1
\"2001-07-04T12:08:56\",2 \"2001.07.04 AD at 12:08:56 PDT\",3 \"Wed, Jul 4,
'01,4 0:08 PM, PDT\",4 \"0:08 PM, PDT\", 5 \"02001.July.04 AD 12:08 PM\" ,6
\"Wed, 4 Jul 2001 12:08:56 -0700\" }\n"
+ + " {0 2,1
\"2001-08-04T12:09:56\",2 \"2011.07.04 AD at 12:08:56 PDT\",3 \"Mon, Jul 4,
'11,4 0:08 PM, PDT\",4 \"0:08 PM, PDT\", 5 \"02001.July.14 AD 12:08 PM\" ,6
\"Mon, 4 Jul 2011 12:08:56 -0700\" }\n";
}