On Thu, Feb 6, 2014 at 12:39 AM, <[email protected]> wrote: > Updated Branches: > refs/heads/1.4.5-SNAPSHOT [created] e15859054 >
In case anyone else is confused about these 50 emails like I was, these were pushed to accumulo-wikisearch.git and not accumulo.git. I emailed Bill and he helped me sort myself out It would be nice if these emails included the branch and repo (e.g. . accumulo-wikisearch.git refs/heads/1.4.5-SNAPSHOT [created] e15859054) > > > ACCUMULO-354 added boolean instead of null to detect presence of next value > > git-svn-id: > https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@123869613f79535-47bb-0310-9956-ffa450edef68 > > > Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo > Commit: > http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/b9cf2945 > Tree: > http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/b9cf2945 > Diff: > http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/b9cf2945 > > Branch: refs/heads/1.4.5-SNAPSHOT > Commit: b9cf2945ee33ace0726298601462d02b0e226190 > Parents: 72fbb54 > Author: Billie Rinaldi <[email protected]> > Authored: Tue Jan 31 16:53:40 2012 +0000 > Committer: Billie Rinaldi <[email protected]> > Committed: Tue Jan 31 16:53:40 2012 +0000 > > ---------------------------------------------------------------------- > .../wikisearch/ingest/WikipediaIngester.java | 3 ++ > .../wikisearch/iterator/TextIndexTest.java | 43 ++++++++++++++++++++ > 2 files changed, 46 insertions(+) > ---------------------------------------------------------------------- > > > > http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java > ---------------------------------------------------------------------- > diff --git > a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java > b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java > index 31c8472..50415a7 100644 > --- > a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java > +++ > b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java > @@ -88,6 +88,7 @@ public class WikipediaIngester extends Configured > implements Tool { > columns.add(new Column("fi\0" + family)); > } > TextIndexCombiner.setColumns(setting, columns); > + TextIndexCombiner.setLossyness(setting, true); > > tops.attachIterator(tableName, setting, > EnumSet.allOf(IteratorScope.class)); > } > @@ -102,6 +103,7 @@ public class WikipediaIngester extends Configured > implements Tool { > // Add the UID combiner > IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", > GlobalIndexUidCombiner.class); > GlobalIndexUidCombiner.setCombineAllColumns(setting, true); > + GlobalIndexUidCombiner.setLossyness(setting, true); > tops.attachIterator(indexTableName, setting, > EnumSet.allOf(IteratorScope.class)); > } > > @@ -110,6 +112,7 @@ public class WikipediaIngester extends Configured > implements Tool { > // Add the UID combiner > IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", > GlobalIndexUidCombiner.class); > GlobalIndexUidCombiner.setCombineAllColumns(setting, true); > + GlobalIndexUidCombiner.setLossyness(setting, true); > tops.attachIterator(reverseIndexTableName, setting, > EnumSet.allOf(IteratorScope.class)); > } > > > > http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java > ---------------------------------------------------------------------- > diff --git > a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java > b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java > index 22ef9aa..7297b5a 100644 > --- > a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java > +++ > b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java > @@ -139,4 +139,47 @@ public class TextIndexTest { > Assert.assertTrue(offsets.get(4) == 15); > Assert.assertTrue(offsets.get(5) == 19); > } > + > + @Test > + public void testEmptyValue() throws InvalidProtocolBufferException { > + Builder builder = createBuilder(); > + builder.addWordOffset(13); > + builder.addWordOffset(15); > + builder.addWordOffset(19); > + builder.setNormalizedTermFrequency(0.12f); > + > + values.add(new Value("".getBytes())); > + values.add(new Value(builder.build().toByteArray())); > + values.add(new Value("".getBytes())); > + > + builder = createBuilder(); > + builder.addWordOffset(1); > + builder.addWordOffset(5); > + builder.setNormalizedTermFrequency(0.1f); > + > + values.add(new Value(builder.build().toByteArray())); > + values.add(new Value("".getBytes())); > + > + builder = createBuilder(); > + builder.addWordOffset(3); > + builder.setNormalizedTermFrequency(0.05f); > + > + values.add(new Value(builder.build().toByteArray())); > + values.add(new Value("".getBytes())); > + > + Value result = combiner.reduce(new Key(), values.iterator()); > + > + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); > + > + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); > + > + List<Integer> offsets = info.getWordOffsetList(); > + Assert.assertTrue(offsets.size() == 6); > + Assert.assertTrue(offsets.get(0) == 1); > + Assert.assertTrue(offsets.get(1) == 3); > + Assert.assertTrue(offsets.get(2) == 5); > + Assert.assertTrue(offsets.get(3) == 13); > + Assert.assertTrue(offsets.get(4) == 15); > + Assert.assertTrue(offsets.get(5) == 19); > + } > } > >
