mikemccand commented on a change in pull request #146: URL: https://github.com/apache/lucene/pull/146#discussion_r659244839
########## File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java ########## @@ -314,5 +323,221 @@ public void testTwoLongParallelPaths() throws Exception { 11); } + // The end node the long path is supposed to flatten over doesn't exist + // assert disabled = pos length of abc = 4 + // assert enabled = AssertionError: outputEndNode=3 vs inputTo=2 + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9963") + public void testAltPathFirstStepHole() throws Exception { + TokenStream in = + new CannedTokenStream( + 0, + 3, + new Token[] {token("abc", 1, 3, 0, 3), token("b", 1, 1, 1, 2), token("c", 1, 1, 2, 3)}); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents( + out, + new String[] {"abc", "b", "c"}, + new int[] {0, 1, 2}, + new int[] {3, 2, 3}, + new int[] {1, 1, 1}, + new int[] {3, 1, 1}, + 3); + } + + // Last node in an alt path releases the long path. but it doesn't exist in this graph + // pos length of abc = 1 + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9963") + public void testAltPathLastStepHole() throws Exception { + TokenStream in = + new CannedTokenStream( + 0, + 4, + new Token[] { + token("abc", 1, 3, 0, 3), + token("a", 0, 1, 0, 1), + token("b", 1, 1, 1, 2), + token("d", 2, 1, 3, 4) + }); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents( + out, + new String[] {"abc", "a", "b", "d"}, + new int[] {0, 0, 1, 3}, + new int[] {1, 1, 2, 4}, + new int[] {1, 0, 1, 2}, + new int[] {3, 1, 1, 1}, + 4); + } + + // Posinc >2 gets squashed to 2 + public void testLongHole() throws Exception { + TokenStream in = + new CannedTokenStream( + 0, + 28, + new Token[] { + token("hello", 1, 1, 0, 5), token("hole", 5, 1, 20, 24), token("fun", 1, 1, 25, 28), + }); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents( + out, + new String[] {"hello", "hole", "fun"}, + new int[] {0, 20, 25}, + new int[] {5, 24, 28}, + new int[] {1, 2, 1}, + new int[] {1, 1, 1}, + 28); + } + + // multiple nodes missing in the alt path. Last edge shows up after long edge and short edge, + // which looks good but the output graph isn't flat. + // assert disabled = nothing + // assert enabled = AssertionError + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9963") + public void testAltPathLastStepLongHole() throws Exception { + TokenStream in = + new CannedTokenStream( + 0, + 4, + new Token[] {token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("d", 3, 1, 3, 4)}); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents( + out, + new String[] {"abc", "a", "d"}, + new int[] {0, 0, 3}, + new int[] {1, 1, 4}, + new int[] {1, 0, 1}, + new int[] {1, 1, 1}, + 4); + } + + // LUCENE-8723 + // Token stream ends without last node showing up + // assert disabled = dropped token + // assert enabled = AssertionError: 2 + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9963") + public void testAltPathLastStepHoleWithoutEndToken() throws Exception { + TokenStream in = + new CannedTokenStream( + 0, + 2, + new Token[] {token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("b", 1, 1, 1, 2)}); + + TokenStream out = new FlattenGraphFilter(in); + + assertTokenStreamContents( + out, + new String[] {"abc", "a", "b"}, + new int[] {0, 0, 1}, + new int[] {1, 1, 2}, + new int[] {1, 0, 1}, + new int[] {1, 1, 1}, + 2); + } + + private CharsRef buildMultiTokenCarRef( Review comment: Hmm the method name is mis-spelled -- `CharsRef` suffix instead of `CarRef`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org