Author: twilliams
Date: Wed Aug 12 00:58:37 2009
New Revision: 803338
URL: http://svn.apache.org/viewvc?rev=803338&view=rev
Log:
Add initial Lucene translation structure.
Added:
labs/esqueranto/trunk/lib/lucene-core-2.4.1.jar (with props)
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.g
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.java
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.tokens
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/BasicLuceneParserTest.java
Modified:
labs/esqueranto/trunk/lib/ (props changed)
Propchange: labs/esqueranto/trunk/lib/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Aug 12 00:58:37 2009
@@ -0,0 +1 @@
+nblibraries-private.properties
Added: labs/esqueranto/trunk/lib/lucene-core-2.4.1.jar
URL:
http://svn.apache.org/viewvc/labs/esqueranto/trunk/lib/lucene-core-2.4.1.jar?rev=803338&view=auto
==============================================================================
Binary file - no diff available.
Propchange: labs/esqueranto/trunk/lib/lucene-core-2.4.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.g
URL:
http://svn.apache.org/viewvc/labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.g?rev=803338&view=auto
==============================================================================
--- labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.g
(added)
+++ labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.g
Wed Aug 12 00:58:37 2009
@@ -0,0 +1,59 @@
+tree grammar LuceneWalker;
+
+//To compile: java org.antlr.Tool -lib .. LuceneWalker.g
+
+options {
+ tokenVocab=esqueranto;
+ ASTLabelType=CommonTree;
+ backtrack=true;
+}
+
+...@header {
+ package org.apache.labs.esqueranto.lucene;
+
+ import org.apache.lucene.search.BooleanClause;
+ import org.apache.lucene.search.BooleanQuery;
+ import org.apache.lucene.search.Query;
+ import org.apache.lucene.search.TermQuery;
+ import org.apache.lucene.index.Term;
+}
+
+...@members {
+ private String defaultField = "content";
+}
+
+expr returns [Query value, String orig]
+ : ^(AND a=expr b=expr) {
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.MUST);
+ q.add(b.value, BooleanClause.Occur.MUST);
+ $value = q;
+ }
+ | ^(OR a=expr b=expr) {
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.SHOULD);
+ q.add(b.value, BooleanClause.Occur.SHOULD);
+ $value = q;
+ }
+ | ^(NOT a=expr b=expr) {
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.SHOULD);
+ q.add(b.value, BooleanClause.Occur.MUST_NOT);
+ $value = q;
+ }
+ | CHAR_STRING1 {
+ $orig = $CHAR_STRING1.text;
+ $value = new TermQuery(new Term(defaultField, $CHAR_STRING1.text));
+ }
+ ;
+
+prog: stat+;
+
+stat:
+ expr {System.out.println($expr.value.toString());}
+ ;
+
+token returns [String value]
+ :
+ CHAR_STRING1 {$value = $CHAR_STRING1.text;}
+ ;
\ No newline at end of file
Added:
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.java
URL:
http://svn.apache.org/viewvc/labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.java?rev=803338&view=auto
==============================================================================
---
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.java
(added)
+++
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.java
Wed Aug 12 00:58:37 2009
@@ -0,0 +1,358 @@
+// $ANTLR 3.1.3 Mar 18, 2009 10:09:25 LuceneWalker.g 2009-08-11 20:55:46
+
+ package org.apache.labs.esqueranto.lucene;
+
+ import org.apache.lucene.search.BooleanClause;
+ import org.apache.lucene.search.BooleanQuery;
+ import org.apache.lucene.search.Query;
+ import org.apache.lucene.search.TermQuery;
+ import org.apache.lucene.index.Term;
+
+
+import org.antlr.runtime.*;
+import org.antlr.runtime.tree.*;import java.util.Stack;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+public class LuceneWalker extends TreeParser {
+ public static final String[] tokenNames = new String[] {
+ "<invalid>", "<EOR>", "<DOWN>", "<UP>", "EXACT_EQ", "AND", "OR",
"NOT", "LPAREN", "OR_TOKEN", "RPAREN", "AND_TOKEN", "NOT_TOKEN",
"CHAR_STRING1", "NEWLINE", "WS"
+ };
+ public static final int EXACT_EQ=4;
+ public static final int OR_TOKEN=9;
+ public static final int AND=5;
+ public static final int CHAR_STRING1=13;
+ public static final int WS=15;
+ public static final int EOF=-1;
+ public static final int RPAREN=10;
+ public static final int AND_TOKEN=11;
+ public static final int LPAREN=8;
+ public static final int NOT_TOKEN=12;
+ public static final int NOT=7;
+ public static final int OR=6;
+ public static final int NEWLINE=14;
+
+ // delegates
+ // delegators
+
+
+ public LuceneWalker(TreeNodeStream input) {
+ this(input, new RecognizerSharedState());
+ }
+ public LuceneWalker(TreeNodeStream input, RecognizerSharedState state)
{
+ super(input, state);
+
+ }
+
+
+ public String[] getTokenNames() { return LuceneWalker.tokenNames; }
+ public String getGrammarFileName() { return "LuceneWalker.g"; }
+
+
+ private String defaultField = "content";
+
+
+ public static class expr_return extends TreeRuleReturnScope {
+ public Query value;
+ public String orig;
+ };
+
+ // $ANTLR start "expr"
+ // LuceneWalker.g:25:1: expr returns [Query value, String orig] : ( ^( AND
a= expr b= expr ) | ^( OR a= expr b= expr ) | ^( NOT a= expr b= expr ) |
CHAR_STRING1 );
+ public final LuceneWalker.expr_return expr() throws RecognitionException {
+ LuceneWalker.expr_return retval = new LuceneWalker.expr_return();
+ retval.start = input.LT(1);
+
+ CommonTree CHAR_STRING11=null;
+ LuceneWalker.expr_return a = null;
+
+ LuceneWalker.expr_return b = null;
+
+
+ try {
+ // LuceneWalker.g:26:5: ( ^( AND a= expr b= expr ) | ^( OR a= expr
b= expr ) | ^( NOT a= expr b= expr ) | CHAR_STRING1 )
+ int alt1=4;
+ switch ( input.LA(1) ) {
+ case AND:
+ {
+ alt1=1;
+ }
+ break;
+ case OR:
+ {
+ alt1=2;
+ }
+ break;
+ case NOT:
+ {
+ alt1=3;
+ }
+ break;
+ case CHAR_STRING1:
+ {
+ alt1=4;
+ }
+ break;
+ default:
+ if (state.backtracking>0) {state.failed=true; return retval;}
+ NoViableAltException nvae =
+ new NoViableAltException("", 1, 0, input);
+
+ throw nvae;
+ }
+
+ switch (alt1) {
+ case 1 :
+ // LuceneWalker.g:26:7: ^( AND a= expr b= expr )
+ {
+ match(input,AND,FOLLOW_AND_in_expr67); if (state.failed)
return retval;
+
+ match(input, Token.DOWN, null); if (state.failed) return
retval;
+ pushFollow(FOLLOW_expr_in_expr71);
+ a=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+ pushFollow(FOLLOW_expr_in_expr75);
+ b=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+
+ match(input, Token.UP, null); if (state.failed) return
retval;
+ if ( state.backtracking==0 ) {
+
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.MUST);
+ q.add(b.value, BooleanClause.Occur.MUST);
+ retval.value = q;
+
+ }
+
+ }
+ break;
+ case 2 :
+ // LuceneWalker.g:32:7: ^( OR a= expr b= expr )
+ {
+ match(input,OR,FOLLOW_OR_in_expr87); if (state.failed)
return retval;
+
+ match(input, Token.DOWN, null); if (state.failed) return
retval;
+ pushFollow(FOLLOW_expr_in_expr91);
+ a=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+ pushFollow(FOLLOW_expr_in_expr95);
+ b=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+
+ match(input, Token.UP, null); if (state.failed) return
retval;
+ if ( state.backtracking==0 ) {
+
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.SHOULD);
+ q.add(b.value, BooleanClause.Occur.SHOULD);
+ retval.value = q;
+
+ }
+
+ }
+ break;
+ case 3 :
+ // LuceneWalker.g:38:7: ^( NOT a= expr b= expr )
+ {
+ match(input,NOT,FOLLOW_NOT_in_expr107); if (state.failed)
return retval;
+
+ match(input, Token.DOWN, null); if (state.failed) return
retval;
+ pushFollow(FOLLOW_expr_in_expr111);
+ a=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+ pushFollow(FOLLOW_expr_in_expr115);
+ b=expr();
+
+ state._fsp--;
+ if (state.failed) return retval;
+
+ match(input, Token.UP, null); if (state.failed) return
retval;
+ if ( state.backtracking==0 ) {
+
+ BooleanQuery q = new BooleanQuery();
+ q.add(a.value, BooleanClause.Occur.SHOULD);
+ q.add(b.value, BooleanClause.Occur.MUST_NOT);
+ retval.value = q;
+
+ }
+
+ }
+ break;
+ case 4 :
+ // LuceneWalker.g:44:7: CHAR_STRING1
+ {
+
CHAR_STRING11=(CommonTree)match(input,CHAR_STRING1,FOLLOW_CHAR_STRING1_in_expr126);
if (state.failed) return retval;
+ if ( state.backtracking==0 ) {
+
+ retval.orig =
(CHAR_STRING11!=null?CHAR_STRING11.getText():null);
+ retval.value = new TermQuery(new
Term(defaultField, (CHAR_STRING11!=null?CHAR_STRING11.getText():null)));
+
+ }
+
+ }
+ break;
+
+ }
+ }
+ catch (RecognitionException re) {
+ reportError(re);
+ recover(input,re);
+ }
+ finally {
+ }
+ return retval;
+ }
+ // $ANTLR end "expr"
+
+
+ // $ANTLR start "prog"
+ // LuceneWalker.g:50:1: prog : ( stat )+ ;
+ public final void prog() throws RecognitionException {
+ try {
+ // LuceneWalker.g:50:5: ( ( stat )+ )
+ // LuceneWalker.g:50:7: ( stat )+
+ {
+ // LuceneWalker.g:50:7: ( stat )+
+ int cnt2=0;
+ loop2:
+ do {
+ int alt2=2;
+ int LA2_0 = input.LA(1);
+
+ if ( ((LA2_0>=AND && LA2_0<=NOT)||LA2_0==CHAR_STRING1) ) {
+ alt2=1;
+ }
+
+
+ switch (alt2) {
+ case 1 :
+ // LuceneWalker.g:0:0: stat
+ {
+ pushFollow(FOLLOW_stat_in_prog140);
+ stat();
+
+ state._fsp--;
+ if (state.failed) return ;
+
+ }
+ break;
+
+ default :
+ if ( cnt2 >= 1 ) break loop2;
+ if (state.backtracking>0) {state.failed=true; return ;}
+ EarlyExitException eee =
+ new EarlyExitException(2, input);
+ throw eee;
+ }
+ cnt2++;
+ } while (true);
+
+
+ }
+
+ }
+ catch (RecognitionException re) {
+ reportError(re);
+ recover(input,re);
+ }
+ finally {
+ }
+ return ;
+ }
+ // $ANTLR end "prog"
+
+
+ // $ANTLR start "stat"
+ // LuceneWalker.g:52:1: stat : expr ;
+ public final void stat() throws RecognitionException {
+ LuceneWalker.expr_return expr2 = null;
+
+
+ try {
+ // LuceneWalker.g:52:5: ( expr )
+ // LuceneWalker.g:53:5: expr
+ {
+ pushFollow(FOLLOW_expr_in_stat155);
+ expr2=expr();
+
+ state._fsp--;
+ if (state.failed) return ;
+ if ( state.backtracking==0 ) {
+ System.out.println((expr2!=null?expr2.value:null).toString());
+ }
+
+ }
+
+ }
+ catch (RecognitionException re) {
+ reportError(re);
+ recover(input,re);
+ }
+ finally {
+ }
+ return ;
+ }
+ // $ANTLR end "stat"
+
+
+ // $ANTLR start "token"
+ // LuceneWalker.g:56:1: token returns [String value] : CHAR_STRING1 ;
+ public final String token() throws RecognitionException {
+ String value = null;
+
+ CommonTree CHAR_STRING13=null;
+
+ try {
+ // LuceneWalker.g:57:5: ( CHAR_STRING1 )
+ // LuceneWalker.g:58:5: CHAR_STRING1
+ {
+
CHAR_STRING13=(CommonTree)match(input,CHAR_STRING1,FOLLOW_CHAR_STRING1_in_token182);
if (state.failed) return value;
+ if ( state.backtracking==0 ) {
+ value = (CHAR_STRING13!=null?CHAR_STRING13.getText():null);
+ }
+
+ }
+
+ }
+ catch (RecognitionException re) {
+ reportError(re);
+ recover(input,re);
+ }
+ finally {
+ }
+ return value;
+ }
+ // $ANTLR end "token"
+
+ // Delegated rules
+
+
+
+
+ public static final BitSet FOLLOW_AND_in_expr67 = new BitSet(new
long[]{0x0000000000000004L});
+ public static final BitSet FOLLOW_expr_in_expr71 = new BitSet(new
long[]{0x00000000000020E0L});
+ public static final BitSet FOLLOW_expr_in_expr75 = new BitSet(new
long[]{0x0000000000000008L});
+ public static final BitSet FOLLOW_OR_in_expr87 = new BitSet(new
long[]{0x0000000000000004L});
+ public static final BitSet FOLLOW_expr_in_expr91 = new BitSet(new
long[]{0x00000000000020E0L});
+ public static final BitSet FOLLOW_expr_in_expr95 = new BitSet(new
long[]{0x0000000000000008L});
+ public static final BitSet FOLLOW_NOT_in_expr107 = new BitSet(new
long[]{0x0000000000000004L});
+ public static final BitSet FOLLOW_expr_in_expr111 = new BitSet(new
long[]{0x00000000000020E0L});
+ public static final BitSet FOLLOW_expr_in_expr115 = new BitSet(new
long[]{0x0000000000000008L});
+ public static final BitSet FOLLOW_CHAR_STRING1_in_expr126 = new BitSet(new
long[]{0x0000000000000002L});
+ public static final BitSet FOLLOW_stat_in_prog140 = new BitSet(new
long[]{0x00000000000020E2L});
+ public static final BitSet FOLLOW_expr_in_stat155 = new BitSet(new
long[]{0x0000000000000002L});
+ public static final BitSet FOLLOW_CHAR_STRING1_in_token182 = new
BitSet(new long[]{0x0000000000000002L});
+
+}
\ No newline at end of file
Added:
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.tokens
URL:
http://svn.apache.org/viewvc/labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.tokens?rev=803338&view=auto
==============================================================================
---
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.tokens
(added)
+++
labs/esqueranto/trunk/src/org/apache/labs/esqueranto/lucene/LuceneWalker.tokens
Wed Aug 12 00:58:37 2009
@@ -0,0 +1,12 @@
+EXACT_EQ=4
+OR_TOKEN=9
+AND=5
+CHAR_STRING1=13
+WS=15
+RPAREN=10
+AND_TOKEN=11
+LPAREN=8
+NOT_TOKEN=12
+NOT=7
+OR=6
+NEWLINE=14
Added:
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/BasicLuceneParserTest.java
URL:
http://svn.apache.org/viewvc/labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/BasicLuceneParserTest.java?rev=803338&view=auto
==============================================================================
---
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/BasicLuceneParserTest.java
(added)
+++
labs/esqueranto/trunk/test/org/apache/labs/esqueranto/lucene/test/BasicLuceneParserTest.java
Wed Aug 12 00:58:37 2009
@@ -0,0 +1,93 @@
+
+package org.apache.labs.esqueranto.lucene.test;
+
+import java.io.ByteArrayInputStream;
+import org.antlr.runtime.ANTLRInputStream;
+import org.antlr.runtime.CommonTokenStream;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.CommonTreeNodeStream;
+import org.apache.labs.esqueranto.esquerantoLexer;
+import org.apache.labs.esqueranto.esquerantoParser;
+import org.apache.labs.esqueranto.lucene.LuceneWalker;
+import org.apache.lucene.search.Query;
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+public class BasicLuceneParserTest {
+
+ //Test AND operator, including case variants.
+ @Test
+ public void simpleAndOperator() throws Exception {
+ String[] q = {
+ "(cat AND dog)"
+ };
+ String[] t = {
+ "+content:cat +content:dog"
+ };
+ test(q,t);
+ }
+
+ //Test NOT operator, including case variants.
+ @Test
+ public void simpleNotOperator() throws Exception {
+ String[] q = {
+ "(cat not dog)"
+ };
+ String[] t = {
+ "content:cat -content:dog"
+ };
+ test(q,t);
+ }
+
+
+ //Test OR operator, including case variants.
+ @Test
+ public void simpleOrOperator() throws Exception {
+ String[] q = {
+ "(cat or dog)"
+ };
+ String[] t = {
+ "content:cat content:dog"
+ };
+ test(q,t);
+ }
+
+ private void test(String[] queries, String[] tree) throws Exception {
+ if(queries == null || tree == null) {
+ throw new Exception("Invalid argument: both params are " +
+ "required.");
+ }
+ if(queries.length != tree.length) {
+ throw new Exception("Test queries must have equivalent tree " +
+ "representations.");
+ }
+
+ esquerantoLexer lex = new esquerantoLexer();
+ CommonTokenStream toks = new CommonTokenStream();
+ esquerantoParser p = new esquerantoParser(null);
+
+ for(int i = 0; i < queries.length; i++) {
+ String q = queries[i];
+ ByteArrayInputStream bis = new
+ ByteArrayInputStream(q.getBytes());
+ ANTLRInputStream in = new ANTLRInputStream(bis);
+
+ lex.setCharStream(in);
+ toks.setTokenSource(lex);
+ p.setTokenStream(toks);
+
+ esquerantoParser.prog_return r = p.prog();
+ CommonTree t = (CommonTree) r.getTree();
+ //assertEquals(tree[i], t.toStringTree());
+ CommonTreeNodeStream nodes = new CommonTreeNodeStream(t);
+ LuceneWalker walker = new LuceneWalker(nodes);
+ LuceneWalker.expr_return lret = walker.expr();
+ Query query = lret.value;
+
+ assertEquals(tree[i], query.toString());
+
+ }
+
+ }
+
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]