: Further to our discussions some time ago I've had some time to put
: together an XML-based query parser with support for many "advanced"
: query types not supported in the current Query parser.
:
: More details and code here: http://www.inperspective.com/lucene/LXQuery2.htm
So I *finally* got a chance to look at this in depth. In general, i think
it looks great, but I have a few questions/comments...
1) The factory classes should have "removeBuilder" methods so people
subclassing parsers can flat out remove support for a particular
tag, not just replace it.
2) This DOM version definitely seems easier to follow/use then the SAX
version (allthough thta could just be because i'm more familiar
with DOM then SAX) .. but it still seems like an intermediate
representation that wasn't org.w3c.dom.Element would be handy -- if
for no other reason then so the methods in DOMUtils could be put
directly in the "Element" class.
3) I'm still confused about how state information could/would be
passed up or down the tree by builders. you mentioned something
before about using ThreadLocal (which i'm not very familiar with)
but i don't see any examples of that here.
4) The various getQuery (and getFilter and getSpanQuery) methods
should use e.getTagName when throwing ParserExceptions about not
finding what they're looking for, That way, if i write a
parser that has...
queryFactory.addBuilder("tf",new TermQueryObjectBuilder());
... the ParserExceptions thrown by TermQueryObjectBuilder.getQuery
when i forget to include a value="foo" can refrence the tag name
i'm using (tf), and not the hardcoded constant "TermQuery"
5) it seems like a lot of redundency could be removed between the
various builders by refactoring some more utility functions --
particularly in the error cases (see attached patch)
-Hoss
Only in modified: build.xml
Only in modified: lib
diff -cr orig/src/org/apache/lucene/xmlparser/DOMUtils.java
modified/src/org/apache/lucene/xmlparser/DOMUtils.java
*** orig/src/org/apache/lucene/xmlparser/DOMUtils.java Wed Feb 22 21:57:54 2006
--- modified/src/org/apache/lucene/xmlparser/DOMUtils.java Sun Feb 26
17:16:41 2006
***************
*** 11,16 ****
--- 11,74 ----
public class DOMUtils
{
+
+ public static Element getChildByTagOrFail(Element e, String name)
+ throws ParserException
+ {
+ Element kid = getChildByTagName(e,name);
+ if (null == kid) {
+ throw new ParserException(e.getTagName() + " missing \"" +
+ name + "\" child element");
+ }
+ return kid;
+ }
+
+ public static Element getFirstChildOrFail(Element e)
+ throws ParserException
+ {
+ Element kid = getFirstChildElement(e);
+ if (null == kid) {
+ throw new ParserException(e.getTagName() +
+ " does not contain a child element");
+ }
+ return kid;
+ }
+
+ public static String getAttributeOrFail(Element e, String name)
+ throws ParserException
+ {
+ String v = e.getAttribute(name);
+ if (null == v) {
+ throw new ParserException(e.getTagName() + " missing \"" +
+ name + "\" attribute");
+ }
+ return v;
+ }
+ public static String getAttributeWithInheritanceOrFail(Element e,
+ String name)
+ throws ParserException
+ {
+ String v = getAttributeWithInheritance(e,name);
+ if (null == v) {
+ throw new ParserException(e.getTagName() + " missing \"" +
+ name + "\" attribute");
+ }
+ return v;
+ }
+ public static String getNonBlankTextOrFail(Element e)
+ throws ParserException
+ {
+ String v = getText(e);
+ if (null != v) v = v.trim();
+ if (null == v || 0 == v.length()) {
+ throw new ParserException(e.getTagName() + " has no text");
+ }
+ return v;
+ }
+
+
+
+
/* Convenience method where there is only one child Element of a given
name */
public static Element getChildByTagName(Element e, String name)
{
diff -cr orig/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java
modified/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java
*** orig/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java Wed Feb
22 22:47:26 2006
--- modified/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java Sun Feb
26 16:51:43 2006
***************
*** 27,70 ****
/* (non-Javadoc)
* @see
org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
*/
! public Query getQuery(Element e) throws ParserException {
! Element filterElement=DOMUtils.getChildByTagName(e,"Filter");
! if(filterElement==null)
! {
! throw new ParserException("FilteredQuery missing
\"Filter\" child element");
! }
! filterElement=DOMUtils.getFirstChildElement(filterElement);
! Filter f=null;
! if(filterElement!=null)
! {
! f=filterFactory.process(filterElement);
! }
! else
! {
! throw new ParserException("FilteredQuery \"Filter\"
element missing child query element ");
! }
!
!
! Element queryElement=DOMUtils.getChildByTagName(e,"Query");
! if(queryElement==null)
! {
! throw new ParserException("FilteredQuery missing
\"Query\" child element");
! }
! queryElement=DOMUtils.getFirstChildElement(queryElement);
! Query q=null;
! if(queryElement!=null)
! {
! q=queryFactory.getQuery(queryElement);
! }
! else
! {
! throw new ParserException("FilteredQuery \"Query\"
element missing child query element ");
! }
!
! FilteredQuery fq = new FilteredQuery(q,f);
! fq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
! return fq;
}
--- 27,45 ----
/* (non-Javadoc)
* @see
org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
*/
! public Query getQuery(Element e) throws ParserException {
!
! Element filterElement=DOMUtils.getChildByTagOrFail(e,"Filter");
! filterElement=DOMUtils.getFirstChildOrFail(filterElement);
! Filter f=filterFactory.process(filterElement);
! Element queryElement=DOMUtils.getChildByTagOrFail(e,"Query");
! queryElement=DOMUtils.getFirstChildOrFail(queryElement);
! Query q=queryFactory.getQuery(queryElement);
!
! FilteredQuery fq = new FilteredQuery(q,f);
! fq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
! return fq;
}
diff -cr orig/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java
Wed Feb 22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java
Sun Feb 26 16:59:12 2006
***************
*** 38,54 ****
BooleanClause.Occur occurs=getOccursValue(clauseElem);
//find the first element child which should contain a
Query
! Element
clauseQuery=DOMUtils.getFirstChildElement(clauseElem);
! if(clauseQuery!=null)
! {
! Query q=factory.getQuery(clauseQuery);
! bq.add(new BooleanClause(q,occurs));
!
! }
! else
! {
! throw new ParserException("BooleanClause
missing child query element ");
! }
}
return bq;
--- 38,46 ----
BooleanClause.Occur occurs=getOccursValue(clauseElem);
//find the first element child which should contain a
Query
! Element
clauseQuery=DOMUtils.getFirstChildOrFail(clauseElem);
! Query q=factory.getQuery(clauseQuery);
! bq.add(new BooleanClause(q,occurs));
}
return bq;
diff -cr
orig/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java
Wed Feb 22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java
Sun Feb 26 16:58:58 2006
***************
*** 22,51 ****
public Query getQuery(Element e) throws ParserException
{
! Element mainQueryElem=DOMUtils.getChildByTagName(e,"Query");
! if(mainQueryElem==null)
! {
! throw new ParserException("BoostingQuery missing a
\"Query\" child element");
! }
! mainQueryElem=DOMUtils.getFirstChildElement(mainQueryElem);
! if(mainQueryElem==null)
! {
! throw new ParserException("BoostingQuery \"Query\"
element missing a child element");
! }
Query mainQuery=factory.getQuery(mainQueryElem);
!
! Element
boostQueryElem=DOMUtils.getChildByTagName(e,"BoostQuery");
float
boost=DOMUtils.getAttribute(boostQueryElem,"boost",defaultBoost);
! if(boostQueryElem==null)
! {
! throw new ParserException("BoostingQuery missing a
\"BoostQuery\" child element");
! }
! boostQueryElem=DOMUtils.getFirstChildElement(boostQueryElem);
! if(boostQueryElem==null)
! {
! throw new ParserException("BoostingQuery \"BoostQuery\"
element missing a child element");
! }
Query boostQuery=factory.getQuery(boostQueryElem);
BoostingQuery bq = new
BoostingQuery(mainQuery,boostQuery,boost);
--- 22,34 ----
public Query getQuery(Element e) throws ParserException
{
! Element mainQueryElem=DOMUtils.getChildByTagOrFail(e,"Query");
! mainQueryElem=DOMUtils.getFirstChildOrFail(mainQueryElem);
Query mainQuery=factory.getQuery(mainQueryElem);
! Element
boostQueryElem=DOMUtils.getChildByTagOrFail(e,"BoostQuery");
float
boost=DOMUtils.getAttribute(boostQueryElem,"boost",defaultBoost);
! boostQueryElem=DOMUtils.getFirstChildOrFail(boostQueryElem);
Query boostQuery=factory.getQuery(boostQueryElem);
BoostingQuery bq = new
BoostingQuery(mainQuery,boostQuery,boost);
diff -cr
orig/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java
***
orig/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java
Wed Feb 22 22:59:38 2006
---
modified/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java
Sun Feb 26 17:06:16 2006
***************
*** 19,29 ****
public Query getQuery(Element e) throws ParserException
{
! Element filterElem=DOMUtils.getFirstChildElement(e);
! if(filterElem==null)
! {
! throw new ParserException("ConstantScoreQuery missing
child element with filter");
! }
Query q=new
ConstantScoreQuery(filterFactory.process(filterElem));
q.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return q;
--- 19,25 ----
public Query getQuery(Element e) throws ParserException
{
! Element filterElem=DOMUtils.getFirstChildOrFail(e);
Query q=new
ConstantScoreQuery(filterFactory.process(filterElem));
q.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return q;
diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java Wed Feb
22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java
Sun Feb 26 17:17:08 2006
***************
*** 19,29 ****
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String slopString=e.getAttribute("slop");
! if((slopString==null)||(slopString.length()==0))
! {
! throw new ParserException("SpanTermQuery missing slop
property ");
! }
int slop=Integer.parseInt(slopString);
boolean inOrder=DOMUtils.getAttribute(e,"inOrder",false);
ArrayList spans=new ArrayList();
--- 19,25 ----
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String slopString=DOMUtils.getAttributeOrFail(e,"slop");
int slop=Integer.parseInt(slopString);
boolean inOrder=DOMUtils.getAttribute(e,"inOrder",false);
ArrayList spans=new ArrayList();
diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java Wed Feb
22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java
Sun Feb 26 17:10:30 2006
***************
*** 24,44 ****
Element includeElem=DOMUtils.getChildByTagName(e,"Include");
if(includeElem!=null)
{
! includeElem=DOMUtils.getFirstChildElement(includeElem);
}
- if(includeElem==null)
- {
- throw new ParserException("SpanNotQuery missing Include
child Element");
- }
Element excludeElem=DOMUtils.getChildByTagName(e,"Exclude");
if(excludeElem!=null)
{
! excludeElem=DOMUtils.getFirstChildElement(excludeElem);
}
- if(excludeElem==null)
- {
- throw new ParserException("SpanNotQuery missing Exclude
child Element");
- }
SpanQuery include=factory.getSpanQuery(includeElem);
SpanQuery exclude=factory.getSpanQuery(excludeElem);
--- 24,36 ----
Element includeElem=DOMUtils.getChildByTagName(e,"Include");
if(includeElem!=null)
{
! includeElem=DOMUtils.getFirstChildOrFail(includeElem);
}
Element excludeElem=DOMUtils.getChildByTagName(e,"Exclude");
if(excludeElem!=null)
{
! excludeElem=DOMUtils.getFirstChildOrFail(excludeElem);
}
SpanQuery include=factory.getSpanQuery(includeElem);
SpanQuery exclude=factory.getSpanQuery(excludeElem);
diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
Wed Feb 22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
Sun Feb 26 17:11:42 2006
***************
*** 30,42 ****
}
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String
fieldName=DOMUtils.getAttributeWithInheritance(e,"fieldName");
! if(fieldName==null)
! {
! throw new ParserException("Error: SpanOrTermsBuilder
missing \"fieldName\" property");
! }
!
! String value=DOMUtils.getText(e);
try
{
--- 30,37 ----
}
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String
fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
! String value=DOMUtils.getNonBlankTextOrFail(e);
try
{
diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java Wed Feb
22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java
Sun Feb 26 17:09:39 2006
***************
*** 12,27 ****
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String
fieldName=DOMUtils.getAttributeWithInheritance(e,"fieldName");
! String value=DOMUtils.getText(e);
! if((fieldName==null)||(fieldName.length()==0))
! {
! throw new ParserException("SpanTermQuery missing
fieldName property ");
! }
! if((value==null)||(value.length()==0))
! {
! throw new ParserException("TermQuery missing value
property ");
! }
SpanTermQuery stq = new SpanTermQuery(new
Term(fieldName,value));
stq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
--- 12,19 ----
public SpanQuery getSpanQuery(Element e) throws ParserException
{
! String
fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
! String value=DOMUtils.getNonBlankTextOrFail(e);
SpanTermQuery stq = new SpanTermQuery(new
Term(fieldName,value));
stq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
diff -cr
orig/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java
Wed Feb 22 22:50:08 2006
---
modified/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java
Sun Feb 26 17:04:36 2006
***************
*** 18,35 ****
public class TermQueryObjectBuilder implements QueryBuilder {
public Query getQuery(Element e) throws ParserException {
! String
field=DOMUtils.getAttributeWithInheritance(e,"fieldName");
! String value=e.getAttribute("value");
! if((field==null)||(field.length()==0))
! {
! throw new ParserException("TermQuery missing fieldName
property ");
! }
! if((value==null)||(value.length()==0))
! {
! throw new ParserException("TermQuery missing value
property ");
! }
! TermQuery tq = new TermQuery(new Term(field,value));
!
tq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return tq;
}
--- 18,26 ----
public class TermQueryObjectBuilder implements QueryBuilder {
public Query getQuery(Element e) throws ParserException {
! String
field=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
! String value=DOMUtils.getAttributeOrFail(e,"value");
! TermQuery tq = new TermQuery(new Term(field,value));
tq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
return tq;
}
diff -cr orig/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
modified/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
*** orig/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
Wed Feb 22 22:50:08 2006
--- modified/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
Sun Feb 26 17:13:49 2006
***************
*** 43,55 ****
for(int i=0;i<nl.getLength();i++)
{
Element fieldElem=(Element) nl.item(i);
! String
fieldName=DOMUtils.getAttributeWithInheritance(fieldElem,"fieldName");
!
! if(fieldName==null)
! {
! throw new ParserException("TermsFilter missing
\"fieldName\" element");
! }
! String text=DOMUtils.getText(fieldElem).trim();
TokenStream ts = analyzer.tokenStream(fieldName, new
StringReader(text));
try
{
--- 43,50 ----
for(int i=0;i<nl.getLength();i++)
{
Element fieldElem=(Element) nl.item(i);
! String
fieldName=DOMUtils.getAttributeWithInheritanceOrFail(fieldElem,"fieldName");
! String text=DOMUtils.getNonBlankTextOrFail(fieldElem);
TokenStream ts = analyzer.tokenStream(fieldName, new
StringReader(text));
try
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]