Repository: cxf Updated Branches: refs/heads/master 0253be4a7 -> 16cff265e
CXF-5549: Introduce Tika Search Visit. Added date/timestamp handling implementation. Project: http://git-wip-us.apache.org/repos/asf/cxf/repo Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/16cff265 Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/16cff265 Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/16cff265 Branch: refs/heads/master Commit: 16cff265ea0002e225b1bdbbaa6b5a621e0a182b Parents: 0253be4 Author: reta <[email protected]> Authored: Fri Jun 27 14:38:13 2014 -0400 Committer: reta <[email protected]> Committed: Fri Jun 27 14:38:13 2014 -0400 ---------------------------------------------------------------------- .../cxf/jaxrs/ext/search/SearchUtils.java | 30 ++++++++++++++++++++ .../ext/search/lucene/LuceneQueryVisitor.java | 25 +++++++--------- .../ext/search/tika/LuceneDocumentMetadata.java | 5 +++- .../search/tika/TikaLuceneContentExtractor.java | 19 +++++++++++-- .../tika/TikaLuceneContentExtractorTest.java | 14 +++++++-- 5 files changed, 72 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cxf/blob/16cff265/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/SearchUtils.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/SearchUtils.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/SearchUtils.java index 773395e..c85d9cb 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/SearchUtils.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/SearchUtils.java @@ -18,10 +18,15 @@ */ package org.apache.cxf.jaxrs.ext.search; +import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Date; import java.util.List; import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.cxf.common.logging.LogUtils; import org.apache.cxf.jaxrs.ext.search.sql.SQLPrinterVisitor; public final class SearchUtils { @@ -32,6 +37,8 @@ public final class SearchUtils { public static final String BEAN_PROPERTY_MAP = "search.bean.property.map"; public static final String SEARCH_VISITOR_PROPERTY = "search.visitor"; + private static final Logger LOG = LogUtils.getL7dLogger(SearchUtils.class); + private SearchUtils() { } @@ -141,4 +148,27 @@ public final class SearchUtils { } return op; } + + public static Date timestampFromString(final String value) { + Date date = timestampFromString(value, "yyyy-MM-dd'T'HH:mm:ssZ"); + + if (date == null) { + date = timestampFromString(value, "yyyy-MM-dd'T'HH:mm:ss"); + } + + return date; + } + + private static Date timestampFromString(final String value, final String format) { + try { + final SimpleDateFormat formatter = new SimpleDateFormat(format); + return formatter.parse(value); + } catch (final ParseException ex) { + LOG.log(Level.WARNING, "Unable to parse date using format specification: " + format, ex); + return null; + } + } + + + } http://git-wip-us.apache.org/repos/asf/cxf/blob/16cff265/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/lucene/LuceneQueryVisitor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/lucene/LuceneQueryVisitor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/lucene/LuceneQueryVisitor.java index f423799..2d2f16b 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/lucene/LuceneQueryVisitor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/lucene/LuceneQueryVisitor.java @@ -28,6 +28,7 @@ import java.util.Stack; import org.apache.cxf.jaxrs.ext.search.ConditionType; import org.apache.cxf.jaxrs.ext.search.PrimitiveStatement; import org.apache.cxf.jaxrs.ext.search.SearchCondition; +import org.apache.cxf.jaxrs.ext.search.SearchUtils; import org.apache.cxf.jaxrs.ext.search.visitor.AbstractSearchConditionVisitor; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; @@ -38,6 +39,7 @@ import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; public class LuceneQueryVisitor<T> extends AbstractSearchConditionVisitor<T, Query> { @@ -200,17 +202,16 @@ public class LuceneQueryVisitor<T> extends AbstractSearchConditionVisitor<T, Que } else if (Date.class.isAssignableFrom(cls)) { // This code has not been tested - most likely needs to be fixed // Resolution should be configurable ? - String luceneDateValue = DateTools.dateToString((Date)value, Resolution.MILLISECOND); - String expression = null; + final Date date = SearchUtils.timestampFromString(value.toString()); + final String luceneDateValue = (date != null) + ? DateTools.dateToString(date, Resolution.MILLISECOND) : value.toString(); + if (type == ConditionType.LESS_THAN) { - // what is the base date here ? - expression = "[" + "" - + " TO " + luceneDateValue + "]"; + return TermRangeQuery.newStringRange(name, "", luceneDateValue, true, false); } else { - expression = "[" + luceneDateValue + " TO " - + DateTools.dateToString(new Date(), Resolution.MILLISECOND) + "]"; + return TermRangeQuery.newStringRange(name, luceneDateValue, + DateTools.dateToString(new Date(), Resolution.MILLISECOND), true, false); } - return parseExpression(name, expression); } else { return null; } @@ -228,11 +229,5 @@ public class LuceneQueryVisitor<T> extends AbstractSearchConditionVisitor<T, Que } return booleanQuery; - } - - protected Query parseExpression(String fieldName, String expression) { - //QueryParser parser = new QueryParser(Version.LUCENE_40, name, analyzer); - // return parse.parse(expression); - return null; - } + } } http://git-wip-us.apache.org/repos/asf/cxf/blob/16cff265/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java index d44ab25..3223823 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java @@ -47,9 +47,12 @@ public class LuceneDocumentMetadata { public String getContentFieldName() { return contentFieldName; } + public Class<?> getFieldType(String name) { return fieldTypes.get(name); } - + public Map<String, Class<?>> getFieldTypes() { + return fieldTypes; + } } http://git-wip-us.apache.org/repos/asf/cxf/blob/16cff265/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java index 567463b..09a5ba1 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java @@ -22,7 +22,12 @@ import java.io.InputStream; import java.util.Date; import java.util.List; +import org.apache.cxf.jaxrs.ext.search.SearchUtils; import org.apache.cxf.jaxrs.ext.search.tika.TikaContentExtractor.TikaContent; + + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; @@ -210,10 +215,20 @@ public class TikaLuceneContentExtractor { return new IntField(name, Integer.valueOf(value), Store.YES); } } else if (Date.class.isAssignableFrom(type)) { - return new StringField(name, value, Store.YES); + final Date date = SearchUtils.timestampFromString(value); + Field field = null; + + if (date != null) { + field = new StringField(name, DateTools.dateToString(date, Resolution.MILLISECOND), + Store.YES); + } else { + field = new StringField(name, value, Store.YES); + } + + return field; } } return new StringField(name, value, Store.YES); - } + } } http://git-wip-us.apache.org/repos/asf/cxf/blob/16cff265/rt/rs/extensions/search/src/test/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractorTest.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/test/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractorTest.java b/rt/rs/extensions/search/src/test/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractorTest.java index ef36439..0fbede1 100644 --- a/rt/rs/extensions/search/src/test/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractorTest.java +++ b/rt/rs/extensions/search/src/test/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractorTest.java @@ -19,7 +19,9 @@ package org.apache.cxf.jaxrs.ext.search.tika; import java.io.IOException; -import java.sql.Date; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; import org.apache.cxf.jaxrs.ext.search.SearchBean; import org.apache.cxf.jaxrs.ext.search.SearchConditionParser; @@ -38,7 +40,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.apache.tika.parser.pdf.PDFParser; - import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -90,15 +91,22 @@ public class TikaLuceneContentExtractorTest extends Assert { writer.addDocument(document); writer.commit(); - assertEquals(1, getHits("modified==2007-09-15T09:02:31Z").length); + assertEquals(1, getHits("modified=gt=2007-09-14T09:02:31", documentMetadata.getFieldTypes()).length); + assertEquals(0, getHits("modified=gt=2007-09-16T09:02:31", documentMetadata.getFieldTypes()).length); + assertEquals(0, getHits("modified=lt=2007-09-01T09:02:31", documentMetadata.getFieldTypes()).length); } private ScoreDoc[] getHits(final String expression) throws IOException { + return getHits(expression, new HashMap<String, Class<?>>()); + } + + private ScoreDoc[] getHits(final String expression, final Map< String, Class<?> > fieldTypes) throws IOException { IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); try { LuceneQueryVisitor<SearchBean> visitor = new LuceneQueryVisitor<SearchBean>("ct", "contents"); + visitor.setPrimitiveFieldTypeMap(fieldTypes); visitor.visit(parser.parse(expression)); ScoreDoc[] hits = searcher.search(visitor.getQuery(), null, 1000).scoreDocs;
