Thanks Andi! I'll give it a try tomorrow. I'll also ask Julian to provide some
test and sample code for you.
Thank you very much for the great work and effort you're putting into jcc,
Andi. As far as I can tell, the linking between classes from PyLucene and
bobo-browse works flawlessly now - cool :)
I have attached the latest version of the Makefile that Christian has modified
from PyLucene as well as a simple sample test-case based on
http://snaprojects.jira.com/wiki/display/BOBO/Getting+Started#GettingStarted-Example%3A
Julian
# Makefile for building bobo-browser bindings for Python
#
# Based on PyLucene's Makefile
VERSION=2.5.0-rc1
BOBO_SVN_VER=HEAD
BOBO_VER=2.5.0-rc1
BOBO_SVN=http://bobo-browse.googlecode.com/svn/trunk
PYBOBO:=$(shell pwd)
BOBO_BROWSE=bobo-browse-$(BOBO_VER)
BOBO_BROWSE_JAR=$(BOBO_BROWSE)/dist/bobo-browse-$(BOBO_VER).jar
ANT_OPTS=
#ANT_OPTS=-Dhttp.proxyHost=192.168.1.1 -Dhttp.proxyPort=3128
ANT=ANT_OPTS="${ANT_OPTS}" ant
PYTHON=python2.6
JCC=$(PYTHON) -m jcc.__main__ --shared
NUM_FILES=2
ifeq ($(DEBUG),1)
DEBUG_OPT=--debug
endif
.PHONY: generate compile install default all clean realclean \
sources test testfast jars distrib
default: all
$(BOBO_BROWSE):
svn co -r $(BOBO_SVN_VER) $(BOBO_SVN) $(BOBO_BROWSE)
sources: $(BOBO_BROWSE)
#to-orig: sources
# mkdir -p $(LUCENE)-orig
# tar -C $(LUCENE) -cf - . | tar -C $(LUCENE)-orig -xvf -
#from-orig: $(LUCENE)-orig
# mkdir -p $(LUCENE)
# tar -C $(LUCENE)-orig -cf - . | tar -C $(LUCENE) -xvf -
bobobrowse:
rm -f $(BOBO_BROWSE_JAR)
$(MAKE) $(BOBO_BROWSE_JAR)
$(BOBO_BROWSE_JAR): $(BOBO_BROWSE)
cd $(BOBO_BROWSE); $(ANT) -Dversion=$(BOBO_VER) jars
JARS=$(BOBO_BROWSE_JAR)
INCLUDES= \
$(BOBO_BROWSE)/lib/master/servlet-api.jar \
$(BOBO_BROWSE)/lib/master/spring-webmvc.jar \
$(BOBO_BROWSE)/lib/master/spring.jar \
$(BOBO_BROWSE)/lib/master/log4j.jar \
$(BOBO_BROWSE)/lib/master/protobuf-java-2.2.0.jar \
$(BOBO_BROWSE)/lib/master/xstream.jar \
$(BOBO_BROWSE)/lib/master/fastutil.jar \
$(BOBO_BROWSE)/lib/master/kamikaze-2.0.0.jar
CLASSPATHS= \
$(BOBO_BROWSE)/lib/master/ant.jar \
$(BOBO_BROWSE)/lib/master/xercesImpl.jar \
$(BOBO_BROWSE)/lib/master/commons-collections.jar \
$(BOBO_BROWSE)/lib/master/commons-cli.jar \
$(BOBO_BROWSE)/lib/master/commons-configuration.jar \
$(BOBO_BROWSE)/lib/master/commons-logging.jar \
$(BOBO_BROWSE)/lib/master/commons-lang.jar \
$(BOBO_BROWSE)/lib/master/commons-digester.jar \
$(BOBO_BROWSE)/lib/master/commons-httpclient.jar \
$(BOBO_BROWSE)/lib/master/zoie-2.0.0-rc1.jar \
$(BOBO_BROWSE)/lib/master/json.jar \
$(BOBO_BROWSE)/lib/master/dwr.jar \
$(BOBO_BROWSE)/lib/master/xmlParserAPIs.jar
jars: $(JARS)
GENERATE=$(JCC) $(foreach jar,$(JARS),--jar $(jar)) \
$(foreach jar,$(INCLUDES),--include $(jar)) \
$(foreach jar,$(CLASSPATHS),--classpath $(jar)) \
--import lucene \
--package java.lang java.lang.System \
java.lang.Runtime \
--package java.util \
java.util.Arrays \
java.text.SimpleDateFormat \
--package java.io java.io.StringReader \
java.io.InputStreamReader \
java.io.FileInputStream \
--python bobobrowse \
--version $(BOBO_VER) \
--files $(NUM_FILES)
generate: jars
$(GENERATE)
compile: jars
$(GENERATE) --build $(DEBUG_OPT)
install: jars
$(GENERATE) --install $(DEBUG_OPT) $(INSTALL_OPT)
bdist: jars
$(GENERATE) --bdist
all: sources jars compile
@echo build of bobo-browse complete
clean:
if test -f $(BOBO_BROWSE)/build.xml; then cd $(BOBO_BROWSE); $(ANT)
clean; fi
rm -rf build
realclean:
rm -rf $(BOBO_BROWSE)
rm -rf build dist
BUILD_TEST:=$(PYBOBO)/build/test
ifeq ($(findstring CYGWIN,$(shell uname)),CYGWIN)
BUILD_TEST:=`cygpath -aw $(BUILD_TEST)`
endif
install-test:
mkdir -p $(BUILD_TEST)
PYTHONPATH=$(BUILD_TEST) $(GENERATE) --install $(DEBUG_OPT)
--install-dir $(BUILD_TEST)
test-clean:
rm -rf $(BUILD_TEST)
testfast:
PYTHONPATH=$(BUILD_TEST) $(PYTHON) test_bobobrowse.py
$(CURDIR)/$(BOBO_BROWSE)
test: install-test testfast
ARCHIVE=bobo_browse-$(VERSION)-src.tar.gz
#distrib:
# mkdir -p distrib
# svn export . distrib/pylucene-$(VERSION)
# tar -cf - --exclude build $(LUCENE) | tar -C
distrib/pylucene-$(VERSION) -xvf -
# mkdir distrib/pylucene-$(VERSION)/doc
# tar -C $(SITE) -cf - . | tar -C distrib/pylucene-$(VERSION)/doc -xvf -
# cd distrib; tar -cvzf $(ARCHIVE) pylucene-$(VERSION)
# cd distrib; gpg2 --armor --output $(ARCHIVE).asc --detach-sig $(ARCHIVE)
# cd distrib; openssl md5 < $(ARCHIVE) > $(ARCHIVE).md5
#
#stage:
# cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \
# people.apache.org:public_html/staging_area
#
#release:
# cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \
people.apache.org:/www/www.apache.org/dist/lucene/pylucene
print-%:
@echo $* = $($*)
#!/usr/bin/env python2.6
import os
import sys
import unittest
import lucene
import bobobrowse as bobo
HERE = os.path.dirname(os.path.abspath(__file__))
if len(sys.argv) == 2:
BOBO = sys.argv[1]
else:
BOBO = os.path.join(HERE, "bobo-browse-2.5.0-rc1")
class TestBoboBrowse(unittest.TestCase):
cartag_index = os.path.join(BOBO, "cardata", "cartag")
def openStore(self):
return lucene.FSDirectory.getDirectory(self.cartag_index, False)
def closeStore(self, store, *args):
for arg in args:
if arg is not None:
arg.close()
store.close()
def test_000bobobrowse(self):
self.assertEqual(repr(bobo.BoboService.class_),
"<Class: class com.browseengine.bobo.service.BoboService>")
self.assertEqual(repr(bobo.FacetSpec.FacetSortSpec.OrderHitsDesc.class_),
"<Class: class com.browseengine.bobo.api.FacetSpec$FacetSortSpec>")
handler_field1 = bobo.MultiValueFacetHandler("field1")
handler_field2 = bobo.MultiValueFacetHandler("field2")
facet_handlers = lucene.JArray('object')(2, bobo.FacetHandler)
facet_handlers[0] = handler_field1
facet_handlers[1] = handler_field2
store = self.openStore()
reader = None
try:
reader = lucene.IndexReader.open(store, True)
facet_handlers = bobo.Arrays.asList(facet_handlers)
lucene_index_reader = bobo.BoboIndexReader.getInstance(reader, facet_handlers)
bobo_index_reader = bobo.BoboIndexReader.getInstance(lucene_index_reader, facet_handlers)
bobo_browser = bobo.BoboBrowser(bobo_index_reader)
finally:
self.closeStore(store, reader)
def test_runBobobrowseExample(self):
#This is the Example on http://snaprojects.jira.com/wiki/display/BOBO/Getting+Started translated into Python.
# define facet handlers
# color facet handler
color_handler = bobo.SimpleFacetHandler("color")
# category facet handler
category_handler = bobo.SimpleFacetHandler("category")
facet_handler_array = lucene.JArray('object')(2, bobo.FacetHandler)
facet_handler_array[0] = color_handler
facet_handler_array[1] = category_handler
handler_list = lucene.Arrays.asList(facet_handler_array)
# opening a lucene index
idx = lucene.FSDirectory.open(lucene.File(self.cartag_index))
reader = lucene.IndexReader.open(idx, True)
# decorate it with a bobo index reader
boboReader = bobo.BoboIndexReader.getInstance(reader, handler_list)
# creating a browse request
br = bobo.BrowseRequest()
br.setCount(10)
br.setOffset(0)
# add a selection
sel = bobo.BrowseSelection("color")
sel.addValue("red")
br.addSelection(sel)
# parse a query
lucene_version = lucene.Version.LUCENE_29
parser = lucene.QueryParser(lucene_version, "contents", lucene.StandardAnalyzer(lucene_version))
q = parser.parse("cool car")
br.setQuery(q)
# add the facet output specs
color_spec = bobo.FacetSpec()
color_spec.setOrderBy(bobo.FacetSpec.FacetSortSpec.OrderHitsDesc)
category_spec = bobo.FacetSpec()
category_spec.setMinHitCount(2)
category_spec.setOrderBy(bobo.FacetSpec.FacetSortSpec.OrderHitsDesc)
br.setFacetSpec("color", color_spec)
br.setFacetSpec("category", category_spec)
# perform browse
browser = bobo.BoboBrowser(boboReader)
result = browser.browse(br) #type: BrowseResult
total_hits = result.getNumHits()
self.assertEqual(total_hits, 1316)
hits = tuple(result.getHits()) #type: BrowseHit[]
self.assertEqual(hits[0].getField('color'), "red")
facet_map = result.getFacetMap() #type: Map<String,FacetAccessible>
color_facets = facet_map.get("color") #type: FacetAccessible
color_facet_vals = tuple(color_facets.getFacets()) #type: List<BrowseFacet>
self.assertEqual(len(color_facet_vals), 1)
self.assertEqual(color_facet_vals[0].getValue(), "red") # see 'add a selection' - there should only be red here
category_facets = facet_map.get("category") #type: FacetAccessible
category_facet_vals = tuple(category_facets.getFacets()) #type: List<BrowseFacet>
self.assertEqual(category_facet_vals[0].getValue(), u'sports car')
self.assertEqual(category_facet_vals[0].getHitCount(), 509)
found_categories = set([e.getValue() for e in category_facet_vals])
expected_categories = set((u'sports car', u'compact', u'exotic', u'suv', u'sedan', u'mini-van', u'truck', u'sub-compact', u'station wagon', u'van'))
self.assertEqual(len(expected_categories.difference(found_categories)), 0) #we want at least the expected_categories
idx.close()
def initialize():
lucene.initVM()
bobo.initVM()
def test_main():
initialize()
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestBoboBrowse))
return suite
if __name__ == "__main__": # pragma: no cover
unittest.TextTestRunner(verbosity=2).run(test_main())