Author: ahorincar Date: Thu Aug 7 19:00:56 2014 New Revision: 1616567 URL: http://svn.apache.org/r1616567 Log: Fixed pagination, fixed wildcard searching, implemented ITemplateStreamFilter for more_like_this queries, added feature to generate schema
Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py Removed: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/solr_backend.py Modified: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py?rev=1616567&view=auto ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py (added) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py Thu Aug 7 19:00:56 2014 @@ -0,0 +1,15 @@ +from trac.core import Component, implements +from bhsolr.schema import SolrSchema +from trac.admin import IAdminCommandProvider + +class BloodhoundSolrAdmin(Component): + + implements(IAdminCommandProvider) + + # IAdminCommandProvider methods + def get_admin_commands(self): + yield ('bhsolr generate_schema', '<path>', + 'Generate Solr schema', + None, SolrSchema(self.env).generate_schema) + + Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py?rev=1616567&view=auto ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py (added) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py Thu Aug 7 19:00:56 2014 @@ -0,0 +1,229 @@ +from bhsearch import BHSEARCH_CONFIG_SECTION +from bhsearch.api import ISearchBackend, SCORE, QueryResult +from bhsearch.query_parser import DefaultQueryParser +from bhsearch.search_resources.ticket_search import TicketIndexer +from trac.core import Component, implements, TracError +from trac.config import Option +from trac.ticket.model import Ticket +from trac.ticket.api import TicketSystem +from trac.util.datefmt import utc +from datetime import datetime +from sunburnt import SolrInterface +from contextlib import contextmanager +from math import ceil +import re +import pkg_resources +from bhsolr.schema import SolrSchema + +UNIQUE_ID = "unique_id" + +HIGHLIGHTABLE_FIELDS = {"unique_id" : True, + "id" : True, + "type" : True, + "product" : True, + "milestone" : True, + "author" : True, + "component" : True, + "status" : True, + "resolution" : True, + "keywords" : True, + "summary" : True, + "content" : True, + "changes" : True, + "owner" : True, + "repository" : True, + "revision" : True, + "message" : True, + "name" : True} + +class SolrBackend(Component): + implements(ISearchBackend) + + server_url = Option( + BHSEARCH_CONFIG_SECTION, + 'solr_server_url', + doc="""Url of the server running Solr instance.""", + doc_domain='bhsearch') + + + def __init__(self): + resource_filename = pkg_resources.resource_filename + path = resource_filename(__name__, "schemadoc") + file_obj = open(path + "/schema.xml") + # print SolrSchema(self.env).getInstance(self.env).path + # file_obj = open(SolrSchema.getInstance(self.env).path) + self.solr_interface = SolrInterface(str(self.server_url), schemadoc=file_obj) + + def add_doc(self, doc, operation_context=None): + self._reformat_doc(doc) + doc[UNIQUE_ID] = self._create_unique_id(doc.get("product", ''), + doc["type"], + doc["id"]) + self.solr_interface.add(doc) + self.solr_interface.commit() + + + def delete_doc(product, doc_type, doc_id, operation_context=None): + unique_id = self._create_unique_id(product, doc_type, doc_id) + self.solr_interface.delete(unique_id) + + + def optimize(self): + self.solr_interface.optimize() + + def query(self, query, query_string, sort = None, fields = None, filter = None, + facets = None, pagenum = 1, pagelen = 20, highlight = False, + highlight_fields = None, context = None): + + if not query_string: + query_string = "*.*" + + final_query_chain = self._create_query_chain(query, query_string) + solr_query = self.solr_interface.query(final_query_chain) + faceted_solr_query = solr_query.facet_by(facets) + self.highlighted_solr_query = faceted_solr_query.highlight(HIGHLIGHTABLE_FIELDS) + + start = 0 if pagenum == 1 else pagelen * pagenum + + paginated_solr_query = self.highlighted_solr_query.paginate(start=start, rows=pagelen) + results = paginated_solr_query.execute() + mlt = self.query_more_like_this(paginated_solr_query, fields="type", mindf=1, mintf=1) + + return self._create_query_result(results, fields, pagenum, pagelen, mlt) + + def query_more_like_this(self, query_chain, **kwargs): + mlt_results = query_chain.mlt(**kwargs).execute().more_like_these + return mlt_results + + def _create_query_result(self, results, fields, pagenum, pagelen, mlt): + total_num, total_page_count, page_num, offset = \ + self._prepare_query_result_attributes(results, pagenum, pagelen) + + query_results = QueryResult() + query_results.hits = total_num + query_results.total_page_count = total_page_count + query_results.page_number = page_num + query_results.offset = offset + + docs = [] + highlighting = [] + + for retrieved_record in results: + result_doc = self._process_record(fields, retrieved_record, mlt) + docs.append(result_doc) + + result_highlights = dict(retrieved_record['solr_highlights']) + + highlighting.append(result_highlights) + query_results.docs = docs + query_results.highlighting = highlighting + + return query_results + + def _create_query_chain(self, query, query_string): + matches = re.findall(re.compile(r'([\w\*]+)'), query_string) + tokens = set([match for match in matches]) + + final_query_chain = None + for token in tokens: + token_query_chain = self._search_fields_for_token(token) + if final_query_chain == None: + final_query_chain = token_query_chain + else: + final_query_chain |= token_query_chain + + return final_query_chain + + + def _process_record(self, fields, retrieved_record, mlt): + result_doc = dict() + if fields: + for field in fields: + if field in retrieved_record: + result_doc[field] = retrieved_record[field] + else: + for key, value in retrieved_record.items(): + result_doc[key] = value + + for key, value in result_doc.iteritems(): + result_doc[key] = self._from_whoosh_format(value) + + return result_doc + + def _from_whoosh_format(self, value): + if isinstance(value, datetime): + value = utc.localize(value) + return value + + def _prepare_query_result_attributes(self, results, pagenum, pagelen): + results_total_num = self.highlighted_solr_query.execute().result.numFound + total_page_count = int(ceil(results_total_num / pagelen)) + pagenum = min(total_page_count, pagenum) + + offset = (pagenum - 1) * pagelen + if (offset + pagelen) > results_total_num: + pagelen = results_total_num - offset + + return results_total_num, total_page_count, pagenum, offset + + def is_index_outdated(self): + return False + + def recreate_index(self): + return True + + @contextmanager + def start_operation(self): + yield + + def _search_fields_for_token(self, token): + query_chain = None + field_boosts = DefaultQueryParser(self.env).field_boosts + + for field, boost in field_boosts.iteritems(): + if field != 'query_suggestion_basket' and field != 'relations': + field_token_dict = {field: token} + if query_chain == None: + query_chain = self.solr_interface.Q(**field_token_dict)**boost + else: + query_chain |= self.solr_interface.Q(**field_token_dict)**boost + + return query_chain + + def _reformat_doc(self, doc): + for key, value in doc.items(): + if key is None: + del doc[None] + elif value is None: + del doc[key] + elif isinstance(value, basestring) and value == "": + del doc[key] + else: + doc[key] = self._to_whoosh_format(value) + + def _to_whoosh_format(self, value): + if isinstance(value, basestring): + value = unicode(value) + elif isinstance(value, datetime): + value = self._convert_date_to_tz_naive_utc(value) + return value + + + def _convert_date_to_tz_naive_utc(self, value): + if value.tzinfo: + utc_time = value.astimezone(utc) + value = utc_time.replace(tzinfo=None) + return value + + + def _create_unique_id(self, product, doc_type, doc_id): + if product: + return u"%s:%s:%s" % (product, doc_type, doc_id) + else: + return u"%s:%s" % (doc_type, doc_id) + + def getInstance(self): + return self.solr_interface + + + Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py?rev=1616567&view=auto ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py (added) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py Thu Aug 7 19:00:56 2014 @@ -0,0 +1,169 @@ +from lxml import etree +from bhsearch.whoosh_backend import WhooshBackend +from trac.core import Component, implements, TracError +import os + +class SolrSchema(Component): + instance = None + + REQUIRED_FIELDS = {"id": True, + "unique_id": True, + "type": True} + + FIELDS_TYPE_DICT = {"ID": "string", + "DATETIME": "date", + "KEYWORD": "string", + "TEXT": "text_general" + } + + def __init__(self): + self.schema = WhooshBackend.SCHEMA + self.schema_element = etree.Element("schema") + self.schema_element.set("name", "Bloodhound Solr Schema") + self.schema_element.set("version", "1") + + self.path = None + self.fields_element = etree.SubElement(self.schema_element, "fields") + self.unique_key_element = etree.SubElement(self.schema_element, "uniqueKey") + self.unique_key_element.text = "unique_id" + + version_field = etree.SubElement(self.fields_element, "field") + version_field.set("name", "_version_") + version_field.set("type", "long") + version_field.set("indexed", "true") + version_field.set("stored", "true") + + root_field = etree.SubElement(self.fields_element, "field") + root_field.set("name", "_root_") + root_field.set("type", "string") + root_field.set("indexed", "true") + root_field.set("stored", "false") + + stored_name = etree.SubElement(self.fields_element, "field") + stored_name.set("name", "_stored_name") + stored_name.set("type", "string") + stored_name.set("indexed", "true") + stored_name.set("stored", "true") + stored_name.set("required", "false") + stored_name.set("multivalued", "false") + + # @classmethod + # def getInstance(self, env): + # if not self.instance: + # self.instance = SolrSchema(env) + # return self.instance + + def generate_schema(self, path=None): + if not path: + path = os.getcwd() + + self.add_all_fields() + self.add_type_definitions() + doc = etree.ElementTree(self.schema_element) + + self.path = os.path.join(path, 'schema.xml') + + out_file = open(os.path.join(path, 'schema.xml'), 'w') + doc.write(out_file, xml_declaration=True, encoding='UTF-8', pretty_print=True) + out_file.close() + + def add_field(self, field_name, name_attr, type_attr, indexed_attr, stored_attr, required_attr, multivalued_attr): + field = etree.SubElement(self.fields_element, field_name) + field.set("name", name_attr) + field.set("type", type_attr) + field.set("indexed", indexed_attr) + field.set("stored", stored_attr) + field.set("required", required_attr) + field.set("multivalued", multivalued_attr) + + def add_all_fields(self): + for (field_name, field_attrs) in self.schema.items(): + type_attr = SolrSchema.FIELDS_TYPE_DICT[str(field_attrs.__class__.__name__)] + indexed_attr = str(field_attrs.indexed).lower() + stored_attr = str(field_attrs.stored).lower() + if field_name in SolrSchema.REQUIRED_FIELDS: + required_attr = "true" + else: + required_attr = "false" + + self.add_field("field", field_name, type_attr, indexed_attr, stored_attr, required_attr, "false") + + + def add_type_definitions(self): + self.types_element = etree.SubElement(self.schema_element, "types") + self._add_string_type_definition() + self._add_text_general_type_definition() + self._add_date_type_definition() + self._add_long_type_definition() + self._add_lowercase_type_definition() + + + def _add_string_type_definition(self): + field_type = etree.SubElement(self.types_element, "fieldType") + field_type.set("name", "string") + field_type.set("class", "solr.StrField") + field_type.set("sortMissingLast", "true") + + + def _add_text_general_type_definition(self): + field_type = etree.SubElement(self.types_element, "fieldType") + field_type.set("name", "text_general") + field_type.set("class", "solr.TextField") + field_type.set("positionIncrementGap", "100") + + analyzer_index = etree.SubElement(field_type, "analyzer") + analyzer_index.set("type", "index") + + tokenizer_index = etree.SubElement(analyzer_index, "tokenizer") + tokenizer_index.set("class", "solr.StandardTokenizerFactory") + filter1 = etree.SubElement(analyzer_index, "filter") + filter1.set("class", "solr.StopFilterFactory") + filter1.set("ignoreCase", "true") + filter1.set("words", "stopwords.txt") + + filter2 = etree.SubElement(analyzer_index, "filter") + filter2.set("class", "solr.LowerCaseFilterFactory") + + analyzer_query = etree.SubElement(field_type, "analyzer") + analyzer_query.set("type", "query") + tokenizer_query = etree.SubElement(analyzer_query, "tokenizer") + tokenizer_query.set("class", "solr.StandardTokenizerFactory") + filter3 = etree.SubElement(analyzer_query, "filter") + filter3.set("class", "solr.StopFilterFactory") + filter3.set("ignoreCase", "true") + filter3.set("words", "stopwords.txt") + + filter4 = etree.SubElement(analyzer_query, "filter") + filter4.set("class", "solr.SynonymFilterFactory") + filter4.set("synonyms", "synonyms.txt") + filter4.set("ignoreCase", "true") + filter4.set("expand", "true") + + filter5 = etree.SubElement(analyzer_query, "filter") + filter5.set("class", "solr.LowerCaseFilterFactory") + + def _add_date_type_definition(self): + field_type = etree.SubElement(self.types_element, "fieldType") + field_type.set("name", "date") + field_type.set("class", "solr.TrieDateField") + field_type.set("precisionStep", "0") + field_type.set("positionIncrementGap", "0") + + def _add_long_type_definition(self): + field_type = etree.SubElement(self.types_element, "fieldType") + field_type.set("name", "long") + field_type.set("class", "solr.TrieLongField") + field_type.set("precisionStep", "0") + field_type.set("positionIncrementGap", "0") + + def _add_lowercase_type_definition(self): + field_type = etree.SubElement(self.types_element, "fieldType") + field_type.set("name", "lowercase") + field_type.set("class", "solr.TextField") + field_type.set("positionIncrementGap", "100") + + analyzer = etree.SubElement(field_type, "analyzer") + tokenizer = etree.SubElement(analyzer, "tokenizer") + tokenizer.set("class", "solr.KeywordTokenizerFactory") + filter_lowercase = etree.SubElement(analyzer, "filter") + filter_lowercase.set("class", "solr.LowerCaseFilterFactory") Modified: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml?rev=1616567&r1=1616566&r2=1616567&view=diff ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml (original) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml Thu Aug 7 19:00:56 2014 @@ -8,8 +8,8 @@ <field name="_root_" type="string" indexed="true" stored="false"/> <!-- BH fields --> - <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="unique_id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="type" type="string" indexed="true" stored="true" required="true" multiValued="false"/> <field name="product" type="string" indexed="true" stored="true" required="false" multiValued="false"/> <field name="milestone" type="string" indexed="true" stored="true" required="false" multiValued="false"/> @@ -31,14 +31,12 @@ <field name="required_permission" type="string" indexed="true" stored="true" required="false" multiValued="false"/> <field name="name" type="text_general" indexed="true" stored="true" required="false" multiValued="false"/> <field name="_stored_name" type="string" indexed="true" stored="true" required="false" multiValued="false"/> - <!-- <field name="query_suggestion_basket" type="text_general" indexed="true" stored="true" required="true" multiValued="false"/> --> - <!-- <field name="relations" type="lowercase" indexed="true" stored="true" required="true" multiValued="false"/> --> + <field name="relations" type="lowercase" indexed="true" stored="true" required="true" multiValued="false"/> + <field name="query_suggestion_basket" type="text_general" indexed="true" stored="true" required="true" multiValued="false"/> </fields> <uniqueKey>unique_id</uniqueKey> -<!-- <copyField source="name" dest="text"/> --> - <types> <!-- Field type definitions --> <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> @@ -69,4 +67,3 @@ </types> </schema> - Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py?rev=1616567&view=auto ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py (added) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py Thu Aug 7 19:00:56 2014 @@ -0,0 +1,17 @@ +from trac.web.api import ITemplateStreamFilter +from genshi.filters import Transformer +import re +from trac.core import Component, implements, TracError +from genshi.input import HTML + +class BloodhoundSolrTemplate(Component): + implements (ITemplateStreamFilter) + + def filter_stream(self, req, method, filename, stream, data): + html = HTML(u'''<br></br><a href="porc" class="btn" style="margin: 10px 10px 10px 0px;">More like this</a>''') + + if re.match(r'/bhsearch', req.path_info): + filter = Transformer('//dl[@id="results"]/dd/span[@class="date"]') + stream |= filter.append(html) + + return stream Modified: bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py URL: http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py?rev=1616567&r1=1616566&r2=1616567&view=diff ============================================================================== --- bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py (original) +++ bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py Thu Aug 7 19:00:56 2014 @@ -8,9 +8,10 @@ PKG_INFO = {'bhsolr': ['schemadoc/*.xml' ENTRY_POINTS = { 'trac.plugins': [ - 'bhsolr.api = bhsolr.api', 'bhsolr.admin = bhsolr.admin', - 'bhsolr.solr_backend = bhsolr.solr_backend', + 'bhsolr.schema = bhsolr.schema', + 'bhsolr.backend = bhsolr.backend', + 'bhsolr.web_ui = bhsolr.web_ui', 'bhsolr.search_resources.ticket_search = bhsolr.search_resources.ticket_search', 'bhsolr.search_resources.milestone_search = bhsolr.search_resources.milestone_search', 'bhsolr.search_resources.changeset_search = bhsolr.search_resources.changeset_search',