Author: ahorincar
Date: Thu Aug  7 19:00:56 2014
New Revision: 1616567

URL: http://svn.apache.org/r1616567
Log:
Fixed pagination, fixed wildcard searching, implemented ITemplateStreamFilter 
for more_like_this queries,  added feature to generate schema

Added:
    bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py
    bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py
    bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py
    bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py
Removed:
    bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/solr_backend.py
Modified:
    
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
    bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py

Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py Thu Aug  
7 19:00:56 2014
@@ -0,0 +1,15 @@
+from trac.core import Component, implements
+from bhsolr.schema import SolrSchema
+from trac.admin import IAdminCommandProvider
+
+class BloodhoundSolrAdmin(Component):
+
+    implements(IAdminCommandProvider)
+
+    # IAdminCommandProvider methods
+    def get_admin_commands(self):
+        yield ('bhsolr generate_schema', '<path>',
+               'Generate Solr schema',
+               None, SolrSchema(self.env).generate_schema)
+
+

Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py Thu Aug 
 7 19:00:56 2014
@@ -0,0 +1,229 @@
+from bhsearch import BHSEARCH_CONFIG_SECTION
+from bhsearch.api import ISearchBackend, SCORE, QueryResult
+from bhsearch.query_parser import DefaultQueryParser
+from bhsearch.search_resources.ticket_search import TicketIndexer
+from trac.core import Component, implements, TracError
+from trac.config import Option
+from trac.ticket.model import Ticket
+from trac.ticket.api import TicketSystem
+from trac.util.datefmt import utc
+from datetime import datetime
+from sunburnt import SolrInterface
+from contextlib import contextmanager
+from math import ceil
+import re
+import pkg_resources
+from bhsolr.schema import SolrSchema
+
+UNIQUE_ID = "unique_id"
+
+HIGHLIGHTABLE_FIELDS = {"unique_id" : True,
+                        "id" : True,
+                        "type" : True,
+                        "product" : True,
+                        "milestone" : True,
+                        "author" : True,
+                        "component" : True,
+                        "status" : True,
+                        "resolution" : True,
+                        "keywords" : True,
+                        "summary" : True,
+                        "content" : True,
+                        "changes" : True,
+                        "owner" : True,
+                        "repository" : True,
+                        "revision" : True,
+                        "message" : True,
+                        "name" : True}
+
+class SolrBackend(Component):
+  implements(ISearchBackend)
+
+  server_url = Option(
+      BHSEARCH_CONFIG_SECTION,
+      'solr_server_url',
+      doc="""Url of the server running Solr instance.""",
+      doc_domain='bhsearch')
+
+
+  def __init__(self):
+    resource_filename = pkg_resources.resource_filename
+    path = resource_filename(__name__, "schemadoc")
+    file_obj = open(path + "/schema.xml")
+    # print SolrSchema(self.env).getInstance(self.env).path
+    # file_obj = open(SolrSchema.getInstance(self.env).path)
+    self.solr_interface = SolrInterface(str(self.server_url), 
schemadoc=file_obj)
+
+  def add_doc(self, doc, operation_context=None):
+    self._reformat_doc(doc)
+    doc[UNIQUE_ID] = self._create_unique_id(doc.get("product", ''),
+                                            doc["type"],
+                                            doc["id"])
+    self.solr_interface.add(doc)
+    self.solr_interface.commit()
+
+
+  def delete_doc(product, doc_type, doc_id, operation_context=None):
+    unique_id = self._create_unique_id(product, doc_type, doc_id)
+    self.solr_interface.delete(unique_id)
+
+
+  def optimize(self):
+    self.solr_interface.optimize()
+
+  def query(self, query, query_string, sort = None, fields = None, filter = 
None,
+            facets = None, pagenum = 1, pagelen = 20, highlight = False,
+            highlight_fields = None, context = None):
+
+    if not query_string:
+      query_string = "*.*"
+
+    final_query_chain = self._create_query_chain(query, query_string)
+    solr_query = self.solr_interface.query(final_query_chain)
+    faceted_solr_query = solr_query.facet_by(facets)
+    self.highlighted_solr_query = 
faceted_solr_query.highlight(HIGHLIGHTABLE_FIELDS)
+
+    start = 0 if pagenum == 1 else pagelen * pagenum
+
+    paginated_solr_query = self.highlighted_solr_query.paginate(start=start, 
rows=pagelen)
+    results = paginated_solr_query.execute()
+    mlt = self.query_more_like_this(paginated_solr_query, fields="type", 
mindf=1, mintf=1)
+
+    return self._create_query_result(results, fields, pagenum, pagelen, mlt)
+
+  def query_more_like_this(self, query_chain, **kwargs):
+    mlt_results = query_chain.mlt(**kwargs).execute().more_like_these
+    return mlt_results
+
+  def _create_query_result(self, results, fields, pagenum, pagelen, mlt):
+    total_num, total_page_count, page_num, offset = \
+                self._prepare_query_result_attributes(results, pagenum, 
pagelen)
+
+    query_results = QueryResult()
+    query_results.hits = total_num
+    query_results.total_page_count = total_page_count
+    query_results.page_number = page_num
+    query_results.offset = offset
+
+    docs = []
+    highlighting = []
+
+    for retrieved_record in results:
+      result_doc = self._process_record(fields, retrieved_record, mlt)
+      docs.append(result_doc)
+
+      result_highlights = dict(retrieved_record['solr_highlights'])
+
+      highlighting.append(result_highlights)
+      query_results.docs = docs
+      query_results.highlighting = highlighting
+
+    return query_results
+
+  def _create_query_chain(self, query, query_string):
+    matches = re.findall(re.compile(r'([\w\*]+)'), query_string)
+    tokens = set([match for match in matches])
+
+    final_query_chain = None
+    for token in tokens:
+      token_query_chain = self._search_fields_for_token(token)
+      if final_query_chain == None:
+        final_query_chain = token_query_chain
+      else:
+        final_query_chain |= token_query_chain
+
+    return final_query_chain
+
+
+  def _process_record(self, fields, retrieved_record, mlt):
+    result_doc = dict()
+    if fields:
+      for field in fields:
+        if field in retrieved_record:
+          result_doc[field] = retrieved_record[field]
+    else:
+      for key, value in retrieved_record.items():
+        result_doc[key] = value
+
+    for key, value in result_doc.iteritems():
+      result_doc[key] = self._from_whoosh_format(value)
+
+    return result_doc
+
+  def _from_whoosh_format(self, value):
+    if isinstance(value, datetime):
+      value = utc.localize(value)
+    return value
+
+  def _prepare_query_result_attributes(self, results, pagenum, pagelen):
+    results_total_num = self.highlighted_solr_query.execute().result.numFound
+    total_page_count = int(ceil(results_total_num / pagelen))
+    pagenum = min(total_page_count, pagenum)
+
+    offset = (pagenum - 1) * pagelen
+    if (offset + pagelen) > results_total_num:
+        pagelen = results_total_num - offset
+
+    return results_total_num, total_page_count, pagenum, offset
+
+  def is_index_outdated(self):
+    return False
+
+  def recreate_index(self):
+    return True
+
+  @contextmanager
+  def start_operation(self):
+    yield
+
+  def _search_fields_for_token(self, token):
+    query_chain = None
+    field_boosts = DefaultQueryParser(self.env).field_boosts
+
+    for field, boost in field_boosts.iteritems():
+      if field != 'query_suggestion_basket' and field != 'relations':
+        field_token_dict = {field: token}
+        if query_chain == None:
+          query_chain = self.solr_interface.Q(**field_token_dict)**boost
+        else:
+          query_chain |= self.solr_interface.Q(**field_token_dict)**boost
+
+    return query_chain
+
+  def _reformat_doc(self, doc):
+    for key, value in doc.items():
+      if key is None:
+        del doc[None]
+      elif value is None:
+        del doc[key]
+      elif isinstance(value, basestring) and value == "":
+        del doc[key]
+      else:
+        doc[key] = self._to_whoosh_format(value)
+
+  def _to_whoosh_format(self, value):
+    if isinstance(value, basestring):
+      value = unicode(value)
+    elif isinstance(value, datetime):
+      value = self._convert_date_to_tz_naive_utc(value)
+    return value
+
+
+  def _convert_date_to_tz_naive_utc(self, value):
+    if value.tzinfo:
+      utc_time = value.astimezone(utc)
+      value = utc_time.replace(tzinfo=None)
+    return value
+
+
+  def _create_unique_id(self, product, doc_type, doc_id):
+    if product:
+      return u"%s:%s:%s" % (product, doc_type, doc_id)
+    else:
+      return u"%s:%s" % (doc_type, doc_id)
+
+  def getInstance(self):
+    return self.solr_interface
+
+
+

Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py Thu Aug  
7 19:00:56 2014
@@ -0,0 +1,169 @@
+from lxml import etree
+from bhsearch.whoosh_backend import WhooshBackend
+from trac.core import Component, implements, TracError
+import os
+
+class SolrSchema(Component):
+  instance = None
+
+  REQUIRED_FIELDS = {"id": True,
+                     "unique_id": True,
+                     "type": True}
+
+  FIELDS_TYPE_DICT = {"ID": "string",
+                      "DATETIME": "date",
+                      "KEYWORD": "string",
+                      "TEXT": "text_general"
+                      }
+
+  def __init__(self):
+    self.schema = WhooshBackend.SCHEMA
+    self.schema_element = etree.Element("schema")
+    self.schema_element.set("name", "Bloodhound Solr Schema")
+    self.schema_element.set("version", "1")
+
+    self.path = None
+    self.fields_element = etree.SubElement(self.schema_element, "fields")
+    self.unique_key_element = etree.SubElement(self.schema_element, 
"uniqueKey")
+    self.unique_key_element.text = "unique_id"
+
+    version_field = etree.SubElement(self.fields_element, "field")
+    version_field.set("name", "_version_")
+    version_field.set("type", "long")
+    version_field.set("indexed", "true")
+    version_field.set("stored", "true")
+
+    root_field = etree.SubElement(self.fields_element, "field")
+    root_field.set("name", "_root_")
+    root_field.set("type", "string")
+    root_field.set("indexed", "true")
+    root_field.set("stored", "false")
+
+    stored_name = etree.SubElement(self.fields_element, "field")
+    stored_name.set("name", "_stored_name")
+    stored_name.set("type", "string")
+    stored_name.set("indexed", "true")
+    stored_name.set("stored", "true")
+    stored_name.set("required", "false")
+    stored_name.set("multivalued", "false")
+
+  # @classmethod
+  # def getInstance(self, env):
+  #   if not self.instance:
+  #     self.instance = SolrSchema(env)
+  #   return self.instance
+
+  def generate_schema(self, path=None):
+    if not path:
+      path = os.getcwd()
+
+    self.add_all_fields()
+    self.add_type_definitions()
+    doc = etree.ElementTree(self.schema_element)
+
+    self.path = os.path.join(path, 'schema.xml')
+
+    out_file = open(os.path.join(path, 'schema.xml'), 'w')
+    doc.write(out_file, xml_declaration=True, encoding='UTF-8', 
pretty_print=True)
+    out_file.close()
+
+  def add_field(self, field_name, name_attr, type_attr, indexed_attr, 
stored_attr, required_attr, multivalued_attr):
+    field = etree.SubElement(self.fields_element, field_name)
+    field.set("name", name_attr)
+    field.set("type", type_attr)
+    field.set("indexed", indexed_attr)
+    field.set("stored", stored_attr)
+    field.set("required", required_attr)
+    field.set("multivalued", multivalued_attr)
+
+  def add_all_fields(self):
+    for (field_name, field_attrs) in self.schema.items():
+      type_attr = 
SolrSchema.FIELDS_TYPE_DICT[str(field_attrs.__class__.__name__)]
+      indexed_attr = str(field_attrs.indexed).lower()
+      stored_attr = str(field_attrs.stored).lower()
+      if field_name in SolrSchema.REQUIRED_FIELDS:
+        required_attr = "true"
+      else:
+        required_attr = "false"
+
+      self.add_field("field", field_name, type_attr, indexed_attr, 
stored_attr, required_attr, "false")
+
+
+  def add_type_definitions(self):
+    self.types_element = etree.SubElement(self.schema_element, "types")
+    self._add_string_type_definition()
+    self._add_text_general_type_definition()
+    self._add_date_type_definition()
+    self._add_long_type_definition()
+    self._add_lowercase_type_definition()
+
+
+  def _add_string_type_definition(self):
+    field_type = etree.SubElement(self.types_element, "fieldType")
+    field_type.set("name", "string")
+    field_type.set("class", "solr.StrField")
+    field_type.set("sortMissingLast", "true")
+
+
+  def _add_text_general_type_definition(self):
+    field_type = etree.SubElement(self.types_element, "fieldType")
+    field_type.set("name", "text_general")
+    field_type.set("class", "solr.TextField")
+    field_type.set("positionIncrementGap", "100")
+
+    analyzer_index = etree.SubElement(field_type, "analyzer")
+    analyzer_index.set("type", "index")
+
+    tokenizer_index = etree.SubElement(analyzer_index, "tokenizer")
+    tokenizer_index.set("class", "solr.StandardTokenizerFactory")
+    filter1 = etree.SubElement(analyzer_index, "filter")
+    filter1.set("class", "solr.StopFilterFactory")
+    filter1.set("ignoreCase", "true")
+    filter1.set("words", "stopwords.txt")
+
+    filter2 = etree.SubElement(analyzer_index, "filter")
+    filter2.set("class", "solr.LowerCaseFilterFactory")
+
+    analyzer_query = etree.SubElement(field_type, "analyzer")
+    analyzer_query.set("type", "query")
+    tokenizer_query = etree.SubElement(analyzer_query, "tokenizer")
+    tokenizer_query.set("class", "solr.StandardTokenizerFactory")
+    filter3 = etree.SubElement(analyzer_query, "filter")
+    filter3.set("class", "solr.StopFilterFactory")
+    filter3.set("ignoreCase", "true")
+    filter3.set("words", "stopwords.txt")
+
+    filter4 = etree.SubElement(analyzer_query, "filter")
+    filter4.set("class", "solr.SynonymFilterFactory")
+    filter4.set("synonyms", "synonyms.txt")
+    filter4.set("ignoreCase", "true")
+    filter4.set("expand", "true")
+
+    filter5 = etree.SubElement(analyzer_query, "filter")
+    filter5.set("class", "solr.LowerCaseFilterFactory")
+
+  def _add_date_type_definition(self):
+    field_type = etree.SubElement(self.types_element, "fieldType")
+    field_type.set("name", "date")
+    field_type.set("class", "solr.TrieDateField")
+    field_type.set("precisionStep", "0")
+    field_type.set("positionIncrementGap", "0")
+
+  def _add_long_type_definition(self):
+    field_type = etree.SubElement(self.types_element, "fieldType")
+    field_type.set("name", "long")
+    field_type.set("class", "solr.TrieLongField")
+    field_type.set("precisionStep", "0")
+    field_type.set("positionIncrementGap", "0")
+
+  def _add_lowercase_type_definition(self):
+    field_type = etree.SubElement(self.types_element, "fieldType")
+    field_type.set("name", "lowercase")
+    field_type.set("class", "solr.TextField")
+    field_type.set("positionIncrementGap", "100")
+
+    analyzer = etree.SubElement(field_type, "analyzer")
+    tokenizer = etree.SubElement(analyzer, "tokenizer")
+    tokenizer.set("class", "solr.KeywordTokenizerFactory")
+    filter_lowercase = etree.SubElement(analyzer, "filter")
+    filter_lowercase.set("class", "solr.LowerCaseFilterFactory")

Modified: 
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml?rev=1616567&r1=1616566&r2=1616567&view=diff
==============================================================================
--- 
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml 
(original)
+++ 
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml 
Thu Aug  7 19:00:56 2014
@@ -8,8 +8,8 @@
   <field name="_root_" type="string" indexed="true" stored="false"/>
 
   <!-- BH fields -->
-  <field name="id" type="string" indexed="true" stored="true" required="true" 
multiValued="false" />
   <field name="unique_id" type="string" indexed="true" stored="true" 
required="true" multiValued="false" />
+  <field name="id" type="string" indexed="true" stored="true" required="true" 
multiValued="false" />
   <field name="type" type="string" indexed="true" stored="true" 
required="true" multiValued="false"/>
   <field name="product" type="string" indexed="true" stored="true" 
required="false" multiValued="false"/>
   <field name="milestone" type="string" indexed="true" stored="true" 
required="false" multiValued="false"/>
@@ -31,14 +31,12 @@
   <field name="required_permission" type="string" indexed="true" stored="true" 
required="false" multiValued="false"/>
   <field name="name" type="text_general" indexed="true" stored="true" 
required="false" multiValued="false"/>
   <field name="_stored_name" type="string" indexed="true" stored="true" 
required="false" multiValued="false"/>
-  <!-- <field name="query_suggestion_basket" type="text_general" 
indexed="true" stored="true" required="true" multiValued="false"/> -->
-  <!-- <field name="relations" type="lowercase" indexed="true" stored="true" 
required="true" multiValued="false"/> -->
+  <field name="relations" type="lowercase" indexed="true" stored="true" 
required="true" multiValued="false"/>
+  <field name="query_suggestion_basket" type="text_general" indexed="true" 
stored="true" required="true" multiValued="false"/>
 </fields>
 
 <uniqueKey>unique_id</uniqueKey>
 
-<!-- <copyField source="name" dest="text"/> -->
-
 <types>
   <!-- Field type definitions -->
   <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
@@ -69,4 +67,3 @@
 </types>
 
 </schema>
-

Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py Thu Aug  
7 19:00:56 2014
@@ -0,0 +1,17 @@
+from trac.web.api import ITemplateStreamFilter
+from genshi.filters import Transformer
+import re
+from trac.core import Component, implements, TracError
+from genshi.input import HTML
+
+class BloodhoundSolrTemplate(Component):
+  implements (ITemplateStreamFilter)
+
+  def filter_stream(self, req, method, filename, stream, data):
+    html = HTML(u'''<br></br><a href="porc" class="btn" style="margin: 10px 
10px 10px 0px;">More like this</a>''')
+
+    if re.match(r'/bhsearch', req.path_info):
+      filter = Transformer('//dl[@id="results"]/dd/span[@class="date"]')
+      stream |= filter.append(html)
+
+    return stream

Modified: bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py
URL: 
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py?rev=1616567&r1=1616566&r2=1616567&view=diff
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py (original)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py Thu Aug  7 
19:00:56 2014
@@ -8,9 +8,10 @@ PKG_INFO = {'bhsolr': ['schemadoc/*.xml'
 
 ENTRY_POINTS = {
           'trac.plugins': [
-          'bhsolr.api = bhsolr.api',
           'bhsolr.admin = bhsolr.admin',
-          'bhsolr.solr_backend = bhsolr.solr_backend',
+          'bhsolr.schema = bhsolr.schema',
+          'bhsolr.backend = bhsolr.backend',
+          'bhsolr.web_ui = bhsolr.web_ui',
           'bhsolr.search_resources.ticket_search = 
bhsolr.search_resources.ticket_search',
           'bhsolr.search_resources.milestone_search = 
bhsolr.search_resources.milestone_search',
           'bhsolr.search_resources.changeset_search = 
bhsolr.search_resources.changeset_search',


Reply via email to