(allura) 02/05: clean up unused solr fields

gcruz Fri, 23 Aug 2024 08:58:47 -0700

This is an automated email from the ASF dual-hosted git repository.

gcruz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git


commit 760dede19609f818410b6f6c9a37b754337f3ac9
Author: Dave Brondsema <dbronds...@slashdotmedia.com>
AuthorDate: Wed Aug 14 09:57:25 2024 -0400

    clean up unused solr fields
---
 Allura/allura/lib/solr.py          |  7 ++++
 solr_config/allura/conf/schema.xml | 69 --------------------------------------
 2 files changed, 7 insertions(+), 69 deletions(-)

diff --git a/Allura/allura/lib/solr.py b/Allura/allura/lib/solr.py
index 49c76afb3..7635b36d6 100644
--- a/Allura/allura/lib/solr.py
+++ b/Allura/allura/lib/solr.py
@@ -154,6 +154,13 @@ class MockSOLR:
         for o in objects:
             o['text'] = ''.join(o['text'])
             json.dumps(o)  # ensure no errors (since pysolr 3.9+ uses json API 
to solr)
+            for k in o.keys():
+                if k.endswith(('_i', '_s', '_l', '_t', '_b', '_f', '_d', 
'_dt', '_ws')):
+                    continue
+                elif k in ('id', 'text', 'title'):
+                    continue
+                else:
+                    raise ValueError(f'Unexpected solr field {k!r}, probably 
not in schema.xml')
             self.db[o['id']] = o
 
     def commit(self):
diff --git a/solr_config/allura/conf/schema.xml 
b/solr_config/allura/conf/schema.xml
index afca49639..fcb1ced3b 100644
--- a/solr_config/allura/conf/schema.xml
+++ b/solr_config/allura/conf/schema.xml
@@ -111,72 +111,7 @@
    <field name="text" type="text_general" indexed="true" stored="true" 
multiValued="true"/>
    <!-- END of required field values -->
 
-   <field name="description" type="text_general" indexed="true" stored="true" 
/>
-   <field name="group_id" type="tint" indexed="true" stored="true"/>
-   <field name="group_ranking" type="tint" indexed="true" stored="true"/>
-   <field name="has_file" type="tint" indexed="true" stored="true"/>
-   <field name="help_wanted" type="boolean" indexed="true" stored="true" />
-   <field name="latest_file_date" type="date" indexed="true" stored="true" />
-   <field name="license" type="string" indexed="true" stored="true" />
-   <field name="license_other" type="string" indexed="true" stored="true" />
-   <field name="name" type="text_general" indexed="true" stored="true"/>
-   <field name="num_developers" type="tint" indexed="true" stored="true"/>
-   <field name="num_downloads" type="tint" indexed="true" stored="true"/>
-   <field name="num_downloads_week" type="tint" indexed="true" stored="true"/>
-   <field name="num_services" type="tint" indexed="true" stored="true"/>
-   <field name="percentile" type="float" indexed="true" stored="true"/>
-   <field name="project_type" type="tint" indexed="true" stored="true" />
-   <field name="project_doc_id" type="string" indexed="true" stored="true" />
-   <field name="registration_date" type="date" indexed="true" stored="true" />
-   <field name="screenshot_url" type="string" indexed="true" stored="true" />
-   <field name="trove" type="text_general" indexed="true" stored="true" />
-   <field name="unix_group_name" type="string" indexed="true" stored="true" />
-   <field name="source" type="string" indexed="true" stored="true" />
-   <field name="rating" type="float" indexed="true" stored="true" />
-   <field name="review_count" type="tint" indexed="true" stored="true" />
-
-   <!-- Common metadata fields, named specifically to match up with
-     SolrCell metadata when parsing rich documents such as Word, PDF.
-     Some fields are multiValued only because Tika currently may return
-     multiple values for them. Some metadata is parsed from the documents,
-     but there are some which come from the client context:
-       "content_type": From the HTTP headers of incoming stream
-       "resourcename": From SolrCell request param resource.name
-   -->
    <field name="title" type="text_general" indexed="true" stored="true" 
multiValued="true"/>
-   <field name="subject" type="text_general" indexed="true" stored="true"/>
-   <field name="comments" type="text_general" indexed="true" stored="true"/>
-   <field name="author" type="text_general" indexed="true" stored="true"/>
-   <field name="keywords" type="text_general" indexed="true" stored="true"/>
-   <field name="category" type="text_general" indexed="true" stored="true"/>
-   <field name="content_type" type="string" indexed="true" stored="true" 
multiValued="true"/>
-   <field name="last_modified" type="date" indexed="true" stored="true"/>
-   <field name="links" type="string" indexed="true" stored="true" 
multiValued="true"/>
-
-   <!-- Main body of document extracted by SolrCell.
-        NOTE: This field is not indexed by default, since it is also copied to 
"text"
-        using copyField below. This is to save space. Use this field for 
returning and
-        highlighting document content. Use the "text" field to search the 
content. -->
-
-
-   <!-- catchall field, containing all other searchable text fields 
(implemented
-        via copyField further on in this schema  -->
-
-   <!-- catchall text field that indexes tokens both normally and in reverse 
for efficient
-        leading wildcard queries. -->
-
-   <!-- non-tokenized version of manufacturer to make it easier to sort or 
group
-        results by manufacturer.  copied from "manu" via copyField -->
-   <!--
-     Some fields such as popularity and manu_exact could be modified to
-     leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" 
docValues="true" default="0" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" 
docValues="true" default="" />
-
-     Although it would make indexing slightly slower and the index bigger, it
-     would also make the index faster to load, more memory-efficient and more
-     NRT-friendly.
-     -->
 
    <!-- Dynamic field definitions allow using convention over configuration
        for fields via the specification of patterns to match field names.
@@ -184,10 +119,6 @@
        RESTRICTION: the glob-like pattern in the name attribute must have
        a "*" only at the start or the end.  -->
 
-   <!-- Type used to index the lat and lon components for the "location" 
FieldType -->
-
-   <!-- some trie-coded dynamic fields for faster range queries -->
-
    <!-- uncomment the following to ignore any fields that don't already match 
an existing
         field name or dynamic field, rather than reporting them as an error.
         alternately, change the type="ignored" to some other type e.g. "text" 
if you want

(allura) 02/05: clean up unused solr fields

Reply via email to