RE: [EXT] Re: Nutch 1.13 release and Solr 6.6

Hiran CHAUDHURI Thu, 14 Sep 2017 20:49:44 -0700

Attachements are obviously not forwarded by the list manager. So I will try it 
inline.


So whoever wants to follow this tutorial:
https://wiki.apache.org/nutch/NutchTutorial#Setup_Solr_for_search

Make sure you apply this patch file or Solr 6.6 won't like it (you get various 
error messages when creating the core):

-----8<-------------------------------------------------------------
--- dist/apache-nutch-1.13/conf/schema.xml      2017-03-29 00:05:21.000000000 
-0400
+++ solr/schema.xml     2017-09-05 16:14:12.515950452 -0400
@@ -56,7 +56,9 @@
     <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
     <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
     <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlongs" class="solr.TrieLongField" docValues="true" 
precisionStep="8" positionIncrementGap="0" multiValued="true"/>
     <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" 
precisionStep="8" positionIncrementGap="0" multiValued="true"/>
 
     <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, 
and
          is a more restricted form of the canonical representation of dateTime
@@ -86,6 +88,7 @@
     
     <!-- A Trie based date field for faster date range queries and date 
faceting. -->
     <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" 
precisionStep="6" positionIncrementGap="0"/>
+    <fieldType name="tdates" class="solr.TrieDateField" docValues="true" 
precisionStep="6" positionIncrementGap="0" multiValued="true"/>
 
 
     <!-- solr.TextField allows the specification of custom text analyzers
@@ -108,7 +111,7 @@
     <fieldType name="text_general" class="solr.TextField" 
positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" />
         <!-- in this example, we will only use synonyms at query time
         <filter class="solr.SynonymFilterFactory" 
synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
         -->
@@ -116,7 +119,7 @@
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" />
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -140,7 +143,6 @@
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
                 words="stopwords.txt"
-                enablePositionIncrements="true"
                 />
         <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
@@ -156,7 +158,6 @@
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
                 words="stopwords.txt"
-                enablePositionIncrements="true"
                 />
         <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
@@ -193,7 +194,6 @@
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
                 words="stopwords.txt"
-                enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" 
splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
@@ -206,7 +206,6 @@
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
                 words="stopwords.txt"
-                enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" 
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" 
splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
@@ -237,7 +236,7 @@
     <fieldType name="text_general_rev" class="solr.TextField" 
positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" />
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
@@ -245,7 +244,7 @@
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="stopwords.txt" />
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -303,7 +302,9 @@
     <fieldtype name="ignored" stored="false" indexed="false" 
multiValued="true" class="solr.StrField" />
 
         <!-- boolean type: "true" or "false" -->
-        <fieldType name="boolean" class="solr.BoolField" 
sortMissingLast="true"/>
+       <fieldType name="boolean" class="solr.BoolField" 
sortMissingLast="true"/>
+
+       <fieldType name="booleans" class="solr.BoolField" 
sortMissingLast="true" multiValued="true"/>
 
          <!-- sortMissingLast and sortMissingFirst attributes are optional 
attributes are
          currently supported on types that are sorted internally as strings
@@ -414,8 +415,7 @@
 
  </fields>
  <uniqueKey>id</uniqueKey>
- <defaultSearchField>text</defaultSearchField>
- <solrQueryParser defaultOperator="OR"/>
+ <solrQueryParser/>
 
   <!-- copyField commands copy one field to another at the time a document
         is added to the index.  It's used either to index the same field 
differently,-------------------------------------------------->8----------------

Hiran

-----Original Message-----
From: BlackIce [mailto:blackice...@gmail.com] 
Sent: 14 September 2017 21:47
To: user@nutch.apache.org
Subject: [EXT] Re: Nutch 1.13 release and Solr 6.6

Sure, that would be most excellent!

On Sep 14, 2017 9:41 PM, "Hiran CHAUDHURI" <hiran.chaudh...@amadeus.com>
wrote:

> Hi there.
>
>
>
> When I tried to setup Nutch 1.13 to connect to Solr 6.6 I found out 
> that the Nutch schema shipped in .../conf/schema.xml needs quite some 
> tweaking before Solr can use it.
>
> The reason may be that Nutch is developed against a different (older?) 
> version of Solr. It might be nice if that information were shared, as 
> otherwise it is quite frustrating - especially for new users to that 
> technology.
>
>
>
> Meanwhile I can provide a diff of what of the tweak to make Solr 6.6 
> spin up - if someone's interested?
>
>
>
> Hiran
>
>
>
>
>
> *Hiran Chaudhuri*
> * Principal Support Engineer*
>
> Service Reliability Engineering - Custom
>
> Amadeus Data Processing GmbH
> Berghamer Strasse 6
> <https://maps.google.com/?q=Berghamer+Strasse+6+%0D+85435+Erding&entry
> =gmail&source=g>
> 85435 Erding
> T: +49-8122-43x3662 <+49%208122%2043>
> hiran.chaudh...@amadeus.com
> * http://amadeus.com <http://amadeus.com/>*
>
>
>

RE: [EXT] Re: Nutch 1.13 release and Solr 6.6

Reply via email to