CHUKWA-798. Added retry logic for SolrWriter and update to Solr 5.5.0 API (Eric Yang)
Project: http://git-wip-us.apache.org/repos/asf/chukwa/repo Commit: http://git-wip-us.apache.org/repos/asf/chukwa/commit/21b24284 Tree: http://git-wip-us.apache.org/repos/asf/chukwa/tree/21b24284 Diff: http://git-wip-us.apache.org/repos/asf/chukwa/diff/21b24284 Branch: refs/heads/master Commit: 21b24284255d15fdc359ada9fc68d3a59fb60f7f Parents: fc70665 Author: Eric Yang <[email protected]> Authored: Sat Mar 19 10:09:36 2016 -0700 Committer: Eric Yang <[email protected]> Committed: Sat Mar 19 10:09:36 2016 -0700 ---------------------------------------------------------------------- conf/chukwa-agent-conf.xml | 2 +- conf/chukwa-common.xml | 2 +- contrib/solr/chukwa/README.txt | 51 + contrib/solr/logs/README.txt | 82 - .../_schema_analysis_stopwords_english.json | 38 - .../conf/_schema_analysis_synonyms_english.json | 11 - contrib/solr/logs/conf/admin-extra.html | 24 - .../solr/logs/conf/admin-extra.menu-bottom.html | 25 - .../solr/logs/conf/admin-extra.menu-top.html | 25 - .../clustering/carrot2/kmeans-attributes.xml | 35 - .../clustering/carrot2/lingo-attributes.xml | 40 - .../conf/clustering/carrot2/stc-attributes.xml | 35 - contrib/solr/logs/conf/currency.xml | 67 - contrib/solr/logs/conf/elevate.xml | 38 - contrib/solr/logs/conf/lang/contractions_ca.txt | 22 - contrib/solr/logs/conf/lang/contractions_fr.txt | 29 - contrib/solr/logs/conf/lang/contractions_ga.txt | 19 - contrib/solr/logs/conf/lang/contractions_it.txt | 37 - contrib/solr/logs/conf/lang/hyphenations_ga.txt | 19 - contrib/solr/logs/conf/lang/stemdict_nl.txt | 20 - contrib/solr/logs/conf/lang/stoptags_ja.txt | 434 -- contrib/solr/logs/conf/lang/stopwords_ar.txt | 139 - contrib/solr/logs/conf/lang/stopwords_bg.txt | 207 - contrib/solr/logs/conf/lang/stopwords_ca.txt | 234 -- contrib/solr/logs/conf/lang/stopwords_ckb.txt | 150 - contrib/solr/logs/conf/lang/stopwords_cz.txt | 186 - contrib/solr/logs/conf/lang/stopwords_da.txt | 110 - contrib/solr/logs/conf/lang/stopwords_de.txt | 294 -- contrib/solr/logs/conf/lang/stopwords_el.txt | 92 - contrib/solr/logs/conf/lang/stopwords_en.txt | 54 - contrib/solr/logs/conf/lang/stopwords_es.txt | 356 -- contrib/solr/logs/conf/lang/stopwords_eu.txt | 113 - contrib/solr/logs/conf/lang/stopwords_fa.txt | 313 -- contrib/solr/logs/conf/lang/stopwords_fi.txt | 97 - contrib/solr/logs/conf/lang/stopwords_fr.txt | 186 - contrib/solr/logs/conf/lang/stopwords_ga.txt | 124 - contrib/solr/logs/conf/lang/stopwords_gl.txt | 175 - contrib/solr/logs/conf/lang/stopwords_hi.txt | 235 -- contrib/solr/logs/conf/lang/stopwords_hu.txt | 211 - contrib/solr/logs/conf/lang/stopwords_hy.txt | 60 - contrib/solr/logs/conf/lang/stopwords_id.txt | 373 -- contrib/solr/logs/conf/lang/stopwords_it.txt | 303 -- contrib/solr/logs/conf/lang/stopwords_ja.txt | 141 - contrib/solr/logs/conf/lang/stopwords_lv.txt | 186 - contrib/solr/logs/conf/lang/stopwords_nl.txt | 119 - contrib/solr/logs/conf/lang/stopwords_no.txt | 194 - contrib/solr/logs/conf/lang/stopwords_pt.txt | 253 -- contrib/solr/logs/conf/lang/stopwords_ro.txt | 247 -- contrib/solr/logs/conf/lang/stopwords_ru.txt | 243 -- contrib/solr/logs/conf/lang/stopwords_sv.txt | 133 - contrib/solr/logs/conf/lang/stopwords_th.txt | 133 - contrib/solr/logs/conf/lang/stopwords_tr.txt | 226 -- contrib/solr/logs/conf/lang/userdict_ja.txt | 43 - contrib/solr/logs/conf/mapping-FoldToASCII.txt | 3813 ------------------ .../solr/logs/conf/mapping-ISOLatin1Accent.txt | 246 -- contrib/solr/logs/conf/protwords.txt | 21 - contrib/solr/logs/conf/schema.xml | 375 -- contrib/solr/logs/conf/scripts.conf | 24 - contrib/solr/logs/conf/solrconfig.xml | 1878 --------- contrib/solr/logs/conf/spellings.txt | 16 - contrib/solr/logs/conf/stopwords.txt | 14 - contrib/solr/logs/conf/synonyms.txt | 29 - contrib/solr/logs/conf/update-script.js | 70 - contrib/solr/logs/conf/velocity/README.txt | 101 - .../logs/conf/velocity/VM_global_library.vm | 191 - contrib/solr/logs/conf/velocity/browse.vm | 49 - contrib/solr/logs/conf/velocity/cluster.vm | 35 - .../solr/logs/conf/velocity/cluster_results.vm | 47 - contrib/solr/logs/conf/velocity/debug.vm | 44 - contrib/solr/logs/conf/velocity/did_you_mean.vm | 25 - contrib/solr/logs/conf/velocity/error.vm | 27 - contrib/solr/logs/conf/velocity/facet_fields.vm | 39 - contrib/solr/logs/conf/velocity/facet_pivot.vm | 28 - .../solr/logs/conf/velocity/facet_queries.vm | 28 - contrib/solr/logs/conf/velocity/facet_ranges.vm | 39 - contrib/solr/logs/conf/velocity/facets.vm | 26 - contrib/solr/logs/conf/velocity/footer.vm | 59 - contrib/solr/logs/conf/velocity/head.vm | 51 - contrib/solr/logs/conf/velocity/header.vm | 23 - contrib/solr/logs/conf/velocity/hit.vm | 41 - contrib/solr/logs/conf/velocity/hit_grouped.vm | 59 - contrib/solr/logs/conf/velocity/hit_plain.vm | 41 - contrib/solr/logs/conf/velocity/join_doc.vm | 36 - .../logs/conf/velocity/jquery.autocomplete.css | 65 - .../logs/conf/velocity/jquery.autocomplete.js | 763 ---- contrib/solr/logs/conf/velocity/layout.vm | 40 - contrib/solr/logs/conf/velocity/main.css | 247 -- .../solr/logs/conf/velocity/mime_type_lists.vm | 84 - .../logs/conf/velocity/pagination_bottom.vm | 38 - .../solr/logs/conf/velocity/pagination_top.vm | 45 - contrib/solr/logs/conf/velocity/product_doc.vm | 48 - contrib/solr/logs/conf/velocity/query.vm | 58 - contrib/solr/logs/conf/velocity/query_form.vm | 80 - contrib/solr/logs/conf/velocity/query_group.vm | 59 - .../solr/logs/conf/velocity/query_spatial.vm | 91 - contrib/solr/logs/conf/velocity/results_list.vm | 38 - contrib/solr/logs/conf/velocity/richtext_doc.vm | 169 - contrib/solr/logs/conf/velocity/suggest.vm | 24 - contrib/solr/logs/conf/velocity/tabs.vm | 66 - contrib/solr/logs/conf/xslt/example.xsl | 132 - contrib/solr/logs/conf/xslt/example_atom.xsl | 67 - contrib/solr/logs/conf/xslt/example_rss.xsl | 66 - contrib/solr/logs/conf/xslt/luke.xsl | 337 -- contrib/solr/logs/conf/xslt/updateXml.xsl | 70 - contrib/solr/logs/core.properties | 15 - .../datacollection/writer/solr/SolrWriter.java | 29 +- .../web/hicc/ajax-solr/chukwa/js/logsearch.js | 2 +- .../web/hicc/ajax-solr/chukwa/js/logviewer.js | 2 +- .../writer/solr/TestSolrWriter.java | 1 + 109 files changed, 77 insertions(+), 17019 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/conf/chukwa-agent-conf.xml ---------------------------------------------------------------------- diff --git a/conf/chukwa-agent-conf.xml b/conf/chukwa-agent-conf.xml index 9eb51fa..faaddda 100644 --- a/conf/chukwa-agent-conf.xml +++ b/conf/chukwa-agent-conf.xml @@ -116,6 +116,6 @@ <property> <name>solr.collection</name> - <value>logs</value> + <value>chukwa</value> </property> </configuration> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/conf/chukwa-common.xml ---------------------------------------------------------------------- diff --git a/conf/chukwa-common.xml b/conf/chukwa-common.xml index c1d1ee0..53b6412 100644 --- a/conf/chukwa-common.xml +++ b/conf/chukwa-common.xml @@ -35,7 +35,7 @@ <property> <name>chukwa.solr.url</name> - <value>http://localhost:7574/solr</value> + <value>http://localhost:8983/solr</value> <description>Solr cloud URL</description> </property> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/chukwa/README.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/chukwa/README.txt b/contrib/solr/chukwa/README.txt new file mode 100644 index 0000000..b5be1cc --- /dev/null +++ b/contrib/solr/chukwa/README.txt @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Chukwa SolrCloud Integration + +Usage +----- + +- Extract Solr tarball and create solr collections + + tar fxv solr-5.5.0.tar.gz + bin/solr start -cloud -z [zookeeper_host]:2181 + bin/solr create_collection -c chukwa -n chukwa + +- Configure chukwa-agent-conf.xml with pipeline that includes SolrWriter. + + <property> + <name>chukwa.pipeline</name> + <value>org.apache.hadoop.chukwa.datacollection.writer.solr.SolrWriter</value> + <description>Configure agent to write to solr</description> + </property> + + <property> + <name>solr.cloud.address</name> + <value>localhost:2181</value> + <description>Solr cloud zookeeper address</description> + </property> + + <property> + <name>solr.collection</name> + <value>chukwa</value> + <description>Solr Cloud collection name</description> + </property> + +- Restart Chukwa Agent and point browser to: + + http://localhost:8983/solr/logs/select?q=*:*&wt=json&indent=true + +This REST API will display all collected log entries. http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/README.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/README.txt b/contrib/solr/logs/README.txt deleted file mode 100644 index 5b986b5..0000000 --- a/contrib/solr/logs/README.txt +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -Chukwa SolrCore Instance Directory -============================= - -This directory is provided as an example of what an "Instance Directory" -should look like for Chukwa SolrCore - -Basic Directory Structure -------------------------- - -The Solr Home directory typically contains the following sub-directories... - - conf/ - This directory is mandatory and must contain your solrconfig.xml - and schema.xml. Any other optional configuration files would also - be kept here. - - data/ - This directory is the default location where Solr will keep your - index, and is used by the replication scripts for dealing with - snapshots. You can override this location in the - conf/solrconfig.xml. Solr will create this directory if it does not - already exist. - - lib/ - This directory is optional. If it exists, Solr will load any Jars - found in this directory and use them to resolve any "plugins" - specified in your solrconfig.xml or schema.xml (ie: Analyzers, - Request Handlers, etc...). Alternatively you can use the <lib> - syntax in conf/solrconfig.xml to direct Solr to your plugins. See - the example conf/solrconfig.xml file for details. - -Usage ------ - -- Symlink this directory to solr-4.9.0/examples/solr/logs. -- Start solr cloud with: - - java -Dbootstrap_confdir=chukwa-0.6.0/etc/solr/logs/conf \ - -Dcollection.configName=myconf -Djetty.port=7574 \ - -DzkHost=localhost:2181 -jar start.jar - -- Configure chukwa-agent-conf.xml with pipeline that includes SolrWriter. - - <property> - <name>chukwa.pipeline</name> - <value>org.apache.hadoop.chukwa.datacollection.writer.solr.SolrWriter</value> - <description>Configure agent to write to solr</description> - </property> - - <property> - <name>solr.cloud.address</name> - <value>localhost:2181</value> - <description>Solr cloud zookeeper address</description> - </property> - - <property> - <name>solr.collection</name> - <value>logs</value> - <description>SolrCore Instance name</description> - </property> - -- Restart Chukwa Agent and point browser to: - - http://localhost:7574/solr/logs/select?q=*:*&wt=json&indent=true - -This REST API will display all collected log entries. http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json b/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json deleted file mode 100644 index a694e5c..0000000 --- a/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "initArgs":{"ignoreCase":true}, - "managedList":[ - "a", - "an", - "and", - "are", - "as", - "at", - "be", - "but", - "by", - "for", - "if", - "in", - "into", - "is", - "it", - "no", - "not", - "of", - "on", - "or", - "stopworda", - "stopwordb", - "such", - "that", - "the", - "their", - "then", - "there", - "these", - "they", - "this", - "to", - "was", - "will", - "with"]} http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json b/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json deleted file mode 100644 index 869bdce..0000000 --- a/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "initArgs":{ - "ignoreCase":true, - "format":"solr" - }, - "managedMap":{ - "GB":["GiB","Gigabyte"], - "happy":["glad","joyful"], - "TV":["Television"] - } -} http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/admin-extra.html ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/admin-extra.html b/contrib/solr/logs/conf/admin-extra.html deleted file mode 100644 index fecab20..0000000 --- a/contrib/solr/logs/conf/admin-extra.html +++ /dev/null @@ -1,24 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- The content of this page will be statically included into the top- -right box of the cores overview page. Uncomment this as an example to -see there the content will show up. - -<img src="img/ico/construction.png"> This line will appear at the top- -right box on collection1's Overview ---> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/admin-extra.menu-bottom.html ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/admin-extra.menu-bottom.html b/contrib/solr/logs/conf/admin-extra.menu-bottom.html deleted file mode 100644 index 3359a46..0000000 --- a/contrib/solr/logs/conf/admin-extra.menu-bottom.html +++ /dev/null @@ -1,25 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- admin-extra.menu-bottom.html --> -<!-- -<li> - <a href="#" style="background-image: url(img/ico/construction.png);"> - LAST ITEM - </a> -</li> ---> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/admin-extra.menu-top.html ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/admin-extra.menu-top.html b/contrib/solr/logs/conf/admin-extra.menu-top.html deleted file mode 100644 index 0886cee..0000000 --- a/contrib/solr/logs/conf/admin-extra.menu-top.html +++ /dev/null @@ -1,25 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- admin-extra.menu-top.html --> -<!-- -<li> - <a href="#" style="background-image: url(img/ico/construction.png);"> - FIRST ITEM - </a> -</li> ---> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml b/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml deleted file mode 100644 index 6f315a8..0000000 --- a/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml +++ /dev/null @@ -1,35 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<!-- - Default configuration for the bisecting k-means clustering algorithm. - - This file can be loaded (and saved) by Carrot2 Workbench. - http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> - <attribute-set id="attributes"> - <value-set> - <label>attributes</label> - <attribute key="MultilingualClustering.defaultLanguage"> - <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> - </attribute> - <attribute key="MultilingualClustering.languageAggregationStrategy"> - <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/> - </attribute> - </value-set> - </attribute-set> -</attribute-sets> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml b/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml deleted file mode 100644 index 3e45bd0..0000000 --- a/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml +++ /dev/null @@ -1,40 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<!-- - Default configuration for the Lingo clustering algorithm. - - This file can be loaded (and saved) by Carrot2 Workbench. - http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> - <attribute-set id="attributes"> - <value-set> - <label>attributes</label> - <!-- - The language to assume for clustered documents. - For a list of allowed values, see: - http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage - --> - <attribute key="MultilingualClustering.defaultLanguage"> - <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> - </attribute> - <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase"> - <value type="java.lang.Integer" value="20"/> - </attribute> - </value-set> - </attribute-set> -</attribute-sets> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml b/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml deleted file mode 100644 index 9604d36..0000000 --- a/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml +++ /dev/null @@ -1,35 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<!-- - Default configuration for the STC clustering algorithm. - - This file can be loaded (and saved) by Carrot2 Workbench. - http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> - <attribute-set id="attributes"> - <value-set> - <label>attributes</label> - <attribute key="MultilingualClustering.defaultLanguage"> - <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> - </attribute> - <attribute key="MultilingualClustering.languageAggregationStrategy"> - <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/> - </attribute> - </value-set> - </attribute-set> -</attribute-sets> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/currency.xml ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/currency.xml b/contrib/solr/logs/conf/currency.xml deleted file mode 100644 index 3a9c58a..0000000 --- a/contrib/solr/logs/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ -<?xml version="1.0" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> - -<currencyConfig version="1.0"> - <rates> - <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> - <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> - <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> - <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> - <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> - <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> - <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> - <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> - <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> - <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> - <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> - <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> - <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> - <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> - <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> - <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> - <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> - <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> - <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> - <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> - <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> - <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> - <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> - <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> - <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> - <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> - <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> - <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> - <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> - <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> - <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> - <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> - <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> - <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> - <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> - <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> - <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> - <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> - - <!-- Cross-rates for some common currencies --> - <rate from="EUR" to="GBP" rate="0.869914" /> - <rate from="EUR" to="NOK" rate="7.800095" /> - <rate from="GBP" to="NOK" rate="8.966508" /> - </rates> -</currencyConfig> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/elevate.xml ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/elevate.xml b/contrib/solr/logs/conf/elevate.xml deleted file mode 100644 index 25d5ceb..0000000 --- a/contrib/solr/logs/conf/elevate.xml +++ /dev/null @@ -1,38 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- If this file is found in the config directory, it will only be - loaded once at startup. If it is found in Solr's data - directory, it will be re-loaded every commit. - - See http://wiki.apache.org/solr/QueryElevationComponent for more info - ---> -<elevate> - <query text="foo bar"> - <doc id="1" /> - <doc id="2" /> - <doc id="3" /> - </query> - - <query text="ipod"> - <doc id="MA147LL/A" /> <!-- put the actual ipod at the top --> - <doc id="IW-02" exclude="true" /> <!-- exclude this cable --> - </query> - -</elevate> http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/contractions_ca.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/contractions_ca.txt b/contrib/solr/logs/conf/lang/contractions_ca.txt deleted file mode 100644 index 4a3b6ff..0000000 --- a/contrib/solr/logs/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/contractions_fr.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/contractions_fr.txt b/contrib/solr/logs/conf/lang/contractions_fr.txt deleted file mode 100644 index dbe0677..0000000 --- a/contrib/solr/logs/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/contractions_ga.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/contractions_ga.txt b/contrib/solr/logs/conf/lang/contractions_ga.txt deleted file mode 100644 index 99e9c97..0000000 --- a/contrib/solr/logs/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/contractions_it.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/contractions_it.txt b/contrib/solr/logs/conf/lang/contractions_it.txt deleted file mode 100644 index c2d2c1e..0000000 --- a/contrib/solr/logs/conf/lang/contractions_it.txt +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/hyphenations_ga.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/hyphenations_ga.txt b/contrib/solr/logs/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 31de8b8..0000000 --- a/contrib/solr/logs/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stemdict_nl.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stemdict_nl.txt b/contrib/solr/logs/conf/lang/stemdict_nl.txt deleted file mode 100644 index a8a1a48..0000000 --- a/contrib/solr/logs/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stoptags_ja.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stoptags_ja.txt b/contrib/solr/logs/conf/lang/stoptags_ja.txt deleted file mode 100644 index 7eafe2e..0000000 --- a/contrib/solr/logs/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,434 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#åè© -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#åè©-ä¸è¬ -# -# noun-proper: Proper nouns where the sub-classification is undefined -#åè©-åºæåè© -# -# noun-proper-misc: miscellaneous proper nouns -#åè©-åºæåè©-ä¸è¬ -# -# noun-proper-person: Personal names where the sub-classification is undefined -#åè©-åºæåè©-人å -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. ãå¸ã®æ¹ -#åè©-åºæåè©-人å-ä¸è¬ -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. å±±ç° -#åè©-åºæåè©-人å-å§ -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太é -#åè©-åºæåè©-人å-å -# -# noun-proper-organization: Names representing organizations. -# e.g. éç£ç, NHK -#åè©-åºæåè©-çµç¹ -# -# noun-proper-place: Place names where the sub-classification is undefined -#åè©-åºæåè©-å°å -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. ã¢ã¸ã¢, ãã«ã»ãã, äº¬é½ -#åè©-åºæåè©-å°å-ä¸è¬ -# -# noun-proper-place-country: Country names. -# e.g. æ¥æ¬, ãªã¼ã¹ãã©ãªã¢ -#åè©-åºæåè©-å°å-å½ -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#åè©-代åè© -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. ãã, ãã, ããã¤, ããªã, ãã¡ãã¡, ããã¤, ã©ãã, ãªã«, ã¿ãªãã, ã¿ããª, ãããã, ãããã -#åè©-代åè©-ä¸è¬ -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ããã, ããã, ãããã, ããã, ãããã -#åè©-代åè©-ç¸®ç´ -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. éæ, 䏿, åå¾, å°é -#åè©-å¯è©å¯è½ -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (ãã, ã§ãã, ãªãã, ãã ãã) -# e.g. ã¤ã³ããã, æç, æªå, æªæ¦è¦é, ä¸å®å¿, ä¸åã -#åè©-ãµå¤æ¥ç¶ -# -# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") -# e.g. å¥åº·, 宿, é§ç®, ã ã -#åè©-形容åè©èªå¹¹ -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like ä½ (å), æ°. -# e.g. 0, 1, 2, ä½, æ°, å¹¾ -#åè©-æ° -# -# noun-affix: noun affixes where the sub-classification is undefined -#åè©-éèªç« -# -# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. ããã¤ã, æ, ãã, ç²æ, æ°, ããã, å«ã, ãã, ç, ãã¨, äº, ãã¨, æ¯, ãã ã, 次第, -# é , ãã, æçº, ã¤ãã§, åºã§, ã¤ãã, ç©ãã, ç¹, ã©ãã, ã®, ã¯ã, ç, ã¯ãã¿, å¼¾ã¿, -# æå, ãµã, ãµã, æ¯ã, ã»ã, æ¹, æ¨, ãã®, ç©, è , ãã, æ , ããã, æä»¥, ãã, 訳, -# ãã, å²ã, å², ã-å£èª/, ãã-å£èª/ -#åè©-éèªç«-ä¸è¬ -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. ããã , é, ããã, æãå¥, ãã¨, å¾, ä½ã, 以å¤, 以é, 以å¾, 以ä¸, 以å, 䏿¹, ãã, -# ä¸, ãã¡, å , ãã, æã, ããã, éã, ãã, ã£ãã, çµæ, ãã, é , ãã, é, æä¸, ããªã, -# æä¸, ããã, èªä½, ãã³, 度, ãã, çº, ã¤ã©, é½åº¦, ã¨ãã, éã, ã¨ã, æ, ã¨ãã, æ, -# ã¨ãã, é端, ãªã, ä¸, ã®ã¡, å¾, ã°ãã, å ´å, æ¥, ã¶ã, å, ã»ã, ä», ã¾ã, å, ã¾ã¾, -# å, ä¾, ã¿ãã, ç¢å -#åè©-éèªç«-å¯è©å¯è½ -# -# noun-affix-aux: noun affixes treated as å©åè© ("auxiliary verb") in school grammars -# with the stem ãã(ã ) ("you(da)"). -# e.g. ãã, ãã, æ§ (ãã) -#åè©-éèªç«-å©åè©èªå¹¹ -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form 㪠(aux "da"). -# e.g. ã¿ãã, ãµã -#åè©-éèªç«-形容åè©èªå¹¹ -# -# noun-special: special nouns where the sub-classification is undefined. -#åè©-ç¹æ® -# -# noun-special-aux: The ããã ("souda") stem form that is used for reporting news, is -# treated as å©åè© ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. ãã -#åè©-ç¹æ®-å©åè©èªå¹¹ -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#åè©-æ¥å°¾ -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ã¬ã« or ã¿ã¤ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# æ¥å°¾èª ("suffix") and is usually the last element in a compound noun. -# e.g. ãã, ãã, æ¹, ç²æ (ãã), ããã, ãã¿, æ°å³, ããã¿, (ï½ãã) ã, 次第, æ¸ (ã) ã¿, -# ãã, (ã§ã)ã£ã, æ, 観, æ§, å¦, é¡, é¢, ç¨ -#åè©-æ¥å°¾-ä¸è¬ -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. å, æ§, è -#åè©-æ¥å°¾-人å -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. çº, å¸, ç -#åè©-æ¥å°¾-å°å -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before ã¹ã« ("suru"). -# e.g. å, è¦, åã, å ¥ã, è½ã¡, è²·ã -#åè©-æ¥å°¾-ãµå¤æ¥ç¶ -# -# noun-suffix-aux: The stem form of ããã (æ§æ ) that is used to indicate conditions, -# is treated as å©åè© ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. ãã -#åè©-æ¥å°¾-å©åè©èªå¹¹ -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula ã ("da"). -# e.g. ç, ã, ãã¡ -#åè©-æ¥å°¾-形容åè©èªå¹¹ -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. å¾ (ã), 以å¾, 以é, 以å, åå¾, ä¸, æ«, ä¸, æ (ã) -#åè©-æ¥å°¾-å¯è©å¯è½ -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 婿°è© ("classifier") and includes common nouns that attach -# to numbers. -# e.g. å, ã¤, æ¬, å, ãã¼ã»ã³ã, cm, kg, ã«æ, ãå½, åºç», æé, æå -#åè©-æ¥å°¾-婿°è© -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽ã) ã, (èã) æ¹ -#åè©-æ¥å°¾-ç¹æ® -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (æ¥æ¬) 対 (ã¢ã¡ãªã«), 対 (ã¢ã¡ãªã«), (3) 対 (5), (女åª) å ¼ (主婦) -#åè©-æ¥ç¶è©ç -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are -# semantically verb-like. -# e.g. ããã, ã覧, 御覧, é æ´ -#åè©-åè©éèªç«ç -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for åè© å¼ç¨æåå ("noun quotation") -# is ããã ("iwaku"). -#åè©-å¼ç¨æåå -# -# noun-nai_adjective: Words that appear before the auxiliary verb ãªã ("nai") and -# behave like an adjective. -# e.g. ç³ã訳, 仿¹, ã¨ãã§ã, éã -#åè©-ãã¤å½¢å®¹è©èªå¹¹ -# -##### -# prefix: unclassified prefixes -#æ¥é è© -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. ã (æ°´), æ (æ°), å (社), æ (ï½æ°), é« (å質), ã (è¦äº), ã (ç«æ´¾) -#æ¥é è©-åè©æ¥ç¶ -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by ãªã/ãªãã/ãã ãã. -# e.g. ã (èªã¿ãªãã), ã (座ã) -#æ¥é è©-åè©æ¥ç¶ -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. ã (å¯ãã§ããã), ãã« (ã§ãã) -#æ¥é è©-å½¢å®¹è©æ¥ç¶ -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. ç´, ããã, æ¯æ -#æ¥é è©-æ°æ¥ç¶ -# -##### -# verb: unclassified verbs -#åè© -# -# verb-main: -#åè©-èªç« -# -# verb-auxiliary: -#åè©-éèªç« -# -# verb-suffix: -#åè©-æ¥å°¾ -# -##### -# adjective: unclassified adjectives -#å½¢å®¹è© -# -# adjective-main: -#形容è©-èªç« -# -# adjective-auxiliary: -#形容è©-éèªç« -# -# adjective-suffix: -#形容è©-æ¥å°¾ -# -##### -# adverb: unclassified adverbs -#å¯è© -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. ãããããã, å¤å -#å¯è©-ä¸è¬ -# -# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, -# ãª, ãã, ã , etc. -# e.g. ãããªã«, ãããªã«, ãããªã«, ãªã«ã, ãªãã§ã -#å¯è©-å©è©é¡æ¥ç¶ -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. ãã®, ãã®, ãã®, ã©ã®, ãããã, ãªãããã®, ä½ããã®, ããããª, ãããã, ãããã, ãããã, -# ã©ããã, ãããª, ãããª, ãããª, ã©ããª, 大ããª, å°ããª, ããããª, ã»ãã®, ãããã, -# ã(, ã) ãã (ãã¨ãªãã)ã, å¾®ã ãã, å ã ãã, åãªã, ãããªã, æãããåã, 亡ã -#é£ä½è© -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. ã, ããã©ã, ããã¦, ããã, ããã©ããã -æ¥ç¶è© -# -##### -# particle: unclassified particles. -å©è© -# -# particle-case: case particles where the subclassification is undefined. -å©è©-æ ¼å©è© -# -# particle-case-misc: Case particles. -# e.g. ãã, ã, ã§, ã¨, ã«, ã¸, ãã, ã, ã®, ã«ã¦ -å©è©-æ ¼å©è©-ä¸è¬ -# -# particle-case-quote: the "to" that appears after nouns, a personâs speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( ã ) 㨠(è¿°ã¹ã.), ( ã§ãã) 㨠(ãã¦å·è¡ç¶äº...) -å©è©-æ ¼å©è©-å¼ç¨ -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. ã¨ãã, ã¨ãã£ã, ã¨ããã, ã¨ãã¦, ã¨ã¨ãã«, ã¨å ±ã«, ã§ãã£ã¦, ã«ããã£ã¦, ã«å½ãã£ã¦, ã«å½ã£ã¦, -# ã«ããã, ã«å½ãã, ã«å½ã, ã«å½ãã, ã«ããã, ã«ããã¦, ã«æ¼ãã¦,ã«æ¼ã¦, ã«ããã, ã«æ¼ãã, -# ã«ãã, ã«ããã¦, ã«ããã, ã«é¢ã, ã«ãããã¦, ã«é¢ãã¦, ã«ãããã, ã«é¢ãã, ã«éã, -# ã«éãã¦, ã«ãããã, ã«å¾ã, ã«å¾ã, ã«ãããã£ã¦, ã«å¾ã£ã¦, ã«ããã, ã«å¯¾ã, ã«ãããã¦, -# ã«å¯¾ãã¦, ã«ãããã, ã«å¯¾ãã, ã«ã¤ãã¦, ã«ã¤ã, ã«ã¤ã, ã«ã¤ãã¦, ã«ã¤ã, ã«ã¤ãã¦, ã«ã¨ã£ã¦, -# ã«ã¨ã, ã«ã¾ã¤ãã, ã«ãã£ã¦, ã«ä¾ã£ã¦, ã«å ã£ã¦, ã«ãã, ã«ä¾ã, ã«å ã, ã«ãã, ã«ä¾ã, ã«å ã, -# ã«ããã£ã¦, ã«ããã, ããã£ã¦, ã以ã£ã¦, ãéã, ãéãã¦, ãéãã¦, ãããã£ã¦, ãããã, ãããã, -# ã£ã¦-å£èª/, ã¡ã ã-é¢è¥¿å¼ãã¨ããã/, (ä½) ã¦ãã (人)-å£èª/, ã£ã¦ãã-å£èª/, ã¨ããµ, ã¨ãããµ -å©è©-æ ¼å©è©-é£èª -# -# particle-conjunctive: -# e.g. ãã, ããã«ã¯, ã, ããã©, ããã©ã, ãã©, ã, ã¤ã¤, ã¦, ã§, ã¨, ã¨ããã, ã©ããã, ã¨ã, ã©ã, -# ãªãã, ãªã, ã®ã§, ã®ã«, ã°, ãã®ã®, ã ( ãã), ãããªã, (ããã) ãã(ãããªã)-å£èª/, -# (è¡ã£) ã¡ã(ãããªã)-å£èª/, (è¨ã£) ãã£ã¦ (ãããããªã)-å£èª/, (ããããªã)ã£ãã£ã¦ (å¹³æ°)-å£èª/ -å©è©-æ¥ç¶å©è© -# -# particle-dependency: -# e.g. ãã, ãã, ãã, ãã, ã¯, ã, ã -å©è©-ä¿å©è© -# -# particle-adverbial: -# e.g. ãã¦ã, ãã, ããã, ä½, ããã, ãã, (妿 ¡) ãã(ãããæµè¡ã£ã¦ãã)-å£èª/, -# (ãã)ããã (ãããªã)-å£èª/, ãã¤, (ç§) ãªã, ãªã©, (ç§) ãªã (ã«), (å ç) ãªãã (大å«ã)-å£èª/, -# (ç§) ãªãã, (å ç) ãªã㦠(大å«ã)-å£èª/, ã®ã¿, ã ã, (ç§) ã ã£ã¦-å£èª/, ã ã«, -# (å½¼)ã£ãã-å£èª/, (ãè¶) ã§ã (ããã), ç (ã¨ã), (ä»å¾) ã¨ã, ã°ãã, ã°ã£ã-å£èª/, ã°ã£ãã-å£èª/, -# ã»ã©, ç¨, ã¾ã§, è¿, (誰) ã (ã)([å©è©-æ ¼å©è©] ããã³ [å©è©-ä¿å©è©] ã®åã«ä½ç½®ããããã) -å©è©-å¯å©è© -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (æ¾å³¶) ã -å©è©-éæå©è© -# -# particle-coordinate: -# e.g. ã¨, ãã, ã ã®, ã ã, ã¨ã, ãªã, ã, ãã -å©è©-並ç«å©è© -# -# particle-final: -# e.g. ãã, ããã, ã, ã, (ã )ã£ã-å£èª/, (ã¨ã¾ã£ã¦ã) ã§-æ¹è¨/, ãª, ã, ãªã-å£èª/, ã, ã, ã, -# ãã-å£èª/, ãã-å£èª/, ãã-æ¹è¨/, ã®, ã®ã-å£èª/, ã, ã, ã¨, ãã-å£èª/, ã, ãã-å£èª/ -å©è©-çµå©è© -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) ãA ã B ãã. Ex:ã(å½å ã§éç¨ãã) ã,(æµ·å¤ã§éç¨ãã) ã (.)ã -# (b) Inside an adverb phrase. Ex:ã(幸ãã¨ãã) ã (, æ»è ã¯ããªãã£ã.)ã -# ã(ç¥ããå±ãããã) ã (, 試é¨ã«åæ ¼ãã.)ã -# (c) ããã®ããã«ã. Ex:ã(ä½ããªãã£ã) ã (ã®ããã«æ¯ãèã£ã.)ã -# e.g. ã -å©è©-å¯å©è©ï¼ä¸¦ç«å©è©ï¼çµå©è© -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -å©è©-é£ä½å -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. ã«, 㨠-å©è©-å¯è©å -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. ããª, ãã, ( ããã ãã) ã«, (ããã) ã«ã(ãããã), (俺) ã (å®¶) -å©è©-ç¹æ® -# -##### -# auxiliary-verb: -å©åè© -# -##### -# interjection: Greetings and other exclamations. -# e.g. ãã¯ãã, ãã¯ãããããã¾ã, ããã«ã¡ã¯, ããã°ãã¯, ãããã¨ã, ã©ãããããã¨ã, ãããã¨ããããã¾ã, -# ããã ãã¾ã, ãã¡ãããã¾, ãããªã, ããããªã, ã¯ã, ããã, ããã, ããããªãã -#æåè© -# -##### -# symbol: unclassified Symbols. -è¨å· -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [ââ@$ãâ+] -è¨å·-ä¸è¬ -# -# symbol-comma: Commas -# e.g. [,ã] -è¨å·-èªç¹ -# -# symbol-period: Periods and full stops. -# e.g. [.ï¼ã] -è¨å·-å¥ç¹ -# -# symbol-space: Full-width whitespace. -è¨å·-ç©ºç½ -# -# symbol-open_bracket: -# e.g. [({ââãã] -è¨å·-æ¬å¼§é -# -# symbol-close_bracket: -# e.g. [)}ââããã] -è¨å·-æ¬å¼§é -# -# symbol-alphabetic: -#è¨å·-ã¢ã«ãã¡ããã -# -##### -# other: unclassified other -#ãã®ä» -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (ã )ã¡ -ãã®ä»-éæ -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. ãã®, ããã¨, ã㨠-ãã£ã©ã¼ -# -##### -# non-verbal: non-verbal sound. -éè¨èªé³ -# -##### -# fragment: -#èªæç -# -##### -# unknown: unknown part of speech. -#æªç¥èª -# -##### End of file http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ar.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ar.txt b/contrib/solr/logs/conf/lang/stopwords_ar.txt deleted file mode 100644 index 4890c4f..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,139 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both Ø£ and ا -Ù Ù -ÙÙ Ù -Ù ÙÙØ§ -Ù ÙÙ -ÙÙ -ÙÙÙ -ÙÙÙØ§ -ÙÙÙ -Ù -Ù -ث٠-ا٠-Ø£Ù -ب -Ø¨ÙØ§ -ب٠-ا -Ø£ -ا٠-ا٠-Ø£Ù -Ø£Ù -ÙØ§ -ÙÙØ§ -Ø§ÙØ§ -Ø£ÙØ§ -Ø¥ÙØ§ -ÙÙÙ -٠ا -Ù٠ا -Ù٠ا -Ù٠ا -ع٠-٠ع -اذا -إذا -ا٠-Ø£Ù -Ø¥Ù -اÙÙØ§ -Ø£ÙÙØ§ -Ø¥ÙÙØ§ -اÙÙ -Ø£ÙÙ -Ø¥ÙÙ -با٠-بأ٠-ÙØ§Ù -ÙØ£Ù -ÙØ§Ù -ÙØ£Ù -ÙØ¥Ù -Ø§ÙØªÙ -Ø§ÙØªÙ -Ø§ÙØ°Ù -Ø§ÙØ°Ù -Ø§ÙØ°ÙÙ -اÙÙ -اÙÙ -Ø¥ÙÙ -Ø¥ÙÙ -عÙÙ -عÙÙÙØ§ -عÙÙÙ -ا٠ا -أ٠ا -إ٠ا -Ø§ÙØ¶Ø§ -Ø£ÙØ¶Ø§ -ÙÙ -ÙÙÙ -ÙÙ -ÙÙÙ -ÙÙ -ÙÙÙ -ÙÙ -ÙÙ -ÙÙ -ÙÙÙ -ÙÙÙ -ÙÙÙ -ÙÙÙ -ÙÙÙ -ÙÙÙ -Ø§ÙØª -Ø£ÙØª -ÙÙ -ÙÙØ§ -ÙÙ -ÙØ°Ù -ÙØ°Ø§ -تÙÙ -ذÙÙ -ÙÙØ§Ù -ÙØ§Ùت -ÙØ§Ù -ÙÙÙÙ -تÙÙÙ -ÙÙØ§Ùت -ÙÙØ§Ù -ØºÙØ± -بعض -ÙØ¯ -ÙØÙ -بÙÙ -بÙÙ٠ا -Ù ÙØ° -ض٠٠-ØÙØ« -Ø§ÙØ§Ù -Ø§ÙØ¢Ù -Ø®ÙØ§Ù -بعد -ÙØ¨Ù -ØØªÙ -Ø¹ÙØ¯ -Ø¹ÙØ¯Ù ا -ÙØ¯Ù -Ø¬Ù ÙØ¹ http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_bg.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_bg.txt b/contrib/solr/logs/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1d2af6c..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,207 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беÑе -би -бил -била -били -било -близо -бÑÐ´Ð°Ñ -бÑде -бÑÑ Ð° -в -Ð²Ð°Ñ -Ð²Ð°Ñ -ваÑа -веÑоÑÑно -веÑе -взема -ви -вие -винаги -вÑе -вÑеки -вÑиÑки -вÑиÑко -вÑÑка -вÑв -вÑпÑеки -вÑÑÑ Ñ -г -ги -главно -го -д -да -дали -до -докаÑо -докога -доÑи -доÑега -доÑÑа -е -едва -един -еÑо -за -зад -заедно -заÑади -заÑега -заÑова -заÑо -заÑоÑо -и -из -или -им -има -Ð¸Ð¼Ð°Ñ -иÑка -й -каза -как -каква -какво -какÑо -какÑв -каÑо -кога -когаÑо -коеÑо -коиÑо -кой -койÑо -колко -коÑÑо -кÑде -кÑдеÑо -кÑм -ли -м -ме -Ð¼ÐµÐ¶Ð´Ñ -мен -ми -мнозина -мога -Ð¼Ð¾Ð³Ð°Ñ -може -Ð¼Ð¾Ð»Ñ -моменÑа -Ð¼Ñ -н -на -над -назад -най -напÑави -напÑед -напÑÐ¸Ð¼ÐµÑ -Ð½Ð°Ñ -не -него -Ð½ÐµÑ -ни -ние -никой -ниÑо -но -нÑкои -нÑкой -нÑма -обаÑе -около -оÑвен -оÑобено -Ð¾Ñ -оÑгоÑе -оÑново -оÑе -пак -по -повеÑе -повеÑеÑо -под -поне -поÑади -поÑле -поÑÑи -пÑави -пÑед -пÑеди -пÑез -пÑи -пÑк -пÑÑво -Ñ -Ñа -Ñамо -Ñе -Ñега -Ñи -ÑкоÑо -Ñлед -Ñме -ÑпоÑед -ÑÑед -ÑÑеÑÑ -ÑÑе -ÑÑм -ÑÑÑ -ÑÑÑо -Ñ -Ñази -Ñака -Ñакива -ÑакÑв -Ñам -Ñвой -Ñе -Ñези -Ñи -Ñн -Ñо -Ñова -Ñогава -Ñози -Ñой -Ñолкова -ÑоÑно -ÑÑÑбва -ÑÑк -ÑÑй -ÑÑ -ÑÑÑ -Ñ -Ñ Ð°ÑеÑва -Ñ -Ñе -ÑеÑÑо -ÑÑез -Ñе -Ñом -Ñ http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ca.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ca.txt b/contrib/solr/logs/conf/lang/stopwords_ca.txt deleted file mode 100644 index 795b6a9..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,234 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -acà -ah -aixà -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allà -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquà -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -està vem -estaven -està veu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ckb.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ckb.txt b/contrib/solr/logs/conf/lang/stopwords_ckb.txt deleted file mode 100644 index e9b5b8d..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,150 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -Ù -# which -Ú©Û -# of -Û -# made/did -کرد -# that/which -ئÛÙÛÛ -# on/head -Ø³ÛØ± -# two -دÙÙ -# also -ÙÛØ±ÙÛÙØ§ -# from/that -ÙÛÙ -# makes/does -دÛکات -# some -ÚÛÙØ¯ -# every -ÙÛØ± - -# demonstratives -# that -ئÛÙ -# this -ئÛÙ - -# personal pronouns -# I -Ù Ù -# we -ئÛÙ Û -# you -ØªÛ -# you -ئÛÙÛ -# he/she/it -ئÛÙ -# they -ئÛÙØ§Ù - -# prepositions -# to/with/by -Ø¨Û -Ù¾Û -# without -Ø¨ÛØ¨Û -# along with/while/during -Ø¨ÛØ¯ÛÙ -# in the opinion of -بÛÙØ§Û -# according to -بÛÙ¾ÛÛ -# before -Ø¨ÛØ±ÙÛ -# in the direction of -Ø¨ÛØ±ÛÙÛ -# in front of/toward -Ø¨ÛØ±ÛÙÛ -# before/in the face of -Ø¨ÛØ±Ø¯ÛÙ -# without -Ø¨Û -# except for -Ø¨ÛØ¬Ú¯Û -# for -Ø¨Û -# on/in -Ø¯Û -ØªÛ -# with -دÛÚ¯ÛÚµ -# after -Ø¯ÙØ§Û -# except for/aside from -Ø¬Ú¯Û -# in/from -ÙÛ -ÙÛ -# in front of/before/because of -ÙÛØ¨Ûر -# between/among -ÙÛØ¨ÛÛÙÛ -# concerning/about -ÙÛØ¨Ø§Ø¨Ûت -# concerning -ÙÛØ¨Ø§Ø±ÛÛ -# instead of -ÙÛØ¨Ø§ØªÛ -# beside -ÙÛØ¨Ù -# instead of -ÙÛØ¨Ø±ÛØªÛ -# behind -ÙÛØ¯ÛÙ -# with/together with -ÙÛÚ¯ÛÚµ -# by -ÙÛÙØ§ÛÛÙ -# within -ÙÛÙØ§Ù -# between/among -ÙÛÙÛÙ -# for the sake of -ÙÛÙ¾ÛÙØ§ÙÛ -# with respect to -ÙÛØ±ÛÙÛ -# by means of/for -ÙÛØ±Û -# for the sake of -ÙÛØ±Ûگا -# on/on top of/according to -ÙÛØ³Ûر -# under -ÙÛÚÛØ± -# between/among -ÙØ§Ù -# between/among -ÙÛÙØ§Ù -# after -پاش -# before -Ù¾ÛØ´ -# like -ÙÛÚ© http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_cz.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_cz.txt b/contrib/solr/logs/conf/lang/stopwords_cz.txt deleted file mode 100644 index a876049..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,186 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -a -s -k -o -i -u -v -z -dnes -cz -tÃmto -budeÅ¡ -budem -byli -jseÅ¡ -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proÄ -máte -tato -kam -tohoto -kdo -kteÅà -mi -nám -tom -tomuto -mÃt -nic -proto -kterou -byla -toho -protože -asi -ho -naÅ¡i -napiÅ¡te -re -což -tÃm -takže -svých -jejà -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -Äi -pod -téma -mezi -pÅes -ty -pak -vám -ani -když -vÅ¡ak -neg -jsem -tento -Älánku -Älánky -aby -jsme -pÅed -pta -jejich -byl -jeÅ¡tÄ -až -bez -také -pouze -prvnà -vaÅ¡e -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -nenà -vás -jen -podle -zde -už -být -vÃce -bude -již -než -který -by -které -co -nebo -ten -tak -má -pÅi -od -po -jsou -jak -dalšà -ale -si -se -ve -to -jako -za -zpÄt -ze -do -pro -je -na -atd -atp -jakmile -pÅiÄemž -já -on -ona -ono -oni -ony -my -vy -jà -ji -mÄ -mne -jemu -tomu -tÄm -tÄmu -nÄmu -nÄmuž -jehož -jÞ -jelikož -jež -jakož -naÄež http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_da.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_da.txt b/contrib/solr/logs/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -nÃ¥r | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -ogsÃ¥ | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sÃ¥dan | such, like this/like that http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_de.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_de.txt b/contrib/solr/logs/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daà | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_el.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_el.txt b/contrib/solr/logs/conf/lang/stopwords_el.txt deleted file mode 100644 index b8b12e8..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,92 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'Ï' instead of 'Ï' -ο -η -Ïο -οι -Ïα -ÏÎ¿Ï -ÏÎ·Ï -ÏÏν -Ïον -Ïην -και -κι -κ -ειμαι -ειÏαι -ειναι -ειμαÏÏε -ειÏÏε -ÏÏο -ÏÏον -ÏÏη -ÏÏην -μα -αλλα -αÏο -για -ÏÏÎ¿Ï -με -Ïε -ÏÏ -ÏαÏα -ανÏι -καÏα -μεÏα -θα -να -δε -δεν -μη -μην -εÏι -ÎµÎ½Ï -εαν -αν -ÏοÏε -ÏÎ¿Ï -ÏÏÏ -ÏÎ¿Î¹Î¿Ï -Ïοια -Ïοιο -Ïοιοι -ÏÎ¿Î¹ÎµÏ -ÏοιÏν -ÏÎ¿Î¹Î¿Ï Ï -Î±Ï ÏÎ¿Ï -Î±Ï Ïη -Î±Ï Ïο -Î±Ï Ïοι -Î±Ï ÏÏν -Î±Ï ÏÎ¿Ï Ï -Î±Ï ÏÎµÏ -Î±Ï Ïα -ÎµÎºÎµÎ¹Î½Î¿Ï -εκεινη -εκεινο -εκεινοι -ÎµÎºÎµÎ¹Î½ÎµÏ -εκεινα -εκεινÏν -ÎµÎºÎµÎ¹Î½Î¿Ï Ï -οÏÏÏ -ομÏÏ -ιÏÏÏ -οÏο -οÏι http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_en.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_en.txt b/contrib/solr/logs/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with
