http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_es.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_es.txt b/contrib/solr/logs/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sà | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | habÃa from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mà | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mÃo | mine -mÃa | -mÃos | -mÃas | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estarÃa -estarÃas -estarÃamos -estarÃais -estarÃan -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habrÃa -habrÃas -habrÃamos -habrÃais -habrÃan -habÃa -habÃas -habÃamos -habÃais -habÃan -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -serÃa -serÃas -serÃamos -serÃais -serÃan -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendrÃa -tendrÃas -tendrÃamos -tendrÃais -tendrÃan -tenÃa -tenÃas -tenÃamos -tenÃais -tenÃan -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened -
http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_eu.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_eu.txt b/contrib/solr/logs/conf/lang/stopwords_eu.txt deleted file mode 100644 index 687c945..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,113 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_fa.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_fa.txt b/contrib/solr/logs/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'Ù' instead of 'Û' -Ø§ÙØ§Ù -ÙØ¯Ø§Ø´ØªÙ -سراسر -Ø®ÙØ§Ù -Ø§ÙØ´Ø§Ù -ÙÙ -تاÙÙÙÙ -Ø¨ÙØ´ØªØ±Ù -دÙÙ -پس -ÙØ§Ø´Ù -ÙÚ¯Ù -ÙØ§ -Ø¯Ø§Ø´ØªÙØ¯ -سپس -ÙÙگا٠-ÙØ±Ú¯Ø² -Ù¾ÙØ¬ -ÙØ´Ø§Ù -ا٠سا٠-دÙگر -گرÙÙÙ -Ø´Ø¯ÙØ¯ -ÚØ·Ùر -د٠-Ù -د٠-ÙØ®Ø³ØªÙÙ -ÙÙÙ -ÚØ±Ø§ -ÚÙ -ÙØ³Ø· -Ù -ÙØ¯Ø§Ù -ÙØ§Ø¨Ù -ÙÙ -Ø±ÙØª -ÙÙØª -ÙÙ ÚÙÙÙ -در -ÙØ²Ø§Ø± -بÙÙ -بÙÙ -Ø´Ø§ÙØ¯ -ا٠ا -Ø´ÙØ§Ø³Ù -Ú¯Ø±ÙØªÙ -Ø¯ÙØ¯ -داشت٠-Ø¯Ø§ÙØ³Øª -داشت٠-Ø®ÙØ§ÙÙÙ -Ù ÙÙÙØ§Ø±Ø¯ -ÙÙØªÙÙÙ -ا٠د -Ø®ÙØ§Ùد -جز -Ø§ÙØ±Ø¯Ù -شد٠-بÙÙÙ -خد٠ات -شد٠-برخ٠-ÙØ¨Ùد -Ø¨Ø³ÙØ§Ø±Ù -جÙÙÚ¯ÙØ±Ù -ØÙ -ÙØ±Ø¯Ùد -ÙÙØ¹Ù -بعر٠-ÙÙØ±Ø¯Ù -ÙØ¸Ùر -ÙØ¨Ø§Ùد -Ø¨ÙØ¯Ù -Ø¨ÙØ¯Ù -داد -Ø§ÙØ±Ø¯ -ÙØ³Øª -جاÙÙ -Ø´ÙØ¯ -Ø¯ÙØ¨Ø§Ù -داد٠-Ø¨Ø§ÙØ¯ -ساب٠-ÙÙÚ -Ù٠ا٠-Ø§ÙØ¬Ø§ -Ù٠تر -ÙØ¬Ø§Ø³Øª -گردد -ÙØ³Ù -تر -٠رد٠-تا٠-داد٠-Ø¨ÙØ¯Ùد -سر٠-جدا -ÙØ¯Ø§Ø±Ùد -٠گر -ÙÙØ¯Ùگر -دارد -دÙÙØ¯ -Ø¨ÙØ§Ø¨Ø±Ø§ÙÙ -ÙÙگا٠٠-س٠ت -جا -اÙÚÙ -Ø®ÙØ¯ -Ø¯Ø§Ø¯ÙØ¯ -Ø²ÙØ§Ø¯ -Ø¯Ø§Ø±ÙØ¯ -اثر -بدÙÙ -Ø¨ÙØªØ±ÙÙ -Ø¨ÙØ´ØªØ± -Ø§ÙØ¨ØªÙ -ب٠-براساس -Ø¨ÙØ±ÙÙ -ÙØ±Ø¯ -بعض٠-Ú¯Ø±ÙØª -تÙÙ -ا٠-Ù ÙÙÙÙÙ -ا٠-Ø¬Ø±ÙØ§Ù -تÙÙ -بر -٠اÙÙØ¯ -برابر -باشÙÙ -٠دت٠-Ú¯ÙÙÙØ¯ -اÙÙÙÙ -تا -تÙÙØ§ -Ø¬Ø¯ÙØ¯ -ÚÙØ¯ -ب٠-ÙØ´Ø¯Ù -ÙØ±Ø¯Ù -ÙØ±Ø¯Ù -Ú¯ÙÙØ¯ -ÙØ±Ø¯Ù -ÙÙÙÙ -ÙÙ Ù -ÙØ²Ø¯ -رÙÙ -ÙØµØ¯ -ÙÙØ· -Ø¨Ø§ÙØ§Ù -دÙگرا٠-اÙÙ -Ø¯ÙØ±Ùز -ØªÙØ³Ø· -سÙÙ -اÙÙ -داÙÙØ¯ -سÙÙ -Ø§Ø³ØªÙØ§Ø¯Ù -ش٠ا -ÙÙØ§Ø± -دارÙÙ -ساخت٠-Ø·ÙØ± -ا٠د٠-Ø±ÙØªÙ -ÙØ®Ø³Øª -Ø¨ÙØ³Øª -ÙØ²Ø¯ÙÙ -Ø·Ù -ÙÙÙØ¯ -از -اÙÙØ§ -ت٠ا٠٠-داشت -ÙÙÙ -طرÙÙ -اش -ÚÙØ³Øª -Ø±ÙØ¨ -ÙÙ Ø§ÙØ¯ -Ú¯ÙØª -ÚÙØ¯ÙÙ -ÚÙØ²Ù -ØªÙØ§Ùد -ا٠-Ø§ÙØ§ -با -ا٠-Ø§ÙØ¯ -ترÙÙ -اÙÙÙÙ -دÙگر٠-را٠-ÙØ§ÙÙ -Ø¨Ø±ÙØ² -ÙÙ ÚÙØ§Ù -پاعÙÙ -ÙØ³ -ØØ¯Ùد -٠ختÙÙ -Ù ÙØ§Ø¨Ù -ÚÙØ² -Ú¯ÙØ±Ø¯ -ÙØ¯Ø§Ø±Ø¯ -ضد -ÙÙ ÚÙÙ -ساز٠-شا٠-Ù ÙØ±Ø¯ -بار٠-٠رس٠-Ø®ÙÙØ´ -Ø¨Ø±Ø®ÙØ±Ø¯Ø§Ø± -ÚÙÙ -خارج -شش -ÙÙÙØ² -ØªØØª -ض٠٠-ÙØ³ØªÙÙ -Ú¯ÙØªÙ -ÙÙØ± -Ø¨Ø³ÙØ§Ø± -Ù¾ÙØ´ -برا٠-Ø±ÙØ²Ùا٠-اÙÙÙ -ÙØ®ÙØ§ÙØ¯ -Ø¨Ø§ÙØ§ -ÙÙ -ÙÙØªÙ -ÙÙ -ÚÙÙÙ -ÙÙ -Ú¯ÙØ±Ù -ÙÙØ³Øª -است -ÙØ¬Ø§ -ÙÙØ¯ -ÙÙØ² -ÙØ§Ø¨Ø¯ -Ø¨ÙØ¯Ù -ØØªÙ -ØªÙØ§ÙÙØ¯ -Ø¹ÙØ¨ -Ø®ÙØ§Ø³Øª -ÙÙÙØ¯ -بÙÙ -ت٠ا٠-ÙÙ Ù -٠ا -Ø¨Ø§Ø´ÙØ¯ -٠ث٠-شد -ار٠-باشد -ار٠-طب٠-بعد -اگر -ØµÙØ±Øª -ØºÙØ± -جا٠-Ø¨ÙØ´ -Ø±ÙØ²Ù -Ø§ÙØ¯ -Ø²ÙØ±Ø§ -ÚÚ¯ÙÙÙ -بار -ÙØ·Ùا -Ù Ù -دربار٠-Ù Ù -Ø¯ÙØ¯Ù -ÙÙ ÙÙ -گذار٠-بردار٠-Ø¹ÙØª -گذاشت٠-ÙÙ -ÙÙÙ -ÙÙ -ÙØ§ -Ø´ÙÙØ¯ -اباد -ÙÙ ÙØ§Ø±Ù -ÙØ± -اÙÙ -Ø®ÙØ§ÙÙØ¯ -ÚÙØ§Ø± -ÙØ§Ù -Ø§Ù Ø±ÙØ² -٠ا٠-ÙØ§Ù -ÙØ¨Ù -ÙÙÙ -سع٠-تاز٠-را -ÙØ³ØªÙد -Ø²ÙØ± -جÙÙÙ -عÙÙØ§Ù -Ø¨ÙØ¯ http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_fi.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_fi.txt b/contrib/solr/logs/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_fr.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_fr.txt b/contrib/solr/logs/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ga.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ga.txt b/contrib/solr/logs/conf/lang/stopwords_ga.txt deleted file mode 100644 index 1666f87..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtà -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -nà -nÃor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sà -tar -thar -thú -triúr -trà -trÃna -trÃnár -trÃocha -tú -um -ár -é -éis -à -ó -ón -óna -ónár http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_gl.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_gl.txt b/contrib/solr/logs/conf/lang/stopwords_gl.txt deleted file mode 100644 index 4874f5c..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,175 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# galican stopwords -a -aÃnda -alà -aquel -aquela -aquelas -aqueles -aquilo -aquà -ao -aos -as -asà -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -habÃa -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_hi.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_hi.txt b/contrib/solr/logs/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -ठà¤à¤¦à¤° -ठत -ठपना -ठपनॠ-ठपनॠ-ठà¤à¥ -à¤à¤¦à¤¿ -à¤à¤ª -à¤à¤¤à¥à¤¯à¤¾à¤¦à¤¿ -à¤à¤¨ -à¤à¤¨à¤à¤¾ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¸ -à¤à¤¸à¤à¤¾ -à¤à¤¸à¤à¥ -à¤à¤¸à¤à¥ -à¤à¤¸à¤®à¥à¤ -à¤à¤¸à¥ -à¤à¤¸à¥ -à¤à¤¨ -à¤à¤¨à¤à¤¾ -à¤à¤¨à¤à¥ -à¤à¤¨à¤à¥ -à¤à¤¨à¤à¥ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¨à¥à¤¹à¥à¤ -à¤à¤¸ -à¤à¤¸à¤à¥ -à¤à¤¸à¥ -à¤à¤¸à¥ -à¤à¤ -à¤à¤µà¤ -à¤à¤¸ -à¤à¤¸à¥ -à¤à¤° -à¤à¤ -à¤à¤° -à¤à¤°à¤¤à¤¾ -à¤à¤°à¤¤à¥ -à¤à¤°à¤¨à¤¾ -à¤à¤°à¤¨à¥ -à¤à¤°à¥à¤ -à¤à¤¹à¤¤à¥ -à¤à¤¹à¤¾ -à¤à¤¾ -à¤à¤¾à¥à¥ -à¤à¤¿ -à¤à¤¿à¤¤à¤¨à¤¾ -à¤à¤¿à¤¨à¥à¤¹à¥à¤ -à¤à¤¿à¤¨à¥à¤¹à¥à¤ -à¤à¤¿à¤¯à¤¾ -à¤à¤¿à¤° -à¤à¤¿à¤¸ -à¤à¤¿à¤¸à¥ -à¤à¤¿à¤¸à¥ -à¤à¥ -à¤à¥à¤ -à¤à¥à¤² -à¤à¥ -à¤à¥ -à¤à¥à¤ -à¤à¥à¤¨ -à¤à¥à¤¨à¤¸à¤¾ -à¤à¤¯à¤¾ -à¤à¤° -à¤à¤¬ -à¤à¤¹à¤¾à¤ -à¤à¤¾ -à¤à¤¿à¤¤à¤¨à¤¾ -à¤à¤¿à¤¨ -à¤à¤¿à¤¨à¥à¤¹à¥à¤ -à¤à¤¿à¤¨à¥à¤¹à¥à¤ -à¤à¤¿à¤¸ -à¤à¤¿à¤¸à¥ -à¤à¥à¤§à¤° -à¤à¥à¤¸à¤¾ -à¤à¥à¤¸à¥ -à¤à¥ -तठ-तब -तरह -तिन -तिनà¥à¤¹à¥à¤ -तिनà¥à¤¹à¥à¤ -तिस -तिसॠ-तॠ-था -थॠ-थॠ-दबारा -दिया -दà¥à¤¸à¤°à¤¾ -दà¥à¤¸à¤°à¥ -दॠ-दà¥à¤µà¤¾à¤°à¤¾ -न -नहà¥à¤ -ना -निहायत -नà¥à¤à¥ -नॠ-पर -पर -पहलॠ-पà¥à¤°à¤¾ -पॠ-फिर -बनॠ-बहॠ-बहà¥à¤¤ -बाद -बाला -बिलà¤à¥à¤² -à¤à¥ -à¤à¥à¤¤à¤° -मà¤à¤° -मानॠ-मॠ-मà¥à¤ -यदि -यह -यहाठ-यहॠ-या -यिह -यॠ-रà¤à¥à¤ -रहा -रहॠ-ऱà¥à¤µà¤¾à¤¸à¤¾ -लिठ-लियॠ-लà¥à¤à¤¿à¤¨ -व -वरà¥à¤ -वह -वह -वहाठ-वहà¥à¤ -वालॠ-वà¥à¤¹ -वॠ-वà¥à¥à¤°à¤¹ -सà¤à¤ -सà¤à¤¤à¤¾ -सà¤à¤¤à¥ -सबसॠ-सà¤à¥ -साथ -साबà¥à¤¤ -साठ-सारा -सॠ-सॠ-हॠ-हà¥à¤ -हà¥à¤ -हà¥à¤ -हॠ-हà¥à¤ -हॠ-हà¥à¤¤à¤¾ -हà¥à¤¤à¥ -हà¥à¤¤à¥ -हà¥à¤¨à¤¾ -हà¥à¤¨à¥ -# additional normalized forms of the above -ठपनि -à¤à¥à¤¸à¥ -हà¥à¤¤à¤¿ -सà¤à¤¿ -तिà¤à¤¹à¥à¤ -à¤à¤à¤¹à¥à¤ -दवारा -à¤à¤¸à¤¿ -à¤à¤¿à¤à¤¹à¥à¤ -थि -à¤à¤à¤¹à¥à¤ -à¤à¤° -à¤à¤¿à¤à¤¹à¥à¤ -वहिठ-ठà¤à¤¿ -बनि -हि -à¤à¤à¤¹à¤¿à¤ -à¤à¤à¤¹à¥à¤ -हà¥à¤ -वà¤à¥à¤°à¤¹ -à¤à¤¸à¥ -रवासा -à¤à¥à¤¨ -निà¤à¥ -à¤à¤¾à¤«à¤¿ -à¤à¤¸à¤¿ -पà¥à¤°à¤¾ -à¤à¤¿à¤¤à¤° -हॠ-बहि -वहाठ-à¤à¥à¤ -यहाठ-à¤à¤¿à¤à¤¹à¥à¤ -तिà¤à¤¹à¥à¤ -à¤à¤¿à¤¸à¤¿ -à¤à¤ -यहि -à¤à¤à¤¹à¤¿à¤ -à¤à¤¿à¤§à¤° -à¤à¤à¤¹à¥à¤ -ठदि -à¤à¤¤à¤¯à¤¾à¤¦à¤¿ -हà¥à¤ -à¤à¥à¤¨à¤¸à¤¾ -à¤à¤¸à¤à¤¿ -दà¥à¤¸à¤°à¥ -à¤à¤¹à¤¾à¤ -ठप -à¤à¤¿à¤à¤¹à¥à¤ -à¤à¤¨à¤à¤¿ -à¤à¤¿ -वरठ-हà¥à¤ -à¤à¥à¤¸à¤¾ -नहिठhttp://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_hu.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_hu.txt b/contrib/solr/logs/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amÃg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elÅ -elÅször -elÅtt -elsÅ -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -Ãgy -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kÃvül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -mÃg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -Å -Åk -Åket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_hy.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_hy.txt b/contrib/solr/logs/conf/lang/stopwords_hy.txt deleted file mode 100644 index 9b70202..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# example set of Armenian stopwords. -Õ¡ÕµÕ¤ -Õ¡ÕµÕ¬ -Õ¡ÕµÕ¶ -Õ¡ÕµÕ½ -Õ¤Õ¸Ö -Õ¤Õ¸ÖÖ -Õ¥Õ´ -Õ¥Õ¶ -Õ¥Õ¶Ö -Õ¥Õ½ -Õ¥Ö -Õ§ -Õ§Õ« -Õ§Õ«Õ¶ -Õ§Õ«Õ¶Ö -Õ§Õ«Ö -Õ§Õ«Ö -Õ§Ö -Õ¨Õ½Õ¿ -Õ© -Õ« -Õ«Õ¶ -Õ«Õ½Õ¯ -Õ«Ö -Õ¯Õ¡Õ´ -Õ°Õ¡Õ´Õ¡Ö -Õ°Õ¥Õ¿ -Õ°Õ¥Õ¿Õ¸ -Õ´Õ¥Õ¶Ö -Õ´Õ¥Õ» -Õ´Õ« -Õ¶ -Õ¶Õ¡ -Õ¶Õ¡Ö -Õ¶ÖÕ¡ -Õ¶ÖÕ¡Õ¶Ö -Õ¸Ö -Õ¸ÖÕ¨ -Õ¸ÖÕ¸Õ¶Ö -Õ¸ÖÕºÕ¥Õ½ -Õ¸Ö -Õ¸ÖÕ´ -ÕºÕ«Õ¿Õ« -Õ¾ÖÕ¡ -Ö http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_id.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_id.txt b/contrib/solr/logs/conf/lang/stopwords_id.txt deleted file mode 100644 index b1816da..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,373 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_it.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_it.txt b/contrib/solr/logs/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc7..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ja.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ja.txt b/contrib/solr/logs/conf/lang/stopwords_ja.txt deleted file mode 100644 index edb67c1..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,141 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -ã® -ã« -㯠-ã -ã -ã -ã§ -㦠-㨠-ã -ã -ã -ãã -ãã -ã -ãã -ãã -㪠-ã㨠-ã¨ã㦠-ã -ã -ãã -ãªã© -ãªã£ -ãªã -ãã® -ãã -ãã® -ã㣠-ãã -ã¾ã -ãã® -ã¨ãã -ãã -ã¾ã§ -ãã -ãªã -㸠-ã -ã -ãã -ã«ãã£ã¦ -ã«ãã -ãã -ãã -ã«ãã -ã -ãªã -ããã -ã«ãã㦠-ã° -ãªã㣠-ãªã -ããã -ã«ã¤ã㦠-ã -ã 㣠-ãã®å¾ -ã§ãã -ãã -ã -ã®ã§ -ãªã -ã®ã¿ -ã§ã -ã -㤠-ã«ããã -ããã³ -ãã -ããã« -ã§ã -ã -ãã -ãã®ä» -ã«é¢ãã -ãã¡ -ã¾ã -ã -ãªã -ã«å¯¾ã㦠-ç¹ã« -ãã -åã³ -ããã -ã¨ã -ã§ã¯ -ã«ã¦ -ã»ã -ãªãã -ãã¡ -ãã㦠-ã¨ã¨ãã« -ãã ã -ãã¤ã¦ -ãããã -ã¾ã㯠-ã -ã»ã© -ãã®ã® -ã«å¯¾ãã -ã»ã¨ãã© -ã¨å ±ã« -ã¨ãã£ã -ã§ã -ã¨ã -ã¨ãã -ãã -##### End of file http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_lv.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_lv.txt b/contrib/solr/logs/conf/lang/stopwords_lv.txt deleted file mode 100644 index 92bdf4a..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,186 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakÅ¡ -Ärpus -augÅ¡pus -bez -caur -dÄļ -gar -iekÅ¡ -iz -kopÅ¡ -labad -lejpus -lÄ«dz -no -otrpus -pa -par -pÄr -pÄc -pie -pirms -pret -priekÅ¡ -starp -Å¡aipus -uz -viÅpus -virs -virspus -zem -apakÅ¡pus -# Conjunctions -un -bet -jo -ja -ka -lai -tomÄr -tikko -turpretÄ« -arÄ« -kaut -gan -tÄdÄļ -tÄ -ne -tikvien -vien -kÄ -ir -te -vai -kamÄr -# Particles -ar -diezin -droÅ¡i -diemžÄl -nebÅ«t -ik -it -taÄu -nu -pat -tiklab -iekÅ¡pus -nedz -tik -nevis -turpretim -jeb -iekam -iekÄm -iekÄms -kolÄ«dz -lÄ«dzko -tiklÄ«dz -jebÅ¡u -tÄlab -tÄpÄc -nekÄ -itin -jÄ -jau -jel -nÄ -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -bÅ«t -biju -biji -bija -bijÄm -bijÄt -esmu -esi -esam -esat -būšu -bÅ«si -bÅ«s -bÅ«sim -bÅ«siet -tikt -tiku -tiki -tika -tikÄm -tikÄt -tieku -tiec -tiek -tiekam -tiekat -tikÅ¡u -tiks -tiksim -tiksiet -tapt -tapi -tapÄt -topat -tapÅ¡u -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvÄm -kļuvÄt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varÄt -varÄju -varÄjÄm -varÄÅ¡u -varÄsim -var -varÄji -varÄjÄt -varÄsi -varÄsiet -varat -varÄja -varÄs http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_nl.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_nl.txt b/contrib/solr/logs/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aea..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_no.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_no.txt b/contrib/solr/logs/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmÃ¥l dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard <[email protected]>, Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -pÃ¥ | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -sÃ¥ | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nÃ¥ | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -nÃ¥r | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -Ã¥ | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sÃ¥nn | such a -inni | inside/within -mellom | between -vÃ¥r | our -hver | each -hvem | who -vors | us/ours -hvis | whose -bÃ¥de | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -ogsÃ¥ | also -slik | just -vært | been -være | to be -bÃ¥e | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -dÃ¥ | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjÃ¥ | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_pt.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_pt.txt b/contrib/solr/logs/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -à s | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houverÃamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -serÃamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tÃnhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -terÃamos -teriam http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ro.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ro.txt b/contrib/solr/logs/conf/lang/stopwords_ro.txt deleted file mode 100644 index 460b0eb..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,247 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -aceastÄ -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceÅti -aceÅtia -acolo -acum -ai -aia -aibÄ -aici -al -Äla -ale -alea -Älea -altceva -altcineva -am -ar -are -aÅ -aÅadar -asemenea -asta -Ästa -astÄzi -astea -Ästea -ÄÅtia -asupra -aÅ£i -au -avea -avem -aveÅ£i -azi -bine -bucur -bunÄ -ca -cÄ -cÄci -când -care -cÄrei -cÄror -cÄrui -cât -câte -câţi -cÄtre -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dÄ -dacÄ -dar -datoritÄ -de -deci -deja -deoarece -departe -deÅi -din -dinaintea -dintr -dintre -drept -dupÄ -ea -ei -el -ele -eram -este -eÅti -eu -face -fÄrÄ -fi -fie -fiecare -fii -fim -fiÅ£i -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângÄ -le -li -lîngÄ -lor -lui -mÄ -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multÄ -mulÅ£i -ne -nicÄieri -nici -nimeni -niÅte -noastrÄ -noastre -noi -noÅtri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -pânÄ -pe -pentru -peste -pînÄ -poate -pot -prea -prima -primul -prin -printr -sa -sÄ -sÄi -sale -sau -sÄu -se -Åi -sînt -sîntem -sînteÅ£i -spre -sub -sunt -suntem -sunteÅ£i -ta -tÄi -tale -tÄu -te -Å£i -Å£ie -tine -toatÄ -toate -tot -toÅ£i -totuÅi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vÄ -vi -voastrÄ -voastre -voi -voÅtri -vostru -vouÄ -vreo -vreun http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_ru.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_ru.txt b/contrib/solr/logs/conf/lang/stopwords_ru.txt deleted file mode 100644 index 5527140..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `Ñ' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -ÑÑо | what/that -он | he -на | on/onto -Ñ | i -Ñ | from -Ñо | alternative form -как | how -а | milder form of `no' (but) -Ñо | conjunction and form of `that' -вÑе | all -она | she -Ñак | so, thus -его | him -но | but -да | yes/and -ÑÑ | thou -к | towards, by -Ñ | around, chez -же | intensifier particle -Ð²Ñ | you -за | beyond, behind -Ð±Ñ | conditional/subj. particle -по | up to, along -ÑолÑко | only -ее | her -мне | to me -бÑло | it was -Ð²Ð¾Ñ | here is/are, particle -Ð¾Ñ | away from -Ð¼ÐµÐ½Ñ | me -еÑе | still, yet, more -Ð½ÐµÑ | no, there isnt/arent -о | about -из | out of -ÐµÐ¼Ñ | to him -ÑепеÑÑ | now -когда | when -даже | even -Ð½Ñ | so, well -вдÑÑг | suddenly -ли | interrogative particle -еÑли | if -Ñже | already, but homonym of `narrower' -или | or -ни | neither -бÑÑÑ | to be -бÑл | he was -него | prepositional form of его -до | up to -Ð²Ð°Ñ | you accusative -нибÑÐ´Ñ | indef. suffix preceded by hyphen -опÑÑÑ | again -Ñж | already, but homonym of `adder' -вам | to you -Ñказал | he said -Ð²ÐµÐ´Ñ | particle `after all' -Ñам | there -поÑом | then -ÑÐµÐ±Ñ | oneself -ниÑего | nothing -ей | to her -Ð¼Ð¾Ð¶ÐµÑ | usually with `бÑÑÑ' as `maybe' -они | they -ÑÑÑ | here -где | where -еÑÑÑ | there is/are -надо | got to, must -ней | prepositional form of ей -Ð´Ð»Ñ | for -Ð¼Ñ | we -ÑÐµÐ±Ñ | thee -Ð¸Ñ | them, their -Ñем | than -бÑла | she was -Ñам | self -ÑÑоб | in order to -без | without -бÑдÑо | as if -Ñеловек | man, person, one -Ñего | genitive form of `what' -Ñаз | once -Ñоже | also -Ñебе | to oneself -под | beneath -Ð¶Ð¸Ð·Ð½Ñ | life -бÑÐ´ÐµÑ | will be -ж | short form of intensifer particle `же' -Ñогда | then -кÑо | who -ÑÑÐ¾Ñ | this -говоÑил | was saying -Ñого | genitive form of `that' -поÑÐ¾Ð¼Ñ | for that reason -ÑÑого | genitive form of `this' -какой | which -ÑовÑем | altogether -ним | prepositional form of `его', `они' -здеÑÑ | here -ÑÑом | prepositional form of `ÑÑоÑ' -один | one -поÑÑи | almost -мой | my -Ñем | instrumental/dative plural of `ÑоÑ', `Ñо' -ÑÑÐ¾Ð±Ñ | full form of `in order that' -нее | her (acc.) -кажеÑÑÑ | it seems -ÑейÑÐ°Ñ | now -бÑли | they were -кÑда | where to -заÑем | why -ÑказаÑÑ | to say -вÑÐµÑ | all (acc., gen. preposn. plural) -никогда | never -ÑÐµÐ³Ð¾Ð´Ð½Ñ | today -можно | possible, one can -пÑи | by -Ð½Ð°ÐºÐ¾Ð½ÐµÑ | finally -два | two -об | alternative form of `о', about -дÑÑгой | another -Ñ Ð¾ÑÑ | even -поÑле | after -над | above -болÑÑе | more -ÑÐ¾Ñ | that one (masc.) -ÑеÑез | across, in -ÑÑи | these -Ð½Ð°Ñ | us -пÑо | about -вÑего | in all, only, of all -Ð½Ð¸Ñ | prepositional form of `они' (they) -ÐºÐ°ÐºÐ°Ñ | which, feminine -много | lots -Ñазве | interrogative particle -Ñказала | she said -ÑÑи | three -ÑÑÑ | this, acc. fem. sing. -Ð¼Ð¾Ñ | my, feminine -впÑоÑем | moreover, besides -Ñ Ð¾ÑоÑо | good -ÑÐ²Ð¾Ñ | ones own, acc. fem. sing. -ÑÑой | oblique form of `ÑÑа', fem. `this' -пеÑед | in front of -иногда | sometimes -лÑÑÑе | better -ÑÑÑÑ | a little -Ñом | preposn. form of `that one' -нелÑÐ·Ñ | one must not -Ñакой | such a one -им | to them -более | more -вÑегда | always -конеÑно | of course -вÑÑ | acc. fem. sing of `all' -Ð¼ÐµÐ¶Ð´Ñ | between - - - | b: some paradigms - | - | personal pronouns - | - | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мноÑ] - | ÑÑ ÑÐµÐ±Ñ Ñебе Ñобой [ÑобоÑ] - | он его ÐµÐ¼Ñ Ð¸Ð¼ [него, немÑ, ним] - | она ее Ñи ÐµÑ [нее, нÑи, неÑ] - | оно его ÐµÐ¼Ñ Ð¸Ð¼ [него, немÑ, ним] - | - | Ð¼Ñ Ð½Ð°Ñ Ð½Ð°Ð¼ нами - | Ð²Ñ Ð²Ð°Ñ Ð²Ð°Ð¼ вами - | они Ð¸Ñ Ð¸Ð¼ ими [Ð½Ð¸Ñ , ним, ними] - | - | ÑÐµÐ±Ñ Ñебе Ñобой [ÑобоÑ] - | - | demonstrative pronouns: ÑÑÐ¾Ñ (this), ÑÐ¾Ñ (that) - | - | ÑÑÐ¾Ñ ÑÑа ÑÑо ÑÑи - | ÑÑого ÑÑÑ ÑÑо ÑÑи - | ÑÑого ÑÑой ÑÑого ÑÑÐ¸Ñ - | ÑÑÐ¾Ð¼Ñ ÑÑой ÑÑÐ¾Ð¼Ñ ÑÑим - | ÑÑим ÑÑой ÑÑим [ÑÑоÑ] ÑÑими - | ÑÑом ÑÑой ÑÑом ÑÑÐ¸Ñ - | - | ÑÐ¾Ñ Ñа Ñо Ñе - | Ñого ÑÑ Ñо Ñе - | Ñого Ñой Ñого ÑÐµÑ - | ÑÐ¾Ð¼Ñ Ñой ÑÐ¾Ð¼Ñ Ñем - | Ñем Ñой Ñем [ÑоÑ] Ñеми - | Ñом Ñой Ñом ÑÐµÑ - | - | determinative pronouns - | - | (a) веÑÑ (all) - | - | веÑÑ Ð²ÑÑ Ð²Ñе вÑе - | вÑего вÑÑ Ð²Ñе вÑе - | вÑего вÑей вÑего вÑÐµÑ - | вÑÐµÐ¼Ñ Ð²Ñей вÑÐµÐ¼Ñ Ð²Ñем - | вÑем вÑей вÑем [вÑеÑ] вÑеми - | вÑем вÑей вÑем вÑÐµÑ - | - | (b) Ñам (himself etc) - | - | Ñам Ñама Ñамо Ñами - | Ñамого ÑÐ°Ð¼Ñ Ñамо ÑÐ°Ð¼Ð¸Ñ - | Ñамого Ñамой Ñамого ÑÐ°Ð¼Ð¸Ñ - | ÑÐ°Ð¼Ð¾Ð¼Ñ Ñамой ÑÐ°Ð¼Ð¾Ð¼Ñ Ñамим - | Ñамим Ñамой Ñамим [ÑамоÑ] Ñамими - | Ñамом Ñамой Ñамом ÑÐ°Ð¼Ð¸Ñ - | - | stems of verbs `to be', `to have', `to do' and modal - | - | бÑÑÑ Ð±Ñ Ð±Ñд бÑв еÑÑÑ ÑÑÑÑ - | име - | дел - | мог мож моÑÑ - | Ñме - | Ñ Ð¾Ñ Ñ Ð¾Ñ - | долж - | можн - | нÑжн - | нелÑÐ·Ñ - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_sv.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_sv.txt b/contrib/solr/logs/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | sÃ¥ = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -pÃ¥ | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -sÃ¥ | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -dÃ¥ | then, when -sin | his -nu | now -har | have -inte | inte nÃ¥gon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -nÃ¥got | some etc -frÃ¥n | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -nÃ¥gon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -Ã¥t | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -nÃ¥gra | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sÃ¥dan | such a -vÃ¥r | our -blivit | from bli -dess | its -inom | within -mellan | between -sÃ¥dant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sÃ¥dana | such a -vart | each -dina | thy -vars | whose -vÃ¥rt | our -vÃ¥ra | our -ert | your -era | your -vilkas | whose - http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_th.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_th.txt b/contrib/solr/logs/conf/lang/stopwords_th.txt deleted file mode 100644 index f27ec57..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,133 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -à¹à¸§à¹ -à¹à¸¡à¹ -à¹à¸ -à¹à¸à¹ -à¹à¸«à¹ -à¹à¸ -à¹à¸à¸¢ -à¹à¸«à¹à¸ -à¹à¸¥à¹à¸§ -à¹à¸¥à¸° -à¹à¸£à¸ -à¹à¸à¸ -à¹à¸à¹ -à¹à¸à¸ -à¹à¸«à¹à¸ -à¹à¸¥à¸¢ -à¹à¸£à¸´à¹à¸¡ -à¹à¸£à¸² -à¹à¸¡à¸·à¹à¸ -à¹à¸à¸·à¹à¸ -à¹à¸à¸£à¸²à¸° -à¹à¸à¹à¸à¸à¸²à¸£ -à¹à¸à¹à¸ -à¹à¸à¸´à¸à¹à¸à¸¢ -à¹à¸à¸´à¸ -à¹à¸à¸·à¹à¸à¸à¸à¸²à¸ -à¹à¸à¸µà¸¢à¸§à¸à¸±à¸ -à¹à¸à¸µà¸¢à¸§ -à¹à¸à¹à¸ -à¹à¸à¸à¸²à¸° -à¹à¸à¸¢ -à¹à¸à¹à¸² -à¹à¸à¸² -à¸à¸µà¸ -à¸à¸²à¸ -à¸à¸°à¹à¸£ -à¸à¸à¸ -à¸à¸¢à¹à¸²à¸ -à¸à¸¢à¸¹à¹ -à¸à¸¢à¸²à¸ -หาภ-หลาย -หลัà¸à¸à¸²à¸ -หลัภ-หรืภ-หà¸à¸¶à¹à¸ -สà¹à¸§à¸ -สà¹à¸ -สุภ-สà¹à¸²à¸«à¸£à¸±à¸ -วà¹à¸² -วัภ-ลภ-รà¹à¸§à¸¡ -ราย -รัภ-ระหวà¹à¸²à¸ -รวม -ยัภ-มี -มาภ-มา -à¸à¸£à¹à¸à¸¡ -à¸à¸ -à¸à¹à¸²à¸ -à¸à¸¥ -à¸à¸²à¸ -à¸à¹à¸² -à¸à¸µà¹ -à¸à¹à¸² -à¸à¸±à¹à¸ -à¸à¸±à¸ -à¸à¸à¸à¸à¸²à¸ -à¸à¸¸à¸ -à¸à¸µà¹à¸ªà¸¸à¸ -à¸à¸µà¹ -à¸à¹à¸²à¹à¸«à¹ -à¸à¹à¸² -à¸à¸²à¸ -à¸à¸±à¹à¸à¸à¸µà¹ -à¸à¸±à¹à¸ -à¸à¹à¸² -à¸à¸¹à¸ -à¸à¸¶à¸ -à¸à¹à¸à¸ -à¸à¹à¸²à¸à¹ -à¸à¹à¸²à¸ -à¸à¹à¸ -à¸à¸²à¸¡ -à¸à¸±à¹à¸à¹à¸à¹ -à¸à¸±à¹à¸ -à¸à¹à¸²à¸ -à¸à¹à¸§à¸¢ -à¸à¸±à¸ -à¸à¸¶à¹à¸ -à¸à¹à¸§à¸ -à¸à¸¶à¸ -à¸à¸²à¸ -à¸à¸±à¸ -à¸à¸° -à¸à¸·à¸ -à¸à¸§à¸²à¸¡ -à¸à¸£à¸±à¹à¸ -à¸à¸ -à¸à¸¶à¹à¸ -à¸à¸à¸ -à¸à¸ -à¸à¸à¸° -à¸à¹à¸à¸ -à¸à¹ -à¸à¸²à¸£ -à¸à¸±à¸ -à¸à¸±à¸ -à¸à¸§à¹à¸² -à¸à¸¥à¹à¸²à¸§ http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/stopwords_tr.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/stopwords_tr.txt b/contrib/solr/logs/conf/lang/stopwords_tr.txt deleted file mode 100644 index 1f03282..0000000 --- a/contrib/solr/logs/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,226 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmıŠ-altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beÅ -bile -bin -bir -birçok -biri -birkaç -birkez -birÅey -birÅeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -deÄil -diÄer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eÄer -elli -en -etmesi -etti -ettiÄi -ettiÄini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -iÅte -itibaren -itibariyle -kadar -karÅın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduÄu -olduÄunu -olduklarını -olmadı -olmadıÄı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -raÄmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -Åey -Åeyden -Åeyi -Åeyler -Åöyle -Åu -Åuna -Åunda -Åundan -Åunları -Åunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptıÄı -yaptıÄını -yaptıkları -yedi -yerine -yetmiÅ -yine -yirmi -yoksa -yüz -zaten http://git-wip-us.apache.org/repos/asf/chukwa/blob/21b24284/contrib/solr/logs/conf/lang/userdict_ja.txt ---------------------------------------------------------------------- diff --git a/contrib/solr/logs/conf/lang/userdict_ja.txt b/contrib/solr/logs/conf/lang/userdict_ja.txt deleted file mode 100644 index 621b1b2..0000000 --- a/contrib/solr/logs/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same <text> is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -æ¥æ¬çµæ¸æ°è,æ¥æ¬ çµæ¸ æ°è,ããã³ ã±ã¤ã¶ã¤ ã·ã³ãã³,ã«ã¹ã¿ã åè© -é¢è¥¿å½é空港,é¢è¥¿ å½é 空港,ã«ã³ãµã¤ ã³ã¯ãµã¤ ã¯ã¦ã³ã¦,ã«ã¹ã¿ã åè© - -# Custom segmentation for compound katakana -ãã¼ãããã°,ãã¼ã ããã°,ãã¼ã ããã°,ããã«ãåè© -ã·ã§ã«ãã¼ããã°,ã·ã§ã«ãã¼ ããã°,ã·ã§ã«ãã¼ ããã°,ããã«ãåè© - -# Custom reading for former sumo wrestler -æéé¾,æéé¾,ã¢ãµã·ã§ã¦ãªã¥ã¦,ã«ã¹ã¿ã 人å
