Author: rwesten
Date: Wed Sep 28 13:42:36 2011
New Revision: 1176881

URL: http://svn.apache.org/viewvc?rev=1176881&view=rev
Log:
deactivated the German Tokenizer because it sometimes create wrong tokens such 
as including leading and tailing " to words (e.g. "Kung Fu Panda" -> ["Kung, 
Fu, Panda"]

Modified:
    incubator/stanbol/trunk/data/opennlp/lang/de/download_models.xml

Modified: incubator/stanbol/trunk/data/opennlp/lang/de/download_models.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/data/opennlp/lang/de/download_models.xml?rev=1176881&r1=1176880&r2=1176881&view=diff
==============================================================================
--- incubator/stanbol/trunk/data/opennlp/lang/de/download_models.xml (original)
+++ incubator/stanbol/trunk/data/opennlp/lang/de/download_models.xml Wed Sep 28 
13:42:36 2011
@@ -21,10 +21,17 @@
     Plugin to download OpenNLP Models from the Web
   </description>
    
+  <!-- Removed the German Tokenizer, because it does sometime create "wrong"
+       tokens. In the absence of this model the SimpleTokenizer will be used by
+       default. -->
   <target name="download">
-    <copy todir="${target.directory}" flatten="true">
+    <!-- Added this delete to ensure that already downloaded versions of the 
+         German Tokenizer models are removed. -->
+    <delete dir="${target.directory}" includes="de-token.bin" />
+       <copy todir="${target.directory}" flatten="true">
       <resources>
-        <url url="${model.url}/de-token.bin"/>
+        <!-- url url="${model.url}/de-token.bin"/>
+              -->
         <url url="${model.url}/de-sent.bin"/>
         <url url="${model.url}/de-pos-perceptron.bin"/>
         <!-- no Chunker for german


Reply via email to