Author: rwesten
Date: Mon Sep 26 11:12:50 2011
New Revision: 1175796

URL: http://svn.apache.org/viewvc?rev=1175796&view=rev
Log:
Moved OpenNLP models for Swedish from "se" to "sv".

This was because OpenNLP uses "se" as prefix for such models however the 
official language key is "sv".

Because of the models are looked-up via the language key the build file was 
also updated to rename the models accordingly.
See the readme for details

Added:
    incubator/stanbol/trunk/data/opennlp/lang/sv/
    incubator/stanbol/trunk/data/opennlp/lang/sv/README.md
    incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml   (with 
props)
    incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml   (with props)
    incubator/stanbol/trunk/data/opennlp/lang/sv/src/
    incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/
    incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/
    incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/org/
    incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/org/apache/
    
incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/org/apache/stanbol/
    
incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/org/apache/stanbol/data/
    
incubator/stanbol/trunk/data/opennlp/lang/sv/src/main/resources/org/apache/stanbol/data/opennlp/
Removed:
    incubator/stanbol/trunk/data/opennlp/lang/se/
Modified:
    incubator/stanbol/trunk/data/pom.xml

Added: incubator/stanbol/trunk/data/opennlp/lang/sv/README.md
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/data/opennlp/lang/sv/README.md?rev=1175796&view=auto
==============================================================================
--- incubator/stanbol/trunk/data/opennlp/lang/sv/README.md (added)
+++ incubator/stanbol/trunk/data/opennlp/lang/sv/README.md Mon Sep 26 11:12:50 
2011
@@ -0,0 +1,31 @@
+# Data files Bundles for OpenNLP
+
+This source repository only holds the pom.xml file and folder structure of 
this bundle.
+
+To avoid loading subversion repository with large binary files this artifact 
has to be build and deployed manually to retrieve precomputed models from other 
sites.
+
+
+## Downloading the OpenNLP statistical model 
+
+The OpenNLP models are downloaded from 
+
+    http://opennlp.sourceforge.net/models-1.5
+
+This url is defined as property in the 'pom.xml'
+The list of downloaded file is defined within the 'download_models.xml'
+
+## NOTE
+
+Using this bundles is only an alternative of manually copying the required 
OpenNLP models to the '{stanbol-installation}/sling/datafiles'. However note 
that OpenNLP uses 'se' as prefix for Swedish however 
+the official ISO language code is 'sv'! Therefoer the original model files 
need 
+to be renamed from
+
+    se-**
+    
+to
+
+    sv-**
+    
+The build process of this bundle does this by default. However when copying
+the model files to the '{stanbol-installation}/sling/datafiles' this MUST BE 
done
+manually!

Added: incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml?rev=1175796&view=auto
==============================================================================
--- incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml (added)
+++ incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml Mon Sep 26 
11:12:50 2011
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project name="OpenNLP Model Download Helper" default="download" basedir=".">
+  <description>
+    Contains only a singel target that is used by the Maven Ant
+    Plugin to download OpenNLP Models from the Web
+  </description>
+  <!-- We need to use the merge mapper to rename model files from
+       "se-*" to "sv-*"
+   -->
+  <mapper type="merge" />
+   
+  <target name="download">
+      <copy toDir="${target.directory}/">
+          <resources>
+              <url url="${model.url}/se-token.bin"/>
+          </resources>
+          <mergemapper to="sv-token.bin"/>
+      </copy>
+      <copy toDir="${target.directory}/">
+          <resources>
+              <url url="${model.url}/se-sent.bin"/>
+          </resources>
+          <mergemapper to="sv-sent.bin"/>
+      </copy>
+      <copy toDir="${target.directory}/">
+          <resources>
+              <url url="${model.url}/se-pos-perceptron.bin"/>
+          </resources>
+          <mergemapper to="sv-pos-perceptron.bin"/>
+      </copy>
+  </target>
+</project>
\ No newline at end of file

Propchange: incubator/stanbol/trunk/data/opennlp/lang/sv/download_models.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml?rev=1175796&view=auto
==============================================================================
--- incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml (added)
+++ incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml Mon Sep 26 11:12:50 
2011
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.data.parent</artifactId>
+    <version>0.9.0-incubating-SNAPSHOT</version>
+    <relativePath>../../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.data.opennlp.lang.se</artifactId>
+  <version>1.0.0-incubating</version>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Data: OpenNLP Models for Swedish</name>
+  <description>
+    Bundle containing all necessary/available models for parsing Swedish 
language texts. 
+    This does not include Models for named entity recocnition (NER).
+    NOTE: OpenNLP modles use incorrectly "se" instead of "sv" as language code.
+    This is corrected by renaming the downloaded files accordingly.
+  </description>
+  <inceptionYear>2011</inceptionYear>
+
+  <scm>
+    <connection>
+      
scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/data/opennlp/lang/se
+    </connection>
+    <developerConnection>
+      
scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/data/opennlp/lang/se
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol/</url>
+  </scm>
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <!-- define the path to/home of the OpenNLP modles-->
+    <opennlp.model.path>org/apache/stanbol/data/opennlp</opennlp.model.path>
+    
<opennlp.model.home>http://dev.iks-project.eu/downloads/opennlp/models-1.5/</opennlp.model.home>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <configuration>
+          <instructions>
+            <_versionpolicy>$${version;===;${@}}</_versionpolicy>
+
+            <!-- 
+              Extension used to provide files in that directory to the
+              DataFileProvider
+              -->
+            <Data-Files>${opennlp.model.path}</Data-Files>
+            <!-- 
+              Use a priority lower than 0 to allow providers without a
+              defined ranking to override this default data.
+             -->
+            <Data-Files-Priority>
+              -100
+            </Data-Files-Priority>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <!-- 
+          Ant is used to download the models from the
+          http://opennlp.sourceforge.net site.
+        -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>compile</id>
+            <phase>compile</phase>
+            <configuration>
+              <!--
+                TODO: I would like to add an "unless" constraint to the
+                target that prevents execution if Maven operates in offline
+                mode. However I was not able to find out how to obtain this
+                information. ${settings.offline} (as noted by several
+                resources) does not work.
+                Until fixed builds will fail if no internetconnection is
+                available!
+              -->
+              <target>
+                <property name="target.directory" 
value="${project.basedir}/src/main/resources/${opennlp.model.path}"/>
+                <property name="model.url" value="${opennlp.model.home}"/>
+                                
+                <echo message="copy OpenNLP models"/>
+                <echo message="  FROM ${model.url} "/>
+                <echo message="  TO ${target.directory}"/>
+
+                <ant antfile="${basedir}/download_models.xml">
+                  <target name="download"/>
+                </ant>
+              </target>
+            </configuration>
+            <goals>
+              <goal>run</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>

Propchange: incubator/stanbol/trunk/data/opennlp/lang/sv/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/data/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/data/pom.xml?rev=1175796&r1=1175795&r2=1175796&view=diff
==============================================================================
--- incubator/stanbol/trunk/data/pom.xml (original)
+++ incubator/stanbol/trunk/data/pom.xml Mon Sep 26 11:12:50 2011
@@ -88,7 +88,7 @@
         <module>opennlp/lang/en</module>
         <module>opennlp/lang/nl</module>
         <module>opennlp/lang/pt</module>
-        <module>opennlp/lang/se</module>
+        <module>opennlp/lang/sv</module>
         <module>opennlp/ner/en</module>
         <module>opennlp/ner/es</module>
         <module>opennlp/ner/nl</module>


Reply via email to