Author: rwesten
Date: Thu Oct 17 13:31:18 2013
New Revision: 1533075

URL: http://svn.apache.org/r1533075
Log:
STANBOL-1143: Applied the patch provided by A. Soroka. The Urify utility - 
formally well hidden within the genericrdf indexing tool - has now an entry in 
the README.md AND also supports help; This also includes import organization of 
the indexing.core module, removal of unused members and .close() is now called 
on the BnD tool in the OsgiConfigurationUtil; increased log level of FST 
configurations to info

Modified:
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
    
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
    stanbol/trunk/entityhub/indexing/genericrdf/README.md

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
 Thu Oct 17 13:31:18 2013
@@ -16,6 +16,9 @@
 */
 package org.apache.stanbol.entityhub.indexing;
 
+import static java.lang.System.exit;
+import static java.lang.System.out;
+
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@@ -77,12 +80,14 @@ public class Urify implements Runnable{
     private static Logger log = LoggerFactory.getLogger(Urify.class);
     
     private static final Options options;
+    
     static {
         options = new Options();
         options.addOption("h", "help", false, "display this help and exit");
         options.addOption("p","prefix",true, 
             "The URI prefix used for wrapping the bNode Id");
         options.addOption("e","encoding",true, "the char encodinf (default: 
UTF-8)");
+        options.addOption("o","outputFilePrefix",true, "The prefix to add to 
output files, defaults to \"uf_\"");
     }
     /**
      * @param args
@@ -92,39 +97,53 @@ public class Urify implements Runnable{
         CommandLineParser parser = new PosixParser();
         CommandLine line = parser.parse(options, args);
         args = line.getArgs();
+        if (line.hasOption('h')) {
+                       out.println("Processes RDF files to translate blank 
nodes into prefixed URI nodes.");
+                       out.println("-h/--help: Print this help and exit.");
+                       out.println("-p/--prefix: Required: The prefix to add 
to blank nodes to make them URIs.");
+                       out.println("-e/--encoding: The text encoding to expect 
in the RDF, defaults to UTF-8.");
+                       out.println("-o/--outputFilePrefix: The prefix to add 
to output files, defaults to \"uf_\".");
+                       exit(0);
+        }
         if(!line.hasOption('p')){
             log.error("Missing parameter 'prefix' ('p)!");
-            System.exit(1);
+            exit(1);
         }
         String prefix = "<"+line.getOptionValue('p');
-        log.info("prfix: {} ",line.getOptionValue('p'));
+        log.info("Using prefix: {} ",line.getOptionValue('p'));
         Charset charset;
         if(line.hasOption('e')){
             charset = Charset.forName(line.getOptionValue('e'));
             if(charset == null){
                 log.error("Unsupported encoding 
'{}'!",line.getOptionValue('e'));
-                System.exit(1);
+                exit(1);
             }
         } else {
             charset = Charset.forName("UTF-8");
         }
+
         log.info("charset: {} ",charset.name());
-        Urify urify = new Urify(Arrays.asList(args), charset, prefix);
-        urify.run(); //TODO: this could support processing multiple files in 
parallel
+               Urify urify = new Urify(Arrays.asList(args), charset, prefix,
+                               line.hasOption('o') ? line.getOptionValue('o') 
: "uf_");
+               urify.run(); //TODO: this could support processing multiple 
files in parallel
     }
 
     private final Charset charset;
     private final String prefix;
+    private final String outputFilePrefix;
     protected long start = System.currentTimeMillis();
     protected long uf_count = 0;
 
     private List<String> resources;
 
-    public Urify(List<String> resources, Charset charset, String prefix) 
throws IOException {
-        this.charset = charset;
-        this.prefix = prefix;
-        this.resources = Collections.synchronizedList(new 
ArrayList<String>(resources));
-    }
+       public Urify(List<String> resources, Charset charset, String prefix,
+                       final String outputFilePrefix) throws IOException {
+               this.charset = charset;
+               this.prefix = prefix;
+               this.outputFilePrefix = outputFilePrefix;
+               this.resources = Collections.synchronizedList(new 
ArrayList<String>(
+                               resources));
+       }
     
     public void run() {
         String source;
@@ -148,7 +167,7 @@ public class Urify implements Runnable{
         if(source.isFile()){
             String path = FilenameUtils.getFullPathNoEndSeparator(resource);
             String name = FilenameUtils.getName(resource);
-            File target = new File(path,"uf_"+name);
+            File target = new File(path, outputFilePrefix + name);
             int i=0;
             while(target.exists()){
                 i++;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
 Thu Oct 17 13:31:18 2013
@@ -77,9 +77,7 @@ public class IndexingConfig {
     private static final String SOURCE_FOLDER = "resources";
     private static final String SOURCE_PATH = 
DEFAULT_ROOT_PATH+File.separatorChar+SOURCE_FOLDER;
     private static final String DESTINATION_FOLDER = "destination";
-    private static final String DESTINATION_PATH = 
DEFAULT_ROOT_PATH+File.separatorChar+DESTINATION_FOLDER;
     private static final String DISTRIBUTION_FOLDER = "dist";
-    private static final String DISTRIBUTION_PATH = 
DEFAULT_ROOT_PATH+File.separatorChar+DISTRIBUTION_FOLDER;
     private static final String INDEXING_PROPERTIES = "indexing.properties";
     private static final String CONFIG_PARAM = "config";
     public static final String KEY_INDEXING_CONFIG = "indexingConfig";

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
 Thu Oct 17 13:31:18 2013
@@ -16,7 +16,6 @@
 */
 package org.apache.stanbol.entityhub.indexing.core.config;
 
-import java.io.File;
 
 /**
  * Constants defines/used for Indexing.

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
 Thu Oct 17 13:31:18 2013
@@ -256,6 +256,7 @@ public final class OsgiConfigurationUtil
             builder.addClasspath(new 
File(config.getDestinationFolder(),CONFIG_ROOT));
         } catch (IOException e) {
             log.warn("Unable to build OSGI Bundle for Indexed Referenced Site 
"+config.getName(),e);
+            builder.close();
             return;
         }
         Jar jar;
@@ -263,8 +264,10 @@ public final class OsgiConfigurationUtil
             jar = builder.build();
         } catch (Exception e) {
             log.warn("Unable to build OSGI Bundle for Indexed Referenced Site 
"+config.getName(),e);
-            return;
-        }
+                       return;
+               } finally {
+                       builder.close();
+               }
         try {
             jar.write(new File(config.getDistributionFolder(),
                 CONFIG_PACKAGE+config.getName()+"-1.0.0.jar"));

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
 Thu Oct 17 13:31:18 2013
@@ -21,9 +21,7 @@ import static org.apache.stanbol.entityh
 import static 
org.apache.stanbol.entityhub.indexing.core.impl.IndexerConstants.SOURCE_STARTED;
 import static 
org.apache.stanbol.entityhub.indexing.core.impl.IndexerConstants.STORE_DURATION;
 
-import java.io.BufferedReader;
 import java.io.BufferedWriter;
-import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
 Thu Oct 17 13:31:18 2013
@@ -81,8 +81,8 @@ public interface IndexerConstants {
      */
     //ignore the Type safety because the item is of
     //INDEXING_COMPLETED_QUEUE_ITEM is anyway null
-    @SuppressWarnings("unchecked")
-    QueueItem INDEXING_COMPLETED_QUEUE_ITEM = new QueueItem(null);
+    @SuppressWarnings("rawtypes")
+    QueueItem INDEXING_COMPLETED_QUEUE_ITEM = new QueueItem<Void>(null);
 
     /**
      * The sequence number for {@link IndexingDaemon}s that read from the 

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
 Thu Oct 17 13:31:18 2013
@@ -56,7 +56,6 @@ import org.apache.stanbol.entityhub.inde
 import org.apache.stanbol.entityhub.indexing.core.Indexer;
 import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
 import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
-import 
org.apache.stanbol.entityhub.indexing.core.destination.OsgiConfigurationUtil;
 import org.apache.stanbol.entityhub.indexing.core.event.IndexingEvent;
 import org.apache.stanbol.entityhub.indexing.core.event.IndexingListener;
 import 
org.apache.stanbol.entityhub.indexing.core.impl.IndexingDaemon.IndexingDaemonEventObject;
@@ -382,7 +381,7 @@ public class IndexerImpl implements Inde
             if(state == State.POSTPROCESSING){ //if state > INITIALISED
                 throw new IllegalStateException("Unable to skip post 
processing if postprocessing is already in progress!");
             }
-            if(state.ordinal() >= state.POSTPROCESSED.ordinal()){
+            if(state.ordinal() >= State.POSTPROCESSED.ordinal()){
                 return; //already post processed
             }
             setState(State.POSTPROCESSED);
@@ -564,7 +563,7 @@ public class IndexerImpl implements Inde
             if(state == State.INDEXING){ 
                 throw new IllegalStateException("Unable to skip indexing if 
indexing is already in progress!");
             }
-            if(state.ordinal() >= state.INDEXED.ordinal()){ //if state > 
INDEXING
+            if(state.ordinal() >= State.INDEXED.ordinal()){ //if state > 
INDEXING
                 return; //already in INDEXED state
             }
             setState(State.INDEXED);

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
 Thu Oct 17 13:31:18 2013
@@ -124,7 +124,8 @@ public class FieldValueFilter implements
     /**
      * @param value
      */
-    private void parseFilterConfig(Object value) {
+    @SuppressWarnings("unchecked")
+       private void parseFilterConfig(Object value) {
         Collection<String> values; 
         if(value instanceof String){
             values = Arrays.asList(value.toString().split(";"));

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
 Thu Oct 17 13:31:18 2013
@@ -22,7 +22,6 @@ import org.apache.stanbol.entityhub.inde
 import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
 import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath;
 import org.apache.stanbol.entityhub.ldpath.backend.YardBackend;
-import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.yard.Yard;
 
 import at.newmedialab.ldpath.api.backend.RDFBackend;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
 Thu Oct 17 13:31:18 2013
@@ -21,23 +21,18 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
-import java.math.BigDecimal;
-import java.math.BigInteger;
 import java.net.URI;
 import java.nio.charset.Charset;
 import java.util.Collection;
-import java.util.Date;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.concurrent.ThreadPoolExecutor;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
 import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
 import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
 import 
org.apache.stanbol.entityhub.ldpath.EntityhubLDPath.EntityhubConfiguration;
-import org.apache.stanbol.entityhub.ldpath.backend.SingleRepresentationBackend;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
 import org.slf4j.Logger;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
 Thu Oct 17 13:31:18 2013
@@ -20,8 +20,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.stanbol.entityhub.indexing.core.EntityIterator;
-import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
 import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
 import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
 Thu Oct 17 13:31:18 2013
@@ -28,9 +28,9 @@ import java.util.EnumSet;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeMap;
-import java.util.Map.Entry;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipFile;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
 Thu Oct 17 13:31:18 2013
@@ -16,6 +16,10 @@
 */
 package org.apache.stanbol.entityhub.indexing.core;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
@@ -29,9 +33,6 @@ import org.apache.stanbol.entityhub.inde
 import org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser;
 import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
 import 
org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator;
-
-import static org.junit.Assert.*;
-
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;

Modified: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
 Thu Oct 17 13:31:18 2013
@@ -16,6 +16,11 @@
 */
 package org.apache.stanbol.entityhub.indexing.core;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -25,8 +30,8 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import org.apache.commons.io.FilenameUtils;
 import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
@@ -37,13 +42,11 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
-import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import static org.junit.Assert.*;
 
 
 public class IndexerTest {

Modified: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
 Thu Oct 17 13:31:18 2013
@@ -16,6 +16,11 @@
 */
 package org.apache.stanbol.entityhub.indexing.core;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -32,7 +37,6 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import static org.junit.Assert.*;
 
 public class ResourceLoaderTest {
     /**

Modified: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
 Thu Oct 17 13:31:18 2013
@@ -17,7 +17,6 @@
 package org.apache.stanbol.entityhub.indexing.core.processor;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;
 

Modified: 
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
 Thu Oct 17 13:31:18 2013
@@ -117,7 +117,7 @@ public class FstConfig {
                             CorpusCreationInfo fstInfo = new 
CorpusCreationInfo(language, 
                                 fieldInfo.name, storeFieldName,  
                                 fieldType, fstFile);
-                            log.debug(" ... init {} ", fstInfo);
+                            log.info(" ... init {} ", fstInfo);
                             addCorpus(fstInfo);
                         }
                     } else {

Modified: stanbol/trunk/entityhub/indexing/genericrdf/README.md
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/genericrdf/README.md?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- stanbol/trunk/entityhub/indexing/genericrdf/README.md (original)
+++ stanbol/trunk/entityhub/indexing/genericrdf/README.md Thu Oct 17 13:31:18 
2013
@@ -229,6 +229,20 @@ the Bundle described above. To install t
 {name} denotes to the value you configured for the "name" property within the
 "indexing.properties" file.
 
+### A note about blank nodes
 
+If your input data sets contain large numbers of blank nodes, you may find that
+you have problems running out of heap space during indexing. This is because 
Jena
+(like many semantic stores) keeps a store of blank nodes in core memory while 
+importing. Keeping in mind that EntityHub does not support the use of blank 
nodes,
+there is a means of indexing such data sets nonetheless. You can convert them 
to
+named nodes and then index. There is a convenient tool packaged with Stanbol 
for
+this purpose, called "Urify" (org.apache.stanbol.entityhub.indexing.Urify).
+It is available in the runnable JAR file built by this indexer. To use it, put 
that
+JAR on your classpath, and you can execute Urify, giving it a list of files to 
process.
+Use the "-h" or "--help" flag to see options for Urify:
 
-
+    java -Xmx1024m -cp org.apache.stanbol.entityhub.indexing.genericrdf-*.jar \
+    org.apache.stanbol.entityhub.indexing.Urify --help
+    
+    
\ No newline at end of file


Reply via email to