Revision: 18160
          http://sourceforge.net/p/gate/code/18160
Author:   johann_p
Date:     2014-07-07 16:31:10 +0000 (Mon, 07 Jul 2014)
Log Message:
-----------
Completely re-worked the gcp-direct.sh command: this now 
takes command line arguments to be more flexible and can use
either a config file or specify inputDir, outputDir, pipeline
file etc. as arguments. See gcp-direct.sh -h

Modified Paths:
--------------
    gcp/trunk/build.xml
    gcp/trunk/gcp-direct.sh
    gcp/trunk/src/gate/cloud/batch/BatchRunner.java

Added Paths:
-----------
    gcp/trunk/lib/commons-cli-1.2.jar

Modified: gcp/trunk/build.xml
===================================================================
--- gcp/trunk/build.xml 2014-07-06 01:19:58 UTC (rev 18159)
+++ gcp/trunk/build.xml 2014-07-07 16:31:10 UTC (rev 18160)
@@ -81,6 +81,7 @@
       <zipfileset file="gcp.jar"  prefix="gcp-${version}/lib"/>
       <zipfileset file="dist/gcp.sh" filemode="755" prefix="gcp-${version}"/>
       <zipfileset file="gcp-cli.jar" filemode="755" prefix="gcp-${version}"/>
+      <zipfileset file="gcp-direct.sh" filemode="755" prefix="gcp-${version}"/>
       <zipfileset dir="lib" includes="*.jar" prefix="gcp-${version}/lib"/>
       <mappedresources>
         <mappedresources>

Modified: gcp/trunk/gcp-direct.sh
===================================================================
--- gcp/trunk/gcp-direct.sh     2014-07-06 01:19:58 UTC (rev 18159)
+++ gcp/trunk/gcp-direct.sh     2014-07-07 16:31:10 UTC (rev 18160)
@@ -41,17 +41,17 @@
 do
 if [ "$1" == "-h" ]
 then
+    gcpparams=( "${gcpparams[@]}" $1 )
     cat <<EOF
 Run GCP
-The following options can be passed immediately after the command name:
-  N        ... number of threads to use
-  file     ... config file 
-  -h       ... show this help
 All options starting with -X or -D will be passed on to the "java" command, 
for example:
   -Djava.io.tmpdir=<somedir>
   -Xmx<memorysize>
+All other arguments will be passed to the program. The program can be invoked 
in two
+ways:
+1) giving it the number of threads and a config file (GCP-CLI mode)
+2) giving it more flexible arguments:
 EOF
-    exit 0
 else 
   if [[ "$1" =~ -D.* ]] || [[ "$1" =~ -X.* ]] 
   then 
@@ -63,9 +63,5 @@
 shift
 done
 
-echo gcpparams "${gcpparams[@]}"
-echo jvmparams "${jvmparams[@]}"
-  
-"$JAVA_HOME/bin/java" -Dgate.home.override="${GATE_HOME}" 
-Dgcp.home="${SCRIPTDIR}" 
-Djava.protocol.handler.pkgs=gate.cloud.util.protocols $JAVA_OPTS -cp 
"${SCRIPTDIR}"/gcp.jar:"${SCRIPTDIR}"/'lib/*':"${SCRIPTDIR}"/conf:"$GATE_HOME"/bin/gate.jar:"$GATE_HOME"/'lib/*'
 "${jvmparams[@]}"  gate.cloud.batch.BatchRunner "${gcpparams[@]}"
+"$JAVA_HOME/bin/java" -Dgate.home="${GATE_HOME}" -Dgcp.home="${SCRIPTDIR}" 
-Djava.protocol.handler.pkgs=gate.cloud.util.protocols $JAVA_OPTS -cp 
"${SCRIPTDIR}"/gcp.jar:"${SCRIPTDIR}"/'lib/*':"${SCRIPTDIR}"/conf:"$GATE_HOME"/bin/gate.jar:"$GATE_HOME"/'lib/*'
 "${jvmparams[@]}"  gate.cloud.batch.BatchRunner "${gcpparams[@]}"
 
-

Added: gcp/trunk/lib/commons-cli-1.2.jar
===================================================================
(Binary files differ)

Index: gcp/trunk/lib/commons-cli-1.2.jar
===================================================================
--- gcp/trunk/lib/commons-cli-1.2.jar   2014-07-06 01:19:58 UTC (rev 18159)
+++ gcp/trunk/lib/commons-cli-1.2.jar   2014-07-07 16:31:10 UTC (rev 18160)

Property changes on: gcp/trunk/lib/commons-cli-1.2.jar
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Modified: gcp/trunk/src/gate/cloud/batch/BatchRunner.java
===================================================================
--- gcp/trunk/src/gate/cloud/batch/BatchRunner.java     2014-07-06 01:19:58 UTC 
(rev 18159)
+++ gcp/trunk/src/gate/cloud/batch/BatchRunner.java     2014-07-07 16:31:10 UTC 
(rev 18160)
@@ -47,6 +47,17 @@
 import org.apache.log4j.Logger;
 
 import com.sun.jna.Platform;
+import gate.CorpusController;
+import gate.cloud.io.DocumentEnumerator;
+import gate.cloud.io.IOConstants;
+import gate.cloud.io.InputHandler;
+import gate.cloud.io.OutputHandler;
+import gate.util.persistence.PersistenceManager;
+import java.util.LinkedList;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
 
 /**
  * This class is a Batch Runner, i.e. it manages the execution of batch jobs,
@@ -497,11 +508,95 @@
   }
   
   /**
-   * Main entry point.  Expects two parameters, a number of threads and a batch
+   * Main entry point.  This can be invoked in one of two ways: the "legacy" 
+   * mode which expects two parameters and a command line mode which allows
+   * to specify various options and is more flexible. The "legacy" mode is 
intended
+   * to be used with the gcp-cli program and should work exactly is it did 
before.
+   * In legacy mode, this program expects two parameters, a number of threads 
and a batch
    * file location, and runs the batch in a thread pool of the specified size,
    * exiting when the batch is complete.
-   */
+   * In command line mode, the commons-cli option parser is used .. see its
+   * option definitions for which arguments exactly can be provided.
+   */  
   public static void main(String[] args) {
+    Options options = new Options();
+    // there are two ways of how this program can get invoked: from the 
+    // GCP-CLI program or from the command line. 
+    // The GCP-CLI way to invoke is "nthreads configfile" with no 
+    // option flags while the command line invokation always includes
+    // requried option flags.
+    
+    // Options for the command line invokation
+    // TODO: may be useful to be able to override the default user config and
+    // session files here?
+    options.addOption("b","batchFile",true,"Batch file (required, replaces -i, 
-o, -x, -r, -I)");
+    options.addOption("i","inputDirectory",true,"Input directory (required, 
unless -b given)");
+    options.addOption("f","outputFormat",true,"Output format, optional, one of 
'xml' or 'finf', default is 'finf'");
+    options.addOption("o","outputDirectory",true,"Output directory (requried, 
unless -b given)");
+    options.addOption("x","executePipeline",true,"Pipeline/application file to 
execute (required, unless -b given)");
+    options.addOption("r","reportFile",true,"Report file (optional, default: 
report.xml");
+    options.addOption("t","numberThreads",true,"Number of threads to use 
(required)");
+    options.addOption("I","batchId",true,"Batch ID (optional, default: GCP");
+    options.addOption("h","help",false,"Print this help information");
+    BasicParser parser = new BasicParser();
+    
+    int numThreads = 0;
+    File batchFile = null;  
+    boolean invokedByGcpCli = true;
+    
+    CommandLine line = null;
+    try {
+      line = parser.parse(options, args);
+    } catch (Exception ex) {
+      log.error("Could not parse command line arguments: "+ex.getMessage());
+      System.exit(1);
+    }
+    if(args.length == 2 && line.getArgs().length == 2) {
+      numThreads = Integer.parseInt(args[0]);
+      batchFile = new File(args[1]);
+      if(!batchFile.exists()){
+        log.error("The provided file (" + batchFile + ") does not exist!");
+        System.exit(1);
+      }
+      if(!batchFile.isFile()){
+        log.error("The provided file (" + batchFile + ") is not a file!");
+        System.exit(1);
+      }      
+    } else {
+      invokedByGcpCli = false;
+      if(line.hasOption('h')) {
+        HelpFormatter helpFormatter = new HelpFormatter();
+        helpFormatter.printHelp("gcp-direct.sh [options]", options);
+        System.exit(0);
+      }
+      if(!line.hasOption('t') || 
+         (!line.hasOption('b') && (!line.hasOption('i') || 
!line.hasOption('o') || !line.hasOption('x')))) {
+        log.error("Required argument missing!");
+        HelpFormatter helpFormatter = new HelpFormatter();
+        helpFormatter.printHelp("gcp-direct.sh [options]", options);
+        System.exit(1);
+      }     
+      if(line.hasOption('b')) {
+        batchFile = new File(line.getOptionValue('b'));
+      }
+      if(line.hasOption('f')) {
+        String format = line.getOptionValue('f');
+        if(!format.equals("xml") && !format.equals("finf")) {
+          log.error("Output format (option 'f') must be either 'xml' or 
'finf'");
+          System.exit(1);
+        }
+      }
+      numThreads = Integer.parseInt(line.getOptionValue('t'));
+    }
+    if(batchFile != null) {
+      try {
+        batchFile = batchFile.getCanonicalFile();
+      } catch (IOException ex) {
+        log.error("Could not get canonical file name for "+batchFile+": 
"+ex.getMessage());
+        System.exit(1);
+      }
+    }
+
     // write PID file if requested
     String pidFileName = System.getProperty("gcp.pid.file");
     if(pidFileName != null) {
@@ -526,22 +621,9 @@
         log.warn("PID file not supported on Windows");
       }
     }
-    
     File gcpHome = new File(System.getProperty("gcp.home", "."));
-    if(args.length != 2) {
-      System.err.println("Need 2 arguments: number of threads and batch file");
-      System.exit(1);
-    }
-    File batchFile = new File(args[1]);
-    if(!batchFile.exists()){
-      log.error("The provided file (" + batchFile + ") does not exist!");
-      System.exit(1);
-    }
-    if(!batchFile.isFile()){
-      log.error("The provided file (" + batchFile + ") is not a file!");
-      System.exit(1);
-    }
-    
+    log.info("Using GCP home directory "+gcpHome);
+
     // exit the whole GCP process if an Error (such as OOM) occurs, rather than
     // just killing the thread in which the error occurred.
     final Thread.UncaughtExceptionHandler defaultExceptionHandler =
@@ -563,29 +645,134 @@
     });
     
     try {
-      int numThreads = Integer.parseInt(args[0]);
-
-      String gateDotHome = System.getProperty("gate.home.override");
+      // we set  gate home differently depending on which invocation mode
+      // we have: if we got invoked from gcpcli, we want to use the gate
+      // home that is in the gcp home, otherwise we want to use the one
+      // provided by the gate.home property or otherwise the one in gcp home
+      File gateHome = null;
+      // If we use the GCP gate, this is false, otherwise this is true
+      boolean useOwnGate = false; 
       File gcpGate = new File(gcpHome,"gate-home");
-      File gateHome = gcpGate;
-      if(gateDotHome != null) {
-        gateHome = new File(gateDotHome);
+      if(invokedByGcpCli) {
+        gateHome = gcpGate;
+      } else {
+        if(System.getProperty("gate.home") != null) {
+          useOwnGate = true;
+          gateHome = new File(System.getProperty("gate.home"));
+        } else {
+          gateHome = new File(gcpHome,"gate-home");
+        }
       }
       Gate.setGateHome(gateHome);
-      if(gateDotHome != null) { // if we use our own GATE
+      // depending on which gate we ended up using, we set the config/session 
files
+      if(useOwnGate) { 
+        // if we use our own GATE, we have to set the site wide config file
+        // and we have to set the user config to the one in gcpHome
         Gate.setSiteConfigFile(new File(gcpGate,"gate.xml"));
-        Gate.setUserConfigFile(new File(gcpGate, "user-gate.xml"));
-        Gate.setUserSessionFile(new File(gcpGate, "empty.session"));
-      } else { // we use the GCP GATE
-        Gate.setUserConfigFile(new File(gateHome, "user-gate.xml"));
-        Gate.setUserSessionFile(new File(gateHome, "empty.session"));
       }
+      // we always set the user config file to the one in gcp gate
+      Gate.setUserConfigFile(new File(gcpGate, "user-gate.xml"));
+      // we always set the session file to a non-existent file in gcpGate
+      // This should never get created anyway since the user config
+      // file we use disables the session file.
+      Gate.setUserSessionFile(new File(gcpGate, "empty.session"));
       Gate.init();
       
       BatchRunner instance = new BatchRunner(numThreads);
 
-      batchFile = batchFile.getCanonicalFile();
-      Batch aBatch = XMLBatchParser.fromXml(batchFile);
+      // depending on how we got invoked, create the batch from either 
+      // the xml file or the info we got via the command line arguments
+      Batch aBatch = null;
+      if(invokedByGcpCli) {
+        aBatch = XMLBatchParser.fromXml(batchFile);        
+      } else {
+        if(batchFile != null) {
+          aBatch = XMLBatchParser.fromXml(batchFile);
+        } else {
+          // collect the various parts of the batch based on the command line
+          // settings
+          aBatch = new Batch();
+          if(line.hasOption('b')) {            
+            aBatch.setBatchId(line.getOptionValue('b'));
+          }
+          aBatch.setGateApplication(
+                    (CorpusController)PersistenceManager.loadObjectFromFile(
+                            new File(line.getOptionValue('x'))));
+          String reportFileName;
+          if(line.hasOption('r')) {
+            reportFileName = line.getOptionValue('r');
+          } else {
+            reportFileName = "report.xml";
+          }
+          File reportFile = new File(reportFileName);
+          reportFile = reportFile.getCanonicalFile();
+          aBatch.setReportFile(reportFile);
+          if(line.hasOption('I')) {
+            aBatch.setBatchId(line.getOptionValue('I'));
+          } else {
+            aBatch.setBatchId("GcpBatchId");
+          }
+          // set the input Handler
+          String inputHandlerClassName = "gate.cloud.io.file.FileInputHandler";
+          Map<String,String> configData = new HashMap<String, String>();
+          configData.put(IOConstants.PARAM_DOCUMENT_ROOT, 
line.getOptionValue('i'));
+          configData.put(IOConstants.PARAM_COMPRESSION,"none");
+          configData.put(IOConstants.PARAM_ENCODING, "UTF-8");
+          configData.put(IOConstants.PARAM_FILE_EXTENSION,"");
+          Class<? extends InputHandler> inputHandlerClass =
+                Class.forName(inputHandlerClassName, true, 
Gate.getClassLoader())
+                        .asSubclass(InputHandler.class);
+          InputHandler inputHandler = inputHandlerClass.newInstance();
+          inputHandler.config(configData);
+          inputHandler.init();
+          log.info("Have input handler: "+inputHandler);
+          aBatch.setInputHandler(inputHandler);
+          // set the output Handler
+          String outputHandlerClassName;
+          if(!line.hasOption('f') || line.getOptionValue('f').equals("finf")) {
+            outputHandlerClassName = 
"gate.cloud.io.file.FastInfosetOutputHandler";
+          } else {
+            outputHandlerClassName = "gate.cloud.io.file.FileOutputHandler";
+          }
+          configData = new HashMap<String, String>();
+          configData.put(IOConstants.PARAM_DOCUMENT_ROOT, 
line.getOptionValue('o'));
+          configData.put(IOConstants.PARAM_COMPRESSION,"none");
+          configData.put(IOConstants.PARAM_ENCODING, "UTF-8");
+          configData.put(IOConstants.PARAM_FILE_EXTENSION,"");
+          Class<? extends OutputHandler> ouputHandlerClass =
+          Class.forName(outputHandlerClassName, true, Gate.getClassLoader())
+                 .asSubclass(OutputHandler.class);
+          OutputHandler outHandler = ouputHandlerClass.newInstance();
+          outHandler.config(configData);
+          List<AnnotationSetDefinition> asDefs = new 
ArrayList<AnnotationSetDefinition>();
+          outHandler.setAnnSetDefinitions(asDefs);
+          outHandler.init();
+          log.info("Have output handler: "+outHandler);
+          List<OutputHandler> outHandlers = new ArrayList<OutputHandler>();
+          outHandlers.add(outHandler);
+          aBatch.setOutputHandlers(outHandlers);
+          String enumeratorClassName = 
"gate.cloud.io.file.FileDocumentEnumerator";
+          Class<? extends DocumentEnumerator> enumeratorClass =
+                Class.forName(enumeratorClassName, true, Gate.getClassLoader())
+                        .asSubclass(DocumentEnumerator.class);
+          configData = new HashMap<String, String>();
+          configData.put(IOConstants.PARAM_DOCUMENT_ROOT, 
line.getOptionValue('i'));          
+          List<DocumentID> docIds = new LinkedList<DocumentID>();
+          DocumentEnumerator enumerator = enumeratorClass.newInstance();
+          enumerator.config(configData);
+          enumerator.init();
+          while(enumerator.hasNext()) {
+            DocumentID id = enumerator.next();
+            log.info("Adding document: "+id);
+            docIds.add(id);
+          } 
+          log.info("Number of document ids: "+docIds.size());
+          aBatch.setDocumentIDs(docIds.toArray(new DocumentID[docIds.size()]));
+          log.info("The document ids: "+aBatch.getDocumentIDs());
+          aBatch.init();
+        }
+      }
+      
       log.info("Launching batch:\n" + aBatch);
       instance.runBatch(aBatch);
       instance.shutdownWhenFinished(true);
@@ -594,5 +781,8 @@
       log.error("Error starting up batch " + batchFile, e);
       System.exit(1);
     }
+  
   }
+  
+  
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Open source business process management suite built on Java and Eclipse
Turn processes into business applications with Bonita BPM Community Edition
Quickly connect people, data, and systems into organized workflows
Winner of BOSSIE, CODIE, OW2 and Gartner awards
http://p.sf.net/sfu/Bonitasoft
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to