Revision: 19874
          http://sourceforge.net/p/gate/code/19874
Author:   johann_p
Date:     2016-12-19 18:46:15 +0000 (Mon, 19 Dec 2016)
Log Message:
-----------
Add option to also set the document name from some CSV file column.

Modified Paths:
--------------
    gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java

Modified: gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java
===================================================================
--- gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java     
2016-12-19 02:22:07 UTC (rev 19873)
+++ gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java     
2016-12-19 18:46:15 UTC (rev 19874)
@@ -64,6 +64,8 @@
   private static JComponent dialog = null;
 
   private static SpinnerNumberModel textColModel = null;
+  
+  private static SpinnerNumberModel nameColModel = null;
 
   private static JCheckBox cboFeatures = null;
 
@@ -105,6 +107,8 @@
     
     txtEncoding = new JTextField("UTF-8");
 
+    nameColModel = new SpinnerNumberModel(-1, -1,
+        Integer.MAX_VALUE, 1);
     
     dialog.setLayout(new GridBagLayout());
 
@@ -208,8 +212,29 @@
     constraints.gridy = 4;
     constraints.gridwidth = GridBagConstraints.RELATIVE;
     constraints.anchor = GridBagConstraints.NORTHWEST;
+    constraints.insets = new Insets(0, 0, 15, 0);
     dialog.add(cboDocuments, constraints);
 
+    constraints = new GridBagConstraints();
+    constraints.gridx = GridBagConstraints.RELATIVE;
+    constraints.gridy = 5;
+    constraints.gridwidth = 3;
+    constraints.anchor = GridBagConstraints.NORTHWEST;
+    dialog.add(new JLabel("Document Name From Column (-1=none)"), constraints);
+
+    constraints = new GridBagConstraints();
+    constraints.gridx = GridBagConstraints.RELATIVE;
+    constraints.gridy = 5;
+    constraints.gridwidth = 3;
+    constraints.anchor = GridBagConstraints.NORTHWEST;
+    dialog.add(new JSpinner(nameColModel), constraints);
+    
+    constraints = new GridBagConstraints();
+    constraints.gridx = GridBagConstraints.RELATIVE;
+    constraints.gridy = 6;
+    constraints.anchor = GridBagConstraints.NORTHWEST;
+    dialog.add(new JLabel("(Ignored if single document is created)"), 
constraints);
+    
     btnCSVURL.addActionListener(new ActionListener() {
       @Override
       public void actionPerformed(ActionEvent e) {
@@ -301,7 +326,8 @@
                         // options from the GUI
                         populate((Corpus)handle.getTarget(), 
f.toURI().toURL(), txtEncoding.getText(),
                             (Integer)textColModel.getValue(),
-                            cboFeatures.isSelected(), separator, quote);
+                            cboFeatures.isSelected(), separator, quote,
+                            (Integer)nameColModel.getValue());
                       } else {
                         // if we are creating a single document from a single
                         // file
@@ -324,7 +350,8 @@
                       populate((Corpus)handle.getTarget(),
                           new URL(txtURL.getText()), txtEncoding.getText(),
                           (Integer)textColModel.getValue(),
-                          cboFeatures.isSelected(), separator, quote);
+                          cboFeatures.isSelected(), separator, quote,
+                          (Integer)nameColModel.getValue());
                     } else {
                       // if we are creating a single document from a single 
file
                       // then call the createDoc method passing through all the
@@ -356,8 +383,13 @@
 
   public static void populate(Corpus corpus, URL csv, String encoding, int 
column,
       boolean colLabels) {
-    populate(corpus, csv, encoding, column, colLabels, ',', '"');
+    populate(corpus, csv, encoding, column, colLabels, ',', '"',-1);
   }
+  
+  public static void populate(Corpus corpus, URL csv, String encoding, int 
column,
+      boolean colLabels, int nameColumn) {
+    populate(corpus, csv, encoding, column, colLabels, ',', '"',nameColumn);
+  }
 
   /**
    * Create a new document from each row and push it into the specified corpus
@@ -366,6 +398,8 @@
    *          the Corpus to add documents to
    * @param csv
    *          the URL of the CSV file to processes
+   * @param encoding
+   *          the encoding of the CSV input file
    * @param column
    *          the (zero index based) column which contains the text content
    * @param colLabels
@@ -375,9 +409,13 @@
    * @param quote
    *          the character used to quote data that includes the column
    *          separator (usually ")
+   * @param nameColumn
+   *          the (zero index based) column which contains the document name 
to use. If this is
+   *          less than 0 then the default name GATE assigns to a new document 
is used instead.
+   *          The default name is also used if a line does not contain that 
column.
    */
   public static void populate(Corpus corpus, URL csv, String encoding, int 
column,
-      boolean colLabels, char separator, char quote) {
+      boolean colLabels, char separator, char quote, int nameColumn) {
     CSVReader reader = null;
     try {
       // open a CSVReader over the URL
@@ -419,7 +457,9 @@
         Document doc =
             (Document)Factory.createResource(
                 gate.corpora.DocumentImpl.class.getName(), params, fmap);
-
+        if(nameColumn > -1 && nameColumn < nextLine.length) {
+          doc.setName(nextLine[nameColumn]);
+        }
         // add the document to the corpus
         corpus.add(doc);
 
@@ -457,6 +497,8 @@
    *          the Corpus to add documents to
    * @param csv
    *          the URL of the CSV file to processes
+   * @param encoding
+   *          the character encoding of the CSV input file content
    * @param column
    *          the (zero index based) column which contains the text content
    * @param colLabels

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/intel
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to