Revision: 19874
http://sourceforge.net/p/gate/code/19874
Author: johann_p
Date: 2016-12-19 18:46:15 +0000 (Mon, 19 Dec 2016)
Log Message:
-----------
Add option to also set the document name from some CSV file column.
Modified Paths:
--------------
gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java
Modified: gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java
===================================================================
--- gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java
2016-12-19 02:22:07 UTC (rev 19873)
+++ gate/trunk/plugins/Format_CSV/src/gate/corpora/CSVImporter.java
2016-12-19 18:46:15 UTC (rev 19874)
@@ -64,6 +64,8 @@
private static JComponent dialog = null;
private static SpinnerNumberModel textColModel = null;
+
+ private static SpinnerNumberModel nameColModel = null;
private static JCheckBox cboFeatures = null;
@@ -105,6 +107,8 @@
txtEncoding = new JTextField("UTF-8");
+ nameColModel = new SpinnerNumberModel(-1, -1,
+ Integer.MAX_VALUE, 1);
dialog.setLayout(new GridBagLayout());
@@ -208,8 +212,29 @@
constraints.gridy = 4;
constraints.gridwidth = GridBagConstraints.RELATIVE;
constraints.anchor = GridBagConstraints.NORTHWEST;
+ constraints.insets = new Insets(0, 0, 15, 0);
dialog.add(cboDocuments, constraints);
+ constraints = new GridBagConstraints();
+ constraints.gridx = GridBagConstraints.RELATIVE;
+ constraints.gridy = 5;
+ constraints.gridwidth = 3;
+ constraints.anchor = GridBagConstraints.NORTHWEST;
+ dialog.add(new JLabel("Document Name From Column (-1=none)"), constraints);
+
+ constraints = new GridBagConstraints();
+ constraints.gridx = GridBagConstraints.RELATIVE;
+ constraints.gridy = 5;
+ constraints.gridwidth = 3;
+ constraints.anchor = GridBagConstraints.NORTHWEST;
+ dialog.add(new JSpinner(nameColModel), constraints);
+
+ constraints = new GridBagConstraints();
+ constraints.gridx = GridBagConstraints.RELATIVE;
+ constraints.gridy = 6;
+ constraints.anchor = GridBagConstraints.NORTHWEST;
+ dialog.add(new JLabel("(Ignored if single document is created)"),
constraints);
+
btnCSVURL.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
@@ -301,7 +326,8 @@
// options from the GUI
populate((Corpus)handle.getTarget(),
f.toURI().toURL(), txtEncoding.getText(),
(Integer)textColModel.getValue(),
- cboFeatures.isSelected(), separator, quote);
+ cboFeatures.isSelected(), separator, quote,
+ (Integer)nameColModel.getValue());
} else {
// if we are creating a single document from a single
// file
@@ -324,7 +350,8 @@
populate((Corpus)handle.getTarget(),
new URL(txtURL.getText()), txtEncoding.getText(),
(Integer)textColModel.getValue(),
- cboFeatures.isSelected(), separator, quote);
+ cboFeatures.isSelected(), separator, quote,
+ (Integer)nameColModel.getValue());
} else {
// if we are creating a single document from a single
file
// then call the createDoc method passing through all the
@@ -356,8 +383,13 @@
public static void populate(Corpus corpus, URL csv, String encoding, int
column,
boolean colLabels) {
- populate(corpus, csv, encoding, column, colLabels, ',', '"');
+ populate(corpus, csv, encoding, column, colLabels, ',', '"',-1);
}
+
+ public static void populate(Corpus corpus, URL csv, String encoding, int
column,
+ boolean colLabels, int nameColumn) {
+ populate(corpus, csv, encoding, column, colLabels, ',', '"',nameColumn);
+ }
/**
* Create a new document from each row and push it into the specified corpus
@@ -366,6 +398,8 @@
* the Corpus to add documents to
* @param csv
* the URL of the CSV file to processes
+ * @param encoding
+ * the encoding of the CSV input file
* @param column
* the (zero index based) column which contains the text content
* @param colLabels
@@ -375,9 +409,13 @@
* @param quote
* the character used to quote data that includes the column
* separator (usually ")
+ * @param nameColumn
+ * the (zero index based) column which contains the document name
to use. If this is
+ * less than 0 then the default name GATE assigns to a new document
is used instead.
+ * The default name is also used if a line does not contain that
column.
*/
public static void populate(Corpus corpus, URL csv, String encoding, int
column,
- boolean colLabels, char separator, char quote) {
+ boolean colLabels, char separator, char quote, int nameColumn) {
CSVReader reader = null;
try {
// open a CSVReader over the URL
@@ -419,7 +457,9 @@
Document doc =
(Document)Factory.createResource(
gate.corpora.DocumentImpl.class.getName(), params, fmap);
-
+ if(nameColumn > -1 && nameColumn < nextLine.length) {
+ doc.setName(nextLine[nameColumn]);
+ }
// add the document to the corpus
corpus.add(doc);
@@ -457,6 +497,8 @@
* the Corpus to add documents to
* @param csv
* the URL of the CSV file to processes
+ * @param encoding
+ * the character encoding of the CSV input file content
* @param column
* the (zero index based) column which contains the text content
* @param colLabels
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today.http://sdm.link/intel
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs