Revision: 18970
http://sourceforge.net/p/gate/code/18970
Author: adamfunk
Date: 2015-10-26 14:46:14 +0000 (Mon, 26 Oct 2015)
Log Message:
-----------
You can now specify a document feature to use as the identifier in
termbank listings (fallback to sourceURL & getName() if
missing/blank). Added corpus index in [] after that to make
uniqueness even more likely.
Cleared the remaining eclipse warnings.
Modified Paths:
--------------
gate/trunk/plugins/TermRaider/.classpath
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java
Modified: gate/trunk/plugins/TermRaider/.classpath
===================================================================
--- gate/trunk/plugins/TermRaider/.classpath 2015-10-26 10:31:39 UTC (rev
18969)
+++ gate/trunk/plugins/TermRaider/.classpath 2015-10-26 14:46:14 UTC (rev
18970)
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
- <classpathentry kind="con"
path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry combineaccessrules="false" exported="true" kind="src"
path="/GATE"/>
+ <classpathentry kind="con"
path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="output" path="classes"/>
</classpath>
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -145,7 +145,7 @@
boolean wasLoaded = corpus.isDocumentLoaded(i);
Document document = (Document) corpus.get(i);
- addData(document);
+ addData(document, i);
// datastore safety
if (! wasLoaded) {
@@ -166,7 +166,7 @@
/* BEHOLD THE GUBBINS to distinguish the various (potential) types of
Pairbanks*/
- protected abstract void addData(Document document);
+ protected abstract void addData(Document document, int index);
protected abstract void calculateScores();
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -46,6 +46,7 @@
// additional CREOLE init parameters
protected Set<String> inputAnnotationTypes;
+ protected String idDocumentFeature;
// transient to allow serialization
protected transient List<Action> actionsList;
@@ -225,7 +226,7 @@
boolean wasLoaded = corpus.isDocumentLoaded(i);
Document document = (Document) corpus.get(i);
- processDocument(document);
+ processDocument(document, i);
// datastore safety
if (! wasLoaded) {
@@ -241,7 +242,7 @@
protected abstract void resetScores();
- protected abstract void processDocument(Document document);
+ protected abstract void processDocument(Document document, int index);
/**
* This also needs to fill types and languages
@@ -332,4 +333,17 @@
return this.inputAnnotationTypes;
}
+
+ @CreoleParameter(comment = "doc feature to use for identification (blank =
use sourceURL)",
+ defaultValue = "")
+ public void setIdDocumentFeature(String name) {
+ this.idDocumentFeature = name;
+ }
+
+ public String getIdDocumentFeature() {
+ return this.idDocumentFeature;
+ }
+
+
+
}
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -50,9 +50,9 @@
private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST;
- protected void processDocument(Document document) {
+ protected void processDocument(Document document, int index) {
documentCount++;
- String documentSource = Utilities.sourceOrName(document);
+ String documentSource = Utilities.docIdentifier(document,
idDocumentFeature, index);
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
for (Annotation candidate : candidates) {
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -134,18 +134,18 @@
}
- protected void processDocument(Document document) {
+ protected void processDocument(Document document, int index) {
if (this.segmentAnnotationType.isEmpty() || (this.segmentAnnotationType ==
null)) {
- processWholeDocument(document);
+ processWholeDocument(document, index);
}
else {
- processDocumentSegments(document);
+ processDocumentSegments(document, index);
}
}
- protected void processDocumentSegments(Document document) {
- String documentSource = Utilities.sourceOrName(document);
+ protected void processDocumentSegments(Document document, int index) {
+ String documentSource = Utilities.docIdentifier(document,
idDocumentFeature, index);
AnnotationSet segments =
document.getAnnotations(inputASName).get(segmentAnnotationType);
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
@@ -166,9 +166,9 @@
}
- protected void processWholeDocument(Document document) {
+ protected void processWholeDocument(Document document, int index) {
documentCount++;
- String documentSource = Utilities.sourceOrName(document);
+ String documentSource = Utilities.docIdentifier(document,
idDocumentFeature, index);
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
Set<Term> documentTerms = new HashSet<Term>();
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -73,9 +73,9 @@
}
- protected void processDocument(Document document) {
+ protected void processDocument(Document document, int index) {
documentCount++;
- String documentSource = Utilities.sourceOrName(document);
+ String documentSource = Utilities.docIdentifier(document,
idDocumentFeature, index);
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
for (Annotation candidate : candidates) {
Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -55,8 +55,9 @@
- protected void addData(Document document) {
- String documentSource = Utilities.sourceOrName(document);
+ protected void addData(Document document, int index) {
+ // TODO: add support for the doc ID feature
+ String documentSource = Utilities.docIdentifier(document, null, index);
/** Collocations that have already been processed in this document
* (each collocation is a pair of IDs for a Token annotation), to avoid
counting
* them again. */
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -55,9 +55,9 @@
- protected void processDocument(Document document) {
+ protected void processDocument(Document document, int index) {
documentCount++;
- String documentSource = Utilities.sourceOrName(document);
+ String documentSource = Utilities.docIdentifier(document,
idDocumentFeature, index);
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
for (Annotation candidate : candidates) {
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -83,7 +83,7 @@
private TermbankTableModel termbankTableModel;
private XHTMLPanel termCloud = new XHTMLPanel();
- private JComboBox cloudType;
+ private JComboBox<ScoreType> cloudType;
private JSlider cloudSize = new JSlider();
private List<ScoreType> scoreTypes;
@@ -177,7 +177,7 @@
cloudBar.setFloatable(false);
JButton btnExport = new JButton(MainFrame.getIcon("Download"));
- cloudType = new JComboBox();
+ cloudType = new JComboBox<ScoreType>();
Hashtable<Integer, JLabel> labelTable = new Hashtable<Integer,JLabel>();
labelTable.put(0, new JLabel(MainFrame.getIcon("Sunny")));
Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java
2015-10-26 14:46:14 UTC (rev 18970)
@@ -131,14 +131,28 @@
}
- public static String sourceOrName(Document document) {
- URL url = document.getSourceUrl();
- if (url == null) {
- return document.getName();
+ public static String docIdentifier(Document document, String feature, int
index) {
+ String identifier = null;
+ if ( (feature != null) && (! feature.isEmpty() ) &&
+ document.getFeatures().containsKey(feature) ) {
+ Object value = document.getFeatures().get(feature);
+ if (value != null) {
+ identifier = value.toString();
+ }
}
+
+ if (identifier == null) {
+ URL url = document.getSourceUrl();
+ if (url != null) {
+ identifier = url.toString();
+ }
+ }
- //implied else
- return url.toString();
+ if (identifier == null) {
+ identifier = document.getName();
+ }
+
+ return String.format("%s [%d]", identifier, index);
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs