Revision: 17373
http://sourceforge.net/p/gate/code/17373
Author: adamfunk
Date: 2014-02-20 16:20:31 +0000 (Thu, 20 Feb 2014)
Log Message:
-----------
DFB & even the viewer appear to work so far
Modified Paths:
--------------
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
Added Paths:
-----------
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-20 15:54:13 UTC (rev 17372)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-20 16:20:31 UTC (rev 17373)
@@ -22,11 +22,15 @@
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
import gate.gui.ActionsPublisher;
import gate.termraider.gui.ActionSaveCsv;
import gate.termraider.util.*;
import gate.util.GateException;
+@CreoleResource(name = "DocumentFrequencyBank",
+icon = "termbank-lr.png",
+comment = "Document frequency counter derived from corpora and other DFBs")
public class DocumentFrequencyBank extends AbstractBank
implements ActionsPublisher{
@@ -41,6 +45,7 @@
private int documentTotal;
private Map<Term, Integer> documentFrequencies;
private int minFrequency, maxFrequency;
+ private Map<String, Set<Term>> stringLookupTable;
// transient to allow serialization
protected transient List<Action> actionsList;
@@ -73,6 +78,7 @@
documentFrequencies = new HashMap<Term, Integer>();
languages = new HashSet<String>();
types = new HashSet<String>();
+ stringLookupTable = new HashMap<String, Set<Term>>();
}
@@ -96,7 +102,7 @@
for (DocumentFrequencyBank bank : inputBanks) {
this.documentTotal += bank.documentTotal;
for (Term term : bank.getTerms()) {
- increment(term, bank.getFrequency(term));
+ increment(term, bank.getFrequencyStrict(term));
}
}
}
@@ -106,7 +112,7 @@
for (int i=0 ; i < corpus.size() ; i++) {
boolean wasLoaded = corpus.isDocumentLoaded(i);
Document document = (Document) corpus.get(i);
- addData(document);
+ processDocument(document);
// datastore safety
if (! wasLoaded) {
corpus.unloadDocument(document);
@@ -116,7 +122,7 @@
}
- protected void addData(Document document) {
+ protected void processDocument(Document document) {
documentTotal++;
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
@@ -132,14 +138,20 @@
private void churnData() {
- minFrequency = this.getFrequency(this.getTerms().iterator().next());
+ if (this.getTerms().size() > 0) {
+ minFrequency =
this.getFrequencyStrict(this.getTerms().iterator().next());
+ }
+ else {
+ minFrequency = 0;
+ }
maxFrequency = 0;
for (Term term : this.getTerms()) {
- int freq = this.getFrequency(term);
+ int freq = this.getFrequencyStrict(term);
maxFrequency = Math.max(maxFrequency, freq);
minFrequency = Math.min(minFrequency, freq);
this.types.add(term.getType());
this.languages.add(term.getLanguageCode());
+ storeStringLookup(term);
}
}
@@ -148,7 +160,7 @@
return documentFrequencies.keySet();
}
- public int getFrequency(Term term) {
+ public int getFrequencyStrict(Term term) {
if (documentFrequencies.containsKey(term)) {
return documentFrequencies.get(term).intValue();
}
@@ -157,8 +169,27 @@
}
+ public int getFrequencyLax(Term term) {
+ // Try for an exact match first
+ if (documentFrequencies.containsKey(term)) {
+ return documentFrequencies.get(term).intValue();
+ }
+
+ // Now see if there's one with a blank language code
+ String termString = term.getTermString();
+ if (stringLookupTable.containsKey(termString)) {
+ for (Term testTerm : stringLookupTable.get(termString)) {
+ if (testTerm.closeMatch(term)) {
+ return documentFrequencies.get(testTerm).intValue();
+ }
+ }
+ }
+
+ return 0;
+ }
+
@CreoleParameter(comment = "Other DFBs to compile into the new one")
public void setInputBanks(Set<DocumentFrequencyBank> inputBanks) {
this.inputBanks = inputBanks;
@@ -191,17 +222,25 @@
return new Double(this.maxFrequency);
}
+
+ public int getMinFrequency() {
+ return this.minFrequency;
+ }
+
+ public int getMaxFrequency() {
+ return this.maxFrequency;
+ }
@Override
public void saveAsCsv(double threshold, File file) throws GateException {
+ System.out.println("CSV output has not yet been implemented.");
// TODO Auto-generated method stub
-
}
@Override
public void saveAsCsv(File file) throws GateException {
- // TODO Auto-generated method stub
+ saveAsCsv(0.0, file);
}
@@ -245,4 +284,26 @@
documentFrequencies.put(term, count);
}
+
+ private void storeStringLookup(Term term) {
+ String termString = term.getTermString();
+ Set<Term> terms;
+ if (stringLookupTable.containsKey(termString)) {
+ terms = stringLookupTable.get(termString);
+ }
+ else {
+ terms = new HashSet<Term>();
+ }
+ terms.add(term);
+ stringLookupTable.put(termString, terms);
+ }
+
+
+ public Map<Term, Integer> getDocFrequencies() {
+ return this.documentFrequencies;
+ }
+
+ public int getTotalDocs() {
+ return this.documentTotal;
+ }
}
Copied:
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
(from rev 17354,
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java)
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
(rev 0)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
2014-02-20 16:20:31 UTC (rev 17373)
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2008--2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * $Id$
+ */
+package gate.termraider.gui;
+
+import gate.Resource;
+import gate.creole.ANNIEConstants;
+import gate.creole.AbstractVisualResource;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.GuiType;
+import gate.event.ProgressListener;
+import gate.termraider.bank.*;
+import gate.termraider.util.*;
+import java.awt.BorderLayout;
+import java.util.*;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JTabbedPane;
+import javax.swing.JTable;
+import javax.swing.JTextField;
+import javax.swing.table.AbstractTableModel;
+
+
+@CreoleResource(name = "Document Frequency Viewer",
+ comment = "viewer for the TermRaider DocumentFrequencyBank",
+ guiType = GuiType.LARGE,
+ mainViewer = true,
+ resourceDisplayed = "gate.termraider.bank.DocumentFrequencyBank")
+public class DocumentFrequencyViewer
+ extends AbstractVisualResource
+ implements ANNIEConstants, ProgressListener {
+
+ private static final long serialVersionUID = 5632849477601995493L;
+
+ private JScrollPane freqScrollPane;
+ private DocumentFrequencyBank dfb;
+ private JTable freqTable;
+ private JTabbedPane tabbedPane;
+ private DFTableModel freqTableModel;
+ private JTextField docsField;
+
+ @Override
+ public Resource init() {
+ initGuiComponents();
+ return this;
+ }
+
+
+ private void initGuiComponents() {
+ setLayout(new BorderLayout());
+ tabbedPane = new JTabbedPane();
+ JPanel tableTab = new JPanel(new BorderLayout());
+ tabbedPane.addTab("Document Frequency", tableTab);
+
+ docsField = new JTextField("...");
+ tableTab.add(docsField, BorderLayout.NORTH);
+
+ freqTableModel = new DFTableModel();
+ freqTable = new JTable(freqTableModel);
+ freqTable.setAutoCreateRowSorter(true);
+ freqScrollPane = new JScrollPane(freqTable,
+ JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,
+ JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+ tableTab.add(freqScrollPane, BorderLayout.CENTER);
+
+ this.add(tabbedPane, BorderLayout.CENTER);
+ tabbedPane.validate();
+ tabbedPane.repaint();
+ }
+
+
+ private void setDocsField() {
+ docsField.setText("Doc count = " + this.dfb.getTotalDocs());
+ }
+
+
+ public void processFinished() {
+ setTarget(dfb);
+ }
+
+ public void progressChanged(int i) {
+ // nothing?
+ }
+
+ public void setTarget(Object target) {
+ if(target == null || ! (target instanceof DocumentFrequencyBank)) {
+ throw new IllegalArgumentException("This Viewer cannot show a "
+ + (target == null ? "null" : target.getClass().toString()));
+ }
+
+ dfb = (DocumentFrequencyBank) target;
+ setDocsField();
+ freqTableModel.setBank(this.dfb);
+ }
+}
+
+
+class DFTableModel extends AbstractTableModel {
+ private static final long serialVersionUID = -7654670667296912991L;
+ private List<Term> terms;
+ private String[] columnNames = {"term", "doc frequency"};
+ private Map<Term, Integer> docFrequencies;
+
+ public DFTableModel() {
+ this.docFrequencies = new HashMap<Term, Integer>();
+ this.terms = new ArrayList<Term>();
+ }
+
+ public void setBank(DocumentFrequencyBank termbank) {
+ this.docFrequencies = termbank.getDocFrequencies();
+ this.terms = new ArrayList<Term>(docFrequencies.keySet());
+ Collections.sort(this.terms, new TermComparator());
+ }
+
+ public int getColumnCount() {
+ return 2;
+ }
+
+ public int getRowCount() {
+ return this.terms.size();
+ }
+
+ public Object getValueAt(int row, int col) {
+ Term term = this.terms.get(row);
+ if (col == 0) {
+ return term.toString();
+ }
+ // implied else
+ if (this.docFrequencies.containsKey(term)) {
+ return this.docFrequencies.get(term);
+ }
+ return 0;
+ }
+
+ public Class<?> getColumnClass(int col) {
+ if (col == 0) {
+ return String.class;
+ }
+ // implied else
+ return Integer.class;
+ }
+
+ public String getColumnName(int col) {
+ return columnNames[col];
+ }
+
+}
Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-20 15:54:13 UTC (rev 17372)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-20 16:20:31 UTC (rev 17373)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008--2012, The University of Sheffield. See the file
+ * Copyright (c) 2008--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -27,6 +27,8 @@
private JSlider slider;
+
+
public SliderPanel(AbstractBank scoredbank, String verb, boolean startLeft,
TermbankViewer viewer) {
this.scoredbank = scoredbank;
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs