Revision: 17373
          http://sourceforge.net/p/gate/code/17373
Author:   adamfunk
Date:     2014-02-20 16:20:31 +0000 (Thu, 20 Feb 2014)
Log Message:
-----------
DFB & even the viewer appear to work so far

Modified Paths:
--------------
    
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java

Added Paths:
-----------
    
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2014-02-20 15:54:13 UTC (rev 17372)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2014-02-20 16:20:31 UTC (rev 17373)
@@ -22,11 +22,15 @@
 import gate.Resource;
 import gate.creole.ResourceInstantiationException;
 import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
 import gate.gui.ActionsPublisher;
 import gate.termraider.gui.ActionSaveCsv;
 import gate.termraider.util.*;
 import gate.util.GateException;
 
+@CreoleResource(name = "DocumentFrequencyBank",
+icon = "termbank-lr.png",
+comment = "Document frequency counter derived from corpora and other DFBs")
 public class DocumentFrequencyBank extends AbstractBank
 implements ActionsPublisher{
   
@@ -41,6 +45,7 @@
   private int documentTotal;
   private Map<Term, Integer> documentFrequencies;
   private int minFrequency, maxFrequency;
+  private Map<String, Set<Term>> stringLookupTable;
 
   // transient to allow serialization
   protected transient List<Action> actionsList;
@@ -73,6 +78,7 @@
     documentFrequencies = new HashMap<Term, Integer>();
     languages = new HashSet<String>();
     types = new HashSet<String>();
+    stringLookupTable = new HashMap<String, Set<Term>>();
   }
 
   
@@ -96,7 +102,7 @@
     for (DocumentFrequencyBank bank : inputBanks) {
       this.documentTotal += bank.documentTotal;
       for (Term term : bank.getTerms()) {
-        increment(term, bank.getFrequency(term));
+        increment(term, bank.getFrequencyStrict(term));
       }
     }
   }
@@ -106,7 +112,7 @@
     for (int i=0 ; i < corpus.size() ; i++) {
       boolean wasLoaded = corpus.isDocumentLoaded(i);
       Document document = (Document) corpus.get(i);
-      addData(document);
+      processDocument(document);
       // datastore safety
       if (! wasLoaded) {
         corpus.unloadDocument(document);
@@ -116,7 +122,7 @@
   }
 
   
-  protected void addData(Document document) {
+  protected void processDocument(Document document) {
     documentTotal++;
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
 
@@ -132,14 +138,20 @@
 
   
   private void churnData() {
-    minFrequency = this.getFrequency(this.getTerms().iterator().next());
+    if (this.getTerms().size() > 0) {
+      minFrequency = 
this.getFrequencyStrict(this.getTerms().iterator().next());
+    }
+    else {
+      minFrequency = 0;
+    }
     maxFrequency = 0;
     for (Term term : this.getTerms()) {
-      int freq = this.getFrequency(term);
+      int freq = this.getFrequencyStrict(term);
       maxFrequency = Math.max(maxFrequency, freq);
       minFrequency = Math.min(minFrequency, freq);
       this.types.add(term.getType());
       this.languages.add(term.getLanguageCode());
+      storeStringLookup(term);
     }
   }
   
@@ -148,7 +160,7 @@
     return documentFrequencies.keySet();
   }
   
-  public int getFrequency(Term term) {
+  public int getFrequencyStrict(Term term) {
     if (documentFrequencies.containsKey(term)) {
       return documentFrequencies.get(term).intValue();
     }
@@ -157,8 +169,27 @@
   }
   
   
+  public int getFrequencyLax(Term term) {
+    // Try for an exact match first
+    if (documentFrequencies.containsKey(term)) {
+      return documentFrequencies.get(term).intValue();
+    }
+    
+    // Now see if there's one with a blank language code
+    String termString = term.getTermString();
+    if (stringLookupTable.containsKey(termString)) {
+      for (Term testTerm : stringLookupTable.get(termString)) {
+        if (testTerm.closeMatch(term)) {
+          return documentFrequencies.get(testTerm).intValue();
+        }
+      }
+    }
+    
+    return 0;
+  }
   
   
+  
   @CreoleParameter(comment = "Other DFBs to compile into the new one")
   public void setInputBanks(Set<DocumentFrequencyBank> inputBanks) {
     this.inputBanks = inputBanks;
@@ -191,17 +222,25 @@
     return new Double(this.maxFrequency);
   }
 
+  
+  public int getMinFrequency() {
+    return this.minFrequency;
+  }
+  
+  public int getMaxFrequency() {
+    return this.maxFrequency;
+  }
 
   @Override
   public void saveAsCsv(double threshold, File file) throws GateException {
+    System.out.println("CSV output has not yet been implemented.");
     // TODO Auto-generated method stub
-    
   }
 
 
   @Override
   public void saveAsCsv(File file) throws GateException {
-    // TODO Auto-generated method stub
+    saveAsCsv(0.0, file);
   }
 
   
@@ -245,4 +284,26 @@
     documentFrequencies.put(term, count);
   }
   
+  
+  private void storeStringLookup(Term term) {
+    String termString = term.getTermString();
+    Set<Term> terms;
+    if (stringLookupTable.containsKey(termString)) {
+      terms = stringLookupTable.get(termString);
+    }
+    else {
+      terms = new HashSet<Term>();
+    }
+    terms.add(term);
+    stringLookupTable.put(termString, terms);
+  }
+  
+  
+  public Map<Term, Integer> getDocFrequencies() {
+    return this.documentFrequencies;
+  }
+  
+  public int getTotalDocs() {
+    return this.documentTotal;
+  }
 }

Copied: 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
 (from rev 17354, 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java)
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
                          (rev 0)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
  2014-02-20 16:20:31 UTC (rev 17373)
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2008--2012, The University of Sheffield. See the file
+ *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ *  This file is part of GATE (see http://gate.ac.uk/), and is free
+ *  software, licenced under the GNU Library General Public License,
+ *  Version 2, June 1991 (in the distribution as file licence.html,
+ *  and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ *  $Id$
+ */
+package gate.termraider.gui;
+
+import gate.Resource;
+import gate.creole.ANNIEConstants;
+import gate.creole.AbstractVisualResource;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.GuiType;
+import gate.event.ProgressListener;
+import gate.termraider.bank.*;
+import gate.termraider.util.*;
+import java.awt.BorderLayout;
+import java.util.*;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JTabbedPane;
+import javax.swing.JTable;
+import javax.swing.JTextField;
+import javax.swing.table.AbstractTableModel;
+
+
+@CreoleResource(name = "Document Frequency Viewer",
+        comment = "viewer for the TermRaider DocumentFrequencyBank",
+        guiType = GuiType.LARGE,
+        mainViewer = true,
+        resourceDisplayed = "gate.termraider.bank.DocumentFrequencyBank")
+public class DocumentFrequencyViewer 
+  extends AbstractVisualResource 
+  implements ANNIEConstants, ProgressListener {
+
+  private static final long serialVersionUID = 5632849477601995493L;
+  
+  private JScrollPane freqScrollPane;
+  private DocumentFrequencyBank dfb;
+  private JTable freqTable;
+  private JTabbedPane tabbedPane;
+  private DFTableModel freqTableModel;
+  private JTextField docsField;
+  
+  @Override
+  public Resource init() {
+    initGuiComponents();
+    return this;
+  }
+
+
+  private void initGuiComponents() {
+    setLayout(new BorderLayout());
+    tabbedPane = new JTabbedPane();
+    JPanel tableTab = new JPanel(new BorderLayout());
+    tabbedPane.addTab("Document Frequency", tableTab);
+    
+    docsField = new JTextField("...");
+    tableTab.add(docsField, BorderLayout.NORTH);
+
+    freqTableModel = new DFTableModel();
+    freqTable = new JTable(freqTableModel);
+    freqTable.setAutoCreateRowSorter(true);
+    freqScrollPane = new JScrollPane(freqTable, 
+            JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED, 
+            JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    tableTab.add(freqScrollPane, BorderLayout.CENTER);
+    
+    this.add(tabbedPane, BorderLayout.CENTER);
+    tabbedPane.validate();
+    tabbedPane.repaint();
+  }
+  
+  
+  private void setDocsField() {
+    docsField.setText("Doc count = " + this.dfb.getTotalDocs());
+  }
+  
+  
+  public void processFinished() {
+    setTarget(dfb);
+  }
+
+  public void progressChanged(int i) {
+    // nothing?
+  }  
+
+  public void setTarget(Object target) {
+    if(target == null || ! (target instanceof DocumentFrequencyBank)) {
+      throw new IllegalArgumentException("This Viewer cannot show a "
+              + (target == null ? "null" : target.getClass().toString()));
+    }
+    
+    dfb = (DocumentFrequencyBank) target;
+    setDocsField();
+    freqTableModel.setBank(this.dfb);
+  }
+}
+
+
+class DFTableModel extends AbstractTableModel {
+  private static final long serialVersionUID = -7654670667296912991L;
+  private List<Term> terms;
+  private String[] columnNames = {"term", "doc frequency"};
+  private Map<Term, Integer> docFrequencies; 
+
+  public DFTableModel() {
+    this.docFrequencies = new HashMap<Term, Integer>();
+    this.terms = new ArrayList<Term>();
+  }
+  
+  public void setBank(DocumentFrequencyBank termbank) {
+    this.docFrequencies = termbank.getDocFrequencies();
+    this.terms = new ArrayList<Term>(docFrequencies.keySet());
+    Collections.sort(this.terms, new TermComparator());
+  }
+  
+  public int getColumnCount() {
+    return 2;
+  }
+
+  public int getRowCount() {
+    return this.terms.size();
+  }
+
+  public Object getValueAt(int row, int col) {
+    Term term = this.terms.get(row); 
+    if (col == 0) {
+      return term.toString();
+    }
+    // implied else
+    if (this.docFrequencies.containsKey(term)) {
+      return this.docFrequencies.get(term);
+    }
+    return 0;
+  }
+  
+  public Class<?> getColumnClass(int col) {
+    if (col == 0) {
+      return String.class;
+    }
+    // implied else
+    return Integer.class;
+  }
+  
+  public String getColumnName(int col) {
+    return columnNames[col];
+  }
+
+}

Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java      
2014-02-20 15:54:13 UTC (rev 17372)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java      
2014-02-20 16:20:31 UTC (rev 17373)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2008--2012, The University of Sheffield. See the file
+ *  Copyright (c) 2008--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -27,6 +27,8 @@
   private JSlider slider;
   
   
+  
+  
   public SliderPanel(AbstractBank scoredbank, String verb, boolean startLeft,
           TermbankViewer viewer) {
     this.scoredbank = scoredbank;

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to