Revision: 17458
http://sourceforge.net/p/gate/code/17458
Author: adamfunk
Date: 2014-02-26 20:51:10 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
Refactoring & a bunch of changes.
If I'd known where I was going, I would've started somewhere else.
Modified Paths:
--------------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/ActionSaveCsv.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/CsvFileSelectionActionListener.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/IdfCalculation.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/MergingMode.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/TfCalculation.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
Added Paths:
-----------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/
Removed Paths:
-------------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
Copied:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
(from rev 17445,
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java)
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
(rev 0)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2010--2014, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * $Id$
+ */
+
+package gate.termraider.bank;
+
+import gate.*;
+import gate.creole.AbstractLanguageResource;
+import gate.creole.metadata.CreoleParameter;
+import gate.util.GateException;
+import java.io.File;
+import java.util.*;
+import gate.termraider.util.*;
+
+
+/**
+ * A thing that has a score name, can be saved as CSV, and
+ * can be used to generate a SliderPanel (which needs
+ * min & max scores).
+ */
+public abstract class AbstractBank extends AbstractLanguageResource {
+ private static final long serialVersionUID = -9168657973312733783L;
+
+ protected Set<String> languages, types;
+ protected int documentCount;
+
+ public abstract Number getMinScore();
+
+ public abstract Number getMaxScore();
+
+ public int getDocumentCount() {
+ return this.documentCount;
+ }
+
+ public abstract void saveAsCsv(double threshold, File file)
+ throws GateException;
+
+ public abstract void saveAsCsv(File file)
+ throws GateException;
+
+ public Set<String> getLanguages() {
+ return this.languages;
+ }
+
+ public Set<String> getTypes() {
+ return this.types;
+ }
+
+ public Term makeTerm(Annotation annotation, Document document) {
+ return new Term(annotation, document,
+ this.languageFeature, this.inputAnnotationFeature);
+ }
+
+
+ /* CREOLE */
+
+ protected String scoreProperty;
+ protected String languageFeature;
+ protected String inputAnnotationFeature;
+ protected Set<Corpus> corpora;
+ protected boolean debugMode;
+ protected String inputASName;
+
+
+
+
+ /* Default value is overridden in the implementations */
+ @CreoleParameter(comment = "name of main score",
+ defaultValue = "score")
+ public void setScoreProperty(String name) {
+ this.scoreProperty = name;
+ }
+
+ public String getScoreProperty() {
+ return this.scoreProperty;
+ }
+
+
+ @CreoleParameter(comment = "language feature on term candidates",
+ defaultValue = "lang")
+ public void setLanguageFeature(String name) {
+ this.languageFeature = name;
+ }
+ public String getLanguageFeature() {
+ return this.languageFeature;
+ }
+
+
+ @CreoleParameter(comment = "input annotation feature",
+ defaultValue = "canonical")
+ public void setInputAnnotationFeature(String name) {
+ this.inputAnnotationFeature = name;
+ }
+ public String getInputAnnotationFeature() {
+ return this.inputAnnotationFeature;
+ }
+
+ @CreoleParameter(comment = "Processed corpora to analyse")
+ public void setCorpora(Set<Corpus> corpora) {
+ this.corpora = corpora;
+ }
+
+ public Set<Corpus> getCorpora() {
+ return this.corpora;
+ }
+
+ @CreoleParameter(comment = "print debugging information during
initialization",
+ defaultValue = "false")
+ public void setDebugMode(Boolean debug) {
+ this.debugMode = debug;
+ }
+
+ public Boolean getDebugMode() {
+ return this.debugMode;
+ }
+
+ @CreoleParameter(comment = "input AS name",
+ defaultValue = "")
+ public void setInputASName(String name) {
+ this.inputASName = name;
+ }
+ public String getInputASName() {
+ return this.inputASName;
+ }
+
+}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -12,7 +12,6 @@
package gate.termraider.bank;
import gate.creole.*;
-import gate.creole.metadata.*;
import gate.gui.ActionsPublisher;
import gate.util.*;
import gate.*;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -16,11 +16,14 @@
import gate.gui.ActionsPublisher;
import gate.util.*;
import gate.*;
+
import java.io.*;
import java.util.*;
+
import gate.termraider.output.*;
import gate.termraider.util.*;
import gate.termraider.gui.*;
+
import javax.swing.Action;
@@ -40,9 +43,9 @@
protected Map<Term, Double> termScores;
protected Map<Term, Double> rawTermScores;
- protected List<Term> termsByDescendingScore, termsByDescendingFrequency,
- termsByDescendingDocFrequency;
+ protected List<Term> termsByDescendingScore;
protected Map<Term, Integer> termFrequencies, docFrequencies;
+ protected boolean descendingScoresDone = false;
public static final String freqProperty = "frequency";
@@ -95,17 +98,16 @@
public List<Term> getTermsByDescendingScore() {
+ // lazy calculation
+ if (! descendingScoresDone) {
+ termsByDescendingScore = new ArrayList<Term>(this.getTerms());
+ Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(scores.get(this.getDefaultScoreType())));
+ descendingScoresDone = true;
+ }
return this.termsByDescendingScore;
}
- public List<Term> getTermsByDescendingFrequency() {
- return this.termsByDescendingFrequency;
- }
-
- public List<Term> getTermsByDescendingDocFrequency() {
- return this.termsByDescendingDocFrequency;
- }
-
+
public Map<Term, Double> getTermScores() {
return this.termScores;
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -15,8 +15,10 @@
import gate.gui.ActionsPublisher;
import gate.*;
import gate.termraider.util.*;
-import gate.termraider.bank.modes.*;
+import gate.termraider.modes.*;
+
import java.util.*;
+
import org.apache.commons.lang.StringEscapeUtils;
@@ -34,7 +36,7 @@
/* EXTRA DATA FOR ANALYSIS */
private Map<Term, List<Double>> termIndividualScores;
-
+ private ScoreType termFrequencyST, localDocFrequencyST;
protected void processDocument(Document document) {
@@ -48,17 +50,15 @@
incrementTermFreq(term, 1);
double score = ((Number) fm.get(inputScoreFeature)).doubleValue();
+ Utilities.addToMapSet(termDocuments, term, documentSource);
+
if (termIndividualScores.containsKey(term)) {
List<Double> scoreList = termIndividualScores.get(term);
scoreList.add(score);
- termDocuments.get(term).add(documentSource);
}
else {
List<Double> scoreList = new ArrayList<Double>();
scoreList.add(score);
- Set<String> docNames = new HashSet<String>();
- docNames.add(documentSource);
- termDocuments.put(term, docNames);
termIndividualScores.put(term, scoreList);
}
}
@@ -67,26 +67,13 @@
public void calculateScores() {
- double score;
-
- for (Term term : termIndividualScores.keySet()) {
- if (mergingMode == MergingMode.MAXIMUM) {
- score = Collections.max(termIndividualScores.get(term));
- }
- else if (mergingMode == MergingMode.MINIMUM) {
- score = Collections.min(termIndividualScores.get(term));
- }
- else { // must be MEAN
- score = Utilities.meanDoubleList(termIndividualScores.get(term));
- }
-
- rawTermScores.put(term, score);
- termScores.put(term, Normalization.normalizeScore(score));
- }
-
- termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
- Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(termScores));
-
+ Double score;
+
+ for (Term term : termIndividualScores.keySet()) {
+ score = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
+ Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, score);
+ }
+
if (debugMode) {
System.out.println("Termbank: nbr of terms = " +
termsByDescendingScore.size());
}
@@ -94,16 +81,12 @@
protected void resetScores() {
+ scores = new HashMap<ScoreType, Map<Term,Number>>();
+ for (ScoreType type : scoreTypes) {
+ scores.put(type, new HashMap<Term, Number>());
+ }
termIndividualScores = new HashMap<Term, List<Double>>();
- termDocuments = new HashMap<Term, Set<String>>();
- termScores = new HashMap<Term, Double>();
- rawTermScores = new HashMap<Term, Double>();
- termsByDescendingScore = new ArrayList<Term>();
- termsByDescendingScore = new ArrayList<Term>();
- termsByDescendingFrequency = new ArrayList<Term>();
- termsByDescendingDocFrequency = new ArrayList<Term>();
- termFrequencies = new HashMap<Term, Integer>();
- docFrequencies = new HashMap<Term, Integer>();
+ termDocuments = new HashMap<Term, Set<String>>();
}
@@ -142,11 +125,10 @@
protected void initializeScoreTypes() {
this.scoreTypes = new ArrayList<ScoreType>();
this.scoreTypes.add(new ScoreType(scoreProperty));
- // TODO Do we need any of this stuff here?
- //this.termFrequencyST = new ScoreType("termFrequency");
- //this.scoreTypes.add(termFrequencyST);
- //this.localDocFrequencyST = new ScoreType("localDocFrequency");
- //this.scoreTypes.add(localDocFrequencyST);
+ this.termFrequencyST = new ScoreType("termFrequency");
+ this.scoreTypes.add(termFrequencyST);
+ this.localDocFrequencyST = new ScoreType("localDocFrequency");
+ this.scoreTypes.add(localDocFrequencyST);
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -42,7 +42,6 @@
private Set<DocumentFrequencyBank> inputBanks;
// note: corpora inherited from AbstractBank
- private int documentTotal;
private Map<Term, Integer> documentFrequencies;
private int minFrequency, maxFrequency;
private Map<String, Set<Term>> stringLookupTable;
@@ -78,7 +77,7 @@
}
protected void resetScores() {
- documentTotal = 0;
+ documentCount = 0;
documentFrequencies = new HashMap<Term, Integer>();
termFrequencies = new HashMap<Term, Integer>();
languages = new HashSet<String>();
@@ -105,7 +104,7 @@
protected void processInputBanks() {
for (DocumentFrequencyBank bank : inputBanks) {
- this.documentTotal += bank.documentTotal;
+ this.documentCount += bank.documentCount;
for (Term term : bank.getTerms()) {
increment(term, bank.getFrequencyStrict(term));
}
@@ -128,7 +127,7 @@
protected void processDocument(Document document) {
- documentTotal++;
+ documentCount++;
AnnotationSet candidates =
document.getAnnotations(inputASName).get(inputAnnotationTypes);
Set<Term> documentTerms = new HashSet<Term>();
@@ -290,10 +289,7 @@
return this.documentFrequencies;
}
- public int getTotalDocs() {
- return this.documentTotal;
- }
-
+
protected void initializeScoreTypes() {
// Whatever this is called, it must be the reference
// document frequency, so we will only need
@@ -333,7 +329,7 @@
sb.append(',').append(StringEscapeUtils.escapeCsv("_TOTAL_DOCS_"));
sb.append(',').append(StringEscapeUtils.escapeCsv(""));
sb.append(',').append(StringEscapeUtils.escapeCsv(""));
-
sb.append(',').append(StringEscapeUtils.escapeCsv(Integer.toString(this.getTotalDocs())));
+
sb.append(',').append(StringEscapeUtils.escapeCsv(Integer.toString(this.getDocumentCount())));
return sb.toString();
}
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -14,7 +14,7 @@
import gate.creole.metadata.*;
import gate.gui.ActionsPublisher;
import gate.*;
-import gate.termraider.bank.modes.*;
+import gate.termraider.modes.*;
import gate.termraider.util.*;
import org.apache.commons.lang.StringEscapeUtils;
import java.util.*;
@@ -75,25 +75,13 @@
}
}
- addToMapSet(termDocuments, term, documentSource);
- addToMapSet(termHeads, term, head);
+ Utilities.addToMapSet(termDocuments, term, documentSource);
+ Utilities.addToMapSet(termHeads, term, head);
incrementTermFreq(term, 1);
}
}
- private void addToMapSet(Map<Term, Set<String>> map, Term key, String value)
{
- Set<String> valueSet;
- if (map.containsKey(key)) {
- valueSet = map.get(key);
- }
- else {
- valueSet = new HashSet<String>();
- }
-
- valueSet.add(value);
- map.put(key, valueSet);
- }
private Set<String> getSetFromMap(Map<Term, Set<String>> map, Term key) {
if (map.containsKey(key)) {
@@ -124,7 +112,7 @@
for (String headI : headsI) {
for (String headJ : headsJ) {
if (headI.endsWith(headJ)) {
- addToMapSet(termHyponyms, termI, termJ.getTermString());
+ Utilities.addToMapSet(termHyponyms, termI,
termJ.getTermString());
break hyponymLoop;
}
}
@@ -143,12 +131,6 @@
termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(termScores));
- termsByDescendingFrequency = new ArrayList<Term>(termScores.keySet());
- Collections.sort(termsByDescendingFrequency, new
TermComparatorByDescendingScore(termFrequencies));
-
- termsByDescendingDocFrequency = new ArrayList<Term>(termScores.keySet());
- Collections.sort(termsByDescendingFrequency, new
TermComparatorByDescendingScore(docFrequencies));
-
if (debugMode) {
System.out.println("Termbank: nbr of terms = " +
termsByDescendingScore.size());
}
@@ -162,8 +144,6 @@
termScores = new HashMap<Term, Double>();
rawTermScores = new HashMap<Term, Double>();
termsByDescendingScore = new ArrayList<Term>();
- termsByDescendingFrequency = new ArrayList<Term>();
- termsByDescendingDocFrequency = new ArrayList<Term>();
termFrequencies = new HashMap<Term, Integer>();
docFrequencies = new HashMap<Term, Integer>();
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -15,9 +15,11 @@
import gate.creole.metadata.*;
import gate.gui.ActionsPublisher;
import gate.*;
-import gate.termraider.bank.modes.*;
+import gate.termraider.modes.*;
import gate.termraider.util.*;
+
import java.util.*;
+
import org.apache.commons.lang.StringEscapeUtils;
@@ -37,7 +39,6 @@
private DocumentFrequencyBank docFreqSource;
/* EXTRA DATA */
- private int documentCount;
private ScoreType termFrequencyST, localDocFrequencyST, refDocFrequencyST;
@@ -50,14 +51,7 @@
Term term = makeTerm(candidate, document);
incrementTermFreq(term, 1);
- if (termDocuments.containsKey(term)) {
- termDocuments.get(term).add(documentSource);
- }
- else {
- Set<String> docNames = new HashSet<String>();
- docNames.add(documentSource);
- termDocuments.put(term, docNames);
- }
+ Utilities.addToMapSet(termDocuments, term, documentSource);
}
}
@@ -83,7 +77,7 @@
for (Term term : termFrequencies.keySet()) {
int tf = termFrequencies.get(term);
int df = getRefDocFrequency(term);
- int n = docFreqSource.getTotalDocs();
+ int n = docFreqSource.getDocumentCount();
double score = TfCalculation.calculate(tfCalculation, tf) *
IdfCalculation.calculate(idfCalculation, df, n);
rawTermScores.put(term, Double.valueOf(score));
termScores.put(term, Normalization.normalizeScore(score));
@@ -103,8 +97,6 @@
termScores = new HashMap<Term, Double>();
rawTermScores = new HashMap<Term, Double>();
termsByDescendingScore = new ArrayList<Term>();
- termsByDescendingFrequency = new ArrayList<Term>();
- termsByDescendingDocFrequency = new ArrayList<Term>();
termFrequencies = new HashMap<Term, Integer>();
docFrequencies = new HashMap<Term, Integer>();
documentCount = 0;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/ActionSaveCsv.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/ActionSaveCsv.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/ActionSaveCsv.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -17,6 +17,7 @@
import javax.swing.*;
import gate.termraider.gui.CsvFileSelectionActionListener.Mode;
import gate.termraider.util.*;
+import gate.termraider.bank.*;
import javax.swing.filechooser.FileNameExtensionFilter;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/CsvFileSelectionActionListener.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/CsvFileSelectionActionListener.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/CsvFileSelectionActionListener.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -16,6 +16,7 @@
import java.awt.event.ActionListener;
import javax.swing.*;
import gate.termraider.util.*;
+import gate.termraider.bank.*;
import java.io.*;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -116,7 +116,7 @@
}
dfb = (DocumentFrequencyBank) target;
- docsField.setText("Doc count = " + this.dfb.getTotalDocs());
+ docsField.setText("Doc count = " + this.dfb.getDocumentCount());
freqTableModel.setBank(this.dfb);
typeTableModel.setList(dfb.getTypes());
langTableModel.setList(dfb.getLanguages());
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -16,7 +16,7 @@
import javax.swing.*;
import javax.swing.border.*;
import javax.swing.event.*;
-import gate.termraider.util.*;
+import gate.termraider.bank.*;
public class SliderPanel extends JPanel {
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/IdfCalculation.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/IdfCalculation.java
2014-02-26 16:44:09 UTC (rev 17445)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/IdfCalculation.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -9,7 +9,7 @@
*
* $Id$
*/
-package gate.termraider.bank.modes;
+package gate.termraider.modes;
import gate.termraider.util.Utilities;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/MergingMode.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/MergingMode.java
2014-02-26 16:44:09 UTC (rev 17445)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/MergingMode.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, The University of Sheffield. See the file
+ * Copyright (c) 2012--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -9,10 +9,28 @@
*
* $Id$
*/
-package gate.termraider.bank.modes;
+package gate.termraider.modes;
+import gate.termraider.util.Utilities;
+import java.util.*;
+
+
public enum MergingMode {
MINIMUM,
MEAN,
- MAXIMUM
+ MAXIMUM;
+
+ public static Double calculate(MergingMode mode, List<Double> list) {
+ if (mode == MAXIMUM) {
+ return Collections.max(list);
+ }
+
+ if (mode == MINIMUM) {
+ return Collections.min(list);
+ }
+
+ // must be MEAN
+ return Utilities.meanDoubleList(list);
+ }
+
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java
2014-02-26 16:44:09 UTC (rev 17445)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -1,4 +1,15 @@
-package gate.termraider.bank.modes;
+/*
+ * Copyright (c) 2013--2014, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * $Id$
+ */
+package gate.termraider.modes;
public enum Normalization {
None,
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/TfCalculation.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/TfCalculation.java
2014-02-26 16:44:09 UTC (rev 17445)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/TfCalculation.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -9,7 +9,7 @@
*
* $Id$
*/
-package gate.termraider.bank.modes;
+package gate.termraider.modes;
import gate.termraider.util.Utilities;
Deleted:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2010--2014, The University of Sheffield. See the file
- * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free
- * software, licenced under the GNU Library General Public License,
- * Version 2, June 1991 (in the distribution as file licence.html,
- * and also available at http://gate.ac.uk/gate/licence.html).
- *
- * $Id$
- */
-
-package gate.termraider.util;
-
-import gate.*;
-import gate.creole.AbstractLanguageResource;
-import gate.creole.metadata.CreoleParameter;
-import gate.util.GateException;
-import java.io.File;
-import java.util.*;
-
-
-/**
- * A thing that has a score name, can be saved as CSV, and
- * can be used to generate a SliderPanel (which needs
- * min & max scores).
- */
-public abstract class AbstractBank extends AbstractLanguageResource {
- private static final long serialVersionUID = -9168657973312733783L;
-
- protected Set<String> languages, types;
-
- public abstract Number getMinScore();
-
- public abstract Number getMaxScore();
-
- public abstract void saveAsCsv(double threshold, File file)
- throws GateException;
-
- public abstract void saveAsCsv(File file)
- throws GateException;
-
- public Set<String> getLanguages() {
- return this.languages;
- }
-
- public Set<String> getTypes() {
- return this.types;
- }
-
- public Term makeTerm(Annotation annotation, Document document) {
- return new Term(annotation, document,
- this.languageFeature, this.inputAnnotationFeature);
- }
-
-
- /* CREOLE */
-
- protected String scoreProperty;
- protected String languageFeature;
- protected String inputAnnotationFeature;
- protected Set<Corpus> corpora;
- protected boolean debugMode;
- protected String inputASName;
-
-
-
-
- /* Default value is overridden in the implementations */
- @CreoleParameter(comment = "name of main score",
- defaultValue = "score")
- public void setScoreProperty(String name) {
- this.scoreProperty = name;
- }
-
- public String getScoreProperty() {
- return this.scoreProperty;
- }
-
-
- @CreoleParameter(comment = "language feature on term candidates",
- defaultValue = "lang")
- public void setLanguageFeature(String name) {
- this.languageFeature = name;
- }
- public String getLanguageFeature() {
- return this.languageFeature;
- }
-
-
- @CreoleParameter(comment = "input annotation feature",
- defaultValue = "canonical")
- public void setInputAnnotationFeature(String name) {
- this.inputAnnotationFeature = name;
- }
- public String getInputAnnotationFeature() {
- return this.inputAnnotationFeature;
- }
-
- @CreoleParameter(comment = "Processed corpora to analyse")
- public void setCorpora(Set<Corpus> corpora) {
- this.corpora = corpora;
- }
-
- public Set<Corpus> getCorpora() {
- return this.corpora;
- }
-
- @CreoleParameter(comment = "print debugging information during
initialization",
- defaultValue = "false")
- public void setDebugMode(Boolean debug) {
- this.debugMode = debug;
- }
-
- public Boolean getDebugMode() {
- return this.debugMode;
- }
-
- @CreoleParameter(comment = "input AS name",
- defaultValue = "")
- public void setInputASName(String name) {
- this.inputASName = name;
- }
- public String getInputASName() {
- return this.inputASName;
- }
-
-}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
2014-02-26 20:43:01 UTC (rev 17457)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
2014-02-26 20:51:10 UTC (rev 17458)
@@ -13,9 +13,11 @@
import gate.*;
import gate.creole.ANNIEConstants;
+
import java.io.*;
import java.net.*;
import java.util.*;
+
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.WordUtils;
@@ -165,5 +167,61 @@
return Math.log10(input) / log10of2;
}
+
+ public static void addToMapSet(Map<Term, Set<String>> map, Term key, String
value) {
+ Set<String> valueSet;
+ if (map.containsKey(key)) {
+ valueSet = map.get(key);
+ }
+ else {
+ valueSet = new HashSet<String>();
+ }
+
+ valueSet.add(value);
+ map.put(key, valueSet);
+ }
+
+
+ public static void setScoreTermValue(Map<ScoreType, Map<Term, Number>> map,
ScoreType type, Term term, Number value) {
+ Map<Term, Number> submap;
+ if (map.containsKey(type)) {
+ submap = map.get(type);
+ }
+ else {
+ submap = new HashMap<Term, Number>();
+ }
+
+ submap.put(term, value);
+ map.put(type, submap);
+ }
+
+
+ /**
+ * Forces the ultimate value to be Integer.
+ */
+ public static void incrementScoreTermValue(Map<ScoreType, Map<Term, Number>>
map,
+ ScoreType type, Term term, Integer increment) {
+ Map<Term, Number> submap;
+ if (map.containsKey(type)) {
+ submap = map.get(type);
+ }
+ else {
+ submap = new HashMap<Term, Number>();
+ }
+
+ int count;
+ if (submap.containsKey(term)) {
+ count = submap.get(term).intValue();
+ }
+ else {
+ count = 0;
+ }
+
+ count += increment.intValue();
+ submap.put(term, count);
+ map.put(type, submap);
+ }
+
+
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs