Revision: 18331
http://sourceforge.net/p/gate/code/18331
Author: adamfunk
Date: 2014-09-12 20:35:28 +0000 (Fri, 12 Sep 2014)
Log Message:
-----------
WIP on the file location memory & the empty document issue.
Modified Paths:
--------------
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
2014-09-12 15:45:46 UTC (rev 18330)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
2014-09-12 20:35:28 UTC (rev 18331)
@@ -25,6 +25,7 @@
import gate.gui.ResourceHelper;
import gate.util.InvalidOffsetException;
import java.awt.event.ActionEvent;
+import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
@@ -68,10 +69,12 @@
*/
public static void populateCorpus(final Corpus corpus, URL inputUrl, String
encoding, List<String> contentKeys,
List<String> featureKeys, int tweetsPerDoc) throws
ResourceInstantiationException {
+
+ InputStream input = null;
try {
- InputStream input = inputUrl.openStream();
+ input = inputUrl.openStream();
- // TODO detect & handle gzipped input
+ // TODO Detect & handle gzipped input.
TweetStreamIterable tweetSource = new TweetStreamIterable(input,
contentKeys, featureKeys, false);
int tweetCounter = 0;
@@ -79,6 +82,8 @@
StringBuilder content = new StringBuilder();
Map<PreAnnotation, Integer> annotandaOffsets = new
HashMap<PreAnnotation, Integer>();
+ // TODO Suppress empty documents (generated by 0-tweet files).
+
for (Tweet tweet : tweetSource) {
if ( (tweetsPerDoc > 0) && (tweetCounter > 0) && ((tweetCounter %
tweetsPerDoc) == 0) ) {
closeDocument(document, content, annotandaOffsets, corpus);
@@ -97,6 +102,9 @@
tweetCounter++;
} // end of Tweet loop
+ System.out.println("CL = " + content.length());
+ System.out.println("TC = " + tweetCounter);
+
if (content.length() > 0) {
closeDocument(document, content, annotandaOffsets, corpus);
}
@@ -112,6 +120,18 @@
catch (Exception e) {
throw new ResourceInstantiationException(e);
}
+ finally {
+ if (input != null) {
+ try {
+ input.close();
+ }
+ catch(IOException e) {
+ logger.warn("Error in Twitter Population", e);
+ }
+ }
+
+ }
+
}
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
2014-09-12 15:45:46 UTC (rev 18330)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
2014-09-12 20:35:28 UTC (rev 18331)
@@ -14,7 +14,6 @@
import gate.Gate;
import gate.swing.XJFileChooser;
-//TODO Get GATE to remember last location.
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
@@ -152,6 +151,8 @@
@Override
public void actionPerformed(ActionEvent arg0) {
XJFileChooser chooser = new XJFileChooser();
+ //TODO Get GATE to remember last location.
+ //chooser.setResource(PopulationConfig.class.getName());
chooser.setDialogTitle("Load XML configuration");
chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY);
int chosen = chooser.showOpenDialog(this.wrapper.dialog);
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
2014-09-12 15:45:46 UTC (rev 18330)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
2014-09-12 20:35:28 UTC (rev 18331)
@@ -14,7 +14,6 @@
import gate.gui.ListEditorDialog;
import gate.gui.MainFrame;
import gate.swing.XJFileChooser;
-// TODO Get GATE to remember last location.
import gate.util.ExtensionFileFilter;
import gate.util.Strings;
import java.awt.Window;
@@ -110,6 +109,8 @@
dialog.add(Box.createVerticalStrut(2));
chooser = new XJFileChooser();
+ // TODO Fix this to get GATE to remember last location.
+ //chooser.setResource(PopulationDialogWrapper.class.getName());
chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY);
chooser.setMultiSelectionEnabled(true);
chooser.setDialogTitle("Select a Twitter JSON file");
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
2014-09-12 15:45:46 UTC (rev 18330)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
2014-09-12 20:35:28 UTC (rev 18331)
@@ -75,11 +75,13 @@
@Override
public boolean hasNext() {
+ // Should that be iterator.hasNextValue() ?
return this.iterator.hasNext() ||
- ( (this.nestedStatuses != null) && this.nestedStatuses.hasNext());
- // should that be iterator.hasNextValue() ?
+ (this.nested && (this.nestedStatuses != null) &&
this.nestedStatuses.hasNext());
+ // Belt & braces: this.nested should suffice.
}
+
@Override
public Tweet next() {
Tweet result = null;
@@ -98,12 +100,12 @@
if (isSearchResultList(json)) {
this.nestedStatuses = getStatuses(json).iterator();
this.nested = this.nestedStatuses.hasNext();
- // Set the nested flag according as there is anything in
- // the statuses value array (it could be empty).
+ // Set the nested flag according as there is anything left
+ // in thee statuses value array (which could be empty).
}
- // Test nested now: true IFF we are in a search result thingy AND
- // the statuses array is non-empty.
+ // Now let's test nested: true IFF we are in a search result thingy AND
+ // the thingy's statuses array is non-empty.
if (this.nested) {
result = Tweet.readTweet(this.nestedStatuses.next(), contentKeys,
featureKeys);
// Set the nested flag again for the next call to next()
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs