Revision: 18335
http://sourceforge.net/p/gate/code/18335
Author: adamfunk
Date: 2014-09-15 09:25:03 +0000 (Mon, 15 Sep 2014)
Log Message:
-----------
Fixed the hasNext functionality so we can detect & delete empty doc if 0 tweets
in statuses array.
Modified Paths:
--------------
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
2014-09-15 01:20:33 UTC (rev 18334)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
2014-09-15 09:25:03 UTC (rev 18335)
@@ -102,9 +102,6 @@
tweetCounter++;
} // end of Tweet loop
- System.out.println("CL = " + content.length());
- System.out.println("TC = " + tweetCounter);
-
if (content.length() > 0) {
closeDocument(document, content, annotandaOffsets, corpus);
}
Modified:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
2014-09-15 01:20:33 UTC (rev 18334)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
2014-09-15 09:25:03 UTC (rev 18335)
@@ -39,8 +39,9 @@
private JsonParser jsonParser;
private MappingIterator<JsonNode> iterator;
private List<String> contentKeys, featureKeys;
- private boolean nested;
+ private boolean nested, hasNextNode;
private Iterator<JsonNode> nestedStatuses;
+ private JsonNode nextNode;
public TweetStreamIterator(InputStream input, List<String> contentKeys,
@@ -70,13 +71,20 @@
iterator = objectMapper.readValues(jsonParser, JsonNode.class);
this.nested = false;
this.nestedStatuses = null;
+ this.hasNextNode = this.iterator.hasNext();
+ if (this.hasNextNode) {
+ this.nextNode = this.iterator.next();
+ }
}
@Override
public boolean hasNext() {
- // Should that be iterator.hasNextValue() ?
- return this.iterator.hasNext() ||
+ /* Using this.iterator.hasNext() did not work for search result format,
because
+ * it returns true if there is a JSON node with an empty statuses array.
So we
+ * have to read ahead a bit in order to let the loop in Population *not*
run in
+ * that case (so we can suppress the empty document). */
+ return (this.hasNextNode && nonEmpty(this.nextNode)) ||
(this.nested && (this.nestedStatuses != null) &&
this.nestedStatuses.hasNext());
// Belt & braces: this.nested should suffice.
}
@@ -94,11 +102,9 @@
this.nested = this.nestedStatuses.hasNext();
}
- else if (iterator.hasNextValue()) {
- JsonNode json = iterator.nextValue();
-
- if (isSearchResultList(json)) {
- this.nestedStatuses = getStatuses(json).iterator();
+ else if (this.hasNext()) {
+ if (isSearchResultList(this.nextNode)) {
+ this.nestedStatuses = getStatuses(this.nextNode).iterator();
this.nested = this.nestedStatuses.hasNext();
// Set the nested flag according as there is anything left
// in thee statuses value array (which could be empty).
@@ -112,9 +118,14 @@
this.nested = this.nestedStatuses.hasNext();
}
else {
- result = Tweet.readTweet(json, contentKeys, featureKeys);
+ result = Tweet.readTweet(this.nextNode, contentKeys, featureKeys);
}
}
+
+ if (! this.nested) {
+ hasNextNode = this.iterator.hasNext();
+ nextNode = hasNextNode ? this.iterator.next() : null;
+ }
}
catch (IOException e) {
logger.warn("Internal error in TweetStreamIterator", e);
@@ -149,5 +160,21 @@
}
+ public static boolean nonEmpty(JsonNode json) {
+ boolean result = false;
+ if (isSearchResultList(json)) {
+ try {
+ result = (getStatuses(json).size() > 0);
+ }
+ catch (IOException e) {
+ logger.warn("Internal error in TweetStreamIterator", e);
+ }
+ }
+ else {
+ result = true;
+ }
+ return result;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs