Revision: 18319
http://sourceforge.net/p/gate/code/18319
Author: adamfunk
Date: 2014-09-11 19:51:08 +0000 (Thu, 11 Sep 2014)
Log Message:
-----------
WIP
Added Paths:
-----------
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
Added:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
(rev 0)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
2014-09-11 19:51:08 UTC (rev 18319)
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 1995-2014, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * $Id$
+ */
+package gate.corpora.twitter;
+
+import java.io.InputStream;
+import java.util.Iterator;
+
+/**
+ * Iterable version, just to make loops easier.
+ * @author adam
+ *
+ */
+public class TweetStreamIterable implements Iterable<Tweet> {
+
+ InputStream input;
+
+ public TweetStreamIterable(InputStream input) {
+ this.input = input;
+ }
+
+ @Override
+ public Iterator<Tweet> iterator() {
+ return new TweetStreamIterator(input);
+ }
+
+}
Property changes on:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
---
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
(rev 0)
+++
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
2014-09-11 19:51:08 UTC (rev 18319)
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1995-2014, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * $Id$
+ */
+package gate.corpora.twitter;
+
+import gate.Document;
+import gate.Factory;
+import gate.FeatureMap;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonParser.Feature;
+import com.fasterxml.jackson.core.JsonPointer;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.MappingIterator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class TweetStreamIterator implements Iterator<Tweet> {
+
+ // Borrowed from gcp IOConstants
+ public static final String ID_POINTER = "/id_str";
+
+
+ private ObjectMapper objectMapper;
+ private JsonParser jsonParser;
+ private MappingIterator<JsonNode> iterator;
+ private boolean gzip;
+ private List<String> contentKeys, featureKeys;
+ protected JsonPointer idPointer;
+
+
+ public TweetStreamIterator(InputStream input, List<String> contentKeys,
+ List<String> featureKeys, boolean gzip) throws JsonParseException,
IOException {
+ this.contentKeys = contentKeys;
+ this.featureKeys = featureKeys;
+ this.gzip = gzip;
+
+ if (gzip) {
+ throw new IllegalArgumentException("gzip not yet supported!");
+ }
+ // TODO support compression
+
+ // Following borrowed from gcp JSONStreamingInputHandler
+ idPointer = JsonPointer.compile(ID_POINTER);
+ objectMapper = new ObjectMapper();
+ jsonParser =
objectMapper.getFactory().createParser(input).enable(Feature.AUTO_CLOSE_SOURCE);
+ // If the first token in the stream is the start of an array ("[")
+ // then
+ // assume the stream as a whole is an array of objects, one per
+ // document.
+ // To handle this, simply clear the token - The MappingIterator
+ // returned
+ // by readValues will cope with the rest in either form.
+ if(jsonParser.nextToken() == JsonToken.START_ARRAY) {
+ jsonParser.clearCurrentToken();
+ }
+ iterator = objectMapper.readValues(jsonParser, JsonNode.class);
+ }
+
+
+ @Override
+ public boolean hasNext() {
+ return iterator.hasNext();
+ // should this be hasNextValue() ?
+ }
+
+ @Override
+ public Tweet next() {
+ try {
+ // why while not if?
+ while(iterator.hasNextValue()) {
+ JsonNode json = iterator.nextValue();
+ String id = json.at(idPointer).asText();
+ // Is it worth testing IDs here?
+ return Tweet.readTweet(json, contentKeys, featureKeys);
+ }
+ }
+ catch (IOException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ @Override
+ public void remove() {
+ // TODO Auto-generated method stub
+
+ }
+
+
+ public void close() {
+ // TODO
+ }
+
+
+
+
+}
Property changes on:
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs