[
https://issues.apache.org/jira/browse/AVRO-2214?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16635734#comment-16635734
]
ASF GitHub Bot commented on AVRO-2214:
--
thiru-apache closed pull request #328: AVRO-2214 Support sync and seek in C++
DataFileReader
URL: https://github.com/apache/avro/pull/328
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/lang/c++/api/DataFile.hh b/lang/c++/api/DataFile.hh
index bff309770..4236d3537 100644
--- a/lang/c++/api/DataFile.hh
+++ b/lang/c++/api/DataFile.hh
@@ -172,12 +172,14 @@ public:
*/
class AVRO_DECL DataFileReaderBase : boost::noncopyable {
const std::string filename_;
-const std::auto_ptr stream_;
+const std::auto_ptr stream_;
const DecoderPtr decoder_;
int64_t objectCount_;
bool eof_;
Codec codec_;
-
+int64_t blockStart_;
+int64_t blockEnd_;
+
ValidSchema readerSchema_;
ValidSchema dataSchema_;
DecoderPtr dataDecoder_;
@@ -247,6 +249,29 @@ public:
* Closes the reader. No further operation is possible on this reader.
*/
void close();
+
+/**
+ * Move to a specific, known synchronization point, for example one
returned
+ * from tell() after sync().
+ */
+void seek(int64_t position);
+
+/**
+ * Move to the next synchronization point after a position. To process a
+ * range of file entires, call this with the starting position, then check
+ * pastSync() with the end point before each use of decoder().
+ */
+void sync(int64_t position);
+
+/**
+ * Return true if past the next synchronization point after a position.
+ */
+bool pastSync(int64_t position);
+
+/**
+ * Return the last synchronization point before our current position.
+ */
+int64_t previousSync();
};
/**
@@ -330,6 +355,29 @@ public:
* Closes the reader. No further operation is possible on this reader.
*/
void close() { return base_->close(); }
+
+/**
+ * Move to a specific, known synchronization point, for example one
returned
+ * from previousSync().
+ */
+void seek(int64_t position) { base_->seek(position); }
+
+/**
+ * Move to the next synchronization point after a position. To process a
+ * range of file entires, call this with the starting position, then check
+ * pastSync() with the end point before each call to read().
+ */
+void sync(int64_t position) { base_->sync(position); }
+
+/**
+ * Return true if past the next synchronization point after a position.
+ */
+bool pastSync(int64_t position) { return base_->pastSync(position); }
+
+/**
+ * Return the last synchronization point before our current position.
+ */
+int64_t previousSync() { return base_->previousSync(); }
};
} // namespace avro
diff --git a/lang/c++/api/Stream.hh b/lang/c++/api/Stream.hh
index 92b2334d2..42ccf0a00 100644
--- a/lang/c++/api/Stream.hh
+++ b/lang/c++/api/Stream.hh
@@ -75,6 +75,31 @@ public:
virtual size_t byteCount() const = 0;
};
+/**
+ * An InputStream which also supports seeking to a specific offset.
+ */
+class AVRO_DECL SeekableInputStream : public InputStream {
+protected:
+
+/**
+ * An empty constuctor.
+ */
+SeekableInputStream() { }
+
+public:
+/**
+ * Destructor.
+ */
+virtual ~SeekableInputStream() { }
+
+/**
+ * Seek to a specific position in the stream. This may invalidate pointers
+ * returned from next(). This will also reset byteCount() to the given
+ * position.
+ */
+virtual void seek(int64_t position) = 0;
+};
+
/**
* A no-copy output stream.
*/
@@ -161,8 +186,10 @@ AVRO_DECL std::auto_ptr
fileOutputStream(const char* filename,
* Returns a new InputStream whose contents come from the given file.
* Data is read in chunks of given buffer size.
*/
-AVRO_DECL std::auto_ptr fileInputStream(const char* filename,
-size_t bufferSize = 8 * 1024);
+AVRO_DECL std::auto_ptr fileInputStream(
+const char *filename, size_t bufferSize = 8 * 1024);
+AVRO_DECL std::auto_ptr fileSeekableInputStream(
+const char *filename, size_t bufferSize = 8 * 1024);
/**
* Returns a new OutputStream whose contents will be sent to the given
@@ -177,8 +204,8 @@ AVRO_DECL std::auto_ptr
ostreamOutputStream(std::ostream& os,
* std::istream. The std::istream object should outlive the returned
* InputStream.
*/
-AVRO_DECL std::auto_ptr istreamInputStream(std::istream& in,
-size_t bufferSize = 8 * 1024);
+AVRO_DECL std::auto_ptr istreamInputStream(
+std::istream , size_t bufferSize = 8 * 1024);
/** A convenience class for reading from an InputStream */
struct