This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/orc by this push:
new a7c0af50 [feature](orc) add getNumberOfRows in RowReader (#122)
a7c0af50 is described below
commit a7c0af50f8ca8ff7cddaf8675473a037f8b13143
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Sep 15 16:01:09 2023 +0800
[feature](orc) add getNumberOfRows in RowReader (#122)
add `getNumberOfRows()`, so that we can get the number of rows in given
range
---
c++/include/orc/Reader.hh | 5 +++++
c++/src/Reader.cc | 6 ++++++
c++/src/Reader.hh | 4 ++++
3 files changed, 15 insertions(+)
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 81e3bf1d..5843d88c 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -695,6 +695,11 @@ namespace orc {
* @param rowNumber the next row the reader should return
*/
virtual void seekToRow(uint64_t rowNumber) = 0;
+
+ /**
+ * Get number of rows in this range.
+ */
+ virtual uint64_t getNumberOfRows() const = 0;
};
} // namespace orc
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index f69f27db..80a5cfd4 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -268,6 +268,7 @@ namespace orc {
numRowGroupsInStripeRange = 0;
useTightNumericVector = opts.getUseTightNumericVector();
uint64_t rowTotal = 0;
+ rowTotalInRange = 0;
firstRowOfStripe.resize(numberOfStripes);
for (size_t i = 0; i < numberOfStripes; ++i) {
@@ -277,6 +278,7 @@ namespace orc {
bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() &&
stripeInfo.offset() < opts.getOffset() +
opts.getLength();
if (isStripeInRange) {
+ rowTotalInRange += stripeInfo.numberofrows();
if (i < currentStripe) {
currentStripe = i;
}
@@ -502,6 +504,10 @@ namespace orc {
}
}
+ uint64_t RowReaderImpl::getNumberOfRows() const {
+ return rowTotalInRange;
+ }
+
void RowReaderImpl::loadStripeIndex() {
// reset all previous row indexes
rowIndexes.clear();
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 2ff3bbe8..c0f891ef 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -184,6 +184,8 @@ namespace orc {
uint64_t rowsInCurrentStripe;
// number of row groups between first stripe and last stripe
uint64_t numRowGroupsInStripeRange;
+ // numbfer of rows in range
+ uint64_t rowTotalInRange;
proto::StripeInformation currentStripeInfo;
proto::StripeFooter currentStripeFooter;
std::unique_ptr<ColumnReader> reader;
@@ -291,6 +293,8 @@ namespace orc {
void seekToRow(uint64_t rowNumber) override;
+ uint64_t getNumberOfRows() const override;
+
const FileContents& getFileContents() const;
bool getThrowOnHive11DecimalOverflow() const;
bool getIsDecimalAsLong() const;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]