wgtmac commented on code in PR #2144:
URL: https://github.com/apache/orc/pull/2144#discussion_r1974640449
##########
c++/include/orc/Reader.hh:
##########
@@ -495,10 +495,17 @@ namespace orc {
*/
virtual uint64_t getNumberOfStripeStatistics() const = 0;
+ /**
+ * Get the statistics about a stripe .
+ * @param stripeIndex the index of the stripe (0 to N-1) to get statistics
about
+ * @return the statistics about that stripe without reading row group
index statistics
+ */
+ virtual std::unique_ptr<Statistics> getStripeStatisticsOnly(uint64_t
stripeIndex) const = 0;
Review Comment:
I'd prefer not to introduce a new API, especially when the name of
`getStripeStatisticsOnly` is a little bit misleading. This is not your fault,
`getStripeStatistics` does not clearly state it carries row index statistics as
well :(
What about changing the existing API like below:
```
virtual std::unique_ptr<StripeStatistics> getStripeStatistics(uint64_t
stripeIndex, bool includeRowIndex = true) const = 0;
```
Then return different `StripeStatistics` subclasses depending on the value
of `includeRowIndex`.
```
class StripeStatisticsImpl : public StripeStatistics {
private:
std::unique_ptr<StatisticsImpl> columnStats_;
// DELIBERATELY NOT IMPLEMENTED
StripeStatisticsImpl(const StripeStatisticsImpl&);
StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
public:
StripeStatisticsImpl(const proto::StripeStatistics& stripeStats,
const StatContext& statContext);
virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId)
const override {
return columnStats_->getColumnStatistics(columnId);
}
uint32_t getNumberOfColumns() const override {
return columnStats_->getNumberOfColumns();
}
virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
uint32_t rowIndex)
const override {
throw NotImplementedYet("getRowIndexStatistics");
}
virtual ~StripeStatisticsImpl() override;
uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
throw NotImplementedYet("getNumberOfRowIndexStats");
}
};
class StripeStatisticsWithRowIndexImpl : public StripeStatisticsImpl {
private:
std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > >
rowIndexStats_;
public:
StripeStatisticsWithRowIndexImpl(const proto::StripeStatistics&
stripeStats,
std::vector<std::vector<proto::ColumnStatistics> >&
indexStats,
const StatContext& statContext);
virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
uint32_t rowIndex)
const override {
// check id indices are valid
return rowIndexStats_[columnId][rowIndex].get();
}
virtual ~StripeStatisticsWithRowIndexImpl() override;
uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
return static_cast<uint32_t>(rowIndexStats_[columnId].size());
}
};
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]