wgtmac commented on a change in pull request #1073:
URL: https://github.com/apache/orc/pull/1073#discussion_r837270955



##########
File path: c++/src/Reader.cc
##########
@@ -764,14 +763,14 @@ namespace orc {
 
   std::unique_ptr<StripeStatistics>
   ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const {
-    if (!isMetadataLoaded) {
+    if (!contents->metadata) {

Review comment:
       We may keep isMetadataLoaded flag to avoid repeated calls of 
readMetadata() if missing.

##########
File path: c++/src/sargs/SargsApplier.cc
##########
@@ -122,4 +124,58 @@ namespace orc {
     return mHasSelected;
   }
 
+  bool SargsApplier::evaluateColumnStatistics(
+                                    const PbColumnStatistics& colStats) const {
+    const auto& leaves =
+      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument)->getLeaves();
+    std::vector<TruthValue> leafValues(
+      leaves.size(), TruthValue::YES_NO_NULL);
+
+    for (size_t pred = 0; pred != leaves.size(); ++pred) {
+      uint64_t columnId = mFilterColumns[pred];
+      if (columnId == INVALID_COLUMN_ID) {

Review comment:
       We can simplify logic here since default value is YES_NO_NULL

##########
File path: c++/src/sargs/SargsApplier.cc
##########
@@ -122,4 +124,58 @@ namespace orc {
     return mHasSelected;
   }
 
+  bool SargsApplier::evaluateColumnStatistics(
+                                    const PbColumnStatistics& colStats) const {
+    const auto& leaves =
+      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument)->getLeaves();
+    std::vector<TruthValue> leafValues(
+      leaves.size(), TruthValue::YES_NO_NULL);
+
+    for (size_t pred = 0; pred != leaves.size(); ++pred) {
+      uint64_t columnId = mFilterColumns[pred];
+      if (columnId == INVALID_COLUMN_ID) {
+        // this column does not exist in current file
+        leafValues[pred] = TruthValue::YES_NO_NULL;
+      } else if (colStats.size() <= static_cast<int>(columnId)) {
+        // column stats does not exist
+        leafValues[pred] = TruthValue::YES_NO_NULL;
+      } else {
+        leafValues[pred] = leaves[pred].evaluate(
+          mWriterVersion, colStats.Get(static_cast<int>(columnId)), nullptr);
+      }
+    }
+
+    return isNeeded(mSearchArgument->evaluate(leafValues));
+  }
+
+  bool SargsApplier::evaluateStripeStatistics(
+                            uint64_t rowsInStripe,
+                            const proto::StripeStatistics& stripeStats) {
+    if (stripeStats.colstats_size() == 0) {
+      return true;
+    }
+
+    bool ret = evaluateColumnStatistics(stripeStats.colstats());
+    if (!ret) {
+      // allocate evaluation result for row groups
+      uint64_t groupsInStripe =
+        (rowsInStripe + mRowIndexStride - 1) / mRowIndexStride;

Review comment:
       Make sure mRowIndexStride is not zero here.

##########
File path: c++/src/Reader.cc
##########
@@ -1050,16 +1060,26 @@ namespace orc {
       rowsInCurrentStripe = currentStripeInfo.numberofrows();
 
       if (sargsApplier) {
-        // read row group statistics and bloom filters of current stripe
-        loadStripeIndex();
-
-        // select row groups to read in the current stripe
-        sargsApplier->pickRowGroups(rowsInCurrentStripe,
-                                    rowIndexes,
-                                    bloomFilterIndex);
-        if (sargsApplier->hasSelectedFrom(currentRowInStripe)) {
-          // current stripe has at least one row group matching the predicate
-          break;
+        bool isStripeNeeded = true;
+        if (contents->metadata && !sargsApplier->evaluateStripeStatistics(
+          rowsInCurrentStripe,

Review comment:
       fix the alignment

##########
File path: c++/src/sargs/SargsApplier.cc
##########
@@ -122,4 +124,58 @@ namespace orc {
     return mHasSelected;
   }
 
+  bool SargsApplier::evaluateColumnStatistics(
+                                    const PbColumnStatistics& colStats) const {
+    const auto& leaves =
+      dynamic_cast<const SearchArgumentImpl *>(mSearchArgument)->getLeaves();

Review comment:
       validate null or use static_cast




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to