DomGarguilo commented on code in PR #5615:
URL: https://github.com/apache/accumulo/pull/5615#discussion_r2646141238


##########
core/src/main/java/org/apache/accumulo/core/metadata/schema/filters/TabletMetadataFilter.java:
##########
@@ -18,25 +18,115 @@
  */
 package org.apache.accumulo.core.metadata.schema.filters;
 
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collection;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Predicate;
 
+import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.IteratorAdapter;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.iterators.user.RowFilter;
+import org.apache.accumulo.core.iterators.WrappingIterator;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 
-public abstract class TabletMetadataFilter extends RowFilter {
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterators;
+
+public abstract class TabletMetadataFilter extends WrappingIterator {
+  private Iterator<TabletMetadata> filteredTablets;
+  private SortedKeyValueIterator<Key,Value> seachIterator;
+  private Collection<ByteSequence> seekFamilies;
+  private boolean seekInclusive;
+  private boolean hasTop = false;
+  private Map<String,String> options;
+
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, 
Map<String,String> options,
+      IteratorEnvironment env) throws IOException {
+    super.init(source, options, env);
+    this.seachIterator = source.deepCopy(env);
+    this.options = Map.copyOf(options);
+  }
+
+  @Override
+  public boolean hasTop() {
+    return hasTop;
+  }
+
+  @Override
+  public void next() throws IOException {
+    super.next();
+    if (!super.hasTop()) {
+      // the main iterator was exhausted for the current row its on, see if 
the search iterator has
+      // anything.
+      seekToNextTablet();
+    }
+  }
+
+  @Override
+  public void seek(Range range, Collection<ByteSequence> columnFamilies, 
boolean inclusive)
+      throws IOException {
+    this.seekFamilies = columnFamilies;
+    this.seekInclusive = inclusive;
+
+    // Setup a search iterator that only uses the column families needed for 
filtering and not all
+    // the columns passed to this function. This optimizes a case like where 
we want only tablets
+    // that have external compactions, but when we find a tablet we want the 
ecomp and prev row
+    // columns. So when searching for tablets only seek on the ecomp column 
and avoid the seek with
+    // the prev row column until a tablet is found.
+    var resolvedFamilies = 
TabletMetadata.ColumnType.resolveFamilies(getColumns());
+    Preconditions.checkState(!resolvedFamilies.isEmpty());
+    seachIterator.seek(range, resolvedFamilies, true);
+    var rowsIter = new RowIterator(new IteratorAdapter(seachIterator));
+    var cols = EnumSet.copyOf(getColumns());
+    var tabletsIter = Iterators.transform(rowsIter,
+        rowIter -> TabletMetadata.convertRow(rowIter, cols, true, false));
+    var predicate = acceptTablet();
+    filteredTablets = Iterators.filter(tabletsIter, predicate::test);
+    seekToNextTablet();
+  }
+
+  private void seekToNextTablet() throws IOException {
+    if (filteredTablets.hasNext()) {
+      var nextTablet = filteredTablets.next();
+      var row = nextTablet.getKeyValues().get(0).getKey().getRow();
+      // now that a row was found by our serach iterator, seek the main 
iterator with all the
+      // columns for that row range
+      super.seek(new Range(row), seekFamilies, seekInclusive);

Review Comment:
   I'm not sure if I'm interpreting things correctly here, but could seeking to 
a row potentially ignore the `range` that was input in the main `seek()` 
method? Like if the caller supplied  a range that begins or ends mid-row, could 
this code make it so that we return data outside the requested range? This may 
not even be an issue if so. Just thought I would double check



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to