This is an automated email from the ASF dual-hosted git repository.
junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new e60fe053ed [core] Introduce basic structure for global index read
(#6626)
e60fe053ed is described below
commit e60fe053edffad73189a1ae4a82e7f53e0c3581e
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Nov 18 17:07:02 2025 +0800
[core] Introduce basic structure for global index read (#6626)
---
.../paimon/globalindex/GlobalFileReader.java | 29 +++++
.../apache/paimon/globalindex/GlobalIndexMeta.java | 53 ++++++++++
.../paimon/globalindex/GlobalIndexReader.java | 44 ++++++++
.../paimon/globalindex/GlobalIndexResult.java | 45 ++++++++
.../globalindex/bitmap/BitmapGlobalIndex.java | 65 ++++++++++++
.../bitmap/BitmapIndexResultWrapper.java | 60 +++++++++++
.../globalindex/wrap/FileIndexReaderWrapper.java | 117 +++++++++++++++++++++
.../main/java/org/apache/paimon/utils/Range.java | 90 ++++++++++++++++
8 files changed, 503 insertions(+)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalFileReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalFileReader.java
new file mode 100644
index 0000000000..7524d1e1f9
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalFileReader.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex;
+
+import org.apache.paimon.fs.SeekableInputStream;
+
+import java.io.IOException;
+
+/** File reader for global index. */
+public interface GlobalFileReader {
+
+ SeekableInputStream create(String fileName) throws IOException;
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexMeta.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexMeta.java
new file mode 100644
index 0000000000..e4c26420a1
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexMeta.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex;
+
+import org.apache.paimon.utils.Range;
+
+/** Index meta for global index. */
+public class GlobalIndexMeta {
+
+ private final String fileName;
+ private final long fileSize;
+ private final Range rowIdRange;
+ private final byte[] metadata;
+
+ public GlobalIndexMeta(String fileName, long fileSize, Range rowIdRange,
byte[] metadata) {
+ this.fileName = fileName;
+ this.fileSize = fileSize;
+ this.rowIdRange = rowIdRange;
+ this.metadata = metadata;
+ }
+
+ public String fileName() {
+ return fileName;
+ }
+
+ public long fileSize() {
+ return fileSize;
+ }
+
+ public Range rowIdRange() {
+ return rowIdRange;
+ }
+
+ public byte[] metadata() {
+ return metadata;
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexReader.java
new file mode 100644
index 0000000000..2ab40805f8
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexReader.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex;
+
+import org.apache.paimon.predicate.FunctionVisitor;
+import org.apache.paimon.predicate.TransformPredicate;
+
+import java.io.Closeable;
+import java.util.List;
+
+/** Index reader for global index, return {@link GlobalIndexResult}. */
+public interface GlobalIndexReader extends FunctionVisitor<GlobalIndexResult>,
Closeable {
+
+ @Override
+ default GlobalIndexResult visitAnd(List<GlobalIndexResult> children) {
+ throw new UnsupportedOperationException("Should not invoke this");
+ }
+
+ @Override
+ default GlobalIndexResult visitOr(List<GlobalIndexResult> children) {
+ throw new UnsupportedOperationException("Should not invoke this");
+ }
+
+ @Override
+ default GlobalIndexResult visit(TransformPredicate predicate) {
+ throw new UnsupportedOperationException("Should not invoke this");
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexResult.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexResult.java
new file mode 100644
index 0000000000..f1ac2213cb
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexResult.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Global index result represents row ids.
+ *
+ * <p>TODO introduce ranges interface
+ */
+public interface GlobalIndexResult extends Iterable<Long> {
+
+ static GlobalIndexResult createEmpty() {
+ return () ->
+ new Iterator<Long>() {
+ @Override
+ public boolean hasNext() {
+ return false;
+ }
+
+ @Override
+ public Long next() {
+ throw new NoSuchElementException();
+ }
+ };
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
new file mode 100644
index 0000000000..6798dfe5d6
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex.bitmap;
+
+import org.apache.paimon.fileindex.FileIndexReader;
+import org.apache.paimon.fileindex.FileIndexResult;
+import org.apache.paimon.fileindex.bitmap.BitmapFileIndex;
+import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.globalindex.GlobalFileReader;
+import org.apache.paimon.globalindex.GlobalIndexMeta;
+import org.apache.paimon.globalindex.GlobalIndexReader;
+import org.apache.paimon.globalindex.GlobalIndexResult;
+import org.apache.paimon.globalindex.wrap.FileIndexReaderWrapper;
+import org.apache.paimon.utils.Range;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.apache.paimon.utils.Preconditions.checkArgument;
+
+/** Bitmap global index. */
+public class BitmapGlobalIndex {
+
+ private final BitmapFileIndex index;
+
+ public BitmapGlobalIndex(BitmapFileIndex index) {
+ this.index = index;
+ }
+
+ public GlobalIndexReader createReader(GlobalFileReader fileReader,
List<GlobalIndexMeta> files)
+ throws IOException {
+ checkArgument(files.size() == 1);
+ GlobalIndexMeta indexMeta = files.get(0);
+ SeekableInputStream input = fileReader.create(indexMeta.fileName());
+ FileIndexReader reader = index.createReader(input, 0, (int)
indexMeta.fileSize());
+ return new FileIndexReaderWrapper(
+ reader, r -> toGlobalResult(indexMeta.rowIdRange(), r), input);
+ }
+
+ private GlobalIndexResult toGlobalResult(Range range, FileIndexResult
result) {
+ if (FileIndexResult.REMAIN == result) {
+ return BitmapIndexResultWrapper.fromRange(range);
+ } else if (FileIndexResult.SKIP == result) {
+ return GlobalIndexResult.createEmpty();
+ }
+ return new BitmapIndexResultWrapper((BitmapIndexResult) result,
range.from);
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapIndexResultWrapper.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapIndexResultWrapper.java
new file mode 100644
index 0000000000..6b10f010d3
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapIndexResultWrapper.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex.bitmap;
+
+import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
+import org.apache.paimon.globalindex.GlobalIndexResult;
+import org.apache.paimon.utils.Range;
+
+import java.util.Iterator;
+
+import static org.apache.paimon.utils.RoaringBitmap32.bitmapOfRange;
+
+/** A {@link GlobalIndexResult} wrapper for {@link BitmapIndexResult}. */
+public class BitmapIndexResultWrapper implements GlobalIndexResult {
+
+ private final BitmapIndexResult result;
+ private final long start;
+
+ public BitmapIndexResultWrapper(BitmapIndexResult result, long start) {
+ this.result = result;
+ this.start = start;
+ }
+
+ @Override
+ public Iterator<Long> iterator() {
+ Iterator<Integer> rowIds = result.get().iterator();
+ return new Iterator<Long>() {
+ @Override
+ public boolean hasNext() {
+ return rowIds.hasNext();
+ }
+
+ @Override
+ public Long next() {
+ return rowIds.next() + start;
+ }
+ };
+ }
+
+ public static BitmapIndexResultWrapper fromRange(Range range) {
+ return new BitmapIndexResultWrapper(
+ new BitmapIndexResult(() -> bitmapOfRange(0, range.to -
range.from)), range.from);
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/wrap/FileIndexReaderWrapper.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/wrap/FileIndexReaderWrapper.java
new file mode 100644
index 0000000000..8a10911ee9
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/wrap/FileIndexReaderWrapper.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex.wrap;
+
+import org.apache.paimon.fileindex.FileIndexReader;
+import org.apache.paimon.fileindex.FileIndexResult;
+import org.apache.paimon.globalindex.GlobalIndexReader;
+import org.apache.paimon.globalindex.GlobalIndexResult;
+import org.apache.paimon.predicate.FieldRef;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.List;
+import java.util.function.Function;
+
+/** A {@link GlobalIndexReader} wrapper for {@link FileIndexReader}. */
+public class FileIndexReaderWrapper implements GlobalIndexReader {
+
+ private final FileIndexReader reader;
+ private final Function<FileIndexResult, GlobalIndexResult> transform;
+ private final Closeable closeable;
+
+ public FileIndexReaderWrapper(
+ FileIndexReader reader,
+ Function<FileIndexResult, GlobalIndexResult> transform,
+ Closeable closeable) {
+ this.reader = reader;
+ this.transform = transform;
+ this.closeable = closeable;
+ }
+
+ @Override
+ public GlobalIndexResult visitIsNotNull(FieldRef fieldRef) {
+ return transform.apply(reader.visitIsNotNull(fieldRef));
+ }
+
+ @Override
+ public GlobalIndexResult visitIsNull(FieldRef fieldRef) {
+ return transform.apply(reader.visitIsNull(fieldRef));
+ }
+
+ @Override
+ public GlobalIndexResult visitStartsWith(FieldRef fieldRef, Object
literal) {
+ return transform.apply(reader.visitStartsWith(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitEndsWith(FieldRef fieldRef, Object literal) {
+ return transform.apply(reader.visitEndsWith(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitContains(FieldRef fieldRef, Object literal) {
+ return transform.apply(reader.visitContains(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitLessThan(FieldRef fieldRef, Object literal) {
+ return transform.apply(reader.visitLessThan(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitGreaterOrEqual(FieldRef fieldRef, Object
literal) {
+ return transform.apply(reader.visitGreaterOrEqual(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitNotEqual(FieldRef fieldRef, Object literal) {
+ return transform.apply(reader.visitNotEqual(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitLessOrEqual(FieldRef fieldRef, Object
literal) {
+ return transform.apply(reader.visitLessOrEqual(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitEqual(FieldRef fieldRef, Object literal) {
+ return transform.apply(reader.visitEqual(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitGreaterThan(FieldRef fieldRef, Object
literal) {
+ return transform.apply(reader.visitGreaterThan(fieldRef, literal));
+ }
+
+ @Override
+ public GlobalIndexResult visitIn(FieldRef fieldRef, List<Object> literals)
{
+ return transform.apply(reader.visitIn(fieldRef, literals));
+ }
+
+ @Override
+ public GlobalIndexResult visitNotIn(FieldRef fieldRef, List<Object>
literals) {
+ return transform.apply(reader.visitNotIn(fieldRef, literals));
+ }
+
+ @Override
+ public void close() throws IOException {
+ closeable.close();
+ }
+}
diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/Range.java
b/paimon-common/src/main/java/org/apache/paimon/utils/Range.java
new file mode 100644
index 0000000000..57a35e59d6
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/Range.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.utils;
+
+import java.util.Objects;
+
+/** Range represents from (inclusive) and to (inclusive). */
+public class Range {
+
+ public final long from;
+ public final long to;
+
+ // Creates a range of [from, to] (from and to are inclusive; empty ranges
are not valid)
+ public Range(long from, long to) {
+ assert from <= to;
+ this.from = from;
+ this.to = to;
+ }
+
+ public long count() {
+ return to - from + 1;
+ }
+
+ public boolean isBefore(Range other) {
+ return to < other.from;
+ }
+
+ public boolean isAfter(Range other) {
+ return from > other.to;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Range range = (Range) o;
+ return from == range.from && to == range.to;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(from, to);
+ }
+
+ @Override
+ public String toString() {
+ return "[" + from + ", " + to + ']';
+ }
+
+ // Returns the union of the two ranges or null if there are elements
between them.
+ public static Range union(Range left, Range right) {
+ if (left.from <= right.from) {
+ if (left.to + 1 >= right.from) {
+ return new Range(left.from, Math.max(left.to, right.to));
+ }
+ } else if (right.to + 1 >= left.from) {
+ return new Range(right.from, Math.max(left.to, right.to));
+ }
+ return null;
+ }
+
+ // Returns the intersection of the two ranges of null if they are not
overlapped.
+ public static Range intersection(Range left, Range right) {
+ if (left.from <= right.from) {
+ if (left.to >= right.from) {
+ return new Range(right.from, Math.min(left.to, right.to));
+ }
+ } else if (right.to >= left.from) {
+ return new Range(left.from, Math.min(left.to, right.to));
+ }
+ return null;
+ }
+}