This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new d87373f9c5 [core] Enable global index external path (#6994)
d87373f9c5 is described below
commit d87373f9c5e3900de2c1f222d162b16ac483b3b8
Author: YeJunHao <[email protected]>
AuthorDate: Tue Jan 13 10:26:20 2026 +0800
[core] Enable global index external path (#6994)
---
.../shortcodes/generated/core_configuration.html | 6 +++
.../main/java/org/apache/paimon/CoreOptions.java | 20 +++++++++
.../paimon/globalindex/GlobalIndexIOMeta.java | 16 +++----
.../globalindex/bitmap/BitmapGlobalIndex.java | 2 +-
.../paimon/globalindex/btree/BTreeIndexReader.java | 4 +-
.../globalindex/btree/LazyFilteredBTreeReader.java | 7 ++--
.../globalindex/io/GlobalIndexFileReader.java | 6 +--
.../bitmapindex/BitmapGlobalIndexTest.java | 15 +------
.../globalindex/btree/AbstractIndexReaderTest.java | 18 ++------
.../btree/BTreeFileMetaSelectorTest.java | 14 ++++---
paimon-core/pom.xml | 4 ++
.../java/org/apache/paimon/AbstractFileStore.java | 3 +-
.../globalindex/GlobalIndexFileReadWrite.java | 13 ++----
.../globalindex/RowRangeGlobalIndexScanner.java | 15 ++++---
.../org/apache/paimon/index/HashIndexFile.java | 3 +-
.../org/apache/paimon/index/IndexFileMeta.java | 5 ++-
.../paimon/table/format/FormatTableFileWriter.java | 3 +-
.../sink/CommitMessageLegacyV2Serializer.java | 1 +
.../apache/paimon/utils/FileStorePathFactory.java | 24 +++++++++--
.../paimon/index/IndexFileMetaSerializerTest.java | 1 +
.../apache/paimon/io/DataFileIndexWriterTest.java | 3 +-
.../paimon/io/KeyValueFileReadWriteTest.java | 6 ++-
...festCommittableSerializerCompatibilityTest.java | 6 ++-
.../paimon/manifest/ManifestFileMetaTestBase.java | 3 +-
.../apache/paimon/manifest/ManifestFileTest.java | 3 +-
.../apache/paimon/manifest/ManifestListTest.java | 3 +-
.../paimon/table/DataEvolutionTableTest.java | 47 +++++++++++++++++++++
.../apache/paimon/table/GlobalIndexTableTest.java | 10 +++--
.../paimon/utils/FileStorePathFactoryTest.java | 6 ++-
.../faiss/index/FaissVectorGlobalIndexReader.java | 2 +-
.../index/FaissVectorGlobalIndexScanTest.java | 6 ++-
.../faiss/index/FaissVectorGlobalIndexTest.java | 29 ++++---------
.../flink/source/TestChangelogDataReadWrite.java | 3 +-
.../index/LuceneVectorGlobalIndexReader.java | 2 +-
.../index/LuceneVectorGlobalIndexScanTest.java | 3 +-
.../lucene/index/LuceneVectorGlobalIndexTest.java | 17 +++-----
.../spark/globalindex/GlobalIndexBuilderUtils.java | 15 ++++++-
.../apache/paimon/spark/SparkFileIndexITCase.java | 3 +-
.../procedure/CreateGlobalIndexProcedureTest.scala | 49 ++++++++++++++++++++++
39 files changed, 269 insertions(+), 127 deletions(-)
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html
b/docs/layouts/shortcodes/generated/core_configuration.html
index f4e0a6d1cd..8f352e39b3 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -602,6 +602,12 @@ under the License.
<td>Boolean</td>
<td>Whether to enable global index for scan.</td>
</tr>
+ <tr>
+ <td><h5>global-index.external-path</h5></td>
+ <td style="word-wrap: break-word;">(none)</td>
+ <td>String</td>
+ <td>Global index root directory, if not set, the global index
files will be stored under the <table-root-directory>/index.</td>
+ </tr>
<tr>
<td><h5>global-index.row-count-per-shard</h5></td>
<td style="word-wrap: break-word;">100000</td>
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
index ade2ac9c37..de8a9fc206 100644
--- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
@@ -2013,6 +2013,13 @@ public class CoreOptions implements Serializable {
.defaultValue(false)
.withDescription("Whether index file in data file
directory.");
+ public static final ConfigOption<String> GLOBAL_INDEX_EXTERNAL_PATH =
+ key("global-index.external-path")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "Global index root directory, if not set, the
global index files will be stored under the <table-root-directory>/index.");
+
public static final ConfigOption<MemorySize> LOOKUP_MERGE_BUFFER_SIZE =
key("lookup.merge-buffer-size")
.memoryType()
@@ -2703,6 +2710,19 @@ public class CoreOptions implements Serializable {
return options.get(INDEX_FILE_IN_DATA_FILE_DIR);
}
+ public Path globalIndexExternalPath() {
+ String pathString = options.get(GLOBAL_INDEX_EXTERNAL_PATH);
+ if (pathString == null || pathString.isEmpty()) {
+ return null;
+ }
+ Path path = new Path(pathString);
+ String scheme = path.toUri().getScheme();
+ if (scheme == null) {
+ throw new IllegalArgumentException("scheme should not be null: " +
path);
+ }
+ return path;
+ }
+
public LookupStrategy lookupStrategy() {
return LookupStrategy.from(
mergeEngine().equals(MergeEngine.FIRST_ROW),
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexIOMeta.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexIOMeta.java
index a09d2437c1..40931255a2 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexIOMeta.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexIOMeta.java
@@ -18,24 +18,26 @@
package org.apache.paimon.globalindex;
+import org.apache.paimon.fs.Path;
+
import java.util.Arrays;
import java.util.Objects;
/** Index meta for global index. */
public class GlobalIndexIOMeta {
- private final String fileName;
+ private final Path filePath;
private final long fileSize;
private final byte[] metadata;
- public GlobalIndexIOMeta(String fileName, long fileSize, byte[] metadata) {
- this.fileName = fileName;
+ public GlobalIndexIOMeta(Path filePath, long fileSize, byte[] metadata) {
+ this.filePath = filePath;
this.fileSize = fileSize;
this.metadata = metadata;
}
- public String fileName() {
- return fileName;
+ public Path filePath() {
+ return filePath;
}
public long fileSize() {
@@ -55,14 +57,14 @@ public class GlobalIndexIOMeta {
return false;
}
GlobalIndexIOMeta that = (GlobalIndexIOMeta) o;
- return Objects.equals(fileName, that.fileName)
+ return Objects.equals(filePath, that.filePath)
&& fileSize == that.fileSize
&& Arrays.equals(metadata, that.metadata);
}
@Override
public int hashCode() {
- int result = Objects.hash(fileName, fileSize);
+ int result = Objects.hash(filePath, fileSize);
result = 31 * result + Arrays.hashCode(metadata);
return result;
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
index b8a64602c2..2f87f89d6e 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/bitmap/BitmapGlobalIndex.java
@@ -61,7 +61,7 @@ public class BitmapGlobalIndex implements GlobalIndexer {
GlobalIndexFileReader fileReader, List<GlobalIndexIOMeta> files)
throws IOException {
checkArgument(files.size() == 1);
GlobalIndexIOMeta indexMeta = files.get(0);
- SeekableInputStream input =
fileReader.getInputStream(indexMeta.fileName());
+ SeekableInputStream input = fileReader.getInputStream(indexMeta);
FileIndexReader reader = index.createReader(input, 0, (int)
indexMeta.fileSize());
return new FileIndexReaderWrapper(reader, this::toGlobalResult, input);
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
index adee790a7d..69cf7ceaa4 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
@@ -75,11 +75,11 @@ public class BTreeIndexReader implements GlobalIndexReader {
this.minKey = null;
this.maxKey = null;
}
- this.input = fileReader.getInputStream(globalIndexIOMeta.fileName());
+ this.input = fileReader.getInputStream(globalIndexIOMeta);
// prepare file footer
long fileSize = globalIndexIOMeta.fileSize();
- Path filePath = fileReader.filePath(globalIndexIOMeta.fileName());
+ Path filePath = globalIndexIOMeta.filePath();
BlockCache blockCache = new BlockCache(filePath, input, cacheManager);
BTreeFileFooter footer = readFooter(blockCache, fileSize);
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
index cbfb452b2d..12b7065ea7 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
@@ -18,6 +18,7 @@
package org.apache.paimon.globalindex.btree;
+import org.apache.paimon.fs.Path;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexReader;
import org.apache.paimon.globalindex.GlobalIndexResult;
@@ -42,7 +43,7 @@ public class LazyFilteredBTreeReader implements
GlobalIndexReader {
private final BTreeFileMetaSelector fileSelector;
private final List<GlobalIndexIOMeta> files;
- private final Map<String, GlobalIndexReader> readerCache;
+ private final Map<Path, GlobalIndexReader> readerCache;
private final KeySerializer keySerializer;
private final CacheManager cacheManager;
private final GlobalIndexFileReader fileReader;
@@ -259,7 +260,7 @@ public class LazyFilteredBTreeReader implements
GlobalIndexReader {
for (GlobalIndexIOMeta meta : files) {
readers.add(
readerCache.computeIfAbsent(
- meta.fileName(),
+ meta.filePath(),
name -> {
try {
return new BTreeIndexReader(
@@ -276,7 +277,7 @@ public class LazyFilteredBTreeReader implements
GlobalIndexReader {
@Override
public void close() throws IOException {
IOException exception = null;
- for (Map.Entry<String, GlobalIndexReader> entry :
this.readerCache.entrySet()) {
+ for (Map.Entry<Path, GlobalIndexReader> entry :
this.readerCache.entrySet()) {
try {
entry.getValue().close();
} catch (IOException ioe) {
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/io/GlobalIndexFileReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/io/GlobalIndexFileReader.java
index be0a2c18b9..b2030f4c79 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/io/GlobalIndexFileReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/io/GlobalIndexFileReader.java
@@ -18,15 +18,13 @@
package org.apache.paimon.globalindex.io;
-import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import java.io.IOException;
/** File reader for global index. */
public interface GlobalIndexFileReader {
- SeekableInputStream getInputStream(String fileName) throws IOException;
-
- Path filePath(String fileName);
+ SeekableInputStream getInputStream(GlobalIndexIOMeta meta) throws
IOException;
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/bitmapindex/BitmapGlobalIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/bitmapindex/BitmapGlobalIndexTest.java
index 18f0a75e2d..cd0277fcdc 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/bitmapindex/BitmapGlobalIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/bitmapindex/BitmapGlobalIndexTest.java
@@ -23,7 +23,6 @@ import org.apache.paimon.fileindex.bitmap.BitmapFileIndex;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
-import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexReader;
@@ -243,19 +242,9 @@ public class BitmapGlobalIndexTest {
long fileSize = fileIO.getFileSize(path);
GlobalIndexFileReader fileReader =
- new GlobalIndexFileReader() {
- @Override
- public SeekableInputStream getInputStream(String fileName)
throws IOException {
- return fileIO.newInputStream(new
Path(tempDir.toString(), fileName));
- }
-
- @Override
- public Path filePath(String fileName) {
- return new Path(tempDir.toString(), fileName);
- }
- };
+ meta -> fileIO.newInputStream(new Path(tempDir.toString(),
meta.filePath()));
- GlobalIndexIOMeta globalIndexMeta = new GlobalIndexIOMeta(fileName,
fileSize, null);
+ GlobalIndexIOMeta globalIndexMeta = new GlobalIndexIOMeta(path,
fileSize, null);
return bitmapGlobalIndex.createReader(
fileReader, Collections.singletonList(globalIndexMeta));
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
index 1160aa0c11..8a4c37437c 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
@@ -24,7 +24,6 @@ import org.apache.paimon.data.Timestamp;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
-import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexParallelWriter;
@@ -109,18 +108,9 @@ public class AbstractIndexReaderTest {
}
};
fileReader =
- new GlobalIndexFileReader() {
- @Override
- public SeekableInputStream getInputStream(String fileName)
throws IOException {
- return fileIO.newInputStream(
- new Path(new Path(tempPath.toUri()),
fileName));
- }
-
- @Override
- public Path filePath(String fileName) {
- return new Path(new Path(tempPath.toUri()), fileName);
- }
- };
+ meta ->
+ fileIO.newInputStream(
+ new Path(new Path(tempPath.toUri()),
meta.filePath()));
options = new Options();
options.set(BTreeIndexOptions.BTREE_INDEX_CACHE_SIZE,
MemorySize.ofMebiBytes(8));
globalIndexer = new BTreeGlobalIndexer(new DataField(1, "testField",
dataType), options);
@@ -147,7 +137,7 @@ public class AbstractIndexReaderTest {
ResultEntry resultEntry = results.get(0);
String fileName = resultEntry.fileName();
return new GlobalIndexIOMeta(
- fileName,
+ new Path(new Path(tempPath.toUri()), fileName),
fileIO.getFileSize(new Path(new Path(tempPath.toUri()),
fileName)),
resultEntry.meta());
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
index 51f72e4346..394f3dacb5 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
@@ -18,6 +18,7 @@
package org.apache.paimon.globalindex.btree;
+import org.apache.paimon.fs.Path;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.memory.MemorySliceOutput;
import org.apache.paimon.predicate.FieldRef;
@@ -54,11 +55,11 @@ public class BTreeFileMetaSelectorTest {
files =
Arrays.asList(
- new GlobalIndexIOMeta("file1", 1, meta1.serialize()),
- new GlobalIndexIOMeta("file2", 1, meta2.serialize()),
- new GlobalIndexIOMeta("file3", 1, meta3.serialize()),
- new GlobalIndexIOMeta("file4", 1, meta4.serialize()),
- new GlobalIndexIOMeta("file5", 1, meta5.serialize()));
+ new GlobalIndexIOMeta(new Path("file1"), 1,
meta1.serialize()),
+ new GlobalIndexIOMeta(new Path("file2"), 1,
meta2.serialize()),
+ new GlobalIndexIOMeta(new Path("file3"), 1,
meta3.serialize()),
+ new GlobalIndexIOMeta(new Path("file4"), 1,
meta4.serialize()),
+ new GlobalIndexIOMeta(new Path("file5"), 1,
meta5.serialize()));
}
@Test
@@ -147,7 +148,8 @@ public class BTreeFileMetaSelectorTest {
private void assertFiles(List<GlobalIndexIOMeta> files, List<String>
expected) {
Assertions.assertThat(
files.stream()
- .map(GlobalIndexIOMeta::fileName)
+ .map(GlobalIndexIOMeta::filePath)
+ .map(Path::toString)
.collect(Collectors.toList()))
.containsExactlyInAnyOrderElementsOf(expected);
}
diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml
index c2f5103a63..a3bf3ccf0e 100644
--- a/paimon-core/pom.xml
+++ b/paimon-core/pom.xml
@@ -139,6 +139,10 @@ under the License.
<version>${hadoop.version}</version>
<scope>test</scope>
<exclusions>
+ <exclusion>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index 6944c0283f..33a60e73d7 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -137,7 +137,8 @@ abstract class AbstractFileStore<T> implements FileStore<T>
{
options.dataFilePathDirectory(),
createExternalPaths(),
options.externalPathStrategy(),
- options.indexFileInDataFileDir());
+ options.indexFileInDataFileDir(),
+ options.globalIndexExternalPath());
}
private List<Path> createExternalPaths() {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexFileReadWrite.java
b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexFileReadWrite.java
index 80c6a41ffc..1df2171354 100644
---
a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexFileReadWrite.java
+++
b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexFileReadWrite.java
@@ -19,7 +19,6 @@
package org.apache.paimon.globalindex;
import org.apache.paimon.fs.FileIO;
-import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
@@ -44,21 +43,15 @@ public class GlobalIndexFileReadWrite implements
GlobalIndexFileReader, GlobalIn
return prefix + "-" + "global-index-" + UUID.randomUUID() + ".index";
}
- @Override
- public Path filePath(String fileName) {
- return indexPathFactory.toPath(fileName);
- }
-
public long fileSize(String fileName) throws IOException {
- return fileIO.getFileSize(filePath(fileName));
+ return fileIO.getFileSize(indexPathFactory.toPath(fileName));
}
public PositionOutputStream newOutputStream(String fileName) throws
IOException {
return fileIO.newOutputStream(indexPathFactory.toPath(fileName), true);
}
- public SeekableInputStream getInputStream(String fileName) throws
IOException {
- Path path = indexPathFactory.toPath(fileName);
- return fileIO.newInputStream(path);
+ public SeekableInputStream getInputStream(GlobalIndexIOMeta meta) throws
IOException {
+ return fileIO.newInputStream(meta.filePath());
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/globalindex/RowRangeGlobalIndexScanner.java
b/paimon-core/src/main/java/org/apache/paimon/globalindex/RowRangeGlobalIndexScanner.java
index f378669652..ad2ccc3498 100644
---
a/paimon-core/src/main/java/org/apache/paimon/globalindex/RowRangeGlobalIndexScanner.java
+++
b/paimon-core/src/main/java/org/apache/paimon/globalindex/RowRangeGlobalIndexScanner.java
@@ -19,6 +19,8 @@
package org.apache.paimon.globalindex;
import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
import org.apache.paimon.index.GlobalIndexMeta;
import org.apache.paimon.index.IndexFileMeta;
import org.apache.paimon.index.IndexPathFactory;
@@ -54,6 +56,7 @@ public class RowRangeGlobalIndexScanner implements Closeable {
private final Options options;
private final GlobalIndexEvaluator globalIndexEvaluator;
+ private final IndexPathFactory indexPathFactory;
public RowRangeGlobalIndexScanner(
Options options,
@@ -76,8 +79,9 @@ public class RowRangeGlobalIndexScanner implements Closeable {
+ ")");
}
- GlobalIndexFileReadWrite indexFileReadWrite =
- new GlobalIndexFileReadWrite(fileIO, indexPathFactory);
+ this.indexPathFactory = indexPathFactory;
+
+ GlobalIndexFileReader indexFileReader = meta ->
fileIO.newInputStream(meta.filePath());
Map<Integer, Map<String, Map<Range, List<IndexFileMeta>>>> indexMetas
= new HashMap<>();
for (IndexManifestEntry entry : entries) {
@@ -97,7 +101,7 @@ public class RowRangeGlobalIndexScanner implements Closeable
{
IntFunction<Collection<GlobalIndexReader>> readersFunction =
fieldId ->
createReaders(
- indexFileReadWrite,
+ indexFileReader,
indexMetas.get(fieldId),
rowType.getField(fieldId));
this.globalIndexEvaluator = new GlobalIndexEvaluator(rowType,
readersFunction);
@@ -109,7 +113,7 @@ public class RowRangeGlobalIndexScanner implements
Closeable {
}
private Collection<GlobalIndexReader> createReaders(
- GlobalIndexFileReadWrite indexFileReadWrite,
+ GlobalIndexFileReader indexFileReadWrite,
Map<String, Map<Range, List<IndexFileMeta>>> indexMetas,
DataField dataField) {
if (indexMetas == null) {
@@ -154,7 +158,8 @@ public class RowRangeGlobalIndexScanner implements
Closeable {
private GlobalIndexIOMeta toGlobalMeta(IndexFileMeta meta) {
GlobalIndexMeta globalIndex = meta.globalIndexMeta();
checkNotNull(globalIndex);
- return new GlobalIndexIOMeta(meta.fileName(), meta.fileSize(),
globalIndex.indexMeta());
+ Path filePath = indexPathFactory.toPath(meta);
+ return new GlobalIndexIOMeta(filePath, meta.fileSize(),
globalIndex.indexMeta());
}
@Override
diff --git
a/paimon-core/src/main/java/org/apache/paimon/index/HashIndexFile.java
b/paimon-core/src/main/java/org/apache/paimon/index/HashIndexFile.java
index 3457ac30d6..b6956a4756 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/HashIndexFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/HashIndexFile.java
@@ -54,7 +54,8 @@ public class HashIndexFile extends IndexFile {
fileSize(path),
count,
null,
- isExternalPath() ? path.toString() : null);
+ isExternalPath() ? path.toString() : null,
+ null);
}
public IndexFileMeta write(int[] ints) throws IOException {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMeta.java
b/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMeta.java
index 41332fafc4..a7c257a46e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMeta.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMeta.java
@@ -103,8 +103,9 @@ public class IndexFileMeta {
String fileName,
long fileSize,
long rowCount,
- @Nullable GlobalIndexMeta globalIndexMeta) {
- this(indexType, fileName, fileSize, rowCount, null, null,
globalIndexMeta);
+ @Nullable GlobalIndexMeta globalIndexMeta,
+ @Nullable String externalPath) {
+ this(indexType, fileName, fileSize, rowCount, null, externalPath,
globalIndexMeta);
}
public String indexType() {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableFileWriter.java
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableFileWriter.java
index 4edb603523..c241eb7d18 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableFileWriter.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableFileWriter.java
@@ -69,7 +69,8 @@ public class FormatTableFileWriter {
options.dataFilePathDirectory(),
null,
CoreOptions.ExternalPathStrategy.NONE,
- options.indexFileInDataFileDir());
+ options.indexFileInDataFileDir(),
+ null);
}
public void withWriteType(RowType writeType) {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/sink/CommitMessageLegacyV2Serializer.java
b/paimon-core/src/main/java/org/apache/paimon/table/sink/CommitMessageLegacyV2Serializer.java
index 5c3976af81..f60415bc80 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/sink/CommitMessageLegacyV2Serializer.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/sink/CommitMessageLegacyV2Serializer.java
@@ -192,6 +192,7 @@ public class CommitMessageLegacyV2Serializer {
row.getLong(2),
row.getLong(3),
null,
+ null,
null);
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/FileStorePathFactory.java
b/paimon-core/src/main/java/org/apache/paimon/utils/FileStorePathFactory.java
index 37ed1b1fa7..6ebe6438ed 100644
---
a/paimon-core/src/main/java/org/apache/paimon/utils/FileStorePathFactory.java
+++
b/paimon-core/src/main/java/org/apache/paimon/utils/FileStorePathFactory.java
@@ -24,6 +24,7 @@ import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.fs.ExternalPathProvider;
import org.apache.paimon.fs.Path;
+import org.apache.paimon.index.IndexFileMeta;
import org.apache.paimon.index.IndexInDataFileDirPathFactory;
import org.apache.paimon.index.IndexPathFactory;
import org.apache.paimon.io.ChainReadContext;
@@ -37,6 +38,7 @@ import javax.annotation.concurrent.ThreadSafe;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
@@ -81,6 +83,7 @@ public class FileStorePathFactory {
private final AtomicInteger statsFileCount;
private final List<Path> externalPaths;
private final ExternalPathStrategy strategy;
+ @Nullable private final Path globalIndexExternalRootDir;
public FileStorePathFactory(
Path root,
@@ -95,7 +98,8 @@ public class FileStorePathFactory {
@Nullable String dataFilePathDirectory,
List<Path> externalPaths,
ExternalPathStrategy strategy,
- boolean indexFileInDataFileDir) {
+ boolean indexFileInDataFileDir,
+ @Nullable Path globalIndexExternalRootDir) {
this.root = root;
this.dataFilePathDirectory = dataFilePathDirectory;
this.indexFileInDataFileDir = indexFileInDataFileDir;
@@ -116,6 +120,7 @@ public class FileStorePathFactory {
this.statsFileCount = new AtomicInteger(0);
this.externalPaths = externalPaths;
this.strategy = strategy;
+ this.globalIndexExternalRootDir = globalIndexExternalRootDir;
}
public Path root() {
@@ -130,6 +135,10 @@ public class FileStorePathFactory {
return new Path(root, INDEX_PATH);
}
+ public Path globalIndexRootDir() {
+ return globalIndexExternalRootDir != null ? globalIndexExternalRootDir
: indexPath();
+ }
+
public Path statisticsPath() {
return new Path(root, STATISTICS_PATH);
}
@@ -318,7 +327,7 @@ public class FileStorePathFactory {
return new IndexPathFactory() {
@Override
public Path toPath(String fileName) {
- return new Path(indexPath(), fileName);
+ return new Path(globalIndexRootDir(), fileName);
}
@Override
@@ -326,9 +335,18 @@ public class FileStorePathFactory {
return toPath(INDEX_PREFIX + uuid + "-" +
indexFileCount.getAndIncrement());
}
+ @Override
+ public Path toPath(IndexFileMeta file) {
+ return Optional.ofNullable(file.externalPath())
+ .map(Path::new)
+ // If external path is null, use the index path (not
global index root dir,
+ // because the root dir may change by alter table)
+ .orElse(new Path(indexPath(), file.fileName()));
+ }
+
@Override
public boolean isExternalPath() {
- return false;
+ return globalIndexExternalRootDir != null;
}
};
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/index/IndexFileMetaSerializerTest.java
b/paimon-core/src/test/java/org/apache/paimon/index/IndexFileMetaSerializerTest.java
index c5ad3ae8fc..7e4fe92c8d 100644
---
a/paimon-core/src/test/java/org/apache/paimon/index/IndexFileMetaSerializerTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/index/IndexFileMetaSerializerTest.java
@@ -55,6 +55,7 @@ public class IndexFileMetaSerializerTest extends
ObjectSerializerTestBase<IndexF
rnd.nextInt(),
rnd.nextInt(),
null,
+ null,
null);
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/io/DataFileIndexWriterTest.java
b/paimon-core/src/test/java/org/apache/paimon/io/DataFileIndexWriterTest.java
index 9fbe7b9510..1739a27154 100644
---
a/paimon-core/src/test/java/org/apache/paimon/io/DataFileIndexWriterTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/io/DataFileIndexWriterTest.java
@@ -173,7 +173,8 @@ public class DataFileIndexWriterTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
Table table = fileSystemCatalog.getTable(Identifier.create(tableName,
tableName));
ReadBuilder readBuilder = table.newReadBuilder();
diff --git
a/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java
b/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java
index 375ca7805f..025f5849c5 100644
---
a/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/io/KeyValueFileReadWriteTest.java
@@ -240,7 +240,8 @@ public class KeyValueFileReadWriteTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
int suggestedFileSize = ThreadLocalRandom.current().nextInt(8192) +
1024;
FileIO fileIO = FileIOFinder.find(path);
Options options = new Options();
@@ -263,7 +264,8 @@ public class KeyValueFileReadWriteTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
}
};
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestCommittableSerializerCompatibilityTest.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestCommittableSerializerCompatibilityTest.java
index bc7775d4af..c3a6ef9649 100644
---
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestCommittableSerializerCompatibilityTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestCommittableSerializerCompatibilityTest.java
@@ -281,7 +281,8 @@ public class ManifestCommittableSerializerCompatibilityTest
{
Arrays.asList("asdf", "qwer", "zxcv"));
List<DataFileMeta> dataFiles = Collections.singletonList(dataFile);
IndexFileMeta hashIndexFile =
- new IndexFileMeta("my_index_type", "my_index_file", 1024 *
100, 1002, null, null);
+ new IndexFileMeta(
+ "my_index_type", "my_index_file", 1024 * 100, 1002,
null, null, null);
LinkedHashMap<String, DeletionVectorMeta> dvRanges = new
LinkedHashMap<>();
dvRanges.put("dv_key1", new DeletionVectorMeta("dv_key1", 1, 2, 3L));
@@ -1060,7 +1061,8 @@ public class
ManifestCommittableSerializerCompatibilityTest {
List<DataFileMeta> dataFiles = Collections.singletonList(dataFile);
IndexFileMeta indexFile =
- new IndexFileMeta("my_index_type", "my_index_file", 1024 *
100, 1002, null, null);
+ new IndexFileMeta(
+ "my_index_type", "my_index_file", 1024 * 100, 1002,
null, null, null);
List<IndexFileMeta> indexFiles = Collections.singletonList(indexFile);
CommitMessageImpl commitMessage =
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
index ead73499ff..4aa696a46a 100644
---
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
+++
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
@@ -156,7 +156,8 @@ public abstract class ManifestFileMetaTestBase {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false),
+ false,
+ null),
Long.MAX_VALUE,
null)
.create();
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
index fc19f6262d..dbf11aa8ff 100644
--- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
@@ -144,7 +144,8 @@ public class ManifestFileTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
int suggestedFileSize = ThreadLocalRandom.current().nextInt(8192) +
1024;
FileIO fileIO = FileIOFinder.find(path);
return new ManifestFile.Factory(
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestListTest.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestListTest.java
index d719a6d5cd..a4b3ceec5a 100644
--- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestListTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestListTest.java
@@ -176,7 +176,8 @@ public class ManifestListTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
}
private ManifestList createManifestList(String pathStr) {
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/DataEvolutionTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/DataEvolutionTableTest.java
index 4fcee005d6..39a78a97a9 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/DataEvolutionTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/DataEvolutionTableTest.java
@@ -24,7 +24,10 @@ import
org.apache.paimon.append.dataevolution.DataEvolutionCompactTask;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.Path;
import org.apache.paimon.globalindex.IndexedSplit;
+import org.apache.paimon.index.IndexFileMeta;
+import org.apache.paimon.index.IndexPathFactory;
import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.manifest.ManifestEntry;
import org.apache.paimon.manifest.ManifestFileMeta;
@@ -853,4 +856,48 @@ public class DataEvolutionTableTest extends
DataEvolutionTestBase {
assertThat(entries.get(0).file().nonNullFirstRowId()).isEqualTo(0);
assertThat(entries.get(0).file().rowCount()).isEqualTo(500000L);
}
+
+ @Test
+ public void testIndexPath() throws Exception {
+ createTableDefault();
+ FileStoreTable table = getTableDefault();
+ IndexPathFactory indexPathFactory =
table.store().pathFactory().globalIndexFileFactory();
+
+ Path path0 = indexPathFactory.toPath("test-file");
+ assertThat(path0.toString().contains(warehouse.toString())).isTrue();
+ String testExternalpath = "file:/external/path/test-file-dir";
+ Path path1 =
+ indexPathFactory.toPath(
+ new IndexFileMeta(
+ "test-type",
+ "test-file",
+ 1024L,
+ 100L,
+ null,
+ testExternalpath,
+ null));
+ assertThat(path1.toString()).isEqualTo(testExternalpath);
+
+ table =
+ table.copy(
+ Collections.singletonMap(
+ CoreOptions.GLOBAL_INDEX_EXTERNAL_PATH.key(),
testExternalpath));
+
+ indexPathFactory =
table.store().pathFactory().globalIndexFileFactory();
+ Path path3 = indexPathFactory.toPath("test-file");
+ assertThat(path3.toString()).isEqualTo(testExternalpath +
"/test-file");
+
+ String testExternalpath2 = "file:/external/path2/test-file";
+ Path path4 =
+ indexPathFactory.toPath(
+ new IndexFileMeta(
+ "test-type",
+ "test-file",
+ 1024L,
+ 100L,
+ null,
+ testExternalpath2,
+ null));
+ assertThat(path4.toString()).isEqualTo(testExternalpath2);
+ }
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/GlobalIndexTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/GlobalIndexTableTest.java
index e283198cf9..b375cea8d6 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/GlobalIndexTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/GlobalIndexTableTest.java
@@ -283,7 +283,7 @@ public class GlobalIndexTableTest extends
DataEvolutionTestBase {
ResultEntry result = results.get(0);
String fileName = result.fileName();
- long fileSize =
fileIO.getFileSize(indexFileReadWrite.filePath(fileName));
+ long fileSize = indexFileReadWrite.fileSize(fileName);
GlobalIndexMeta globalIndexMeta =
new GlobalIndexMeta(0, result.rowCount() - 1, indexField.id(),
null, result.meta());
return Collections.singletonList(
@@ -292,7 +292,8 @@ public class GlobalIndexTableTest extends
DataEvolutionTestBase {
fileName,
fileSize,
result.rowCount(),
- globalIndexMeta));
+ globalIndexMeta,
+ null));
}
private List<IndexFileMeta> createBTreeIndex(
@@ -375,7 +376,7 @@ public class GlobalIndexTableTest extends
DataEvolutionTestBase {
ResultEntry entry = entries.get(0);
String fileName = entry.fileName();
- long fileSize =
fileIO.getFileSize(indexFileReadWrite.filePath(fileName));
+ long fileSize = indexFileReadWrite.fileSize(fileName);
GlobalIndexMeta globalIndexMeta =
new GlobalIndexMeta(range.from, range.to, indexField.id(),
null, entry.meta());
@@ -385,7 +386,8 @@ public class GlobalIndexTableTest extends
DataEvolutionTestBase {
fileName,
fileSize,
entry.rowCount(),
- globalIndexMeta));
+ globalIndexMeta,
+ null));
}
return indexFileMetas;
diff --git
a/paimon-core/src/test/java/org/apache/paimon/utils/FileStorePathFactoryTest.java
b/paimon-core/src/test/java/org/apache/paimon/utils/FileStorePathFactoryTest.java
index eb7e9f88b6..146a62cae6 100644
---
a/paimon-core/src/test/java/org/apache/paimon/utils/FileStorePathFactoryTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/utils/FileStorePathFactoryTest.java
@@ -95,7 +95,8 @@ public class FileStorePathFactoryTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
assertPartition("20211224", 16, pathFactory, "/dt=20211224/hr=16");
assertPartition("20211224", null, pathFactory,
"/dt=20211224/hr=default");
@@ -140,6 +141,7 @@ public class FileStorePathFactoryTest {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
}
}
diff --git
a/paimon-faiss/paimon-faiss-index/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
b/paimon-faiss/paimon-faiss-index/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
index 3aa1fca16c..f33bce9cd1 100644
---
a/paimon-faiss/paimon-faiss-index/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
+++
b/paimon-faiss/paimon-faiss-index/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
@@ -304,7 +304,7 @@ public class FaissVectorGlobalIndexReader implements
GlobalIndexReader {
private void loadIndexAt(int position) throws IOException {
GlobalIndexIOMeta ioMeta = ioMetas.get(position);
FaissIndex index = null;
- try (SeekableInputStream in =
fileReader.getInputStream(ioMeta.fileName())) {
+ try (SeekableInputStream in = fileReader.getInputStream(ioMeta)) {
index = loadIndex(in);
if (indices.size() <= position) {
while (indices.size() < position) {
diff --git
a/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexScanTest.java
b/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexScanTest.java
index d0b9fe68d4..df5f94e07f 100644
---
a/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexScanTest.java
+++
b/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexScanTest.java
@@ -237,7 +237,8 @@ public class FaissVectorGlobalIndexScanTest {
entry.fileName(),
fileSize,
entry.rowCount(),
- globalMeta));
+ globalMeta,
+ null));
}
// Commit index
@@ -323,7 +324,8 @@ public class FaissVectorGlobalIndexScanTest {
entry.fileName(),
fileSize,
entry.rowCount(),
- globalMeta));
+ globalMeta,
+ null));
}
return metas;
}
diff --git
a/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexTest.java
b/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexTest.java
index 7311d95322..0352f3f111 100644
---
a/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexTest.java
+++
b/paimon-faiss/paimon-faiss-index/src/test/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexTest.java
@@ -23,7 +23,6 @@ import org.apache.paimon.faiss.FaissException;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
-import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexResult;
@@ -109,17 +108,7 @@ public class FaissVectorGlobalIndexTest {
}
private GlobalIndexFileReader createFileReader(Path path) {
- return new GlobalIndexFileReader() {
- @Override
- public SeekableInputStream getInputStream(String fileName) throws
IOException {
- return fileIO.newInputStream(new Path(path, fileName));
- }
-
- @Override
- public Path filePath(String fileName) {
- return new Path(path, fileName);
- }
- };
+ return meta -> fileIO.newInputStream(new Path(path, meta.filePath()));
}
@Test
@@ -149,7 +138,7 @@ public class FaissVectorGlobalIndexTest {
List<GlobalIndexIOMeta> metas = new ArrayList<>();
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(metricIndexPath, result.fileName()),
fileIO.getFileSize(new Path(metricIndexPath,
result.fileName())),
result.meta()));
@@ -190,7 +179,7 @@ public class FaissVectorGlobalIndexTest {
List<GlobalIndexIOMeta> metas = new ArrayList<>();
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(typeIndexPath, result.fileName()),
fileIO.getFileSize(new Path(typeIndexPath,
result.fileName())),
result.meta()));
@@ -227,7 +216,7 @@ public class FaissVectorGlobalIndexTest {
List<GlobalIndexIOMeta> metas = new ArrayList<>();
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(dimIndexPath, result.fileName()),
fileIO.getFileSize(new Path(dimIndexPath,
result.fileName())),
result.meta()));
@@ -283,7 +272,7 @@ public class FaissVectorGlobalIndexTest {
for (ResultEntry result : results) {
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(indexPath, result.fileName()),
fileIO.getFileSize(new Path(indexPath,
result.fileName())),
result.meta()));
}
@@ -349,7 +338,7 @@ public class FaissVectorGlobalIndexTest {
for (ResultEntry result : results) {
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(indexPath, result.fileName()),
fileIO.getFileSize(new Path(indexPath,
result.fileName())),
result.meta()));
}
@@ -391,9 +380,7 @@ public class FaissVectorGlobalIndexTest {
Path filePath = new Path(indexPath, result.fileName());
assertThat(fileIO.exists(filePath)).isTrue();
assertThat(fileIO.getFileSize(filePath)).isGreaterThan(0);
- metas.add(
- new GlobalIndexIOMeta(
- result.fileName(), fileIO.getFileSize(filePath),
result.meta()));
+ metas.add(new GlobalIndexIOMeta(filePath,
fileIO.getFileSize(filePath), result.meta()));
}
// Search for vectors from different files
@@ -452,7 +439,7 @@ public class FaissVectorGlobalIndexTest {
for (ResultEntry result : results) {
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(indexPath, result.fileName()),
fileIO.getFileSize(new Path(indexPath,
result.fileName())),
result.meta()));
}
diff --git
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/source/TestChangelogDataReadWrite.java
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/source/TestChangelogDataReadWrite.java
index d2fbf63eea..dcd734cad7 100644
---
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/source/TestChangelogDataReadWrite.java
+++
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/source/TestChangelogDataReadWrite.java
@@ -113,7 +113,8 @@ public class TestChangelogDataReadWrite {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
this.snapshotManager = newSnapshotManager(LocalFileIO.create(), new
Path(root));
this.commitUser = UUID.randomUUID().toString();
}
diff --git
a/paimon-lucene/src/main/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexReader.java
b/paimon-lucene/src/main/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexReader.java
index c6acf36f69..b8e965d6df 100644
---
a/paimon-lucene/src/main/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexReader.java
+++
b/paimon-lucene/src/main/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexReader.java
@@ -227,7 +227,7 @@ public class LuceneVectorGlobalIndexReader implements
GlobalIndexReader {
synchronized (this) {
if (!indicesLoaded) {
for (GlobalIndexIOMeta meta : files) {
- try (SeekableInputStream in =
fileReader.getInputStream(meta.fileName())) {
+ try (SeekableInputStream in =
fileReader.getInputStream(meta)) {
LuceneIndexMMapDirectory directory = null;
IndexReader reader = null;
try {
diff --git
a/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexScanTest.java
b/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexScanTest.java
index e9e476ede6..32c79772cd 100644
---
a/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexScanTest.java
+++
b/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexScanTest.java
@@ -186,7 +186,8 @@ public class LuceneVectorGlobalIndexScanTest {
entry.fileName(),
fileSize,
entry.rowCount(),
- globalMeta));
+ globalMeta,
+ null));
}
return metas;
}
diff --git
a/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexTest.java
b/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexTest.java
index 2848a04f72..ab30d0cbc7 100644
---
a/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexTest.java
+++
b/paimon-lucene/src/test/java/org/apache/paimon/lucene/index/LuceneVectorGlobalIndexTest.java
@@ -93,13 +93,8 @@ public class LuceneVectorGlobalIndexTest {
private GlobalIndexFileReader createFileReader(Path path) {
return new GlobalIndexFileReader() {
@Override
- public SeekableInputStream getInputStream(String fileName) throws
IOException {
- return fileIO.newInputStream(new Path(path, fileName));
- }
-
- @Override
- public Path filePath(String fileName) {
- return new Path(path, fileName);
+ public SeekableInputStream getInputStream(GlobalIndexIOMeta meta)
throws IOException {
+ return fileIO.newInputStream(new Path(path, meta.filePath()));
}
};
}
@@ -131,7 +126,7 @@ public class LuceneVectorGlobalIndexTest {
List<GlobalIndexIOMeta> metas = new ArrayList<>();
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(metricIndexPath, result.fileName()),
fileIO.getFileSize(new Path(metricIndexPath,
result.fileName())),
result.meta()));
@@ -168,7 +163,7 @@ public class LuceneVectorGlobalIndexTest {
List<GlobalIndexIOMeta> metas = new ArrayList<>();
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(dimIndexPath, result.fileName()),
fileIO.getFileSize(new Path(dimIndexPath,
result.fileName())),
result.meta()));
@@ -225,7 +220,7 @@ public class LuceneVectorGlobalIndexTest {
for (ResultEntry result : results) {
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(indexPath, result.fileName()),
fileIO.getFileSize(new Path(indexPath,
result.fileName())),
result.meta()));
}
@@ -293,7 +288,7 @@ public class LuceneVectorGlobalIndexTest {
for (ResultEntry result : results) {
metas.add(
new GlobalIndexIOMeta(
- result.fileName(),
+ new Path(indexPath, result.fileName()),
fileIO.getFileSize(new Path(indexPath,
result.fileName())),
result.meta()));
}
diff --git
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/globalindex/GlobalIndexBuilderUtils.java
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/globalindex/GlobalIndexBuilderUtils.java
index 24388cfd81..cd26dbfaf4 100644
---
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/globalindex/GlobalIndexBuilderUtils.java
+++
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/globalindex/GlobalIndexBuilderUtils.java
@@ -18,6 +18,7 @@
package org.apache.paimon.spark.globalindex;
+import org.apache.paimon.fs.Path;
import org.apache.paimon.globalindex.GlobalIndexFileReadWrite;
import org.apache.paimon.globalindex.GlobalIndexWriter;
import org.apache.paimon.globalindex.GlobalIndexer;
@@ -51,9 +52,21 @@ public class GlobalIndexBuilderUtils {
long fileSize = readWrite.fileSize(fileName);
GlobalIndexMeta globalIndexMeta =
new GlobalIndexMeta(range.from, range.to, indexFieldId,
null, entry.meta());
+
+ Path externalPathDir =
table.coreOptions().globalIndexExternalPath();
+ String externalPathString = null;
+ if (externalPathDir != null) {
+ Path externalPath = new Path(externalPathDir, fileName);
+ externalPathString = externalPath.toString();
+ }
IndexFileMeta indexFileMeta =
new IndexFileMeta(
- indexType, fileName, fileSize, entry.rowCount(),
globalIndexMeta);
+ indexType,
+ fileName,
+ fileSize,
+ entry.rowCount(),
+ globalIndexMeta,
+ externalPathString);
results.add(indexFileMeta);
}
return results;
diff --git
a/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java
b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java
index 364a33934a..20d78a76c3 100644
---
a/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java
+++
b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java
@@ -211,7 +211,8 @@ public class SparkFileIndexITCase extends SparkWriteITCase {
null,
null,
CoreOptions.ExternalPathStrategy.NONE,
- false);
+ false,
+ null);
Table table = fileSystemCatalog.getTable(Identifier.create("db",
tableName));
ReadBuilder readBuilder = table.newReadBuilder();
diff --git
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalIndexProcedureTest.scala
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalIndexProcedureTest.scala
index 23ed2921c6..8fd0d541a5 100644
---
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalIndexProcedureTest.scala
+++
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateGlobalIndexProcedureTest.scala
@@ -24,8 +24,11 @@ import org.apache.paimon.spark.PaimonSparkTestBase
import org.apache.paimon.types.VarCharType
import org.apache.paimon.utils.Range
+import org.apache.spark.sql.paimon.Utils
import org.apache.spark.sql.streaming.StreamTest
+import java.io.File
+
import scala.collection.JavaConverters._
import scala.collection.immutable
@@ -297,6 +300,52 @@ class CreateGlobalIndexProcedureTest extends
PaimonSparkTestBase with StreamTest
}
}
+ test("create bitmap global index with external path") {
+ withTable("T") {
+ val tempIndexDir: File = Utils.createTempDir
+ val indexPath = "file:" + tempIndexDir.toString
+ spark.sql(s"""
+ |CREATE TABLE T (id INT, name STRING)
+ |TBLPROPERTIES (
+ | 'bucket' = '-1',
+ | 'global-index.row-count-per-shard' = '10000',
+ | 'global-index.external-path' = '$indexPath',
+ | 'row-tracking.enabled' = 'true',
+ | 'data-evolution.enabled' = 'true')
+ |""".stripMargin)
+
+ val values =
+ (0 until 100000).map(i => s"($i, 'name_$i')").mkString(",")
+ spark.sql(s"INSERT INTO T VALUES $values")
+
+ val output =
+ spark
+ .sql("CALL sys.create_global_index(table => 'test.T', index_column
=> 'name', index_type => 'bitmap')")
+ .collect()
+ .head
+
+ assert(output.getBoolean(0))
+
+ val table = loadTable("T")
+ val bitmapEntries = table
+ .store()
+ .newIndexFileHandler()
+ .scanEntries()
+ .asScala
+ .filter(_.indexFile().indexType() == "bitmap")
+ assert(bitmapEntries.nonEmpty)
+ val totalRowCount = bitmapEntries.map(_.indexFile().rowCount()).sum
+ assert(totalRowCount == 100000L)
+ for (entry <- bitmapEntries) {
+ assert(
+ entry
+ .indexFile()
+ .externalPath()
+ .startsWith(indexPath + "/" + entry.indexFile().fileName()))
+ }
+ }
+ }
+
private def assertMultiplePartitionsResult(
tableName: String,
rowCount: Long,