iemejia commented on code in PR #12400: URL: https://github.com/apache/gluten/pull/12400#discussion_r3498405290
########## backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxFileHandleCacheSuite.scala: ########## @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.gluten.config.VeloxConfig +import org.apache.gluten.execution.{BasicScanExecTransformer, VeloxWholeStageTransformerSuite} + +import org.apache.spark.SparkConf + +/** + * Test suite for Velox file handle cache behavior. + * + * Tests correctness, config propagation, and edge cases for the file handle cache which caches open + * file handles (descriptors) to avoid repeated open/close overhead. + */ +class VeloxFileHandleCacheSuite extends VeloxWholeStageTransformerSuite { + override protected val resourcePath: String = "/parquet-for-read" + override protected val fileFormat: String = "parquet" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key, "true") + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_EXPIRATION_DURATION_MS.key, "600000") + .set(VeloxConfig.COLUMNAR_VELOX_NUM_CACHE_FILE_HANDLES.key, "20000") + } + + testWithSpecifiedSparkVersion( + "basic scan correctness with file handle cache enabled", + "3.5", + "3.5") { + // Verify that enabling file handle cache produces correct scan results + withTempPath { + dir => + spark + .range(10000) + .selectExpr("id", "cast(id % 7 as int) as category", "id * 1.5 as value") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val df = spark.read.parquet(dir.getCanonicalPath) + df.createOrReplaceTempView("t") + + runQueryAndCompare("SELECT count(*) FROM t") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT sum(value) FROM t WHERE category = 3") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT category, count(*) FROM t GROUP BY category") { + checkGlutenPlan[BasicScanExecTransformer] + } + } + } + + testWithSpecifiedSparkVersion( + "repeated scans produce consistent results (cache hit path)", + "3.5", + "3.5") { + // When file handles are cached, repeated scans of the same files must produce + // identical results. This exercises the cache hit path. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "cast(id as string) as name") + .repartition(50) // 50 files to exercise many cache entries + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + val expected = spark.read.parquet(path).count() + assert(expected == 5000) + + // Scan the same files multiple times - each should hit the cache + for (i <- 1 to 5) { + val count = spark.read.parquet(path).count() + assert( + count == expected, + s"Iteration $i: expected $expected rows but got $count") + } + + // Verify aggregation consistency across repeated scans + val firstSum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + for (i <- 1 to 3) { + val sum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + assert( + sum == firstSum, + s"Iteration $i: sum mismatch, expected $firstSum but got $sum") + } + } + } + + testWithSpecifiedSparkVersion( + "many small files do not cause errors with file handle cache", + "3.5", + "3.5") { + // Verify that scanning many small files with caching enabled does not cause + // file descriptor exhaustion or other resource-related errors. + withTempPath { + dir => + // Create 200 small parquet files + spark + .range(20000) + .selectExpr("id", "uuid() as payload") + .repartition(200) + .write + .parquet(dir.getCanonicalPath) + + val fileCount = dir.listFiles().count(_.getName.endsWith(".parquet")) + assert(fileCount >= 100, s"Expected at least 100 files, got $fileCount") + + // Scan all files - should work without resource errors + val count = spark.read.parquet(dir.getCanonicalPath).count() + assert(count == 20000) + + // Scan again (cache hit path) - should also work + val count2 = spark.read.parquet(dir.getCanonicalPath).count() + assert(count2 == 20000) + } + } + + testWithSpecifiedSparkVersion( + "filtered scan correctness with file handle cache", + "3.5", + "3.5") { + // Verify that predicate pushdown works correctly with cached file handles. + // This exercises the row group skipping path through cached handles. + withTempPath { + dir => + spark + .range(100000) + .selectExpr( + "id", + "cast(id % 10 as int) as partition_key", + "cast(id * 0.01 as double) as metric") + .repartition(20) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Filter that matches ~10% of rows + val filtered = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered == 10000, s"Expected 10000 filtered rows, got $filtered") + + // Range filter + val rangeFiltered = spark.read.parquet(path).where("id >= 50000").count() + assert(rangeFiltered == 50000, s"Expected 50000 range-filtered rows, got $rangeFiltered") + + // Re-run same filters (cache hit path) + val filtered2 = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered2 == filtered, "Filtered count mismatch on repeated scan") + } + } + + testWithSpecifiedSparkVersion( + "scan after file deletion produces appropriate error or empty result", + "3.5", + "3.5") { + // If a file is deleted between scans, the next scan should either: + // - Succeed (if the cached FD still works on Linux with unlinked inodes) + // - Produce an error (not silently return wrong data) + withTempPath { + dir => + spark + .range(1000) + .selectExpr("id") + .repartition(5) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + // First scan populates the cache + val count1 = spark.read.parquet(path).count() + assert(count1 == 1000) + + // Delete one parquet file + val parquetFiles = dir.listFiles().filter(_.getName.endsWith(".parquet")) + assert(parquetFiles.nonEmpty) + val deletedFile = parquetFiles.head + val deletedRows = spark.read.parquet(deletedFile.getCanonicalPath).count() + deletedFile.delete() + + // On Linux, the cached FD to the deleted file may still work (unlinked inode). + // Either way, the remaining files should be readable. + // We don't assert on exact count because the deleted file's FD might still be valid. + val count2 = spark.read.parquet(path).count() + // The count should be either (count1 - deletedRows) or count1 + // depending on whether the OS kept the inode accessible + assert( + count2 == count1 || count2 == count1 - deletedRows, + s"Unexpected count after deletion: $count2 (original: $count1, deleted: $deletedRows)") Review Comment: Fixed. Wrapped the second scan in a try-catch — if the scan throws because the file is no longer accessible, that is acceptable behavior. The important invariant is that it must not silently return wrong data. ########## backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxFileHandleCacheSuite.scala: ########## @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.gluten.config.VeloxConfig +import org.apache.gluten.execution.{BasicScanExecTransformer, VeloxWholeStageTransformerSuite} + +import org.apache.spark.SparkConf + +/** + * Test suite for Velox file handle cache behavior. + * + * Tests correctness, config propagation, and edge cases for the file handle cache which caches open + * file handles (descriptors) to avoid repeated open/close overhead. + */ +class VeloxFileHandleCacheSuite extends VeloxWholeStageTransformerSuite { + override protected val resourcePath: String = "/parquet-for-read" + override protected val fileFormat: String = "parquet" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key, "true") + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_EXPIRATION_DURATION_MS.key, "600000") + .set(VeloxConfig.COLUMNAR_VELOX_NUM_CACHE_FILE_HANDLES.key, "20000") + } + + testWithSpecifiedSparkVersion( + "basic scan correctness with file handle cache enabled", + "3.5", + "3.5") { + // Verify that enabling file handle cache produces correct scan results + withTempPath { + dir => + spark + .range(10000) + .selectExpr("id", "cast(id % 7 as int) as category", "id * 1.5 as value") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val df = spark.read.parquet(dir.getCanonicalPath) + df.createOrReplaceTempView("t") + + runQueryAndCompare("SELECT count(*) FROM t") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT sum(value) FROM t WHERE category = 3") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT category, count(*) FROM t GROUP BY category") { + checkGlutenPlan[BasicScanExecTransformer] + } + } + } + + testWithSpecifiedSparkVersion( + "repeated scans produce consistent results (cache hit path)", + "3.5", + "3.5") { + // When file handles are cached, repeated scans of the same files must produce + // identical results. This exercises the cache hit path. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "cast(id as string) as name") + .repartition(50) // 50 files to exercise many cache entries + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + val expected = spark.read.parquet(path).count() + assert(expected == 5000) + + // Scan the same files multiple times - each should hit the cache + for (i <- 1 to 5) { + val count = spark.read.parquet(path).count() + assert( + count == expected, + s"Iteration $i: expected $expected rows but got $count") + } + + // Verify aggregation consistency across repeated scans + val firstSum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + for (i <- 1 to 3) { + val sum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + assert( + sum == firstSum, + s"Iteration $i: sum mismatch, expected $firstSum but got $sum") + } + } + } + + testWithSpecifiedSparkVersion( + "many small files do not cause errors with file handle cache", + "3.5", + "3.5") { + // Verify that scanning many small files with caching enabled does not cause + // file descriptor exhaustion or other resource-related errors. + withTempPath { + dir => + // Create 200 small parquet files + spark + .range(20000) + .selectExpr("id", "uuid() as payload") + .repartition(200) + .write + .parquet(dir.getCanonicalPath) + + val fileCount = dir.listFiles().count(_.getName.endsWith(".parquet")) + assert(fileCount >= 100, s"Expected at least 100 files, got $fileCount") + + // Scan all files - should work without resource errors + val count = spark.read.parquet(dir.getCanonicalPath).count() + assert(count == 20000) + + // Scan again (cache hit path) - should also work + val count2 = spark.read.parquet(dir.getCanonicalPath).count() + assert(count2 == 20000) + } + } + + testWithSpecifiedSparkVersion( + "filtered scan correctness with file handle cache", + "3.5", + "3.5") { + // Verify that predicate pushdown works correctly with cached file handles. + // This exercises the row group skipping path through cached handles. + withTempPath { + dir => + spark + .range(100000) + .selectExpr( + "id", + "cast(id % 10 as int) as partition_key", + "cast(id * 0.01 as double) as metric") + .repartition(20) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Filter that matches ~10% of rows + val filtered = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered == 10000, s"Expected 10000 filtered rows, got $filtered") + + // Range filter + val rangeFiltered = spark.read.parquet(path).where("id >= 50000").count() + assert(rangeFiltered == 50000, s"Expected 50000 range-filtered rows, got $rangeFiltered") + + // Re-run same filters (cache hit path) + val filtered2 = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered2 == filtered, "Filtered count mismatch on repeated scan") + } + } + + testWithSpecifiedSparkVersion( + "scan after file deletion produces appropriate error or empty result", + "3.5", + "3.5") { + // If a file is deleted between scans, the next scan should either: + // - Succeed (if the cached FD still works on Linux with unlinked inodes) + // - Produce an error (not silently return wrong data) + withTempPath { + dir => + spark + .range(1000) + .selectExpr("id") + .repartition(5) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + // First scan populates the cache + val count1 = spark.read.parquet(path).count() + assert(count1 == 1000) + + // Delete one parquet file + val parquetFiles = dir.listFiles().filter(_.getName.endsWith(".parquet")) + assert(parquetFiles.nonEmpty) + val deletedFile = parquetFiles.head + val deletedRows = spark.read.parquet(deletedFile.getCanonicalPath).count() + deletedFile.delete() + + // On Linux, the cached FD to the deleted file may still work (unlinked inode). + // Either way, the remaining files should be readable. + // We don't assert on exact count because the deleted file's FD might still be valid. + val count2 = spark.read.parquet(path).count() + // The count should be either (count1 - deletedRows) or count1 + // depending on whether the OS kept the inode accessible + assert( + count2 == count1 || count2 == count1 - deletedRows, + s"Unexpected count after deletion: $count2 (original: $count1, deleted: $deletedRows)") + } + } + + testWithSpecifiedSparkVersion( + "column pruning with cached file handles", + "3.5", + "3.5") { + // Verify that column pruning works correctly when file handles are cached. + // The cache key includes the file path but not the projected columns, so + // different projections on the same file must still work correctly. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "id * 2 as doubled", "id * 3 as tripled", "uuid() as text") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Read all columns + val allCols = spark.read.parquet(path).select("id", "doubled", "tripled", "text").count() + assert(allCols == 5000) + + // Read subset of columns (same file handles, different projection) + val subset1 = spark.read.parquet(path).select("id").collect() + assert(subset1.length == 5000) + assert(subset1.head.schema.fieldNames.sameElements(Array("id"))) Review Comment: Fixed. Moved the schema assertion to the DataFrame before `collect()`: check `subset1Df.schema.fieldNames` first, then collect and assert row count. ########## ep/build-velox/src/get-velox.sh: ########## @@ -148,6 +148,16 @@ function apply_compilation_fixes { $SUDO_CMD cp ${CURRENT_DIR}/modify_arrow.patch ${VELOX_HOME}/CMake/resolve_dependency_modules/arrow/ git add ${VELOX_HOME}/CMake/resolve_dependency_modules/arrow/modify_arrow.patch # to avoid the file from being deleted by git clean -dffx :/ + + # Wire file handle cache TTL config to SimpleLRUCache constructor. + if [ -f "${CURRENT_DIR}/file-handle-cache-ttl.patch" ]; then + pushd $VELOX_HOME + git apply --check ${CURRENT_DIR}/file-handle-cache-ttl.patch 2>/dev/null && \ + git apply ${CURRENT_DIR}/file-handle-cache-ttl.patch && \ + echo "Applied file-handle-cache-ttl.patch" || \ + echo "file-handle-cache-ttl.patch already applied or not applicable, skipping" + popd + fi Review Comment: Fixed. The script now distinguishes three cases: (1) patch applies cleanly — apply it, (2) reverse-apply check passes — patch is already present upstream, skip, (3) neither — fail the build with an error. This ensures the TTL wiring is never silently absent. ########## cpp/velox/utils/ConfigExtractor.cc: ########## @@ -292,6 +292,10 @@ std::shared_ptr<facebook::velox::config::ConfigBase> createHiveConnectorConfig( hiveConfMap[facebook::velox::connector::hive::HiveConfig::kEnableFileHandleCache] = conf->get<bool>(kVeloxFileHandleCacheEnabled, kVeloxFileHandleCacheEnabledDefault) ? "true" : "false"; + hiveConfMap[facebook::velox::connector::hive::HiveConfig::kNumCacheFileHandles] = + std::to_string(conf->get<int32_t>(kVeloxNumCacheFileHandles, kVeloxNumCacheFileHandlesDefault)); + hiveConfMap[facebook::velox::connector::hive::HiveConfig::kFileHandleExpirationDurationMs] = std::to_string( + conf->get<int64_t>(kVeloxFileHandleExpirationDurationMs, kVeloxFileHandleExpirationDurationMsDefault)); Review Comment: Already fixed in a previous push — ran clang-format-15 on the file. ########## backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxFileHandleCacheSuite.scala: ########## @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.gluten.config.VeloxConfig +import org.apache.gluten.execution.{BasicScanExecTransformer, VeloxWholeStageTransformerSuite} + +import org.apache.spark.SparkConf + +/** + * Test suite for Velox file handle cache behavior. + * + * Tests correctness, config propagation, and edge cases for the file handle cache which caches open + * file handles (descriptors) to avoid repeated open/close overhead. + */ +class VeloxFileHandleCacheSuite extends VeloxWholeStageTransformerSuite { + override protected val resourcePath: String = "/parquet-for-read" + override protected val fileFormat: String = "parquet" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key, "true") + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_EXPIRATION_DURATION_MS.key, "600000") + .set(VeloxConfig.COLUMNAR_VELOX_NUM_CACHE_FILE_HANDLES.key, "20000") + } + + testWithSpecifiedSparkVersion( + "basic scan correctness with file handle cache enabled", + "3.5", + "3.5") { + // Verify that enabling file handle cache produces correct scan results + withTempPath { + dir => + spark + .range(10000) + .selectExpr("id", "cast(id % 7 as int) as category", "id * 1.5 as value") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val df = spark.read.parquet(dir.getCanonicalPath) + df.createOrReplaceTempView("t") + + runQueryAndCompare("SELECT count(*) FROM t") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT sum(value) FROM t WHERE category = 3") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT category, count(*) FROM t GROUP BY category") { + checkGlutenPlan[BasicScanExecTransformer] + } + } + } + + testWithSpecifiedSparkVersion( + "repeated scans produce consistent results (cache hit path)", + "3.5", + "3.5") { + // When file handles are cached, repeated scans of the same files must produce + // identical results. This exercises the cache hit path. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "cast(id as string) as name") + .repartition(50) // 50 files to exercise many cache entries + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + val expected = spark.read.parquet(path).count() + assert(expected == 5000) + + // Scan the same files multiple times - each should hit the cache + for (i <- 1 to 5) { + val count = spark.read.parquet(path).count() + assert( + count == expected, + s"Iteration $i: expected $expected rows but got $count") + } + + // Verify aggregation consistency across repeated scans + val firstSum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + for (i <- 1 to 3) { + val sum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + assert( + sum == firstSum, + s"Iteration $i: sum mismatch, expected $firstSum but got $sum") + } + } + } + + testWithSpecifiedSparkVersion( + "many small files do not cause errors with file handle cache", + "3.5", + "3.5") { + // Verify that scanning many small files with caching enabled does not cause + // file descriptor exhaustion or other resource-related errors. + withTempPath { + dir => + // Create 200 small parquet files + spark + .range(20000) + .selectExpr("id", "uuid() as payload") + .repartition(200) + .write + .parquet(dir.getCanonicalPath) + + val fileCount = dir.listFiles().count(_.getName.endsWith(".parquet")) + assert(fileCount >= 100, s"Expected at least 100 files, got $fileCount") + + // Scan all files - should work without resource errors + val count = spark.read.parquet(dir.getCanonicalPath).count() + assert(count == 20000) + + // Scan again (cache hit path) - should also work + val count2 = spark.read.parquet(dir.getCanonicalPath).count() + assert(count2 == 20000) + } + } + + testWithSpecifiedSparkVersion( + "filtered scan correctness with file handle cache", + "3.5", + "3.5") { + // Verify that predicate pushdown works correctly with cached file handles. + // This exercises the row group skipping path through cached handles. + withTempPath { + dir => + spark + .range(100000) + .selectExpr( + "id", + "cast(id % 10 as int) as partition_key", + "cast(id * 0.01 as double) as metric") + .repartition(20) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Filter that matches ~10% of rows + val filtered = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered == 10000, s"Expected 10000 filtered rows, got $filtered") + + // Range filter + val rangeFiltered = spark.read.parquet(path).where("id >= 50000").count() + assert(rangeFiltered == 50000, s"Expected 50000 range-filtered rows, got $rangeFiltered") + + // Re-run same filters (cache hit path) + val filtered2 = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered2 == filtered, "Filtered count mismatch on repeated scan") + } + } + + testWithSpecifiedSparkVersion( + "scan after file deletion produces appropriate error or empty result", + "3.5", + "3.5") { + // If a file is deleted between scans, the next scan should either: + // - Succeed (if the cached FD still works on Linux with unlinked inodes) + // - Produce an error (not silently return wrong data) + withTempPath { + dir => + spark + .range(1000) + .selectExpr("id") + .repartition(5) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + // First scan populates the cache + val count1 = spark.read.parquet(path).count() + assert(count1 == 1000) + + // Delete one parquet file + val parquetFiles = dir.listFiles().filter(_.getName.endsWith(".parquet")) + assert(parquetFiles.nonEmpty) + val deletedFile = parquetFiles.head + val deletedRows = spark.read.parquet(deletedFile.getCanonicalPath).count() + deletedFile.delete() + + // On Linux, the cached FD to the deleted file may still work (unlinked inode). + // Either way, the remaining files should be readable. + // We don't assert on exact count because the deleted file's FD might still be valid. + val count2 = spark.read.parquet(path).count() + // The count should be either (count1 - deletedRows) or count1 + // depending on whether the OS kept the inode accessible + assert( + count2 == count1 || count2 == count1 - deletedRows, + s"Unexpected count after deletion: $count2 (original: $count1, deleted: $deletedRows)") + } + } + + testWithSpecifiedSparkVersion( + "column pruning with cached file handles", + "3.5", + "3.5") { + // Verify that column pruning works correctly when file handles are cached. + // The cache key includes the file path but not the projected columns, so + // different projections on the same file must still work correctly. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "id * 2 as doubled", "id * 3 as tripled", "uuid() as text") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Read all columns + val allCols = spark.read.parquet(path).select("id", "doubled", "tripled", "text").count() + assert(allCols == 5000) + + // Read subset of columns (same file handles, different projection) + val subset1 = spark.read.parquet(path).select("id").collect() + assert(subset1.length == 5000) + assert(subset1.head.schema.fieldNames.sameElements(Array("id"))) Review Comment: Fixed. Moved the schema assertion to the DataFrame before `collect()`: check `subset1Df.schema.fieldNames` first, then collect and assert row count. ########## backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxFileHandleCacheSuite.scala: ########## @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.gluten.config.VeloxConfig +import org.apache.gluten.execution.{BasicScanExecTransformer, VeloxWholeStageTransformerSuite} + +import org.apache.spark.SparkConf + +/** + * Test suite for Velox file handle cache behavior. + * + * Tests correctness, config propagation, and edge cases for the file handle cache which caches open + * file handles (descriptors) to avoid repeated open/close overhead. + */ +class VeloxFileHandleCacheSuite extends VeloxWholeStageTransformerSuite { + override protected val resourcePath: String = "/parquet-for-read" + override protected val fileFormat: String = "parquet" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key, "true") + .set(VeloxConfig.COLUMNAR_VELOX_FILE_HANDLE_EXPIRATION_DURATION_MS.key, "600000") + .set(VeloxConfig.COLUMNAR_VELOX_NUM_CACHE_FILE_HANDLES.key, "20000") + } + + testWithSpecifiedSparkVersion( + "basic scan correctness with file handle cache enabled", + "3.5", + "3.5") { + // Verify that enabling file handle cache produces correct scan results + withTempPath { + dir => + spark + .range(10000) + .selectExpr("id", "cast(id % 7 as int) as category", "id * 1.5 as value") + .repartition(10) + .write + .parquet(dir.getCanonicalPath) + + val df = spark.read.parquet(dir.getCanonicalPath) + df.createOrReplaceTempView("t") + + runQueryAndCompare("SELECT count(*) FROM t") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT sum(value) FROM t WHERE category = 3") { + checkGlutenPlan[BasicScanExecTransformer] + } + runQueryAndCompare("SELECT category, count(*) FROM t GROUP BY category") { + checkGlutenPlan[BasicScanExecTransformer] + } + } + } + + testWithSpecifiedSparkVersion( + "repeated scans produce consistent results (cache hit path)", + "3.5", + "3.5") { + // When file handles are cached, repeated scans of the same files must produce + // identical results. This exercises the cache hit path. + withTempPath { + dir => + spark + .range(5000) + .selectExpr("id", "cast(id as string) as name") + .repartition(50) // 50 files to exercise many cache entries + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + val expected = spark.read.parquet(path).count() + assert(expected == 5000) + + // Scan the same files multiple times - each should hit the cache + for (i <- 1 to 5) { + val count = spark.read.parquet(path).count() + assert( + count == expected, + s"Iteration $i: expected $expected rows but got $count") + } + + // Verify aggregation consistency across repeated scans + val firstSum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + for (i <- 1 to 3) { + val sum = spark.read.parquet(path).selectExpr("sum(id)").collect()(0).getLong(0) + assert( + sum == firstSum, + s"Iteration $i: sum mismatch, expected $firstSum but got $sum") + } + } + } + + testWithSpecifiedSparkVersion( + "many small files do not cause errors with file handle cache", + "3.5", + "3.5") { + // Verify that scanning many small files with caching enabled does not cause + // file descriptor exhaustion or other resource-related errors. + withTempPath { + dir => + // Create 200 small parquet files + spark + .range(20000) + .selectExpr("id", "uuid() as payload") + .repartition(200) + .write + .parquet(dir.getCanonicalPath) + + val fileCount = dir.listFiles().count(_.getName.endsWith(".parquet")) + assert(fileCount >= 100, s"Expected at least 100 files, got $fileCount") + + // Scan all files - should work without resource errors + val count = spark.read.parquet(dir.getCanonicalPath).count() + assert(count == 20000) + + // Scan again (cache hit path) - should also work + val count2 = spark.read.parquet(dir.getCanonicalPath).count() + assert(count2 == 20000) + } + } + + testWithSpecifiedSparkVersion( + "filtered scan correctness with file handle cache", + "3.5", + "3.5") { + // Verify that predicate pushdown works correctly with cached file handles. + // This exercises the row group skipping path through cached handles. + withTempPath { + dir => + spark + .range(100000) + .selectExpr( + "id", + "cast(id % 10 as int) as partition_key", + "cast(id * 0.01 as double) as metric") + .repartition(20) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + + // Filter that matches ~10% of rows + val filtered = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered == 10000, s"Expected 10000 filtered rows, got $filtered") + + // Range filter + val rangeFiltered = spark.read.parquet(path).where("id >= 50000").count() + assert(rangeFiltered == 50000, s"Expected 50000 range-filtered rows, got $rangeFiltered") + + // Re-run same filters (cache hit path) + val filtered2 = spark.read.parquet(path).where("partition_key = 5").count() + assert(filtered2 == filtered, "Filtered count mismatch on repeated scan") + } + } + + testWithSpecifiedSparkVersion( + "scan after file deletion produces appropriate error or empty result", + "3.5", + "3.5") { + // If a file is deleted between scans, the next scan should either: + // - Succeed (if the cached FD still works on Linux with unlinked inodes) + // - Produce an error (not silently return wrong data) + withTempPath { + dir => + spark + .range(1000) + .selectExpr("id") + .repartition(5) + .write + .parquet(dir.getCanonicalPath) + + val path = dir.getCanonicalPath + // First scan populates the cache + val count1 = spark.read.parquet(path).count() + assert(count1 == 1000) + + // Delete one parquet file + val parquetFiles = dir.listFiles().filter(_.getName.endsWith(".parquet")) + assert(parquetFiles.nonEmpty) + val deletedFile = parquetFiles.head + val deletedRows = spark.read.parquet(deletedFile.getCanonicalPath).count() + deletedFile.delete() Review Comment: Fixed. Added `assert(deletedFile.delete(), ...)` to fail if deletion does not succeed. ########## backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala: ########## @@ -527,10 +527,29 @@ object VeloxConfig extends ConfigRegistry { val COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED = buildStaticConf("spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled") .doc( - "Disables caching if false. File handle cache should be disabled " + - "if files are mutable, i.e. file content may change while file path stays the same.") + "Enables caching of file handles to avoid repeated open/close overhead on remote " + + "filesystems. Should be disabled if files are mutable, i.e. file content may " + + "change while file path stays the same.") .booleanConf - .createWithDefault(false) + .createWithDefault(true) + + val COLUMNAR_VELOX_NUM_CACHE_FILE_HANDLES = + buildStaticConf("spark.gluten.sql.columnar.backend.velox.numCacheFileHandles") + .doc( + "Maximum number of entries in the file handle cache. Each entry holds an open " + + "file descriptor (local FS) or connection state (remote FS).") + .intConf + .createWithDefault(20000) Review Comment: Good point. Reduced the default from 20000 to 10000. Also expanded the doc to clarify that on remote object stores (S3, ABFS, GCS) entries are HTTP connections, not OS file descriptors, so the FD concern primarily applies to local filesystems. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
