This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 414c25032f [log] Add detailed failure log and test for query metadata 
columns (#5479)
414c25032f is described below

commit 414c25032f3a280aec18f1e9337fc76ecef9343c
Author: askwang <[email protected]>
AuthorDate: Mon Apr 21 09:13:12 2025 +0800

    [log] Add detailed failure log and test for query metadata columns (#5479)
---
 docs/content/spark/sql-query.md                      |  4 ++++
 .../paimon/spark/PaimonRecordReaderIterator.scala    |  3 ++-
 .../apache/paimon/spark/sql/PaimonQueryTest.scala    | 20 ++++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md
index f1597b9347..9f737d67e9 100644
--- a/docs/content/spark/sql-query.md
+++ b/docs/content/spark/sql-query.md
@@ -49,6 +49,10 @@ Paimon also supports reading some hidden metadata columns, 
currently supporting
 SELECT *, __paimon_file_path, __paimon_partition, __paimon_bucket, 
__paimon_row_index FROM t;
 ```
 
+{{< hint info >}}
+Note: only append table or deletion vector table support querying metadata 
columns.
+{{< /hint >}}
+
 ### Batch Time Travel
 
 Paimon batch reads with time travel can specify a snapshot or a tag and read 
the corresponding data.
diff --git 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
index 6d6c1ca47c..4a71cdff88 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
+++ 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
@@ -50,7 +50,8 @@ case class PaimonRecordReaderIterator(
     if (needMetadata) {
       if (!isFileRecordIterator || !split.isInstanceOf[DataSplit]) {
         throw new RuntimeException(
-          "There need be FileRecordIterator when metadata columns are 
required.")
+          "There need be FileRecordIterator when metadata columns are 
required. " +
+            "Only append table or deletion vector table support querying 
metadata columns.")
       }
     }
   }
diff --git 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
index ae90b5b1f3..d7a2ecfd93 100644
--- 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
+++ 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
@@ -21,7 +21,9 @@ package org.apache.paimon.spark.sql
 import org.apache.paimon.spark.PaimonSparkTestBase
 import org.apache.paimon.table.source.DataSplit
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.{Row, SparkSession}
+import org.assertj.core.api.Assertions.assertThat
 import org.junit.jupiter.api.Assertions
 
 import java.util
@@ -387,6 +389,24 @@ class PaimonQueryTest extends PaimonSparkTestBase {
     )
   }
 
+  test("Paimon Query: not support querying metadata columns for pk table") {
+    spark.sql("""
+                |CREATE TABLE T (id INT, name STRING)
+                |TBLPROPERTIES ('primary-key' = 'id', 'bucket' = '1')
+                |""".stripMargin)
+
+    spark.sql("INSERT INTO T VALUES(1,'a')")
+    assertThat(spark.sql("SELECT *,__paimon_file_path FROM 
T").collect()).hasSize(1)
+
+    // query failed if more than one file in a bucket
+    spark.sql("INSERT INTO T VALUES(2,'b')")
+    assert(
+      intercept[SparkException] {
+        spark.sql("SELECT *,__paimon_file_path FROM T").collect()
+      }.getMessage
+        .contains("Only append table or deletion vector table support querying 
metadata columns."))
+  }
+
   private def getAllFiles(
       tableName: String,
       partitions: Seq[String],

Reply via email to