[ 
https://issues.apache.org/jira/browse/HUDI-9690?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Mansi Patel updated HUDI-9690:
------------------------------
    Description: 
Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
{code:java}
spark-shell --jars 
/home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \    
--conf 
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
  \
--conf 
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"


import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
val df1 = Seq(
 ("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z", 
"type1"),
 ("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z", 
"type2"),
 ("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z", 
"type3"),
 ("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z", 
"type4"),
 ("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z", 
"type1"),
 ("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z", 
"type2"),
 ("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z", 
"type3"),
 ("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z", 
"type4"),
 ("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z", 
"type1"),
 ("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z", 
"type2"),
 ("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z", 
"type3"),
 ("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", "type4")
 ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")

var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"
df1.write.format("hudi")
 .option("hoodie.metadata.enable", "true")
 .option("hoodie.table.name", tableName)
 .option("hoodie.database.name", "default")
 .option("hoodie.datasource.write.operation", "upsert")
 .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
 .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
 .option("hoodie.datasource.write.partitionpath.field", "event_type") 
 .option("hoodie.datasource.write.precombine.field", "event_ts")
 .option("hoodie.datasource.write.keygenerator.class", 
"org.apache.hudi.keygen.ComplexKeyGenerator")
 .option("hoodie.datasource.hive_sync.enable", "true")
 .option("hoodie.datasource.meta.sync.enable", "true")
 .option("hoodie.index.type", "GLOBAL_BLOOM")
 .option("hoodie.datasource.hive_sync.mode", "hms")
 .option("hoodie.datasource.hive_sync.database", "default")
 .option("hoodie.datasource.hive_sync.table", tableName)
 .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
 .option("hoodie.datasource.hive_sync.partition_extractor_class", 
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
 .mode(SaveMode.Overwrite)
 .save(tablePath)

# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();

+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+


# INSERT new data
spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test 
(event_id, event_date, event_name, event_ts, event_type) VALUES('112', 
DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 
'type5')")

# SELECT - Not showing new record
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+


# df works
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode

var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"

val newData = Seq(
("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", "type5")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")

newData.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class", 
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class", 
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append)
.save(tablePath)

# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();

+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
|  20250805001335884|20250805001335884...|event_id:113,even...|                 
type5|6f74bbeb-e7cd-4e2...|     
113|2015-01-01|event_name_999|2015-01-01T14:00:...|     type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}

  was:
Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
{code:java}
spark-shell --jars 
/home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \    
--conf 
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
  \
--conf 
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"


import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
val df1 = Seq(
 ("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z", 
"type1"),
 ("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z", 
"type2"),
 ("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z", 
"type3"),
 ("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z", 
"type4"),
 ("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z", 
"type1"),
 ("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z", 
"type2"),
 ("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z", 
"type3"),
 ("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z", 
"type4"),
 ("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z", 
"type1"),
 ("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z", 
"type2"),
 ("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z", 
"type3"),
 ("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", "type4")
 ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")

var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"
df1.write.format("hudi")
 .option("hoodie.metadata.enable", "true")
 .option("hoodie.table.name", tableName)
 .option("hoodie.database.name", "default")
 .option("hoodie.datasource.write.operation", "upsert")
 .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
 .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
 .option("hoodie.datasource.write.partitionpath.field", "event_type") 
 .option("hoodie.datasource.write.precombine.field", "event_ts")
 .option("hoodie.datasource.write.keygenerator.class", 
"org.apache.hudi.keygen.ComplexKeyGenerator")
 .option("hoodie.datasource.hive_sync.enable", "true")
 .option("hoodie.datasource.meta.sync.enable", "true")
 .option("hoodie.index.type", "GLOBAL_BLOOM")
 .option("hoodie.datasource.hive_sync.mode", "hms")
 .option("hoodie.datasource.hive_sync.database", "default")
 .option("hoodie.datasource.hive_sync.table", tableName)
 .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
 .option("hoodie.datasource.hive_sync.partition_extractor_class", 
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
 .mode(SaveMode.Overwrite)
 .save(tablePath)

# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();

+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+


# INSERT new data
spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test 
(event_id, event_date, event_name, event_ts, event_type) VALUES('112', 
DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 
'type5')")

# SELECT - Not showing new record
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+


# df works
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode

var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://fgac-iceberg-011426214932/mansipp/fta/ec2/" + tableName + 
"/"

val newData = Seq(
("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", "type5")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")

newData.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class", 
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class", 
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append)
.save(tablePath)

# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
order by event_id").show();

+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|  20250805000927187|20250805000927187...|event_id:100,even...|                 
type1|db420273-0543-4c0...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:101,even...|                 
type2|d888bc7e-09c8-44a...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
|  20250805000927187|20250805000927187...|event_id:102,even...|                 
type3|198de9d0-7df0-4c5...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:103,even...|                 
type4|72c26c48-802d-473...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:104,even...|                 
type1|db420273-0543-4c0...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
|  20250805000927187|20250805000927187...|event_id:105,even...|                 
type2|d888bc7e-09c8-44a...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:106,even...|                 
type3|198de9d0-7df0-4c5...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
|  20250805000927187|20250805000927187...|event_id:107,even...|                 
type4|72c26c48-802d-473...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
|  20250805000927187|20250805000927187...|event_id:108,even...|                 
type1|db420273-0543-4c0...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
|  20250805000927187|20250805000927187...|event_id:109,even...|                 
type2|d888bc7e-09c8-44a...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
|  20250805000927187|20250805000927187...|event_id:110,even...|                 
type3|198de9d0-7df0-4c5...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
|  20250805000927187|20250805000927187...|event_id:111,even...|                 
type4|72c26c48-802d-473...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
|  20250805001335884|20250805001335884...|event_id:113,even...|                 
type5|6f74bbeb-e7cd-4e2...|     
113|2015-01-01|event_name_999|2015-01-01T14:00:...|     type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}


> Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data
> -------------------------------------------------------------------
>
>                 Key: HUDI-9690
>                 URL: https://issues.apache.org/jira/browse/HUDI-9690
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark-sql
>            Reporter: Mansi Patel
>            Priority: Major
>              Labels: 1.0.2
>
> Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
> {code:java}
> spark-shell --jars 
> /home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
> --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \    
> --conf 
> "spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
>   \
> --conf 
> "spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> val df1 = Seq(
>  ("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z", 
> "type1"),
>  ("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z", 
> "type2"),
>  ("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z", 
> "type3"),
>  ("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z", 
> "type4"),
>  ("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z", 
> "type1"),
>  ("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z", 
> "type2"),
>  ("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z", 
> "type3"),
>  ("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z", 
> "type4"),
>  ("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z", 
> "type1"),
>  ("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z", 
> "type2"),
>  ("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z", 
> "type3"),
>  ("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", 
> "type4")
>  ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
> var tablePath = "s3://<path>/" + tableName + "/"
> df1.write.format("hudi")
>  .option("hoodie.metadata.enable", "true")
>  .option("hoodie.table.name", tableName)
>  .option("hoodie.database.name", "default")
>  .option("hoodie.datasource.write.operation", "upsert")
>  .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
>  .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
>  .option("hoodie.datasource.write.partitionpath.field", "event_type") 
>  .option("hoodie.datasource.write.precombine.field", "event_ts")
>  .option("hoodie.datasource.write.keygenerator.class", 
> "org.apache.hudi.keygen.ComplexKeyGenerator")
>  .option("hoodie.datasource.hive_sync.enable", "true")
>  .option("hoodie.datasource.meta.sync.enable", "true")
>  .option("hoodie.index.type", "GLOBAL_BLOOM")
>  .option("hoodie.datasource.hive_sync.mode", "hms")
>  .option("hoodie.datasource.hive_sync.database", "default")
>  .option("hoodie.datasource.hive_sync.table", tableName)
>  .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
>  .option("hoodie.datasource.hive_sync.partition_extractor_class", 
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
>  .mode(SaveMode.Overwrite)
>  .save(tablePath)
> # SELECT
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|  
> _hoodie_record_key|_hoodie_partition_path|   
> _hoodie_file_name|event_id|event_date|    event_name|            
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |  20250805000927187|20250805000927187...|event_id:100,even...|               
>   type1|db420273-0543-4c0...|     
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:101,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:102,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:103,even...|               
>   type4|72c26c48-802d-473...|     
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:104,even...|               
>   type1|db420273-0543-4c0...|     
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:105,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:106,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:107,even...|               
>   type4|72c26c48-802d-473...|     
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:108,even...|               
>   type1|db420273-0543-4c0...|     
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:109,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:110,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:111,even...|               
>   type4|72c26c48-802d-473...|     
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> # INSERT new data
> spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test 
> (event_id, event_date, event_name, event_ts, event_type) VALUES('112', 
> DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 
> 'type5')")
> # SELECT - Not showing new record
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|  
> _hoodie_record_key|_hoodie_partition_path|   
> _hoodie_file_name|event_id|event_date|    event_name|            
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |  20250805000927187|20250805000927187...|event_id:100,even...|               
>   type1|db420273-0543-4c0...|     
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:101,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:102,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:103,even...|               
>   type4|72c26c48-802d-473...|     
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:104,even...|               
>   type1|db420273-0543-4c0...|     
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:105,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:106,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:107,even...|               
>   type4|72c26c48-802d-473...|     
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:108,even...|               
>   type1|db420273-0543-4c0...|     
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:109,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:110,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:111,even...|               
>   type4|72c26c48-802d-473...|     
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> # df works
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
> var tablePath = "s3://<path>/" + tableName + "/"
> val newData = Seq(
> ("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", 
> "type5")
> ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
> newData.write.format("hudi")
> .option("hoodie.metadata.enable", "true")
> .option("hoodie.table.name", tableName)
> .option("hoodie.database.name", "default")
> .option("hoodie.datasource.write.operation", "upsert")
> .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
> .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
> .option("hoodie.datasource.write.partitionpath.field", "event_type")
> .option("hoodie.datasource.write.precombine.field", "event_ts")
> .option("hoodie.datasource.write.keygenerator.class", 
> "org.apache.hudi.keygen.ComplexKeyGenerator")
> .option("hoodie.index.type", "GLOBAL_BLOOM")
> .option("hoodie.datasource.hive_sync.enable", "true")
> .option("hoodie.datasource.meta.sync.enable", "true")
> .option("hoodie.datasource.hive_sync.mode", "hms")
> .option("hoodie.datasource.hive_sync.database", "default")
> .option("hoodie.datasource.hive_sync.table", tableName)
> .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
> .option("hoodie.datasource.hive_sync.partition_extractor_class", 
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
> .mode(SaveMode.Append)
> .save(tablePath)
> # SELECT
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test 
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|  
> _hoodie_record_key|_hoodie_partition_path|   
> _hoodie_file_name|event_id|event_date|    event_name|            
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |  20250805000927187|20250805000927187...|event_id:100,even...|               
>   type1|db420273-0543-4c0...|     
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:101,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:102,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:103,even...|               
>   type4|72c26c48-802d-473...|     
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:104,even...|               
>   type1|db420273-0543-4c0...|     
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:105,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:106,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:107,even...|               
>   type4|72c26c48-802d-473...|     
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
> |  20250805000927187|20250805000927187...|event_id:108,even...|               
>   type1|db420273-0543-4c0...|     
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
> |  20250805000927187|20250805000927187...|event_id:109,even...|               
>   type2|d888bc7e-09c8-44a...|     
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
> |  20250805000927187|20250805000927187...|event_id:110,even...|               
>   type3|198de9d0-7df0-4c5...|     
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
> |  20250805000927187|20250805000927187...|event_id:111,even...|               
>   type4|72c26c48-802d-473...|     
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
> |  20250805001335884|20250805001335884...|event_id:113,even...|               
>   type5|6f74bbeb-e7cd-4e2...|     
> 113|2015-01-01|event_name_999|2015-01-01T14:00:...|     type5|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to