[ 
https://issues.apache.org/jira/browse/HUDI-6582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Danny Chen updated HUDI-6582:
-----------------------------
    Fix Version/s: 0.14.0

> Table create schema's name should be set accordingly
> ----------------------------------------------------
>
>                 Key: HUDI-6582
>                 URL: https://issues.apache.org/jira/browse/HUDI-6582
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: voon
>            Assignee: voon
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.14.0
>
>
> When recreating a hudi table with an existing hoodie.properties, the name of 
> the hudi table will be changed to "topLevelRecord".
>  
> This ticket is a followup for HUDI-6145 to ensure that the name/namespace of 
> the table is standardised accordingly.
>  
> {code:java}
> test("Test Create Hoodie Table with existing hoodie.properties") {
>   withTempDir { tmp =>
>     val tableName = generateTableName
>     val tablePath = s"${tmp.getCanonicalPath}"
>     spark.sql(
>       s"""
>          |create table $tableName (
>          |  id int,
>          |  name string,
>          |  price double,
>          |  ts long
>          |) using hudi
>          | location '$tablePath'
>          | tblproperties (
>          |  primaryKey ='id',
>          |  type = 'cow',
>          |  preCombineField = 'ts'
>          | )
>      """.stripMargin)
>     // 
> hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
>  but got 
> {"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
>     // drop the table without purging hdfs directory
>     spark.sql(s"drop table $tableName".stripMargin)
>     val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
>       .setConf(spark.sparkContext.hadoopConfiguration)
>       .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
>     // avro schema name and namespace should not change should not change
>     spark.newSession().sql(
>       s"""
>          |create table $tableName (
>          |  id int,
>          |  name string,
>          |  price double,
>          |  ts long
>          |) using hudi
>          | location '$tablePath'
>          | tblproperties (
>          |  primaryKey ='id',
>          |  type = 'cow',
>          |  preCombineField = 'ts'
>          | )
>      """.stripMargin)
>     /// 
> hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
>     val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
>       .setConf(spark.sparkContext.hadoopConfiguration)
>       .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
>     assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
>   } {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to