voon created HUDI-6582:
--------------------------

             Summary: Table create schema's name should be set accordingly
                 Key: HUDI-6582
                 URL: https://issues.apache.org/jira/browse/HUDI-6582
             Project: Apache Hudi
          Issue Type: Bug
            Reporter: voon


When recreating a hudi table with an existing hoodie.properties, the name of 
the hudi table will be changed to "topLevelRecord".

 

This ticket is a followup for HUDI-8587 to ensure that the name/namespace of 
the table is standardised accordingly.

 
{code:java}
test("Test Create Hoodie Table with existing hoodie.properties") {
  withTempDir { tmp =>
    val tableName = generateTableName
    val tablePath = s"${tmp.getCanonicalPath}"
    spark.sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    // 
hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
 but got 
{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    // drop the table without purging hdfs directory
    spark.sql(s"drop table $tableName".stripMargin)

    val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    // avro schema name and namespace should not change should not change
    spark.newSession().sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    /// 
hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
  } {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to