Sagar Sumit created HUDI-8198:
---------------------------------
Summary: DELETE_PARTITION does not work when table is partitioned
by multiple fields
Key: HUDI-8198
URL: https://issues.apache.org/jira/browse/HUDI-8198
Project: Apache Hudi
Issue Type: Task
Reporter: Sagar Sumit
Fix For: 1.0.0
{code:java}
Map<String, String> writeOptions = getWriterOptions();
writeOptions.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "id");
writeOptions.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "version");
writeOptions.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "dt,hh");
writeOptions.put(DataSourceWriteOptions.OPERATION().key(),
DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL());
writeOptions.put(DataSourceWriteOptions.URL_ENCODE_PARTITIONING().key(),
"false");
// generate dataset with multiple partition fields
List<Row> rows = IntStream.range(0, 10)
.mapToObj(i -> RowFactory.create(i, "a" + i, 10 + i, 10000, "2021-03-0" +
(i % 2 + 1), "10"))
.collect(Collectors.toList());
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("name", DataTypes.StringType, false, Metadata.empty()),
new StructField("price", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("version", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("dt", DataTypes.StringType, false, Metadata.empty()),
new StructField("hh", DataTypes.StringType, false, Metadata.empty())
});
Dataset<Row> inputDF1 = sparkSession.createDataFrame(rows, schema);
inputDF1.write().format("hudi")
.options(writeOptions)
.mode(SaveMode.Overwrite)
.save(basePath);
Dataset<Row> df = sparkSession.emptyDataFrame();
df.write().format("hudi")
.options(writeOptions)
.option(DataSourceWriteOptions.OPERATION().key(),
WriteOperationType.DELETE_PARTITION.value())
.option(DataSourceWriteOptions.PARTITIONS_TO_DELETE().key(),
"dt=2021-03-01/hh=10")
.mode(Append)
.save(basePath); {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)