lw309637554 commented on a change in pull request #2127: URL: https://github.com/apache/hudi/pull/2127#discussion_r499162390
########## File path: hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java ########## @@ -68,61 +71,62 @@ public void tearDown() throws IOException { cleanupResources(); } - @Test - public void testSchemaEvolutionOnUpdate() throws Exception { + private WriteStatus prepareFirstCommitData(List<String> recordsStrs) throws IOException { // Create a bunch of records with a old version of schema final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt"); final HoodieSparkTable table = HoodieSparkTable.create(config, context); - final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> { - String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; - String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; - String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; List<HoodieRecord> insertRecords = new ArrayList<>(); - RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1)); - RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2)); - RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3)); - + for (String recordStr : recordsStrs) { + RawTripTestPayload rowChange = new RawTripTestPayload(recordStr); + insertRecords + .add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange)); + } Map<String, HoodieRecord> insertRecordMap = insertRecords.stream() .collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity())); HoodieCreateHandle createHandle = - new HoodieCreateHandle(config, "100", table, rowChange1.getPartitionPath(), "f1-0", insertRecordMap, supplier); + new HoodieCreateHandle(config, "100", table, insertRecords.get(0).getPartitionPath(), "f1-0", insertRecordMap, supplier); createHandle.write(); return createHandle.close(); }).collect(); final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100")); FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile); + return statuses.get(0); + } + + @Test + public void testSchemaEvolutionOnUpdateSuccessWithAddColumnHaveDefault() throws Exception { + List<String> recordsStrs = new ArrayList<>(); + String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; + String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; + String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; + recordsStrs.add(recordStr1); + recordsStrs.add(recordStr2); + recordsStrs.add(recordStr3); Review comment: thanks ,3 local vars will use for prepareFirstRecordCommit method .i will move it to a method ########## File path: hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java ########## @@ -68,61 +71,62 @@ public void tearDown() throws IOException { cleanupResources(); } - @Test - public void testSchemaEvolutionOnUpdate() throws Exception { + private WriteStatus prepareFirstCommitData(List<String> recordsStrs) throws IOException { // Create a bunch of records with a old version of schema final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt"); final HoodieSparkTable table = HoodieSparkTable.create(config, context); - final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> { - String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; - String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; - String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\"," - + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; List<HoodieRecord> insertRecords = new ArrayList<>(); - RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1)); - RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2)); - RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3); - insertRecords - .add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3)); - + for (String recordStr : recordsStrs) { + RawTripTestPayload rowChange = new RawTripTestPayload(recordStr); + insertRecords + .add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange)); + } Map<String, HoodieRecord> insertRecordMap = insertRecords.stream() .collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity())); HoodieCreateHandle createHandle = - new HoodieCreateHandle(config, "100", table, rowChange1.getPartitionPath(), "f1-0", insertRecordMap, supplier); + new HoodieCreateHandle(config, "100", table, insertRecords.get(0).getPartitionPath(), "f1-0", insertRecordMap, supplier); createHandle.write(); return createHandle.close(); }).collect(); final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100")); FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile); + return statuses.get(0); + } + + @Test + public void testSchemaEvolutionOnUpdateSuccessWithAddColumnHaveDefault() throws Exception { + List<String> recordsStrs = new ArrayList<>(); + String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; + String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; + String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\"," + + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; + recordsStrs.add(recordStr1); + recordsStrs.add(recordStr2); + recordsStrs.add(recordStr3); // Now try an update with an evolved schema // Evolved schema does not have guarantee on preserving the original field ordering final HoodieWriteConfig config2 = makeHoodieClientConfig("/exampleEvolvedSchema.txt"); - final WriteStatus insertResult = statuses.get(0); + final WriteStatus insertResult = prepareFirstCommitData(recordsStrs); String fileId = insertResult.getFileId(); - final HoodieSparkTable table2 = HoodieSparkTable.create(config, context); + final HoodieSparkTable table2 = HoodieSparkTable.create(config2, context); Review comment: ok ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org