n3nash commented on a change in pull request #2927: URL: https://github.com/apache/hudi/pull/2927#discussion_r630725241
########## File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala ########## @@ -248,6 +249,24 @@ private[hudi] object HoodieSparkSqlWriter { } } + /** + * Checks if schema needs upgrade (if incoming records's schema is old while table schema got evolved). + * @param fs instance of FileSystem. + * @param basePath base path. + * @param sparkContext instance of spark context. + * @param schema incoming record's schema. + * @return Pair of(boolean, table schema), where first entry will be true only if schema conversion is required. + */ + def getLatestSchema(fs: FileSystem, basePath: Path, sparkContext: SparkContext, schema: Schema) : Schema = { + var latestSchema: Schema = schema + if(fs.exists(new Path(basePath.toString + "/" + HoodieTableMetaClient.METAFOLDER_NAME))) { + val tableMetaClient = HoodieTableMetaClient.builder.setConf(sparkContext.hadoopConfiguration).setBasePath(basePath.toString).build() + val tableSchemaResolver = new TableSchemaResolver(tableMetaClient) + latestSchema = tableSchemaResolver.getLatestSchema(schema, false, null); Review comment: The last argument is passed as "null", will that not cause NullPointer ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org