pvary commented on a change in pull request #2038:
URL: https://github.com/apache/iceberg/pull/2038#discussion_r559661520



##########
File path: 
mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
##########
@@ -206,33 +207,42 @@ public static PartitionSpec spec(Configuration config) {
    * @param config The target configuration to store to
    * @param table The table which we want to store to the configuration
    */
+
   @VisibleForTesting
-  static void put(Configuration config, Table table) {
-    // The Table contains a FileIO and the FileIO serializes the configuration 
so we might end up recursively
-    // serializing the objects. To avoid this unset the values for now before 
serializing.
-    config.unset(InputFormatConfig.SERIALIZED_TABLE);
-    config.unset(InputFormatConfig.FILE_IO);
-    config.unset(InputFormatConfig.LOCATION_PROVIDER);
-    config.unset(InputFormatConfig.ENCRYPTION_MANAGER);
-    config.unset(InputFormatConfig.TABLE_LOCATION);
-    config.unset(InputFormatConfig.TABLE_SCHEMA);
-    config.unset(InputFormatConfig.PARTITION_SPEC);
-
-    String base64Table = table instanceof Serializable ? 
SerializationUtil.serializeToBase64(table) : null;
-    String base64Io = SerializationUtil.serializeToBase64(table.io());
-    String base64LocationProvider = 
SerializationUtil.serializeToBase64(table.locationProvider());
-    String base64EncryptionManager = 
SerializationUtil.serializeToBase64(table.encryption());
-
-    if (base64Table != null) {
-      config.set(InputFormatConfig.SERIALIZED_TABLE, base64Table);
+  static void overlayTableProperties(Configuration configuration, TableDesc 
tableDesc, Map<String, String> map) {
+    Properties props = tableDesc.getProperties();
+    Table table = Catalogs.loadTable(configuration, props);
+    String schemaJson = SchemaParser.toJson(table.schema());
+
+    Map<String, String> original = new HashMap<>(map);
+    map.clear();
+
+    map.putAll(Maps.fromProperties(props));
+    map.putAll(original);
+
+    map.put(InputFormatConfig.TABLE_IDENTIFIER, 
props.getProperty(Catalogs.NAME));
+    map.put(InputFormatConfig.TABLE_LOCATION, table.location());
+    map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+    map.put(InputFormatConfig.PARTITION_SPEC, 
PartitionSpecParser.toJson(table.spec()));
+    String formatString = PropertyUtil.propertyAsString(table.properties(), 
DEFAULT_FILE_FORMAT,
+        DEFAULT_FILE_FORMAT_DEFAULT);
+    map.put(InputFormatConfig.WRITE_FILE_FORMAT, 
formatString.toUpperCase(Locale.ENGLISH));
+    map.put(InputFormatConfig.WRITE_TARGET_FILE_SIZE,
+        table.properties().getOrDefault(WRITE_TARGET_FILE_SIZE_BYTES,
+            String.valueOf(WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT)));
+
+    if (table instanceof Serializable) {
+      map.put(InputFormatConfig.SERIALIZED_TABLE, 
SerializationUtil.serializeToBase64(table));
     }
 
-    config.set(InputFormatConfig.FILE_IO, base64Io);
-    config.set(InputFormatConfig.LOCATION_PROVIDER, base64LocationProvider);
-    config.set(InputFormatConfig.ENCRYPTION_MANAGER, base64EncryptionManager);
+    map.put(InputFormatConfig.FILE_IO, 
SerializationUtil.serializeToBase64(table.io()));
+    map.put(InputFormatConfig.LOCATION_PROVIDER, 
SerializationUtil.serializeToBase64(table.locationProvider()));
+    map.put(InputFormatConfig.ENCRYPTION_MANAGER, 
SerializationUtil.serializeToBase64(table.encryption()));
+    // We need to remove this otherwise the job.xml will be invalid
+    map.remove("columns.comments");

Review comment:
       Added more comments to explain what is happening




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to