Github user fszabo2 commented on a diff in the pull request:
https://github.com/apache/sqoop/pull/60#discussion_r238693735
--- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
@@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
}
}
- public static String toHiveType(Schema.Type avroType) {
- switch (avroType) {
- case BOOLEAN:
- return HIVE_TYPE_BOOLEAN;
- case INT:
- return HIVE_TYPE_INT;
- case LONG:
- return HIVE_TYPE_BIGINT;
- case FLOAT:
- return HIVE_TYPE_FLOAT;
- case DOUBLE:
- return HIVE_TYPE_DOUBLE;
- case STRING:
- case ENUM:
- return HIVE_TYPE_STRING;
- case BYTES:
- case FIXED:
- return HIVE_TYPE_BINARY;
- default:
- return null;
+ public static String toHiveType(Schema schema, SqoopOptions options) {
+ if (schema.getType() == Schema.Type.UNION) {
+ for (Schema subSchema : schema.getTypes()) {
+ if (subSchema.getType() != Schema.Type.NULL) {
+ return toHiveType(subSchema, options);
+ }
+ }
+ }
+
+ Schema.Type avroType = schema.getType();
+ switch (avroType) {
+ case BOOLEAN:
+ return HIVE_TYPE_BOOLEAN;
+ case INT:
+ return HIVE_TYPE_INT;
+ case LONG:
+ return HIVE_TYPE_BIGINT;
+ case FLOAT:
+ return HIVE_TYPE_FLOAT;
+ case DOUBLE:
+ return HIVE_TYPE_DOUBLE;
+ case STRING:
+ case ENUM:
+ return HIVE_TYPE_STRING;
+ case BYTES:
+ return mapToDecimalOrBinary(schema, options);
+ case FIXED:
+ return HIVE_TYPE_BINARY;
+ default:
+ throw new RuntimeException(String.format("There is no Hive type
mapping defined for the Avro type of: %s ", avroType.getName()));
+ }
+ }
+
+ private static String mapToDecimalOrBinary(Schema schema, SqoopOptions
options) {
+ boolean logicalTypesEnabled =
options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL,
false);
+ if (logicalTypesEnabled && schema.getLogicalType() != null &&
schema.getLogicalType() instanceof Decimal) {
+ Decimal decimal = (Decimal) schema.getLogicalType();
+
+ // trimming precision and scale to Hive's maximum values.
+ int precision = Math.min(HiveDecimal.MAX_PRECISION,
decimal.getPrecision());
+ if (precision < decimal.getPrecision()) {
+ LOG.warn("Warning! Precision in the Hive table definition will be
smaller than the actual precision of the column on storage! Hive may not be
able to read data from this column.");
--- End diff --
Do you think we should remove this warning? (I think, even if it's
redundant, it's useful to write this out.)
---