Markus Kemper created SQOOP-3089:
------------------------------------

             Summary: Sqoop import + --as-parquetfile + Oracle + BINARY_DOUBLE 
fails with insufficient message
                 Key: SQOOP-3089
                 URL: https://issues.apache.org/jira/browse/SQOOP-3089
             Project: Sqoop
          Issue Type: Bug
            Reporter: Markus Kemper


The ask in this JIRA is to enhance the debug message to be more informative 
about how to map the data and successfully perform the operation.  Full test 
case below

*Sqoop Debug (current)*
16/12/21 12:16:08 ERROR tool.ImportTool: Imported Failed: Cannot convert SQL 
type 101

*Sqoop Debug (requested)*
16/12/21 12:16:08 ERROR tool.ImportTool: Imported Failed: Cannot convert SQL 
type 101, please try using --map-column-java <column>=<type>

{noformat}
#################
# STEP 01 - Setup Table and Data
#################

export MYCONN=jdbc:oracle:thin:@oracle.cloudera.com:1521/orcl12c;
export MYUSER=sqoop
export MYPSWD=cloudera

sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query 
"drop table t1_oracle"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query 
"create table t1_oracle (c1 int, c2 binary_double)"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query 
"insert into t1_oracle values (1, 1.1)"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query 
"select * from t1_oracle"

Output:
-----------------------------------
| C1                   | C2       | 
-----------------------------------
| 1                    | 1.1      | 
-----------------------------------

#################
# STEP 02 - Import Data as Parquet (reproduction)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table 
T1_ORACLE --target-dir /user/user1/t1_oracle_parquet --delete-target-dir 
--num-mappers 1 --as-parquetfile --verbose

Output:
16/12/21 12:16:08 INFO manager.SqlManager: Executing SQL statement: SELECT t.* 
FROM T1_ORACLE t WHERE 1=0
16/12/21 12:16:08 DEBUG manager.SqlManager: Found column C1 of type [2, 38, 0]
16/12/21 12:16:08 DEBUG manager.SqlManager: Found column C2 of type [101, 0, 0]
16/12/21 12:16:08 DEBUG manager.OracleManager$ConnCache: Caching released 
connection for 
jdbc:oracle:thin:@host-10-17-101-252.coe.cloudera.com:1521/orcl12c/sqoop
16/12/21 12:16:08 DEBUG util.ClassLoaderStack: Restoring classloader: 
java.net.FactoryURLClassLoader@55465b1f
16/12/21 12:16:08 ERROR tool.ImportTool: Imported Failed: Cannot convert SQL 
type 101

#################
# STEP 03 - Import Data as Parquet using option (--map-column-java C2=Double)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table 
T1_ORACLE --target-dir /user/user1/t1_oracle_parquet --delete-target-dir 
--num-mappers 1 --as-parquetfile --verbose --map-column-java C2=Double

Output:
16/12/21 12:34:36 INFO mapreduce.ImportJobBase: Transferred 1.6406 KB in 77.989 
seconds (21.5415 bytes/sec)
16/12/21 12:34:36 INFO mapreduce.ImportJobBase: Retrieved 1 records.

#################
# STEP 04 - View Data using Parquet-tools
#################

hdfs dfs -ls /user/user1/t1_oracle_parquet/*.parquet
parquet-tools schema -d 
hdfs://host-10-17-103-169.coe.cloudera.com/user/user1/t1_oracle_parquet/e35dab0d-899f-46e7-88b4-cf1c4d24cae9.parquet
parquet-tools cat --json 
hdfs://host-10-17-103-169.coe.cloudera.com/user/user1/t1_oracle_parquet/e35dab0d-899f-46e7-88b4-cf1c4d24cae9.parquet

Output:
-rw-r--r--   3 user1 user1        612 2016-12-21 12:34 
/user/user1/t1_oracle_parquet/e35dab0d-899f-46e7-88b4-cf1c4d24cae9.parquet
---
message T1_ORACLE {
  optional binary C1 (UTF8);
  optional double C2;
}
creator: parquet-mr version 1.5.0-cdh5.8.3 (build ${buildNumber})
extra: parquet.avro.schema = {"type":"record","name":"T1_ORACLE","doc":"Sqoop 
import of 
T1_ORACLE","fields":[{"name":"C1","type":["null","string"],"default":null,"columnName":"C1","sqlType":"2"},{"name":"C2","type":["null","double"],"default":null,"columnName":"C2","sqlType":"101"}],"tableName":"T1_ORACLE"}
---
{"C1":"1","C2":1.1}

#################
# STEP 05 - Import Data as Parquet using option (--map-column-java C2=String)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table 
T1_ORACLE --target-dir /user/user1/t1_oracle_parquet --delete-target-dir 
--num-mappers 1 --as-parquetfile --verbose --map-column-java C2=String

Output:
16/12/21 12:24:36 INFO mapreduce.ImportJobBase: Transferred 1.6221 KB in 
48.9326 seconds (33.9447 bytes/sec)
16/12/21 12:24:36 INFO mapreduce.ImportJobBase: Retrieved 1 records.

#################
# STEP 06 - View Data using Parquet-tools
#################

hdfs dfs -ls /user/user1/t1_oracle_parquet/*.parquet
parquet-tools schema -d 
hdfs://host-10-17-103-169.coe.cloudera.com/user/user1/t1_oracle_parquet/1ac78d16-2d1e-4805-b7af-65c585085e9e.parquet
parquet-tools cat --json 
hdfs://host-10-17-103-169.coe.cloudera.com/user/user1/t1_oracle_parquet/1ac78d16-2d1e-4805-b7af-65c585085e9e.parquet

Output:
-rw-r--r--   3 user1 user1        593 2016-12-21 12:24 
/user/user1/t1_oracle_parquet/1ac78d16-2d1e-4805-b7af-65c585085e9e.parquet
---
message T1_ORACLE {
  optional binary C1 (UTF8);
  optional binary C2 (UTF8);
}
creator: parquet-mr version 1.5.0-cdh5.8.3 (build ${buildNumber})
extra: parquet.avro.schema = {"type":"record","name":"T1_ORACLE","doc":"Sqoop 
import of 
T1_ORACLE","fields":[{"name":"C1","type":["null","string"],"default":null,"columnName":"C1","sqlType":"2"},{"name":"C2","type":["null","string"],"default":null,"columnName":"C2","sqlType":"101"}],"tableName":"T1_ORACLE"}
---
{"C1":"1","C2":"1.1"}
{noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to