All ,
Am trying to run a free form SQL from Sqoop to import data from Oracle 10g ,
every time I keep getting the split-by column mentioned is invalid . Can you
please help figure what am I missing .
Sqoop version : 1.4.3
Oracle 10g
Ojdbc6.jar
Error
14/03/14 08:55:16 INFO mapred.JobClient: Task Id :
attempt_201403100830_0118_m_000009_0, Status : FAILED
java.io.IOException: SQLException in nextKeyValue
at
org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:265)
at
org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:530)
at
org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at
org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:363)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.sql.SQLSyntaxErrorException: ORA-00904:
"SI_CUSTOMER_XREF2"."CUSTOMER_ID": invalid identifier
QUERY
sqoop import --connect <<connection parameter>> -query
"
SELECT
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
****
FROM
(
SELECT * FROM SI_CUSTOMER_XREF2 WHERE RUN_ID='209'
and \$CONDITIONS
) SI_CUSTOMER_XREF2 join
(
SELECT T.*
FROM SI_F_L0_TRADE T
WHERE T.REVENUE_DATE >= '01-JAN-2014' and \$CONDITIONS
AND
T.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD D WHERE
D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE ON (
SI_F_L0_TRADE.CUSTOMER_SGK=SI_CUSTOMER_XREF2.CUSTOMER_SGK )
join
(
SELECT * FROM SI_F_L0_TRADE_GROUP_ID G where RUN_ID='209' and \$CONDITIONS
AND REVENUE_DATE >= '01-JAN-2014'
AND G.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD D WHERE
D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE_GROUP_ID ON (
SI_F_L0_TRADE.TRADE_SGK=SI_F_L0_TRADE_GROUP_ID.TRADE_SGK )
join
( SELECT * FROM cpl_trade_instrument_ref WHERE RUN_ID= '209' -- and
\$CONDITIONS
) CPL_INSTRUMENT_REF ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=SI_F_L0_TRADE.INSTRUMENT_SGK )
JOIN
(
SELECT * FROM CPL_INSTRUMENT_REF_EXT WHERE RUN_ID='209' and \$CONDITIONS
) CPL_INSTRUMENT_REF_EXT ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=CPL_INSTRUMENT_REF_EXT.INSTRUMENT_SGK)
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID='209' and \$CONDITIONS
) ISSUER_BUSINESS_REGION ON (
ISSUER_BUSINESS_REGION.ISO_CNTRY_3_CHAR_CD=CPL_INSTRUMENT_REF_EXT.ISSUER_COUNTRY_CD
)
join
(
SELECT (product_line_cd ||
CASE WHEN nvl(new_issue_ind, '0') = '1' THEN 'P' ELSE 'S'
END || '_' ||
nvl(business_line_cd, 'N') || '_' ||
nvl(trade_type_cd, 'N') || '_' ||
nvl(region_cd, 'N'
)) product_cd, SI_GROUP_ID_XREF.* FROM SI_GROUP_ID_XREF
WHERE RUN_ID = '209' and \$CONDITIONS
) SI_GROUP_ID_XREF ON (
SI_F_L0_TRADE_GROUP_ID.GROUP_ID=SI_GROUP_ID_XREF.GROUP_ID )
JOIN
(
SELECT * FROM SI_PRODUCT_LINE WHERE RUN_ID= '209' and \$CONDITIONS
) SI_PRODUCT_LINE ON (
SI_GROUP_ID_XREF.PRODUCT_LINE_CD=SI_PRODUCT_LINE.PRODUCT_LINE_L6_SGK )
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BUSINESS_REGION ON (
SI_GROUP_ID_XREF.REGION_CD=SI_BUSINESS_REGION.REGION_L6_SGK )
JOIN
(
SELECT * FROM si_salesrep_xref WHERE run_id = '209' and \$CONDITIONS
) MARKETER ON ( MARKETER.SRC_SALESREP_ID=SI_F_L0_TRADE.SRC_SALES_REP_ID)
join
(
SELECT * FROM CDW_PRESENTATION.CPL_EMPLOYEE_REF WHERE run_id ='209' and
\$CONDITIONS
) MARKETER_EMPLOYEE_REF ON (
MARKETER.CS_EMPLOYEE_ID=MARKETER_EMPLOYEE_REF.EMPLOYEE_ID )
join
(
SELECT * FROM SI_BOOK WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BOOK ON ( SI_BOOK.BOOK_SGK=SI_F_L0_TRADE.BOOK_SGK )
join
(SELECT * FROM cpl_sfo_country_xref ) DOMICILE_SFO_CNTRY_XREF ON (
SI_CUSTOMER_XREF2.DOMICILE_CNTRY_ISO_CD=DOMICILE_SFO_CNTRY_XREF.CNTRY_ISO_CD)
join
(
SELECT * FROM CPL_CLIENT_REF WHERE RUN_ID='209' and \$CONDITIONS
AND SRC_SYS_CD = 'GESC' ) CPL_CLIENT_REF
ON ( SI_CUSTOMER_XREF2.GESC_CLIENT_ID=CPL_CLIENT_REF.CLIENT_CD )
JOIN
(
select * from si_client_type where run_id = '209' and \$CONDITIONS
) CLIENT_TYPE ON ( CPL_CLIENT_REF.CLIENT_SGK=CLIENT_TYPE.CLIENT_SGK )
join
(
select * from si_client_types where run_id = '209' and \$CONDITIONS
) CLIENT_TYPES ON (
CLIENT_TYPE.CLIENT_TYPES_SGK=CLIENT_TYPES.CLIENT_TYPES_SGK )
WHERE MARKETER.SRC_SYS_CD=SI_F_L0_TRADE.SREP_SRC_SYS_CD and \$CONDITIONS
AND
(
( ( SI_F_L0_TRADE.IGNORE_REASON_CD ) IS NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) IS NOT NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) >0
AND SI_GROUP_ID_XREF.Can_Double_Count = 0
AND SI_GROUP_ID_XREF.Product_Line_Cd IS NOT NULL
AND SI_GROUP_ID_XREF.LOSS_IND >=0
)
AND
SI_PRODUCT_LINE.PRODUCT_LINE_L1_NAME = 'All (Equities)' )
GROUP BY
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
***
" --split-by SI_CUSTOMER_XREF2.customer_id --boundary-query "select
min(CUSTOMER_ID),max(CUSTOMER_ID) from SI_CUSTOMER_XREF2 WHERE RUN_ID='209'"
-m 48 --fields-terminated-by "|" --target-dir "/SqoopImports/daily_eqt"
--verbose;
Suresh Sethuramaswamy
===============================================================================
Please access the attached hyperlink for an important electronic communications
disclaimer:
http://www.credit-suisse.com/legal/en/disclaimer_email_ib.html
===============================================================================