Hi sir, it seems that HSQLDB is reporting the column "STRING" as type CLOB. The parameters --(input-)null-(non-)string are working only for a string based columns (CHAR, VARCHAR, NCHAR, ...) and not for a CLOB. You might be able to overcome this by using --map-column-java parameter and force it's type to String (and not a CLOB):
sqoop import --map-column-java STRING=String ... You can find more details about type mapping in Sqoop User Guide: http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_controlling_type_mapping Jarcec On Thu, Nov 21, 2013 at 09:56:15PM -0800, redshift-etl-user wrote: > Jarek - class included below. Thanks! > > > // ORM class for table 'null' > // WARNING: This class is AUTO-GENERATED. Modify at your own risk. > // > // Debug information: > // Generated date: Thu Nov 21 21:52:32 PST 2013 > // For connector: org.apache.sqoop.manager.GenericJdbcManager > import org.apache.hadoop.io.BytesWritable; > import org.apache.hadoop.io.Text; > import org.apache.hadoop.io.Writable; > import org.apache.hadoop.mapred.lib.db.DBWritable; > import com.cloudera.sqoop.lib.JdbcWritableBridge; > import com.cloudera.sqoop.lib.DelimiterSet; > import com.cloudera.sqoop.lib.FieldFormatter; > import com.cloudera.sqoop.lib.RecordParser; > import com.cloudera.sqoop.lib.BooleanParser; > import com.cloudera.sqoop.lib.BlobRef; > import com.cloudera.sqoop.lib.ClobRef; > import com.cloudera.sqoop.lib.LargeObjectLoader; > import com.cloudera.sqoop.lib.SqoopRecord; > import java.sql.PreparedStatement; > import java.sql.ResultSet; > import java.sql.SQLException; > import java.io.DataInput; > import java.io.DataOutput; > import java.io.IOException; > import java.nio.ByteBuffer; > import java.nio.CharBuffer; > import java.sql.Date; > import java.sql.Time; > import java.sql.Timestamp; > import java.util.Arrays; > import java.util.Iterator; > import java.util.List; > import java.util.Map; > import java.util.TreeMap; > > public class gXoTNYSfULokaKs extends SqoopRecord implements DBWritable, > Writable { > private final int PROTOCOL_VERSION = 3; > public int getClassFormatVersion() { return PROTOCOL_VERSION; } > protected ResultSet __cur_result_set; > private Integer ID; > public Integer get_ID() { > return ID; > } > public void set_ID(Integer ID) { > this.ID = ID; > } > public gXoTNYSfULokaKs with_ID(Integer ID) { > this.ID = ID; > return this; > } > private com.cloudera.sqoop.lib.ClobRef STRING; > public com.cloudera.sqoop.lib.ClobRef get_STRING() { > return STRING; > } > public void set_STRING(com.cloudera.sqoop.lib.ClobRef STRING) { > this.STRING = STRING; > } > public gXoTNYSfULokaKs with_STRING(com.cloudera.sqoop.lib.ClobRef STRING) > { > this.STRING = STRING; > return this; > } > public boolean equals(Object o) { > if (this == o) { > return true; > } > if (!(o instanceof gXoTNYSfULokaKs)) { > return false; > } > gXoTNYSfULokaKs that = (gXoTNYSfULokaKs) o; > boolean equal = true; > equal = equal && (this.ID == null ? that.ID == null : > this.ID.equals(that.ID)); > equal = equal && (this.STRING == null ? that.STRING == null : > this.STRING.equals(that.STRING)); > return equal; > } > public void readFields(ResultSet __dbResults) throws SQLException { > this.__cur_result_set = __dbResults; > this.ID = JdbcWritableBridge.readInteger(1, __dbResults); > this.STRING = JdbcWritableBridge.readClobRef(2, __dbResults); > } > public void loadLargeObjects(LargeObjectLoader __loader) > throws SQLException, IOException, InterruptedException { > this.STRING = __loader.readClobRef(2, this.__cur_result_set); > } > public void write(PreparedStatement __dbStmt) throws SQLException { > write(__dbStmt, 0); > } > > public int write(PreparedStatement __dbStmt, int __off) throws > SQLException { > JdbcWritableBridge.writeInteger(ID, 1 + __off, 4, __dbStmt); > JdbcWritableBridge.writeClobRef(STRING, 2 + __off, 2005, __dbStmt); > return 2; > } > public void readFields(DataInput __dataIn) throws IOException { > if (__dataIn.readBoolean()) { > this.ID = null; > } else { > this.ID = Integer.valueOf(__dataIn.readInt()); > } > if (__dataIn.readBoolean()) { > this.STRING = null; > } else { > this.STRING = > com.cloudera.sqoop.lib.LobSerializer.readClobFields(__dataIn); > } > } > public void write(DataOutput __dataOut) throws IOException { > if (null == this.ID) { > __dataOut.writeBoolean(true); > } else { > __dataOut.writeBoolean(false); > __dataOut.writeInt(this.ID); > } > if (null == this.STRING) { > __dataOut.writeBoolean(true); > } else { > __dataOut.writeBoolean(false); > com.cloudera.sqoop.lib.LobSerializer.writeClob(this.STRING, __dataOut); > } > } > private final DelimiterSet __outputDelimiters = new DelimiterSet((char) > 44, (char) 10, (char) 34, (char) 92, true); > public String toString() { > return toString(__outputDelimiters, true); > } > public String toString(DelimiterSet delimiters) { > return toString(delimiters, true); > } > public String toString(boolean useRecordDelim) { > return toString(__outputDelimiters, useRecordDelim); > } > public String toString(DelimiterSet delimiters, boolean useRecordDelim) { > StringBuilder __sb = new StringBuilder(); > char fieldDelim = delimiters.getFieldsTerminatedBy(); > __sb.append(FieldFormatter.escapeAndEnclose(ID==null?"":"" + ID, > delimiters)); > __sb.append(fieldDelim); > __sb.append(FieldFormatter.escapeAndEnclose(STRING==null?"":"" + > STRING, delimiters)); > if (useRecordDelim) { > __sb.append(delimiters.getLinesTerminatedBy()); > } > return __sb.toString(); > } > private final DelimiterSet __inputDelimiters = new DelimiterSet((char) > 44, (char) 10, (char) 34, (char) 92, true); > private RecordParser __parser; > public void parse(Text __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > public void parse(CharSequence __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > public void parse(byte [] __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > public void parse(char [] __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > public void parse(ByteBuffer __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > public void parse(CharBuffer __record) throws RecordParser.ParseError { > if (null == this.__parser) { > this.__parser = new RecordParser(__inputDelimiters); > } > List<String> __fields = this.__parser.parseRecord(__record); > __loadFromFields(__fields); > } > > private void __loadFromFields(List<String> fields) { > Iterator<String> __it = fields.listIterator(); > String __cur_str; > __cur_str = __it.next(); > if (__cur_str.equals("null") || __cur_str.length() == 0) { this.ID = > null; } else { > this.ID = Integer.valueOf(__cur_str); > } > > __cur_str = __it.next(); > if (__cur_str.equals("null") || __cur_str.length() == 0) { this.STRING > = null; } else { > this.STRING = ClobRef.parse(__cur_str); > } > > } > > public Object clone() throws CloneNotSupportedException { > gXoTNYSfULokaKs o = (gXoTNYSfULokaKs) super.clone(); > o.STRING = (o.STRING != null) ? (com.cloudera.sqoop.lib.ClobRef) > o.STRING.clone() : null; > return o; > } > > public Map<String, Object> getFieldMap() { > Map<String, Object> __sqoop$field_map = new TreeMap<String, Object>(); > __sqoop$field_map.put("ID", this.ID); > __sqoop$field_map.put("STRING", this.STRING); > return __sqoop$field_map; > } > > public void setField(String __fieldName, Object __fieldVal) { > if ("ID".equals(__fieldName)) { > this.ID = (Integer) __fieldVal; > } > else if ("STRING".equals(__fieldName)) { > this.STRING = (com.cloudera.sqoop.lib.ClobRef) __fieldVal; > } > else { > throw new RuntimeException("No such field: " + __fieldName); > } > } > } > > > On Wed, Nov 20, 2013 at 8:55 AM, Jarek Jarcec Cecho <[email protected]>wrote: > > > Thank you sir! > > > > Would you mind also sharing with the generated class? I do not see > > anything suspicious, so I would like to explore the generated code. > > > > Also please note that direct usage of Sqoop Java API is not recommended as > > Sqoop at that point expect that entire environment will be properly > > configured. I would strongly suggest you to use the sqoop binary shipped > > with Sqoop. > > > > Jarcec > > > > On Mon, Nov 18, 2013 at 04:48:57PM -0800, redshift-etl-user wrote: > > > Hi Jarek, > > > > > > Sure! Note that I'm running Sqoop through "Sqoop.runTool". Responses > > inline. > > > > > > On Sun, Nov 17, 2013 at 5:47 PM, Jarek Jarcec Cecho <[email protected] > > >wrote: > > > > > > > Hi sir, > > > > would you mind sharing with us more details about your use case? Sqoop > > and > > > > HSQLDB versions, > > > > > > > > > sqoop-1.4.4-hadoop100.jar > > > hsqldb-1.8.0.10.jar > > > > > > > > > > command that you're using, > > > > > > > > > import --connect jdbc:h2:mem:play-test-985978706 --username sa --password > > > sa --verbose --query SELECT id,string FROM test WHERE $CONDITIONS ORDER > > BY > > > id LIMIT 200 -m 1 --target-dir > > > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/data > > > --fields-terminated-by , --escaped-by \ --enclosed-by " > > > --null-non-string *--null-string > > > asdf* --outdir > > > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/classes > > > --class-name avyhOWkUKUQHvkr --driver org.hsqldb.jdbcDriver --split-by id > > > --verbose > > > > > > > > > > log generated by Sqoop with parameter --verbose. > > > > > > > > > 16:34:26.380 [ [33mwarn [0m] [pool-3-thread-1] > > > org.apache.sqoop.tool.SqoopTool - $SQOOP_CONF_DIR has not been set in the > > > environment. Cannot check for additional configuration. > > > 16:34:26.433 [ [33mwarn [0m] [pool-3-thread-1] > > > org.apache.sqoop.tool.BaseSqoopTool - Setting your password on the > > > command-line is insecure. Consider using -P instead. > > > 16:34:26.439 [ [33mwarn [0m] [pool-3-thread-1] > > org.apache.sqoop.ConnFactory > > > - $SQOOP_CONF_DIR has not been set in the environment. Cannot check for > > > additional configuration. > > > 16:34:26.456 [ [33mwarn [0m] [pool-3-thread-1] > > org.apache.sqoop.ConnFactory > > > - Parameter --driver is set to an explicit driver however appropriate > > > connection manager is not being set (via --connection-manager). Sqoop is > > > going to fall back to org.apache.sqoop.manager.GenericJdbcManager. Please > > > specify explicitly which connection manager should be used next time. > > > Note: > > > > > /tmp/sqoop-romming/compile/8541deacc9cf2714256c59d89dd9bf0a/avyhOWkUKUQHvkr.java > > > uses or overrides a deprecated API. > > > Note: Recompile with -Xlint:deprecation for details. > > > 2013-11-18 16:34:27.319 java[48163:13c07] Unable to load realm info from > > > SCDynamicStore > > > 16:34:27.397 [ [33mwarn [0m] [pool-3-thread-1] > > > org.apache.sqoop.mapreduce.JobBase - SQOOP_HOME is unset. May not be able > > > to find all job dependencies. > > > 16:34:27.423 [ [33mwarn [0m] [pool-3-thread-1] > > > o.a.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library > > for > > > your platform... using builtin-java classes where applicable > > > > > > > > > > Perhaps even a simplified data that will cause this behaviour? > > > > > > CREATE TABLE IF NOT EXISTS test (id INTEGER, string TEXT, PRIMARY KEY > > (id)); > > > INSERT INTO test (id, string) VALUES (1, 'test'); > > > INSERT INTO test (id) VALUES (2); > > > INSERT INTO test (id, string) VALUES (3, 'test'); > > > This produces a file containing: > > > 1,test > > > 2, > > > 3,test > > > > > > When it really ought to be > > > 1,test > > > 2,asdf > > > 3,test > > > > > > > > > > > > > Jarcec > > > > > > > > On Fri, Nov 15, 2013 at 01:29:36PM -0800, redshift-etl-user wrote: > > > > > Hi, > > > > > > > > > > I'm using the "--null-string" option to control the value of null > > string > > > > > columns for imports. I've tested this with MySQL and Postgres and it > > > > seems > > > > > to work fine. However, when I try with HSQLDB, it seems to ignore > > this > > > > > option and just return an empty string for nulls. In fact, when the > > > > > "--null-string" option isn't present it's supposed to return the > > string > > > > > "null" according to the spec, and it returns an empty string in this > > case > > > > > as well. > > > > > > > > > > Could someone else confirm this behavior? Seems like a bug. > > > > > > > > > > Thanks! > > > > > >
signature.asc
Description: Digital signature
