Repository: incubator-hawq Updated Branches: refs/heads/master b59603125 -> 0bb0a2074
HAWQ-898. Revert copy checkinstall-good case as cases transactions and row_types need this to prepare data Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/0bb0a207 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/0bb0a207 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/0bb0a207 Branch: refs/heads/master Commit: 0bb0a207467ade1eb5d8b52a2465d23c1e803fda Parents: b596031 Author: YI JIN <y...@pivotal.io> Authored: Wed Jul 27 12:23:23 2016 +1000 Committer: YI JIN <y...@pivotal.io> Committed: Wed Jul 27 12:23:23 2016 +1000 ---------------------------------------------------------------------- src/test/regress/input/copy.source | 224 ++++++++++++++++++++++++++++++ src/test/regress/known_good_schedule | 1 + src/test/regress/output/copy.source | 214 ++++++++++++++++++++++++++++ 3 files changed, 439 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/input/copy.source ---------------------------------------------------------------------- diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source new file mode 100755 index 0000000..ccab99b --- /dev/null +++ b/src/test/regress/input/copy.source @@ -0,0 +1,224 @@ +-- +-- COPY +-- +-- CLASS POPULATION +-- (any resemblance to real life is purely coincidental) +-- +COPY aggtest FROM '@abs_srcdir@/data/agg.data'; + +COPY onek FROM '@abs_srcdir@/data/onek.data'; + +COPY onek TO '@abs_builddir@/results/onek.data'; + +TRUNCATE onek; + +COPY onek FROM '@abs_builddir@/results/onek.data'; + +COPY tenk1 FROM '@abs_srcdir@/data/tenk.data'; + +COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data'; + +COPY person FROM '@abs_srcdir@/data/person.data'; + +COPY emp FROM '@abs_srcdir@/data/emp.data'; + +COPY student FROM '@abs_srcdir@/data/student.data'; + +COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data'; + +COPY road FROM '@abs_srcdir@/data/streets.data'; + +COPY real_city FROM '@abs_srcdir@/data/real_city.data'; + +COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data'; + +COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data'; + +COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data'; + +COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data'; + +-- the data in this file has a lot of duplicates in the index key +-- fields, leading to long bucket chains and lots of table expansion. +-- this is therefore a stress test of the bucket overflow code (unlike +-- the data in hash.data, which has unique index keys). +-- +-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data'; + +COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data'; + +COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data'; + +COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data'; + +COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data'; + +COPY array_op_test FROM '@abs_srcdir@/data/array.data'; + +COPY array_index_op_test FROM '@abs_srcdir@/data/array.data'; + +--- test copying in CSV mode with various styles +--- of embedded line ending characters + +create temp table copytest ( + style text, + test text, + filler int); + +insert into copytest values('DOS',E'abc\r\ndef',1); +insert into copytest values('Unix',E'abc\ndef',2); +insert into copytest values('Mac',E'abc\rdef',3); +insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); + +copy copytest to '@abs_builddir@/results/copytest.csv' csv; + +create temp table copytest2 (like copytest); + +copy copytest2 from '@abs_builddir@/results/copytest.csv' csv; + +select * from copytest except select * from copytest2 order by 1,2,3; + +truncate copytest2; + +--- same test but with an escape char different from quote char + +copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; + +copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; + +select * from copytest except select * from copytest2 order by 1,2,3; + + +-- test header line feature + +create temp table copytest3 ( + c1 int, + "col with , comma" text, + "col with "" quote" int) distributed by (c1); + +copy copytest3 from stdin csv header; +this is just a line full of junk that would error out if parsed +1,a,1 +2,b,2 +\. + +copy copytest3 to stdout csv header; +-- copy with error table +CREATE TABLE number (a INT) DISTRIBUTED BY (a); + +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +these are invalid line should be insert into error table. +a +b +c +d +e +f +g +h +\. + +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; +select * from number; --should be empty +\d err_copy + +DROP TABLE err_copy; + +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +these are invalid line should be insert into error table. +a +1 +b +2 +c +3 +d +4 +e +5 +f +6 +g +7 +h +\. + +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; +select count(*) from number; --should be 7 +DROP TABLE err_copy; + +TRUNCATE number; + +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +these are invalid line should be insert into error table. +a +1 +b +2 +c +3 +d +4 +e +5 +f +6 +g +7 +h +i +\. + +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; -- should not exist +select count(*) from number; --should be empty + +TRUNCATE number; +CREATE TABLE err_copy (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) distributed randomly; + +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +these are invalid line should be insert into error table. +a +1 +b +2 +c +3 +d +4 +e +5 +f +6 +g +7 +h +\. + +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; +select count(*) from number; --should be 7 +DROP TABLE err_copy; + +-- invalid error table schema +TRUNCATE number; +create table invalid_error_table1 (a int) distributed randomly; +create table invalid_error_table3 (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) + distributed by (cmdtime); + +COPY number FROM STDIN LOG ERRORS INTO invalid_error_table1 SEGMENT REJECT LIMIT 10 ROWS; -- should fail +these are invalid line should be insert into error table. +1 +\. + +; + +COPY number FROM STDIN LOG ERRORS INTO invalid_error_table3 SEGMENT REJECT LIMIT 10 ROWS; -- should fail +these are invalid line should be insert into error table. +1 +\. + +; + +DROP TABLE invalid_error_table1; +DROP TABLE invalid_error_table3; + +DROP TABLE number; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/known_good_schedule ---------------------------------------------------------------------- diff --git a/src/test/regress/known_good_schedule b/src/test/regress/known_good_schedule index dcb6a7a..8c9091f 100755 --- a/src/test/regress/known_good_schedule +++ b/src/test/regress/known_good_schedule @@ -44,6 +44,7 @@ ignore: horology ignore: create_type test: create_table_test ignore: create_function_2 +test: copy ignore: copyselect ignore: constraints ignore: triggers http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/output/copy.source ---------------------------------------------------------------------- diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source new file mode 100755 index 0000000..d250166 --- /dev/null +++ b/src/test/regress/output/copy.source @@ -0,0 +1,214 @@ +-- +-- COPY +-- +-- CLASS POPULATION +-- (any resemblance to real life is purely coincidental) +-- +COPY aggtest FROM '@abs_srcdir@/data/agg.data'; +COPY onek FROM '@abs_srcdir@/data/onek.data'; +COPY onek TO '@abs_builddir@/results/onek.data'; +TRUNCATE onek; +COPY onek FROM '@abs_builddir@/results/onek.data'; +COPY tenk1 FROM '@abs_srcdir@/data/tenk.data'; +COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data'; +COPY person FROM '@abs_srcdir@/data/person.data'; +COPY emp FROM '@abs_srcdir@/data/emp.data'; +COPY student FROM '@abs_srcdir@/data/student.data'; +COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data'; +COPY road FROM '@abs_srcdir@/data/streets.data'; +COPY real_city FROM '@abs_srcdir@/data/real_city.data'; +COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data'; +COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data'; +COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data'; +COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data'; +-- the data in this file has a lot of duplicates in the index key +-- fields, leading to long bucket chains and lots of table expansion. +-- this is therefore a stress test of the bucket overflow code (unlike +-- the data in hash.data, which has unique index keys). +-- +-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data'; +COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data'; +COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data'; +COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data'; +COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data'; +COPY array_op_test FROM '@abs_srcdir@/data/array.data'; +COPY array_index_op_test FROM '@abs_srcdir@/data/array.data'; +--- test copying in CSV mode with various styles +--- of embedded line ending characters +create temp table copytest ( + style text, + test text, + filler int); +insert into copytest values('DOS',E'abc\r\ndef',1); +insert into copytest values('Unix',E'abc\ndef',2); +insert into copytest values('Mac',E'abc\rdef',3); +insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); +copy copytest to '@abs_builddir@/results/copytest.csv' csv; +create temp table copytest2 (like copytest); +copy copytest2 from '@abs_builddir@/results/copytest.csv' csv; +select * from copytest except select * from copytest2 order by 1,2,3; + style | test | filler +-------+------+-------- +(0 rows) + +truncate copytest2; +--- same test but with an escape char different from quote char +copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +select * from copytest except select * from copytest2 order by 1,2,3; + style | test | filler +-------+------+-------- +(0 rows) + +-- test header line feature +create temp table copytest3 ( + c1 int, + "col with , comma" text, + "col with "" quote" int) distributed by (c1); +copy copytest3 from stdin csv header; +copy copytest3 to stdout csv header; +c1,"col with , comma","col with "" quote" +1,a,1 +2,b,2 +-- copy with error table +CREATE TABLE number (a INT) DISTRIBUTED BY (a); +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name +WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present +HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) +NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy" +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; + relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes +---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+---------- + number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. | + number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a | + number | <stdin> | 3 | | invalid input syntax for integer: "b", column a | b | + number | <stdin> | 4 | | invalid input syntax for integer: "c", column a | c | + number | <stdin> | 5 | | invalid input syntax for integer: "d", column a | d | + number | <stdin> | 6 | | invalid input syntax for integer: "e", column a | e | + number | <stdin> | 7 | | invalid input syntax for integer: "f", column a | f | + number | <stdin> | 8 | | invalid input syntax for integer: "g", column a | g | + number | <stdin> | 9 | | invalid input syntax for integer: "h", column a | h | +(9 rows) + +select * from number; --should be empty + a +--- +(0 rows) + +\d err_copy + Append-Only Table "public.err_copy" + Column | Type | Modifiers +----------+--------------------------+----------- + cmdtime | timestamp with time zone | + relname | text | + filename | text | + linenum | integer | + bytenum | integer | + errmsg | text | + rawdata | text | + rawbytes | bytea | +Compression Type: None +Compression Level: 0 +Block Size: 32768 +Checksum: f +Distributed randomly + +DROP TABLE err_copy; +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name +WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present +HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) +NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy" +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; + relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes +---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+---------- + number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. | + number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a | + number | <stdin> | 4 | | invalid input syntax for integer: "b", column a | b | + number | <stdin> | 6 | | invalid input syntax for integer: "c", column a | c | + number | <stdin> | 8 | | invalid input syntax for integer: "d", column a | d | + number | <stdin> | 10 | | invalid input syntax for integer: "e", column a | e | + number | <stdin> | 12 | | invalid input syntax for integer: "f", column a | f | + number | <stdin> | 14 | | invalid input syntax for integer: "g", column a | g | + number | <stdin> | 16 | | invalid input syntax for integer: "h", column a | h | +(9 rows) + +select count(*) from number; --should be 7 + count +------- + 7 +(1 row) + +DROP TABLE err_copy; +TRUNCATE number; +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name +WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present +HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) +ERROR: Segment reject limit reached. Aborting operation. Last error was: invalid input syntax for integer: "i", column a +CONTEXT: COPY number, line 17, column a +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; -- should not exist +ERROR: relation "err_copy" does not exist +LINE 1: ...name,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy o... + ^ +select count(*) from number; --should be empty + count +------- + 0 +(1 row) + +TRUNCATE number; +CREATE TABLE err_copy (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) distributed randomly; +COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS; +NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy" +select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; + relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes +---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+---------- + number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. | + number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a | + number | <stdin> | 4 | | invalid input syntax for integer: "b", column a | b | + number | <stdin> | 6 | | invalid input syntax for integer: "c", column a | c | + number | <stdin> | 8 | | invalid input syntax for integer: "d", column a | d | + number | <stdin> | 10 | | invalid input syntax for integer: "e", column a | e | + number | <stdin> | 12 | | invalid input syntax for integer: "f", column a | f | + number | <stdin> | 14 | | invalid input syntax for integer: "g", column a | g | + number | <stdin> | 16 | | invalid input syntax for integer: "h", column a | h | +(9 rows) + +select count(*) from number; --should be 7 + count +------- + 7 +(1 row) + +DROP TABLE err_copy; +-- invalid error table schema +TRUNCATE number; +create table invalid_error_table1 (a int) distributed randomly; +create table invalid_error_table3 (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) + distributed by (cmdtime); + +COPY number FROM STDIN LOG ERRORS INTO invalid_error_table1 SEGMENT REJECT LIMIT 10 ROWS; -- should fail +ERROR: Relation "invalid_error_table1" already exists and is not of a valid error table format (expected 8 attributes, found 1) +these are invalid line should be insert into error table. +1 +\. +invalid command \. +; +ERROR: syntax error at or near "these" +LINE 1: these are invalid line should be insert into error table. + ^ +COPY number FROM STDIN LOG ERRORS INTO invalid_error_table3 SEGMENT REJECT LIMIT 10 ROWS; -- should fail +ERROR: Relation "invalid_error_table3" already exists and is not of a valid error table format. It appears to not distributed randomly +these are invalid line should be insert into error table. +1 +\. +invalid command \. +; +ERROR: syntax error at or near "these" +LINE 1: these are invalid line should be insert into error table. + ^ +DROP TABLE invalid_error_table1; +DROP TABLE invalid_error_table3; +DROP TABLE number;