maropu commented on a change in pull request #24923: [SPARK-28071][SQL][TEST] 
Port strings.sql
URL: https://github.com/apache/spark/pull/24923#discussion_r308531894
 
 

 ##########
 File path: sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql
 ##########
 @@ -0,0 +1,656 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- STRINGS
+-- -- 
https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/strings.sql
+-- Test various data entry syntaxes.
+--
+
+-- [SPARK-28073] ANSI SQL: Character literals
+-- SQL string continuation syntax
+-- E021-03 character string literals
+-- SELECT 'first line'
+-- ' - next line'
+--     ' - third line'
+--     AS "Three lines to one";
+
+-- illegal string continuation syntax
+-- SELECT 'first line'
+-- ' - next line' /* this comment is not allowed here */
+-- ' - third line'
+--     AS "Illegal comment within continuation";
+
+-- Unicode escapes
+-- SET standard_conforming_strings TO on;
+
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+
+SELECT U&' \' UESCAPE '!' AS "tricky";
+SELECT 'tricky' AS U&"\" UESCAPE '!';
+
+SELECT U&'wrong: \061';
+SELECT U&'wrong: \+0061';
+SELECT U&'wrong: +0061' UESCAPE '+';
+
+-- SET standard_conforming_strings TO off;
+
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+
+SELECT U&' \' UESCAPE '!' AS "tricky";
+SELECT 'tricky' AS U&"\" UESCAPE '!';
+
+SELECT U&'wrong: \061';
+SELECT U&'wrong: \+0061';
+SELECT U&'wrong: +0061' UESCAPE '+';
+
+-- RESET standard_conforming_strings;
+
+-- Spark SQL only support escape mode
+-- bytea
+-- SET bytea_output TO hex;
+-- SELECT E'\\xDeAdBeEf'::bytea;
+-- SELECT E'\\x De Ad Be Ef '::bytea;
+-- SELECT E'\\xDeAdBeE'::bytea;
+-- SELECT E'\\xDeAdBeEx'::bytea;
+-- SELECT E'\\xDe00BeEf'::bytea;
+-- SELECT E'DeAdBeEf'::bytea;
+-- SELECT E'De\\000dBeEf'::bytea;
+-- SELECT E'De\123dBeEf'::bytea;
+-- SELECT E'De\\123dBeEf'::bytea;
+-- SELECT E'De\\678dBeEf'::bytea;
+
+-- SET bytea_output TO escape;
+SELECT binary('\\xDeAdBeEf');
+SELECT binary('\\x De Ad Be Ef ');
+SELECT binary('\\xDe00BeEf');
+SELECT binary('DeAdBeEf');
+SELECT binary('De\\000dBeEf');
+SELECT binary('De\\123dBeEf');
+
+-- Skip these tests because we do not have char/varchar type
+--
+-- test conversions between various string types
+-- E021-10 implicit casting among the character data types
+--
+
+-- SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL;
+
+-- SELECT CAST(f1 AS text) AS "text(varchar)" FROM VARCHAR_TBL;
+
+-- SELECT CAST(name 'namefield' AS text) AS "text(name)";
+
+-- since this is an explicit cast, it should truncate w/o error:
+-- SELECT CAST(f1 AS char(10)) AS "char(text)" FROM TEXT_TBL;
+-- note: implicit-cast case is tested in char.sql
+
+-- SELECT CAST(f1 AS char(20)) AS "char(text)" FROM TEXT_TBL;
+
+-- SELECT CAST(f1 AS char(10)) AS "char(varchar)" FROM VARCHAR_TBL;
+
+-- SELECT CAST(name 'namefield' AS char(10)) AS "char(name)";
+
+-- SELECT CAST(f1 AS varchar) AS "varchar(text)" FROM TEXT_TBL;
+
+-- SELECT CAST(f1 AS varchar) AS "varchar(char)" FROM CHAR_TBL;
+
+-- SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)";
+
+--
+-- test SQL string functions
+-- E### and T### are feature reference numbers from SQL99
+--
+
+-- E021-09 trim function
+SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS `bunch o 
blanks`;
+
+SELECT TRIM(LEADING FROM '  bunch o blanks  ') = 'bunch o blanks  ' AS `bunch 
o blanks  `;
+
+SELECT TRIM(TRAILING FROM '  bunch o blanks  ') = '  bunch o blanks' AS `  
bunch o blanks`;
+
+SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS `some Xs`;
+
+-- E021-06 substring expression
+SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS `34567890`;
+
+SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS `456`;
+
+-- [SPARK-28076] Support regular expression substring
+-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+-- SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
+
+-- No match should return NULL
+-- SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+
+-- Null inputs should return NULL
+-- SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
+-- SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
+-- SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+
+-- The first and last parts should act non-greedy
+-- SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
+-- SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+
+-- Vertical bar in any part affects only that part
+-- SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
+-- SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
+-- SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+
+-- Can't have more than two part separators
+-- SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+
+-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
+-- SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
+-- SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+
+-- substring() with just two arguments is not allowed by SQL spec;
+-- we accept it, but we interpret the pattern as a POSIX regexp not SQL
+-- SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
+
+-- With a parenthesized subexpression, return only what matches the subexpr
+-- SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
+
+-- [SPARK-27952] String Functions: regexp_replace is not compatible
+-- PostgreSQL extension to allow using back reference in replace string;
+-- SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) 
\\2-\\3');
+-- SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
+-- SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
+-- SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
+-- invalid regexp option
+-- SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+
+-- set so we can tell NULL from empty string
+-- \pset null '\\N'
+
+-- [SPARK-28078] Add support other 4 REGEXP functions
+-- return all matches from regexp
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+
+-- test case insensitive
+-- SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+
+-- global option - more than one match
+-- SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 
'g');
+
+-- empty capture group (matched empty string)
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+-- no match
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+-- optional capture group did not match, null entry in array
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+
+-- no capture groups
+-- SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+
+-- start/end-of-line matches are of zero length
+-- SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || 
chr(10) || 'baz', '^', 'mg');
+-- SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || 
chr(10) || 'baz', '$', 'mg');
+-- SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || 
'4' || chr(10), '^.?', 'mg');
+-- SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || 
chr(10) || '4' || chr(10), '.?$', 'mg');
+-- SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || 
chr(10) || '4', '.?$', 'mg');
+
+-- give me errors
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
+-- SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+-- SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+
+-- split string on regexp
+-- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox 
jumps over the lazy dog', $re$\s+$re$) AS foo;
+-- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 
$re$\s+$re$);
+
+-- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox 
jumps over the lazy dog', $re$\s*$re$) AS foo;
+-- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 
$re$\s*$re$);
+-- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox 
jumps over the lazy dog', '') AS foo;
+-- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 
'');
+-- case insensitive
+-- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx 
jUMPs ovEr The lazy dOG', 'e', 'i') AS foo;
+-- -- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy 
dOG', 'e', 'i');
+-- no match of pattern
+-- SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox 
jumps over the lazy dog', 'nomatch') AS foo;
+-- SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 
'nomatch');
+-- some corner cases
+-- SELECT regexp_split_to_array('123456','1');
+-- SELECT regexp_split_to_array('123456','6');
+-- SELECT regexp_split_to_array('123456','.');
+-- SELECT regexp_split_to_array('123456','');
+-- SELECT regexp_split_to_array('123456','(?:)');
+-- SELECT regexp_split_to_array('1','');
+-- errors
+-- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx 
jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
+-- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 
'e', 'iz');
+-- global option meaningless for regexp_split
+-- SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx 
jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
+-- SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 
'e', 'g');
+
+-- change NULL-display back
+-- \pset null ''
+
+-- E021-11 position expression
+SELECT POSITION('4' IN '1234567890') = '4' AS `4`;
+
+SELECT POSITION('5' IN '1234567890') = '5' AS `5`;
+
+-- [SPARK-28077] Add support string functions: OVERLAY
+-- T312 character overlay function
+-- SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
+
+-- SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS "yabadaba";
+
+-- SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS "yabadabadoo";
+
+-- SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS "bubba";
+
+--
+-- test LIKE
+-- Be sure to form every test as a LIKE/NOT LIKE pair.
+--
+
+-- simplest examples
+-- E061-04 like predicate
+SELECT 'hawkeye' LIKE 'h%' AS `true`;
+SELECT 'hawkeye' NOT LIKE 'h%' AS `false`;
+
+SELECT 'hawkeye' LIKE 'H%' AS `false`;
+SELECT 'hawkeye' NOT LIKE 'H%' AS `true`;
+
+SELECT 'hawkeye' LIKE 'indio%' AS `false`;
+SELECT 'hawkeye' NOT LIKE 'indio%' AS `true`;
+
+SELECT 'hawkeye' LIKE 'h%eye' AS `true`;
+SELECT 'hawkeye' NOT LIKE 'h%eye' AS `false`;
+
+SELECT 'indio' LIKE '_ndio' AS `true`;
+SELECT 'indio' NOT LIKE '_ndio' AS `false`;
+
+SELECT 'indio' LIKE 'in__o' AS `true`;
+SELECT 'indio' NOT LIKE 'in__o' AS `false`;
+
+SELECT 'indio' LIKE 'in_o' AS `false`;
+SELECT 'indio' NOT LIKE 'in_o' AS `true`;
+
+-- [SPARK-28083] ANSI SQL: LIKE predicate: ESCAPE clause
+-- unused escape character
+-- SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true";
+-- SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false";
+
+-- SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS "true";
+-- SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false";
+
+-- escape character
+-- E061-05 like predicate with escape clause
+-- SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true";
+-- SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS "false";
+
+-- SELECT 'h%wkeye' LIKE 'h#%' ESCAPE '#' AS "false";
+-- SELECT 'h%wkeye' NOT LIKE 'h#%' ESCAPE '#' AS "true";
+
+-- SELECT 'h%wkeye' LIKE 'h#%%' ESCAPE '#' AS "true";
+-- SELECT 'h%wkeye' NOT LIKE 'h#%%' ESCAPE '#' AS "false";
+
+-- SELECT 'h%awkeye' LIKE 'h#%a%k%e' ESCAPE '#' AS "true";
+-- SELECT 'h%awkeye' NOT LIKE 'h#%a%k%e' ESCAPE '#' AS "false";
+
+-- SELECT 'indio' LIKE '_ndio' ESCAPE '$' AS "true";
+-- SELECT 'indio' NOT LIKE '_ndio' ESCAPE '$' AS "false";
+
+-- SELECT 'i_dio' LIKE 'i$_d_o' ESCAPE '$' AS "true";
+-- SELECT 'i_dio' NOT LIKE 'i$_d_o' ESCAPE '$' AS "false";
+
+-- SELECT 'i_dio' LIKE 'i$_nd_o' ESCAPE '$' AS "false";
+-- SELECT 'i_dio' NOT LIKE 'i$_nd_o' ESCAPE '$' AS "true";
+
+-- SELECT 'i_dio' LIKE 'i$_d%o' ESCAPE '$' AS "true";
+-- SELECT 'i_dio' NOT LIKE 'i$_d%o' ESCAPE '$' AS "false";
+
+-- escape character same as pattern character
+-- SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS "true";
+-- SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS "false";
+
+-- SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS "true";
+-- SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS "false";
+
+-- SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS "true";
+-- SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS "false";
+
+-- SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS "true";
+-- SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS "false";
+
+-- SELECT 'be_r' LIKE '__e__r' ESCAPE '_' AS "false";
+-- SELECT 'be_r' NOT LIKE '__e__r' ESCAPE '_' AS "true";
+
+--
+-- test ILIKE (case-insensitive LIKE)
+-- Be sure to form every test as an ILIKE/NOT ILIKE pair.
+--
+
+SELECT 'hawkeye' ILIKE 'h%' AS "true";
 
 Review comment:
   IMO we don't need to do cuz that's pg-specific.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to