comphead commented on code in PR #263: URL: https://github.com/apache/arrow-datafusion-comet/pull/263#discussion_r1566263894
########## doc/spark_coverage.txt: ########## @@ -0,0 +1,421 @@ ++---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|name |details | ++---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|! |{PASSED, [{SELECT ! true;, OK}]} | +|% |{PASSED, [{SELECT 2 % 1.8;, OK}]} | +|& |{PASSED, [{SELECT 3 & 5;, OK}]} | +|* |{PASSED, [{SELECT 2 * 3;, OK}]} | +|+ |{PASSED, [{SELECT 1 + 2;, OK}]} | +|- |{PASSED, [{SELECT 2 - 1;, OK}]} | +|/ |{PASSED, [{SELECT 3 / 2;, OK}]} | +|< |{PASSED, [{SELECT 1 < 2;, OK}]} | +|<= |{PASSED, [{SELECT 2 <= 2;, OK}]} | +|<=> |{PASSED, [{SELECT 2 <=> 2;, OK}]} | +|= |{PASSED, [{SELECT 2 = 2;, OK}]} | +|== |{PASSED, [{SELECT 2 == 2;, OK}]} | +|> |{PASSED, [{SELECT 2 > 1;, OK}]} | +|>= |{PASSED, [{SELECT 2 >= 1;, OK}]} | +|^ |{PASSED, [{SELECT 3 ^ 5;, OK}]} | +|abs |{PASSED, [{SELECT abs(-1);, OK}]} | +|acos |{PASSED, [{SELECT acos(1);, OK}]} | +|acosh |{PASSED, [{SELECT acosh(1);, OK}]} | +|add_months |{PASSED, [{SELECT add_months('2016-08-31', 1);, OK}]} | +|aes_decrypt |{PASSED, [{SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333');, OK}]} | +|aes_encrypt |{FAILED, [{SELECT hex(aes_encrypt('Spark', '0000111122223333'));, Failed on something else. Check query manually}]} | +|aggregate |{FAILED, [{SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x);, Unsupported}]} | +|and |{PASSED, [{SELECT true and true;, OK}]} | +|any |{FAILED, [{SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} | +|any_value |{FAILED, [{SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} | +|approx_count_distinct |{FAILED, [{SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);, Unsupported}]} | +|approx_percentile |{FAILED, [{SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);, Unsupported}]} | +|array |{FAILED, [{SELECT array(1, 2, 3);, Unsupported}]} | +|array_agg |{FAILED, [{SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} | +|array_append |{FAILED, [{SELECT array_append(array('b', 'd', 'c', 'a'), 'd');, Unsupported}]} | +|array_compact |{FAILED, [{SELECT array_compact(array(1, 2, 3, null));, Unsupported}]} | +|array_contains |{PASSED, [{SELECT array_contains(array(1, 2, 3), 2);, OK}]} | +|array_distinct |{FAILED, [{SELECT array_distinct(array(1, 2, 3, null, 3));, Unsupported}]} | +|array_except |{FAILED, [{SELECT array_except(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} | +|array_insert |{FAILED, [{SELECT array_insert(array(1, 2, 3, 4), 5, 5);, Unsupported}]} | +|array_intersect |{FAILED, [{SELECT array_intersect(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} | +|array_join |{PASSED, [{SELECT array_join(array('hello', 'world'), ' ');, OK}]} | +|array_max |{PASSED, [{SELECT array_max(array(1, 20, null, 3));, OK}]} | +|array_min |{PASSED, [{SELECT array_min(array(1, 20, null, 3));, OK}]} | +|array_position |{PASSED, [{SELECT array_position(array(3, 2, 1), 1);, OK}]} | +|array_remove |{FAILED, [{SELECT array_remove(array(1, 2, 3, null, 3), 3);, Unsupported}]} | +|array_repeat |{FAILED, [{SELECT array_repeat('123', 2);, Unsupported}]} | +|array_size |{PASSED, [{SELECT array_size(array('b', 'd', 'c', 'a'));, OK}]} | +|array_sort |{FAILED, [{SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end);, Unsupported}]} | +|array_union |{FAILED, [{SELECT array_union(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} | +|arrays_overlap |{PASSED, [{SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5));, OK}]} | +|arrays_zip |{FAILED, [{SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4));, Unsupported}]} | +|ascii |{PASSED, [{SELECT ascii('222');, OK}]} | +|asin |{PASSED, [{SELECT asin(0);, OK}]} | +|asinh |{PASSED, [{SELECT asinh(0);, OK}]} | +|assert_true |{PASSED, [{SELECT assert_true(0 < 1);, OK}]} | +|atan |{PASSED, [{SELECT atan(0);, OK}]} | +|atan2 |{PASSED, [{SELECT atan2(0, 0);, OK}]} | +|atanh |{PASSED, [{SELECT atanh(0);, OK}]} | +|avg |{FAILED, [{SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} | +|base64 |{PASSED, [{SELECT base64('Spark SQL');, OK}]} | +|bigint |{SKIPPED, []} | +|bin |{PASSED, [{SELECT bin(13);, OK}]} | +|binary |{SKIPPED, []} | +|bit_and |{FAILED, [{SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} | +|bit_count |{PASSED, [{SELECT bit_count(0);, OK}]} | +|bit_get |{PASSED, [{SELECT bit_get(11, 0);, OK}]} | +|bit_length |{PASSED, [{SELECT bit_length('Spark SQL');, OK}]} | +|bit_or |{FAILED, [{SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} | +|bit_xor |{FAILED, [{SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} | +|bool_and |{FAILED, [{SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);, Unsupported}]} | +|bool_or |{FAILED, [{SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} | +|boolean |{SKIPPED, []} | +|bround |{PASSED, [{SELECT bround(2.5, 0);, OK}]} | +|btrim |{PASSED, [{SELECT btrim(' SparkSQL ');, OK}]} | +|cardinality |{PASSED, [{SELECT cardinality(array('b', 'd', 'c', 'a'));, OK}]} | +|cast |{PASSED, [{SELECT cast('10' as int);, OK}]} | +|cbrt |{PASSED, [{SELECT cbrt(27.0);, OK}]} | +|ceil |{PASSED, [{SELECT ceil(-0.1);, OK}]} | +|ceiling |{PASSED, [{SELECT ceiling(-0.1);, OK}]} | +|char |{PASSED, [{SELECT char(65);, OK}]} | +|char_length |{PASSED, [{SELECT char_length('Spark SQL ');, OK}]} | +|character_length |{PASSED, [{SELECT character_length('Spark SQL ');, OK}]} | +|chr |{PASSED, [{SELECT chr(65);, OK}]} | +|coalesce |{PASSED, [{SELECT coalesce(NULL, 1, NULL);, OK}]} | +|collect_list |{FAILED, [{SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} | +|collect_set |{FAILED, [{SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} | +|concat |{PASSED, [{SELECT concat('Spark', 'SQL');, OK}]} | +|concat_ws |{PASSED, [{SELECT concat_ws(' ', 'Spark', 'SQL');, OK}]} | +|contains |{PASSED, [{SELECT contains('Spark SQL', 'Spark');, OK}]} | +|conv |{PASSED, [{SELECT conv('100', 2, 10);, OK}]} | +|convert_timezone |{FAILED, [{SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');, Failed on native side}]} | +|corr |{FAILED, [{SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);, Unsupported}]} | +|cos |{PASSED, [{SELECT cos(0);, OK}]} | +|cosh |{PASSED, [{SELECT cosh(0);, OK}]} | +|cot |{PASSED, [{SELECT cot(1);, OK}]} | +|count |{FAILED, [{SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);, Unsupported}]} | +|count_if |{FAILED, [{SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);, Unsupported}]} | +|count_min_sketch |{FAILED, [{SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} | +|covar_pop |{FAILED, [{SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);, Unsupported}]} | +|covar_samp |{FAILED, [{SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);, Unsupported}]} | +|crc32 |{PASSED, [{SELECT crc32('Spark');, OK}]} | +|csc |{PASSED, [{SELECT csc(1);, OK}]} | +|cume_dist |{FAILED, [{SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} | +|curdate |{PASSED, [{SELECT curdate();, OK}]} | +|current_catalog |{PASSED, [{SELECT current_catalog();, OK}]} | +|current_database |{PASSED, [{SELECT current_database();, OK}]} | +|current_date |{PASSED, [{SELECT current_date();, OK}]} | +|current_schema |{PASSED, [{SELECT current_schema();, OK}]} | +|current_timestamp |{FAILED, [{SELECT current_timestamp();, Failed on something else. Check query manually}]} | +|current_timezone |{PASSED, [{SELECT current_timezone();, OK}]} | +|current_user |{PASSED, [{SELECT current_user();, OK}]} | +|date |{SKIPPED, []} | +|date_add |{PASSED, [{SELECT date_add('2016-07-30', 1);, OK}]} | +|date_diff |{PASSED, [{SELECT date_diff('2009-07-31', '2009-07-30');, OK}]} | +|date_format |{PASSED, [{SELECT date_format('2016-04-08', 'y');, OK}]} | +|date_from_unix_date |{PASSED, [{SELECT date_from_unix_date(1);, OK}]} | +|date_part |{PASSED, [{SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} | +|date_sub |{PASSED, [{SELECT date_sub('2016-07-30', 1);, OK}]} | +|date_trunc |{PASSED, [{SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359');, OK}]} | +|dateadd |{PASSED, [{SELECT dateadd('2016-07-30', 1);, OK}]} | +|datediff |{PASSED, [{SELECT datediff('2009-07-31', '2009-07-30');, OK}]} | +|datepart |{PASSED, [{SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} | +|day |{PASSED, [{SELECT day('2009-07-30');, OK}]} | +|dayofmonth |{PASSED, [{SELECT dayofmonth('2009-07-30');, OK}]} | +|dayofweek |{PASSED, [{SELECT dayofweek('2009-07-30');, OK}]} | +|dayofyear |{PASSED, [{SELECT dayofyear('2016-04-09');, OK}]} | +|decimal |{SKIPPED, []} | +|decode |{PASSED, [{SELECT decode(encode('abc', 'utf-8'), 'utf-8');, OK}]} | +|degrees |{PASSED, [{SELECT degrees(3.141592653589793);, OK}]} | +|dense_rank |{FAILED, [{SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} | +|div |{PASSED, [{SELECT 3 div 2;, OK}]} | +|double |{SKIPPED, []} | +|e |{PASSED, [{SELECT e();, OK}]} | +|element_at |{PASSED, [{SELECT element_at(array(1, 2, 3), 2);, OK}]} | +|elt |{FAILED, [{SELECT elt(1, 'scala', 'java');, Unsupported}]} | +|encode |{PASSED, [{SELECT encode('abc', 'utf-8');, OK}]} | +|endswith |{PASSED, [{SELECT endswith('Spark SQL', 'SQL');, OK}]} | +|equal_null |{PASSED, [{SELECT equal_null(3, 3);, OK}]} | +|every |{FAILED, [{SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col);, Unsupported}]} | +|exists |{FAILED, [{SELECT exists(array(1, 2, 3), x -> x % 2 == 0);, Unsupported}]} | +|exp |{PASSED, [{SELECT exp(0);, OK}]} | +|explode |{FAILED, [{SELECT explode(array(10, 20));, Unsupported}]} | +|explode_outer |{FAILED, [{SELECT explode_outer(array(10, 20));, Unsupported}]} | +|expm1 |{PASSED, [{SELECT expm1(0);, OK}]} | +|extract |{PASSED, [{SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} | +|factorial |{PASSED, [{SELECT factorial(5);, OK}]} | +|filter |{FAILED, [{SELECT filter(array(1, 2, 3), x -> x % 2 == 1);, Unsupported}]} | +|find_in_set |{PASSED, [{SELECT find_in_set('ab','abc,b,ab,c,def');, OK}]} | +|first |{FAILED, [{SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} | +|first_value |{FAILED, [{SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} | +|flatten |{FAILED, [{SELECT flatten(array(array(1, 2), array(3, 4)));, Unsupported}]} | +|float |{SKIPPED, []} | +|floor |{PASSED, [{SELECT floor(-0.1);, OK}]} | +|forall |{FAILED, [{SELECT forall(array(1, 2, 3), x -> x % 2 == 0);, Unsupported}]} | +|format_number |{PASSED, [{SELECT format_number(12332.123456, 4);, OK}]} | +|format_string |{PASSED, [{SELECT format_string("Hello World %d %s", 100, "days");, OK}]} | +|from_csv |{FAILED, [{SELECT from_csv('1, 0.8', 'a INT, b DOUBLE');, Unsupported}]} | +|from_json |{FAILED, [{SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE');, Unsupported}]} | +|from_unixtime |{PASSED, [{SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss');, OK}]} | +|from_utc_timestamp |{PASSED, [{SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');, OK}]} | +|get |{PASSED, [{SELECT get(array(1, 2, 3), 0);, OK}]} | +|get_json_object |{PASSED, [{SELECT get_json_object('{"a":"b"}', '$.a');, OK}]} | +|getbit |{PASSED, [{SELECT getbit(11, 0);, OK}]} | +|greatest |{PASSED, [{SELECT greatest(10, 9, 2, 4, 3);, OK}]} | +|grouping |{FAILED, [{SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);, Failed on something else. Check query manually}]} | +|grouping_id |{FAILED, [{SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);, Failed on something else. Check query manually}]} | +|hash |{PASSED, [{SELECT hash('Spark', array(123), 2);, OK}]} | Review Comment: If I check somehing not supported the plan is another, and I can see fallback to Spark ``` == Physical Plan == *(1) Project [[2] AS ifnull(NULL, array(2))#9, x#7] +- *(1) ColumnarToRow +- CometScan parquet [x#7] Batched: true, DataFilters: [], Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/6f/_s1vnnd55zgfkx7zlwnrnv0h0000gn/T/spark-fa..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<x:string> ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org