Github user nsyca commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16915#discussion_r101374593
  
    --- Diff: 
sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
 ---
    @@ -0,0 +1,595 @@
    +-- Automatically generated by SQLQueryTestSuite
    +-- Number of queries: 16
    +
    +
    +-- !query 0
    +create temporary view t1 as select * from values
    +  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
    +  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
    +  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
    +  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
    +  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
    +  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
    +  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
    +  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
    +  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
    +  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
    +  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
    +  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
    +  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
    +-- !query 0 schema
    +struct<>
    +-- !query 0 output
    +
    +
    +
    +-- !query 1
    +create temporary view t2 as select * from values
    +  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
    +  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
    +  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
    +  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
    +  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
    +  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
    +  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
    +  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
    +  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
    +  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
    +  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
    +  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
    +  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
    +  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
    +-- !query 1 schema
    +struct<>
    +-- !query 1 output
    +
    +
    +
    +-- !query 2
    +create temporary view t3 as select * from values
    +  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
    +  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
    +  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
    +  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
    +  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
    +  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
    +  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
    +  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
    +  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
    +  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
    +  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
    +  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
    +  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
    +-- !query 2 schema
    +struct<>
    +-- !query 2 output
    +
    +
    +
    +-- !query 3
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2h,
    +       t2i
    +FROM   (SELECT *
    +        FROM   t2
    +        WHERE  t2a IN (SELECT t1a
    +                       FROM   t1)
    +        UNION ALL
    +        SELECT *
    +        FROM   t3
    +        WHERE  t3a IN (SELECT t1a
    +                       FROM   t1)) AS t3
    +WHERE  t2i IS NOT NULL AND
    +       2 * t2b = t2c
    +ORDER  BY t2c DESC nulls first
    +-- !query 3 schema
    +struct<t2a:string,t2b:smallint,t2c:int,t2h:timestamp,t2i:date>
    +-- !query 3 output
    +val1b      8       16      2015-05-04 01:01:00     2015-05-04
    +val1b      8       16      2014-07-04 01:01:00     2014-07-04
    +val1b      8       16      2014-06-04 01:02:00     2014-06-04
    +val1b      8       16      2014-07-04 01:02:00     2014-07-04
    +
    +
    +-- !query 4
    +SELECT t2a,
    +       t2b,
    +       t2d,
    +       Count(DISTINCT( t2h )),
    +       t2i
    +FROM   (SELECT *
    +        FROM   t2
    +        WHERE  t2a IN (SELECT t1a
    +                       FROM   t1
    +                       WHERE  t2b = t1b)
    +        UNION
    +        SELECT *
    +        FROM   t1
    +        WHERE  t1a IN (SELECT t3a
    +                       FROM   t3
    +                       WHERE  t1c = t3c)) AS t3
    +GROUP  BY t2a,
    +          t2b,
    +          t2d,
    +          t2i
    +ORDER  BY t2d DESC
    +-- !query 4 schema
    +struct<t2a:string,t2b:smallint,t2d:bigint,count(DISTINCT 
t2h):bigint,t2i:date>
    +-- !query 4 output
    +val1b      8       119     1       2015-05-04
    +val1b      8       19      1       2014-07-04
    +val1b      8       19      1       2014-05-04
    +
    +
    +-- !query 5
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       Min(t2d)
    +FROM   t2
    +WHERE  t2a IN (SELECT t1a
    +               FROM   t1
    +               WHERE  t1b = t2b)
    +GROUP BY t2a, t2b, t2c
    +UNION ALL
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       Max(t2d)
    +FROM   t2
    +WHERE  t2a IN (SELECT t1a
    +               FROM   t1
    +               WHERE  t2c = t1c)
    +GROUP BY t2a, t2b, t2c
    +UNION
    +SELECT t3a,
    +       t3b,
    +       t3c,
    +       Min(t3d)
    +FROM   t3
    +WHERE  t3a IN (SELECT t2a
    +               FROM   t2
    +               WHERE  t3c = t2c)
    +GROUP BY t3a, t3b, t3c
    +UNION DISTINCT
    +SELECT t1a,
    +       t1b,
    +       t1c,
    +       Max(t1d)
    +FROM   t1
    +WHERE  t1a IN (SELECT t3a
    +               FROM   t3
    +               WHERE  t3d = t1d)
    +GROUP BY t1a, t1b, t1c
    +-- !query 5 schema
    +struct<t2a:string,t2b:smallint,t2c:int,min(t2d):bigint>
    +-- !query 5 output
    +val1b      10      12      19
    +val1b      8       16      119
    +val1b      8       16      19
    +val1b      NULL    16      19
    +val1b      NULL    16      319
    +val1c      12      16      219
    +
    +
    +-- !query 6
    +SELECT DISTINCT( t2a ),
    +               t2b,
    +               Count(t2c),
    +               t2d,
    +               t2h,
    +               t2i
    +FROM   t2
    +WHERE  t2a IN (SELECT t1a
    +               FROM   t1
    +               WHERE  t1b = t2b)
    +GROUP  BY t2a,
    +          t2b,
    +          t2c,
    +          t2d,
    +          t2h,
    +          t2i
    +UNION
    +SELECT DISTINCT( t2a ),
    +               t2b,
    +               Count(t2c),
    +               t2d,
    +               t2h,
    +               t2i
    +FROM   t2
    +WHERE  t2a IN (SELECT t1a
    +               FROM   t1
    +               WHERE  t2c = t1c)
    +GROUP  BY t2a,
    +          t2b,
    +          t2c,
    +          t2d,
    +          t2h,
    +          t2i
    +HAVING t2b IS NOT NULL
    +-- !query 6 schema
    
+struct<t2a:string,t2b:smallint,count(t2c):bigint,t2d:bigint,t2h:timestamp,t2i:date>
    +-- !query 6 output
    +val1b      8       1       119     2015-05-04 01:01:00     2015-05-04
    +val1b      8       1       19      2014-07-04 01:01:00     2014-07-04
    +val1c      12      1       19      2014-08-04 01:01:00     2014-08-05
    +val1c      12      1       219     2016-05-04 01:01:00     2016-05-04
    +
    +
    +-- !query 7
    +SELECT t2a,
    +               t2b,
    +               Count(t2c),
    +               t2d,
    +               t2h,
    +               t2i
    +FROM   t2
    +WHERE  t2a IN (SELECT DISTINCT(t1a)
    +               FROM   t1
    +               WHERE  t1b = t2b)
    +GROUP  BY t2a,
    +          t2b,
    +          t2c,
    +          t2d,
    +          t2h,
    +          t2i
    +
    +UNION
    +SELECT DISTINCT( t2a ),
    +               t2b,
    +               Count(t2c),
    +               t2d,
    +               t2h,
    +               t2i
    +FROM   t2
    +WHERE  t2b IN (SELECT Max(t1b)
    +               FROM   t1
    +               WHERE  t2c = t1c)
    +GROUP  BY t2a,
    +          t2b,
    +          t2c,
    +          t2d,
    +          t2h,
    +          t2i
    +HAVING t2b IS NOT NULL
    +UNION DISTINCT
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2d,
    +       t2h,
    +       t2i
    +FROM   t2
    +WHERE  t2d IN (SELECT min(t1d)
    +               FROM   t1
    +               WHERE  t2c = t1c)
    +-- !query 7 schema
    
+struct<t2a:string,t2b:smallint,count(t2c):bigint,t2d:bigint,t2h:timestamp,t2i:date>
    +-- !query 7 output
    +val1b      8       1       119     2015-05-04 01:01:00     2015-05-04
    +val1b      8       1       19      2014-07-04 01:01:00     2014-07-04
    +val1b      8       16      19      2014-07-04 01:01:00     2014-07-04
    +val1b      NULL    16      19      2014-05-04 01:01:00     NULL
    +val1c      12      16      19      2014-08-04 01:01:00     2014-08-05
    +
    +
    +-- !query 8
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2d
    +FROM   t2
    +WHERE  t2a IN (SELECT t1a
    +               FROM   t1
    +               WHERE  t1b = t2b AND
    +                      t1d < t2d)
    +INTERSECT
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2d
    +FROM   t2
    +WHERE  t2b IN (SELECT Max(t1b)
    +               FROM   t1
    +               WHERE  t2c = t1c)
    +EXCEPT
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2d
    +FROM   t2
    +WHERE  t2d IN (SELECT Min(t3d)
    +               FROM   t3
    +               WHERE  t2c = t3c)
    +UNION ALL
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2d
    +FROM   t2
    +WHERE  t2c IN (SELECT Max(t1c)
    +               FROM   t1
    +               WHERE t1d = t2d)
    +-- !query 8 schema
    +struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint>
    +-- !query 8 output
    +val1b      8       16      119
    +val1b      8       16      19
    +val1b      NULL    16      19
    +val1c      12      16      19
    +
    +
    +-- !query 9
    +SELECT DISTINCT(t1a),
    +       t1b,
    +       t1c,
    +       t1d
    +FROM   t1
    +WHERE  t1a IN (SELECT t3a
    +               FROM   (SELECT t2a t3a
    +                       FROM   t2
    +                       UNION ALL
    +                       SELECT t2a t3a
    +                       FROM   t2) AS t3
    +               UNION
    +               SELECT t2a
    +               FROM   (SELECT t2a
    +                       FROM   t2
    +                       WHERE  t2b > 6
    +                       UNION
    +                       SELECT t2a
    +                       FROM   t2
    +                       WHERE  t2b > 6) AS t4
    +               UNION DISTINCT
    +               SELECT t2a
    +               FROM   (SELECT t2a
    +                       FROM   t2
    +                       WHERE  t2b > 6
    +                       UNION DISTINCT
    +                       SELECT t1a
    +                       FROM   t1
    +                       WHERE  t1b > 6) AS t5)
    +GROUP BY t1a, t1b, t1c, t1d
    +HAVING t1c IS NOT NULL AND t1b IS NOT NULL
    +ORDER BY t1c DESC
    +-- !query 9 schema
    +struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint>
    +-- !query 9 output
    +val1c      8       16      19
    +val1b      8       16      19
    +val1a      16      12      21
    +val1a      16      12      10
    +val1a      6       8       10
    +
    +
    +-- !query 10
    +SELECT t1a,
    +       t1b,
    +       t1c
    +FROM   t1
    +WHERE  t1b IN (SELECT t2b
    +               FROM   (SELECT t2b
    +                       FROM   t2
    +                       WHERE  t2b > 6
    +                       INTERSECT
    +                       SELECT t1b
    +                       FROM   t1
    +                       WHERE  t1b > 6) AS t3
    +               WHERE  t2b = t1b)
    +-- !query 10 schema
    +struct<t1a:string,t1b:smallint,t1c:int>
    +-- !query 10 output
    +val1b      8       16
    +val1c      8       16
    +val1d      10      NULL
    +val1e      10      NULL
    +val1e      10      NULL
    +val1e      10      NULL
    +
    +
    +-- !query 11
    +SELECT t1a,
    +       t1b,
    +       t1c
    +FROM   t1
    +WHERE  t1h IN (SELECT t2h
    +               FROM   (SELECT t2h
    +                       FROM   t2
    +                       EXCEPT
    +                       SELECT t3h
    +                       FROM   t3) AS t3)
    +ORDER BY t1b DESC NULLs first, t1c  DESC NULLs last
    +-- !query 11 schema
    +struct<t1a:string,t1b:smallint,t1c:int>
    +-- !query 11 output
    +val1d      NULL    16
    +val1a      16      12
    +val1e      10      NULL
    +val1d      10      NULL
    +val1e      10      NULL
    +val1b      8       16
    +
    +
    +-- !query 12
    +SELECT t1a,
    +       t1b,
    +       t1c
    +FROM   t1
    +WHERE  t1b IN
    +       (
    +              SELECT t2b
    +              FROM   (
    +                            SELECT t2b
    +                            FROM   t2
    +                            WHERE  t2b > 6
    +                            INTERSECT
    +                            SELECT t1b
    +                            FROM   t1
    +                            WHERE  t1b > 6) AS t3)
    +UNION DISTINCT
    +SELECT t1a,
    +       t1b,
    +       t1c
    +FROM   t1
    +WHERE  t1b IN
    +       (
    +              SELECT t2b
    +              FROM   (
    +                            SELECT t2b
    +                            FROM   t2
    +                            WHERE  t2b > 6
    +                            EXCEPT
    +                            SELECT t1b
    +                            FROM   t1
    +                            WHERE  t1b > 6) AS t4
    +              WHERE  t2b = t1b)
    +ORDER BY t1c DESC NULLS last
    +-- !query 12 schema
    +struct<t1a:string,t1b:smallint,t1c:int>
    +-- !query 12 output
    +val1c      8       16
    +val1b      8       16
    +val1e      10      NULL
    +val1d      10      NULL
    +
    +
    +-- !query 13
    +SELECT *
    +FROM   (SELECT *
    +        FROM   (SELECT *
    +                FROM   t2
    +                WHERE  t2h IN (SELECT t1h
    +                               FROM   t1
    +                               WHERE  t1a = t2a)
    +                UNION DISTINCT
    +                SELECT *
    +                FROM   t1
    +                WHERE  t1h IN (SELECT t3h
    +                               FROM   t3
    +                               UNION
    +                               SELECT t1h
    +                               FROM   t1)
    +                UNION
    +                SELECT *
    +                FROM   t3
    +                WHERE  t3a IN (SELECT t2a
    +                               FROM   t2
    +                               UNION ALL
    +                               SELECT t1a
    +                               FROM   t1
    +                               WHERE  t1b > 0)
    +               INTERSECT
    +               SELECT *
    +               FROM   T1
    +               WHERE  t1b IN (SELECT t3b
    +                              FROM   t3
    +                              UNION DISTINCT
    +                              SELECT t2b
    +                              FROM   t2
    +                               )
    +              EXCEPT
    +              SELECT *
    +              FROM   t2
    +              WHERE  t2h IN (SELECT t1i
    +                             FROM   t1)) t4
    +        WHERE  t4.t2b IN (SELECT Min(t3b)
    +                          FROM   t3
    +                          WHERE  t4.t2a = t3a))
    +-- !query 13 schema
    
+struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(2,-2),t2h:timestamp,t2i:date>
    +-- !query 13 output
    +val1b      8       16      19      17.0    25.0    2600    2014-05-04 
01:01:00     2014-05-04
    +
    +
    +-- !query 14
    +SELECT t2a,
    +       t2b,
    +       t2c,
    +       t2i
    +FROM   (SELECT *
    +        FROM   t2
    +        WHERE  t2a NOT IN (SELECT t1a
    +                           FROM   t1
    +                           UNION
    +                           SELECT t3a
    +                           FROM   t3)
    +        UNION ALL
    +        SELECT *
    +        FROM   t2
    +        WHERE  t2a NOT IN (SELECT t1a
    +                           FROM   t1
    +                           INTERSECT
    +                           SELECT t2a
    +                           FROM   t2)) AS t3
    +WHERE  t3.t2a NOT IN (SELECT t1a
    +                      FROM   t1
    +                      INTERSECT
    +                      SELECT t2a
    +                      FROM   t2)
    +       AND t2c IS NOT NULL
    +ORDER  BY t2a
    +-- !query 14 schema
    +struct<t2a:string,t2b:smallint,t2c:int,t2i:date>
    +-- !query 14 output
    +val2a      6       12      2014-04-04
    +val2a      6       12      2014-04-04
    +
    +
    +-- !query 15
    +SELECT   Count(DISTINCT(t1a)),
    +         t1b,
    +         t1c,
    +         t1i
    +FROM     t1
    +WHERE    t1b NOT IN
    +         (
    +                SELECT t2b
    +                FROM   (
    +                              SELECT t2b
    +                              FROM   t2
    +                              WHERE  t2b NOT IN
    +                                     (
    +                                            SELECT t1b
    +                                            FROM   t1)
    +                              UNION
    +                              SELECT t1b
    +                              FROM   t1
    +                              WHERE  t1b NOT IN
    +                                     (
    +                                            SELECT t3b
    +                                            FROM   t3)
    +                              UNION
    +                                    distinct SELECT t3b
    +                              FROM   t3
    +                              WHERE  t3b NOT IN
    +                                     (
    +                                            SELECT t2b
    +                                            FROM   t2)) AS t3
    +                WHERE  t2b = t1b)
    +GROUP BY t1a,
    +         t1b,
    +         t1c,
    +         t1i
    +HAVING   t1b NOT IN
    +         (
    +                SELECT t2b
    +                FROM   t2
    +                WHERE  t2c IS NULL
    +                EXCEPT
    +                SELECT t3b
    +                FROM   t3)
    +ORDER BY t1c DESC NULLS LAST
    +-- !query 15 schema
    +struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1i:date>
    +-- !query 15 output
    +1  8       16      2014-05-05
    +1  8       16      2014-05-04
    +1  16      12      2014-06-04
    +1  16      12      2014-07-04
    +1  6       8       2014-04-04
    +1  10      NULL    2014-08-04
    +1  10      NULL    2014-09-04
    +1  10      NULL    2015-05-04
    +1  10      NULL    2014-05-04
    --- End diff --
    
    All the results are equivalent with the ones from DB2.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to