wuchong commented on a change in pull request #10001: [FLINK-14535][table-planner-blink] Fix distinct key type for DecimalT… URL: https://github.com/apache/flink/pull/10001#discussion_r339445914
########## File path: flink-table/flink-table-planner-blink/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/AggregateITCase.scala ########## @@ -180,34 +180,68 @@ class AggregateITCase( @Test def testCountDistinct(): Unit = { val data = new mutable.MutableList[Row] - data.+=(Row.of(JInt.valueOf(1), JLong.valueOf(1L), "A")) - data.+=(Row.of(JInt.valueOf(2), JLong.valueOf(2L), "B")) - data.+=(Row.of(null, JLong.valueOf(2L), "B")) - data.+=(Row.of(JInt.valueOf(3), JLong.valueOf(2L), "B")) - data.+=(Row.of(JInt.valueOf(4), JLong.valueOf(3L), "C")) - data.+=(Row.of(JInt.valueOf(5), JLong.valueOf(3L), "C")) - data.+=(Row.of(JInt.valueOf(5), JLong.valueOf(3L), null)) - data.+=(Row.of(JInt.valueOf(6), JLong.valueOf(3L), "C")) - data.+=(Row.of(JInt.valueOf(7), JLong.valueOf(4L), "B")) - data.+=(Row.of(JInt.valueOf(8), JLong.valueOf(4L), "A")) - data.+=(Row.of(JInt.valueOf(9), JLong.valueOf(4L), "D")) - data.+=(Row.of(null, JLong.valueOf(4L), null)) - data.+=(Row.of(JInt.valueOf(10), JLong.valueOf(4L), "E")) - data.+=(Row.of(JInt.valueOf(11), JLong.valueOf(5L), "A")) - data.+=(Row.of(JInt.valueOf(12), JLong.valueOf(5L), "B")) - - val rowType: RowTypeInfo = new RowTypeInfo(Types.INT, Types.LONG, Types.STRING) - - val t = failingDataSource(data)(rowType).toTable(tEnv, 'a, 'b, 'c) + data.+=(Row.of(localDateTime("1970-01-01 00:00:01"), localDate("1970-01-01"), + mLocalTime("00:00:01"), BigDecimal(1).bigDecimal, JInt.valueOf(1), JLong.valueOf(1L), + Long.box(1L), "A")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:02"), localDate("1970-01-02"), + mLocalTime("00:00:02"), BigDecimal(2).bigDecimal, JInt.valueOf(2), JLong.valueOf(2L), + Long.box(2L), "B")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:03"), localDate("1970-01-03"), + mLocalTime("00:00:03"), BigDecimal(3).bigDecimal, null, JLong.valueOf(3L), + Long.box(2L), "B")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:04"), localDate("1970-01-04"), + mLocalTime("00:00:04"), BigDecimal(4).bigDecimal, JInt.valueOf(4), JLong.valueOf(4L), + Long.box(3L), "C")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:05"), localDate("1970-01-05"), + mLocalTime("00:00:05"), BigDecimal(5).bigDecimal, JInt.valueOf(5), JLong.valueOf(5L), + Long.box(3L), "C")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:06"), localDate("1970-01-06"), + mLocalTime("00:00:06"), BigDecimal(6).bigDecimal, JInt.valueOf(6), JLong.valueOf(6L), + Long.box(3L), "C")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:07"), localDate("1970-01-07"), + mLocalTime("00:00:07"), BigDecimal(7).bigDecimal, JInt.valueOf(7), JLong.valueOf(7L), + Long.box(4L), "B")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:08"), localDate("1970-01-08"), + mLocalTime("00:00:08"), BigDecimal(8).bigDecimal, JInt.valueOf(8), JLong.valueOf(8L), + Long.box(4L), "A")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:09"), localDate("1970-01-09"), + mLocalTime("00:00:09"), BigDecimal(9).bigDecimal, JInt.valueOf(9), JLong.valueOf(9L), + Long.box(4L), "D")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:10"), localDate("1970-01-10"), + mLocalTime("00:00:10"), BigDecimal(10).bigDecimal, null, JLong.valueOf(10L), + Long.box(4L), "E")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:11"), localDate("1970-01-11"), + mLocalTime("00:00:11"), BigDecimal(11).bigDecimal, JInt.valueOf(11), JLong.valueOf(11L), + Long.box(5L), "A")) + data.+=(Row.of(localDateTime("1970-01-01 00:00:12"), localDate("1970-01-12"), + mLocalTime("00:00:12"), BigDecimal(12).bigDecimal, JInt.valueOf(12), JLong.valueOf(12L), + Long.box(5L), "B")) Review comment: We can improve it a bit more, to make it more readable and maintainable, and cover more aspects, 1) please make sure the test data contains null data for each type 2) please make sure there are some duplicate values per group. how about: ```scala @Test def testCountDistinct(): Unit = { val ids = List( 1, 2, 2, 3, 3, 3, 4, 4, 4, 4) val datetimes = List( "1970-01-01 00:00:01", "1970-01-01 00:00:02", "1970-01-01 00:00:03", "1970-01-01 00:00:04", null, "1970-01-01 00:00:06", "1970-01-01 00:00:07", "1970-01-01 00:00:07", "1970-01-01 00:00:09", "1970-01-01 00:00:09") val dates = List( "1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04", null, "1970-01-06", "1970-01-07", "1970-01-07", "1970-01-09", "1970-01-09") val data = new mutable.MutableList[Row] for (i <- ids.indices) { data += Row.of(Integer.valueOf(ids(i)), localDateTime(datetimes(i)), localDate(dates(i))) } val rowType = new RowTypeInfo(Types.INT(), Types.LOCAL_DATE_TIME(), Types.LOCAL_DATE()) val t = failingDataSource(data)(rowType).toTable( tEnv, 'id, 'a, 'b) tEnv.registerTable("T", t) val t1 = tEnv.sqlQuery( s""" |SELECT | id, | 'TIMESTAMP:' || CAST(COUNT(DISTINCT a) AS VARCHAR), | 'DATE:' || CAST(COUNT(DISTINCT b) AS VARCHAR) |FROM T |GROUP BY id """.stripMargin) val sink = new TestingRetractSink t1.toRetractStream[Row].addSink(sink) env.execute() val expected = List( "1,TIMESTAMP:1,DATE:1", "2,TIMESTAMP:2,DATE:2", "3,TIMESTAMP:2,DATE:2", "4,TIMESTAMP:2,DATE:2") assertEquals(expected.sorted, sink.getRetractResults.sorted) } ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services