[ https://issues.apache.org/jira/browse/PHOENIX-2315?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14997698#comment-14997698 ]
Alicia Ying Shu commented on PHOENIX-2315: ------------------------------------------ [~jamestaylor] Here is the unit test in GroupByIT.java that reproduces the issue. @Test public void testExpressionInGroupBy() throws Exception { Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); Connection conn = DriverManager.getConnection(getUrl(), props); String ddl = " create table tgb_counter(tgb_id integer NOT NULL,utc_date_epoch integer NOT NULL,tgb_name varchar(40),ack_success_count integer" + ",ack_success_one_ack_count integer, CONSTRAINT pk_tgb_counter PRIMARY KEY(tgb_id, utc_date_epoch))"; String query = "SELECT tgb_id, tgb_name, (utc_date_epoch/10)*10 AS utc_epoch_hour,SUM(ack_success_count + ack_success_one_ack_count) AS ack_tx_sum" + " FROM tgb_counter GROUP BY tgb_id, tgb_name, utc_epoch_hour"; createTestTable(getUrl(), ddl); try { String dml = "UPSERT INTO tgb_counter VALUES(?,?,?,?,?)"; PreparedStatement stmt = conn.prepareStatement(dml); stmt.setInt(1, 1); stmt.setInt(2, 1000); stmt.setString(3, "aaa"); stmt.setInt(4, 1); stmt.setInt(5, 1); stmt.execute(); stmt.setInt(1, 2); stmt.setInt(2, 2000); stmt.setString(3, "bbb"); stmt.setInt(4, 2); stmt.setInt(5, 2); stmt.execute(); conn.commit(); ResultSet rs = conn.createStatement().executeQuery(query); assertTrue(rs.next()); assertEquals(1,rs.getInt(1)); assertEquals("aaa",rs.getString(2)); assertEquals(1000,rs.getInt(3)); assertEquals(2,rs.getInt(4)); assertTrue(rs.next()); assertEquals(2,rs.getInt(1)); assertEquals("bbb",rs.getString(2)); assertEquals(2000,rs.getInt(3)); assertEquals(4,rs.getInt(4)); assertFalse(rs.next()); rs.close(); } finally { conn.createStatement().execute("drop table tgb_counter"); conn.close(); } } The error we got: java.lang.AssertionError: expected:<aaa> but was:<null> at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.failNotEquals(Assert.java:834) at org.junit.Assert.assertEquals(Assert.java:118) at org.junit.Assert.assertEquals(Assert.java:144) at org.apache.phoenix.end2end.GroupByIT.testExpressionInGroupBy(GroupByIT.java:681) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at org.junit.runners.Suite.runChild(Suite.java:128) at org.junit.runners.Suite.runChild(Suite.java:27) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50) at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390) at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197) ========================== What I found is that in GroupByCompiler.java, inside compiler() method Collections.sort(groupBys, new Comparator<Pair<Integer,Expression>>() is called. The group by fields are re-arranged and primary keys are put in the beginning. In my example, original "GROUP BY tgb_id, tgb_name, utc_epoch_hour" becomes "GROUP BY tgb_id, utc_epoch_hour, tgb_name". This introduced the wrong result NULL for tgb_name when it should be 'aaa'. Why the sorting of the group by fields is needed? Removing the sort will get us the correct results. > Group by fields of primary key columns together with non-primary key columns > can give wrong result > -------------------------------------------------------------------------------------------------- > > Key: PHOENIX-2315 > URL: https://issues.apache.org/jira/browse/PHOENIX-2315 > Project: Phoenix > Issue Type: Bug > Reporter: Alicia Ying Shu > Assignee: Alicia Ying Shu > > create table tgb_counter(tgb_id integer NOT NULL,utc_date_epoch integer NOT > NULL,tgb_name varchar(40),ack_success_count integer,ack_success_one_ack_count > integer, CONSTRAINT pk_tgb_counter PRIMARY KEY(tgb_id, utc_date_epoch)); > SELECT tgb_id, tgb_name, (utc_date_epoch/10)*10 AS > utc_epoch_hour,SUM(ack_success_count + ack_success_one_ack_count) AS > ack_tx_sum FROM tgb_counter GROUP BY tgb_id, tgb_name, utc_epoch_hour; > tgb_name returns NULL. -- This message was sent by Atlassian JIRA (v6.3.4#6332)