[ 
https://issues.apache.org/jira/browse/PHOENIX-2315?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14997698#comment-14997698
 ] 

Alicia Ying Shu commented on PHOENIX-2315:
------------------------------------------

[~jamestaylor] Here is the unit test in GroupByIT.java that reproduces the 
issue.
    @Test
    public void testExpressionInGroupBy() throws Exception {
        Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
        Connection conn = DriverManager.getConnection(getUrl(), props);
        String ddl = " create table tgb_counter(tgb_id integer NOT 
NULL,utc_date_epoch integer NOT NULL,tgb_name varchar(40),ack_success_count 
integer" +
                        ",ack_success_one_ack_count integer, CONSTRAINT 
pk_tgb_counter PRIMARY KEY(tgb_id, utc_date_epoch))";
        String query = "SELECT tgb_id, tgb_name, (utc_date_epoch/10)*10 AS 
utc_epoch_hour,SUM(ack_success_count + ack_success_one_ack_count) AS 
ack_tx_sum" +
                        " FROM tgb_counter GROUP BY tgb_id, tgb_name, 
utc_epoch_hour";

        createTestTable(getUrl(), ddl);
        try {
                String dml = "UPSERT INTO tgb_counter VALUES(?,?,?,?,?)";
                PreparedStatement stmt = conn.prepareStatement(dml);
                stmt.setInt(1, 1);
                stmt.setInt(2, 1000);
                stmt.setString(3, "aaa");
                stmt.setInt(4, 1);
                stmt.setInt(5, 1);
                stmt.execute();
                stmt.setInt(1, 2);
                stmt.setInt(2, 2000);
                stmt.setString(3, "bbb");
                stmt.setInt(4, 2);
                stmt.setInt(5, 2);
                stmt.execute();
                conn.commit();

                ResultSet rs = conn.createStatement().executeQuery(query);
                assertTrue(rs.next());
                assertEquals(1,rs.getInt(1));
                assertEquals("aaa",rs.getString(2));
                assertEquals(1000,rs.getInt(3));
                assertEquals(2,rs.getInt(4));
                assertTrue(rs.next());
                assertEquals(2,rs.getInt(1));
                assertEquals("bbb",rs.getString(2));
                assertEquals(2000,rs.getInt(3));
                assertEquals(4,rs.getInt(4));
                assertFalse(rs.next());
                rs.close();
        } finally {
                conn.createStatement().execute("drop table tgb_counter");
                conn.close();
        }
    }

The error we got: 
java.lang.AssertionError: expected:<aaa> but was:<null>
        at org.junit.Assert.fail(Assert.java:88)
        at org.junit.Assert.failNotEquals(Assert.java:834)
        at org.junit.Assert.assertEquals(Assert.java:118)
        at org.junit.Assert.assertEquals(Assert.java:144)
        at 
org.apache.phoenix.end2end.GroupByIT.testExpressionInGroupBy(GroupByIT.java:681)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
        at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
        at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
        at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
        at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
        at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
        at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
        at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
        at org.junit.runners.Suite.runChild(Suite.java:128)
        at org.junit.runners.Suite.runChild(Suite.java:27)
        at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
        at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
        at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
        at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
        at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
        at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
        at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
        at 
org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
        at 
org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)

==========================

What I found is that in GroupByCompiler.java, inside compiler() method 
Collections.sort(groupBys, new Comparator<Pair<Integer,Expression>>() is 
called. The group by fields are re-arranged and primary keys are put in the 
beginning. In my example, original "GROUP BY tgb_id, tgb_name, utc_epoch_hour" 
becomes "GROUP BY tgb_id, utc_epoch_hour, tgb_name". This introduced the wrong 
result NULL for tgb_name when it should be 'aaa'. Why the sorting of the group 
by fields is needed? Removing the sort will get us the correct results. 


> Group by fields of primary key columns together with non-primary key columns 
> can give wrong result
> --------------------------------------------------------------------------------------------------
>
>                 Key: PHOENIX-2315
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-2315
>             Project: Phoenix
>          Issue Type: Bug
>            Reporter: Alicia Ying Shu
>            Assignee: Alicia Ying Shu
>
> create table tgb_counter(tgb_id integer NOT NULL,utc_date_epoch integer NOT 
> NULL,tgb_name varchar(40),ack_success_count integer,ack_success_one_ack_count 
> integer, CONSTRAINT pk_tgb_counter PRIMARY KEY(tgb_id, utc_date_epoch)); 
> SELECT tgb_id, tgb_name, (utc_date_epoch/10)*10 AS 
> utc_epoch_hour,SUM(ack_success_count + ack_success_one_ack_count) AS 
> ack_tx_sum FROM tgb_counter GROUP BY tgb_id, tgb_name, utc_epoch_hour;
> tgb_name returns NULL.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to