[ https://issues.apache.org/jira/browse/PHOENIX-5265?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Viraj Jasani updated PHOENIX-5265: ---------------------------------- Summary: [UMBRELLA] Phoenix Test should use gold files for result comparison instead of using hard-corded comparison. (was: (Umbrella)Phoenix Test should use gold files for result comparison instead of using hard-corded comparison.) > [UMBRELLA] Phoenix Test should use gold files for result comparison instead > of using hard-corded comparison. > ------------------------------------------------------------------------------------------------------------ > > Key: PHOENIX-5265 > URL: https://issues.apache.org/jira/browse/PHOENIX-5265 > Project: Phoenix > Issue Type: Improvement > Environment: {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows = 20; > Connection conn = getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v > INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " > VALUES(?,?, ?, ?, ?)"); > byte[] val = new byte[250]; > for (int i = 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + > Bytes.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, > d.v) VALUES(?,?,?)"); > for (int i = 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + > Bytes.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List<KeyRange> keyRanges = getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + > fullTableName); > actualExplainPlan = QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" > : "14190" ) : > (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? > "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + > physicalTableName, > actualExplainPlan); > ConnectionQueryServices services = > conn.unwrap(PhoenixConnection.class).getQueryServices(); > List<HRegionLocation> regions = > services.getAllTableRegions(Bytes.toBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges = getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded; > boolean hasShadowCells = > TransactionFactory.Provider.OMID.name().equals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 > : 13, keyRanges.size()); > rs = conn > .createStatement() > .executeQuery( > "SELECT > COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from > \"SYSTEM\".STATS where PHYSICAL_NAME = '" > + physicalTableName + "' GROUP BY COLUMN_FAMILY > ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells > ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 > : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? > 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : > 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? > 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? > 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName + > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0"); > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + > PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + > physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS > NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + > fullTableName); > actualExplainPlan = QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + > physicalTableName, actualExplainPlan); > } > {code} > Reporter: Bin Shi > Assignee: Viraj Jasani > Priority: Major > Labels: hardening, phoenix-hardening > > Currently, in Phoenix Test, the comparison of the returned result and the > expected result is always hard-coded, which is especially widely used by E2E > and Integration test for comparing the query result, including the result of > EXPLAIN query plan. This has significantly impaired the productivity and > efficiency of workin on Phoenix. > The right approach is, each test case should write the generated result to a > file under a new directory, then compares this file with a gold file under > "gold" directory. After we make some code changes and make sure the change is > correct by manually verifying several specific test cases, we can safely > replace the whole "gold" directory with the directory which contains all new > generated test files during test running. In this way, we can easily rebase > all the tests. > For example, in BaseStatsCollectorIT.java, we verify the estimated row size > in the returned result of EXPLAIN query plan, the row size is decided by many > factors, like the column is encoded or not, it is mutable or not, it uses > transaction provider or not, it uses TEPHRA or OMID as transaction provider, > etc. The code snippet "testWithMultiCF" shows a typical test case. The > comparisons of the result result and the expected result are hard-coded in > those asserts. Now imagine, if we change the way collecting stats or we > change column encoding scheme or we change the cell storage format for TEPHRA > or OMID, which is very likely to happen, then we need to manually change all > those hard-coded comparison everywhere, and it isn't trivial to re-calculate > all expected row sizes with the different conditions . Today you might need > one or two weeks to rebase all the tests, which is a huge waste. We should > use "gold" files here, so that we can rebase the test very easily. > BTW, the new generated test result files and gold files should be in XML or > JSON. The result of "EXPLAN" query should be in XML or JSON too, because the > file format matches the structure of a query plan. > {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows = 20; > Connection conn = getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v > INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " > VALUES(?,?, ?, ?, ?)"); > byte[] val = new byte[250]; > for (int i = 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + > Bytes.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, > d.v) VALUES(?,?,?)"); > for (int i = 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + > Bytes.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List<KeyRange> keyRanges = getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + > fullTableName); > actualExplainPlan = QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" > : "14190" ) : > (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? > "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + > physicalTableName, > actualExplainPlan); > ConnectionQueryServices services = > conn.unwrap(PhoenixConnection.class).getQueryServices(); > List<HRegionLocation> regions = > services.getAllTableRegions(Bytes.toBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges = getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded; > boolean hasShadowCells = > TransactionFactory.Provider.OMID.name().equals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 > : 13, keyRanges.size()); > rs = conn > .createStatement() > .executeQuery( > "SELECT > COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from > \"SYSTEM\".STATS where PHYSICAL_NAME = '" > + physicalTableName + "' GROUP BY COLUMN_FAMILY > ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells > ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 > : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? > 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : > 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? > 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? > 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName + > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0"); > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + > PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + > physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS > NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + > fullTableName); > actualExplainPlan = QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + > physicalTableName, actualExplainPlan); > } > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)