dengzhhu653 commented on code in PR #5523: URL: https://github.com/apache/hive/pull/5523#discussion_r1857704711
########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java: ########## @@ -183,55 +181,6 @@ public PreparedStatement prepareStmtWithParameters(Connection dbConn, String sql return pst; } - - - /** - * Oracle SQL query that creates or replaces view HMS_SUMMARY. - */ - private static final String CREATE_METADATASUMMARY_ORACLE = "CREATE OR REPLACE VIEW METADATASUMMARYALL AS SELECT a.TBL_ID, a.TBL_NAME, a.OWNER as \"CTLG\", a.TBL_TYPE, a.CREATE_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT, c.IS_COMPRESSED, c.LOCATION, c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB, TO_CHAR(e.PARAM_VALUE) as \"PARAM_VAL\", count(j.COLUMN_NAME) as \"TOTAL_COLUMN_COUNT\", jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT, jj.MAP_COLUMN_COUNT, k.PARTITION_KEY_NAME as \"PARTITION_COLUMN\", m.PARTITION_CNT, CAST(CAST(q.NUM_FILES AS VARCHAR2(200)) AS NUMBER) as \"num_files\", CAST(q.TOTAL_SIZE AS NUMBER) as \"total_size\", CAST(q.NUM_ROWS AS NUMBER) as \"num_rows\", q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES FROM TBLS a LEFT JOIN DBS b on a.DB_ID = b.DB_ID LEFT JOIN SDS c on a.SD_ID = c.SD_ID LEFT JOIN SERDES d on c.SERDE_ID = d.SERDE_ID LEFT JOIN (select SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY = 'field.delim') e on c.SERDE_ID = e.SERDE_ID LEFT JOIN CO LUMNS_V2 j on c.CD_ID = j.CD_ID LEFT JOIN (SELECT CD_ID, sum(CASE WHEN TYPE_NAME like 'array%' THEN 1 ELSE 0 END) AS \"ARRAY_COLUMN_COUNT\", sum(CASE WHEN TYPE_NAME like 'struct%' THEN 1 ELSE 0 END) AS \"STRUCT_COLUMN_COUNT\", sum(CASE WHEN TYPE_NAME like 'map%' THEN 1 ELSE 0 END) AS \"MAP_COLUMN_COUNT\" from COLUMNS_V2 group by CD_ID) jj on jj.CD_ID=c.CD_ID LEFT JOIN (select TBL_ID, LISTAGG(PKEY_NAME, ',') as PARTITION_KEY_NAME from PARTITION_KEYS group by TBL_ID) k on a.TBL_ID = k.TBL_ID LEFT JOIN (select SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY = 'serialization.format') f on c.SERDE_ID = f.SERDE_ID LEFT JOIN (select TBL_ID,PARAM_KEY,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY = 'comment') g on a.TBL_ID = g.TBL_ID LEFT JOIN (select TBL_ID, PARAM_KEY,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY = 'transient_lastDdlTime') h on a.TBL_ID = h.TBL_ID LEFT JOIN (select TBL_ID,COUNT(PART_ID) as PARTITION_CNT from PARTITIONS group by TBL_ID) m on a.TBL_ID = m.TBL _ID LEFT JOIN (SELECT aa.TBL_ID, aa.NUM_FILES + case when bb.NUM_FILES is not null then bb.NUM_FILES else 0 end AS \"NUM_FILES\", aa.NUM_ROWS + case when bb.NUM_ROWS is not null then bb.NUM_ROWS else 0 end AS \"NUM_ROWS\", aa.TOTAL_SIZE + case when bb.TOTAL_SIZE is not null then bb.TOTAL_SIZE else 0 end AS \"TOTAL_SIZE\", aa.WRITE_FORMAT_DEFAULT, aa.TRANSACTIONAL_PROPERTIES from (select u.TBL_ID, NUM_FILES, NUM_ROWS, TOTAL_SIZE, WRITE_FORMAT_DEFAULT, TRANSACTIONAL_PROPERTIES from (select TBL_ID, max(CASE PARAM_KEY WHEN 'numFiles' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN 'numRows' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_ROWS\", max(CASE PARAM_KEY WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"TOTAL_SIZE\" from TABLE_PARAMS group by TBL_ID) u left join (select TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as \"WRITE_FORMAT_DEFAULT\" from TAB LE_PARAMS where PARAM_KEY = 'write.format.default') v on u.TBL_ID = v.TBL_ID left join (select TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as \"TRANSACTIONAL_PROPERTIES\" from TABLE_PARAMS where PARAM_KEY = 'transactional_properties') w on u.TBL_ID = w.TBL_ID) aa left join (SELECT y.TBL_ID, SUM(x.NUM_FILES) AS \"NUM_FILES\", SUM(x.NUM_ROWS) AS \"NUM_ROWS\", SUM(x.TOTAL_SIZE) AS \"TOTAL_SIZE\" FROM PARTITIONS y left join (SELECT PART_ID, max(CASE PARAM_KEY WHEN 'numFiles' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN 'numRows' THEN CAST(CAST(PARAM_VALUE AS CHAR(200)) AS NUMBER) ELSE 0 END) AS \"NUM_ROWS\", max(CASE PARAM_KEY WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"TOTAL_SIZE\" FROM PARTITION_PARAMS group by PART_ID) x ON y.PART_ID = x.PART_ID group by y.TBL_ID) bb on aa.TBL_ID = bb.TBL_ID) q on a.TBL_ID = q.TBL_ID group by a.TBL_ID, a.TBL_NAME, a.OWNER, a.TBL_TYPE, a.CREAT E_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT, c.IS_COMPRESSED, c.LOCATION, c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB, TO_CHAR(e.PARAM_VALUE), jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT,jj.MAP_COLUMN_COUNT, k.PARTITION_KEY_NAME, m.PARTITION_CNT,q.NUM_FILES, q.TOTAL_SIZE, q.NUM_ROWS, q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES;"; Review Comment: Tested on an env with millions of the partition and ten thousands of the table, the performance of collecting summary doesn't degrade afterwards(in about 2 min). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org