dengzhhu653 commented on code in PR #5523:
URL: https://github.com/apache/hive/pull/5523#discussion_r1857704711
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java:
##########
@@ -183,55 +181,6 @@ public PreparedStatement
prepareStmtWithParameters(Connection dbConn, String sql
return pst;
}
-
-
- /**
- * Oracle SQL query that creates or replaces view HMS_SUMMARY.
- */
- private static final String CREATE_METADATASUMMARY_ORACLE = "CREATE OR
REPLACE VIEW METADATASUMMARYALL AS SELECT a.TBL_ID, a.TBL_NAME, a.OWNER as
\"CTLG\", a.TBL_TYPE, a.CREATE_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT,
c.IS_COMPRESSED, c.LOCATION, c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB,
TO_CHAR(e.PARAM_VALUE) as \"PARAM_VAL\", count(j.COLUMN_NAME) as
\"TOTAL_COLUMN_COUNT\", jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT,
jj.MAP_COLUMN_COUNT, k.PARTITION_KEY_NAME as \"PARTITION_COLUMN\",
m.PARTITION_CNT, CAST(CAST(q.NUM_FILES AS VARCHAR2(200)) AS NUMBER) as
\"num_files\", CAST(q.TOTAL_SIZE AS NUMBER) as \"total_size\", CAST(q.NUM_ROWS
AS NUMBER) as \"num_rows\", q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES
FROM TBLS a LEFT JOIN DBS b on a.DB_ID = b.DB_ID LEFT JOIN SDS c on a.SD_ID =
c.SD_ID LEFT JOIN SERDES d on c.SERDE_ID = d.SERDE_ID LEFT JOIN (select
SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY =
'field.delim') e on c.SERDE_ID = e.SERDE_ID LEFT JOIN CO
LUMNS_V2 j on c.CD_ID = j.CD_ID LEFT JOIN (SELECT CD_ID, sum(CASE WHEN
TYPE_NAME like 'array%' THEN 1 ELSE 0 END) AS \"ARRAY_COLUMN_COUNT\", sum(CASE
WHEN TYPE_NAME like 'struct%' THEN 1 ELSE 0 END) AS \"STRUCT_COLUMN_COUNT\",
sum(CASE WHEN TYPE_NAME like 'map%' THEN 1 ELSE 0 END) AS \"MAP_COLUMN_COUNT\"
from COLUMNS_V2 group by CD_ID) jj on jj.CD_ID=c.CD_ID LEFT JOIN (select
TBL_ID, LISTAGG(PKEY_NAME, ',') as PARTITION_KEY_NAME from PARTITION_KEYS group
by TBL_ID) k on a.TBL_ID = k.TBL_ID LEFT JOIN (select
SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY =
'serialization.format') f on c.SERDE_ID = f.SERDE_ID LEFT JOIN (select
TBL_ID,PARAM_KEY,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY = 'comment') g
on a.TBL_ID = g.TBL_ID LEFT JOIN (select TBL_ID, PARAM_KEY,PARAM_VALUE from
TABLE_PARAMS where PARAM_KEY = 'transient_lastDdlTime') h on a.TBL_ID =
h.TBL_ID LEFT JOIN (select TBL_ID,COUNT(PART_ID) as PARTITION_CNT from
PARTITIONS group by TBL_ID) m on a.TBL_ID = m.TBL
_ID LEFT JOIN (SELECT aa.TBL_ID, aa.NUM_FILES + case when bb.NUM_FILES is not
null then bb.NUM_FILES else 0 end AS \"NUM_FILES\", aa.NUM_ROWS + case when
bb.NUM_ROWS is not null then bb.NUM_ROWS else 0 end AS \"NUM_ROWS\",
aa.TOTAL_SIZE + case when bb.TOTAL_SIZE is not null then bb.TOTAL_SIZE else 0
end AS \"TOTAL_SIZE\", aa.WRITE_FORMAT_DEFAULT, aa.TRANSACTIONAL_PROPERTIES
from (select u.TBL_ID, NUM_FILES, NUM_ROWS, TOTAL_SIZE, WRITE_FORMAT_DEFAULT,
TRANSACTIONAL_PROPERTIES from (select TBL_ID, max(CASE PARAM_KEY WHEN
'numFiles' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END)
AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN 'numRows' THEN CAST(CAST(PARAM_VALUE
AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_ROWS\", max(CASE PARAM_KEY
WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0
END) AS \"TOTAL_SIZE\" from TABLE_PARAMS group by TBL_ID) u left join (select
TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as \"WRITE_FORMAT_DEFAULT\" from TAB
LE_PARAMS where PARAM_KEY = 'write.format.default') v on u.TBL_ID = v.TBL_ID
left join (select TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as
\"TRANSACTIONAL_PROPERTIES\" from TABLE_PARAMS where PARAM_KEY =
'transactional_properties') w on u.TBL_ID = w.TBL_ID) aa left join (SELECT
y.TBL_ID, SUM(x.NUM_FILES) AS \"NUM_FILES\", SUM(x.NUM_ROWS) AS \"NUM_ROWS\",
SUM(x.TOTAL_SIZE) AS \"TOTAL_SIZE\" FROM PARTITIONS y left join (SELECT
PART_ID, max(CASE PARAM_KEY WHEN 'numFiles' THEN CAST(CAST(PARAM_VALUE AS
VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN
'numRows' THEN CAST(CAST(PARAM_VALUE AS CHAR(200)) AS NUMBER) ELSE 0 END) AS
\"NUM_ROWS\", max(CASE PARAM_KEY WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS
VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"TOTAL_SIZE\" FROM PARTITION_PARAMS
group by PART_ID) x ON y.PART_ID = x.PART_ID group by y.TBL_ID) bb on aa.TBL_ID
= bb.TBL_ID) q on a.TBL_ID = q.TBL_ID group by a.TBL_ID, a.TBL_NAME, a.OWNER,
a.TBL_TYPE, a.CREAT
E_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT, c.IS_COMPRESSED, c.LOCATION,
c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB, TO_CHAR(e.PARAM_VALUE),
jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT,jj.MAP_COLUMN_COUNT,
k.PARTITION_KEY_NAME, m.PARTITION_CNT,q.NUM_FILES, q.TOTAL_SIZE, q.NUM_ROWS,
q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES;";
Review Comment:
Tested on an env with millions of the partition and ten thousands of the
table, the performance of collecting summary doesn't degrade afterwards(in
about 2 min).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]