[ https://issues.apache.org/jira/browse/HIVE-25842?focusedWorklogId=712178&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-712178 ]
ASF GitHub Bot logged work on HIVE-25842: ----------------------------------------- Author: ASF GitHub Bot Created on: 20/Jan/22 16:36 Start Date: 20/Jan/22 16:36 Worklog Time Spent: 10m Work Description: lcspinter commented on a change in pull request #2916: URL: https://github.com/apache/hive/pull/2916#discussion_r788944777 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/metrics/DeltaFilesMetricReporter.java ########## @@ -310,81 +143,6 @@ private static String getDeltaCountKey(String dbName, String tableName, String p return key.toString(); } - private static void logDeltaDirMetrics(AcidDirectory dir, Configuration conf, int numObsoleteDeltas, int numDeltas, - int numSmallDeltas) { - long loggerFrequency = HiveConf - .getTimeVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ACID_METRICS_LOGGER_FREQUENCY, TimeUnit.MILLISECONDS); - if (loggerFrequency <= 0) { - return; - } - long currentTime = System.currentTimeMillis(); - if (lastSuccessfulLoggingTime == 0 || currentTime >= lastSuccessfulLoggingTime + loggerFrequency) { - lastSuccessfulLoggingTime = currentTime; - if (numDeltas >= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ACTIVE_DELTA_DIR_THRESHOLD)) { - LOG.warn("Directory " + dir.getPath() + " contains " + numDeltas + " active delta directories. This can " + - "cause performance degradation."); - } - - if (numObsoleteDeltas >= - HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_OBSOLETE_DELTA_DIR_THRESHOLD)) { - LOG.warn("Directory " + dir.getPath() + " contains " + numDeltas + " obsolete delta directories. This can " + - "indicate compaction cleaner issues."); - } - - if (numSmallDeltas >= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_SMALL_DELTA_DIR_THRESHOLD)) { - LOG.warn("Directory " + dir.getPath() + " contains " + numDeltas + " small delta directories. This can " + - "indicate performance degradation and there might be a problem with your streaming setup."); - } - } - } - - private static int getNumObsoleteDeltas(AcidDirectory dir, long checkThresholdInSec) throws IOException { - int numObsoleteDeltas = 0; - for (Path obsolete : dir.getObsolete()) { - FileStatus stat = dir.getFs().getFileStatus(obsolete); - if (System.currentTimeMillis() - stat.getModificationTime() >= checkThresholdInSec * 1000) { - numObsoleteDeltas++; - } - } - return numObsoleteDeltas; - } - - public static void createCountersForAcidMetrics(TezCounters tezCounters, JobConf jobConf) { - if (HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED) && - MetastoreConf.getBoolVar(jobConf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON)) { - - Arrays.stream(DeltaFilesMetricType.values()) - .filter(type -> jobConf.get(type.name()) != null) - .forEach(type -> - Splitter.on(ENTRY_SEPARATOR).withKeyValueSeparator(KEY_VALUE_SEPARATOR).split(jobConf.get(type.name())).forEach( - (path, cnt) -> tezCounters.findCounter(type.value, path).setValue(Long.parseLong(cnt)) - ) - ); - } - } - - public static void addAcidMetricsToConfObj(EnumMap<DeltaFilesMetricType, - Queue<Pair<String, Integer>>> deltaFilesStats, Configuration conf) { - try { - deltaFilesStats.forEach((type, value) -> conf - .set(type.name(), Joiner.on(ENTRY_SEPARATOR).withKeyValueSeparator(KEY_VALUE_SEPARATOR).join(value))); - - } catch (Exception e) { - LOG.warn("Couldn't add Delta metrics to conf object", e); - } - } - - public static void backPropagateAcidMetrics(JobConf jobConf, Configuration conf) { - if (HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED) && - MetastoreConf.getBoolVar(jobConf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON)) { - try { - Arrays.stream(DeltaFilesMetricType.values()).filter(type -> conf.get(type.name()) != null) - .forEach(type -> jobConf.set(type.name(), conf.get(type.name()))); - } catch (Exception e) { - LOG.warn("Couldn't back propagate Delta metrics to jobConf object", e); - } - } - } private static long getBaseSize(AcidDirectory dir) throws IOException { long baseSize = 0; Review comment: removed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 712178) Time Spent: 5.5h (was: 5h 20m) > Reimplement delta file metric collection > ---------------------------------------- > > Key: HIVE-25842 > URL: https://issues.apache.org/jira/browse/HIVE-25842 > Project: Hive > Issue Type: Improvement > Reporter: László Pintér > Assignee: László Pintér > Priority: Major > Labels: pull-request-available > Time Spent: 5.5h > Remaining Estimate: 0h > > FUNCTIONALITY: Metrics are collected only when a Tez query runs a table > (select * and select count( * ) don't update the metrics) > Metrics aren't updated after compaction or cleaning after compaction, so > users will probably see "issues" with compaction (like many active or > obsolete or small deltas) that don't exist. > RISK: Metrics are collected during queries – we tried to put a try-catch > around each method in DeltaFilesMetricsReporter but of course this isn't > foolproof. This is a HUGE performance and functionality liability. Tests > caught some issues, but our tests aren't perfect. -- This message was sent by Atlassian Jira (v8.20.1#820001)