xiaohui-sun commented on a change in pull request #5208: [TE] Disable alerts if
it has no success run within 30 days
URL: https://github.com/apache/incubator-pinot/pull/5208#discussion_r407146873
##########
File path:
thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/anomaly/monitor/MonitorTaskRunner.java
##########
@@ -120,11 +134,62 @@ private void executeMonitorUpdate(MonitorTaskInfo
monitorTaskInfo) {
// update detection health
updateDetectionHealth();
+
+ // disable alerts that failed consecutively for a long time
+ disableLongFailedAlerts();
+
} catch (Exception e) {
LOG.error("Exception in monitor update task", e);
}
}
+ /**
+ * Disable the alert if it was updated before {@MAX_TASK_FAIL_DAYS} but
there is no success run since then.
+ */
+ private void disableLongFailedAlerts() {
+ DetectionConfigManager detectionDAO =
DAO_REGISTRY.getDetectionConfigManager();
+ List<DetectionConfigDTO> detectionConfigs = detectionDAO.findAllActive();
+ long currentTimeMills = System.currentTimeMillis();
+ long maxTaskFailMills = TimeUnit.DAYS.toMillis(MAX_FAILED_DISABLE_DAYS);
+ for (DetectionConfigDTO config : detectionConfigs) {
+ try {
+ Timestamp updateTime = config.getUpdateTime();
+ if (updateTime != null && config.getHealth() != null &&
config.getHealth().getDetectionTaskStatus() != null) {
+ long lastTaskExecutionTime =
config.getHealth().getDetectionTaskStatus().getLastTaskExecutionTime();
+ if (updateTime.getTime() <= currentTimeMills - maxTaskFailMills &&
(lastTaskExecutionTime == -1L
+ || lastTaskExecutionTime <= currentTimeMills -
maxTaskFailMills)) {
+ config.setActive(false);
+ detectionDAO.update(config);
+ sendDisableAlertNotificationEmail(config);
+ LOG.info("Disabled alert " + config.getId() + " since it failed
more than " + MAX_FAILED_DISABLE_DAYS + " days");
+ LOG.info("Task last update time: " + config.getUpdateTime());
+ LOG.info("Last success task execution time: " +
lastTaskExecutionTime);
+ }
+ }
+ } catch (Exception e) {
+ LOG.error("Exception in disabling alert ", config.getId());
Review comment:
fixed
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]