zentol commented on a change in pull request #6850: [FLINK-10252] Handle oversized metric messges URL: https://github.com/apache/flink/pull/6850#discussion_r226616012
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/metrics/dump/MetricDumpSerialization.java ########## @@ -124,55 +146,125 @@ public MetricSerializationResult serialize( Map<Counter, Tuple2<QueryScopeInfo, String>> counters, Map<Gauge<?>, Tuple2<QueryScopeInfo, String>> gauges, Map<Histogram, Tuple2<QueryScopeInfo, String>> histograms, - Map<Meter, Tuple2<QueryScopeInfo, String>> meters) { + Map<Meter, Tuple2<QueryScopeInfo, String>> meters, + long maximumFramesize, + MetricQueryService queryService) { - buffer.clear(); + boolean unregisterRemainingMetrics = false; + countersBuffer.clear(); int numCounters = 0; for (Map.Entry<Counter, Tuple2<QueryScopeInfo, String>> entry : counters.entrySet()) { + if (unregisterRemainingMetrics) { + queryService.unregister(entry.getKey()); + continue; + } + try { - serializeCounter(buffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); + serializeCounter(countersBuffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); numCounters++; + if (countersBuffer.length() > maximumFramesize) { + LOG.warn("The serialized counter metric is larger than the maximum frame size, " + + " so maybe not all metrics would be reported."); + unregisterRemainingMetrics = true; + //clear all, because we can not revoke the latest metrics which caused overflow + queryService.unregister(entry.getKey()); + countersBuffer.clear(); + numCounters = 0; + } } catch (Exception e) { LOG.debug("Failed to serialize counter.", e); } } + gaugesBuffer.clear(); int numGauges = 0; for (Map.Entry<Gauge<?>, Tuple2<QueryScopeInfo, String>> entry : gauges.entrySet()) { + if (unregisterRemainingMetrics) { + queryService.unregister(entry.getKey()); + continue; + } + try { - serializeGauge(buffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); + serializeGauge(gaugesBuffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); numGauges++; + if (gaugesBuffer.length() > maximumFramesize) { + LOG.warn("The serialized gauge metric is larger than the maximum frame size, " + + " so maybe not all metrics would be reported."); + unregisterRemainingMetrics = true; + queryService.unregister(entry.getKey()); + gaugesBuffer.clear(); + numGauges = 0; + } } catch (Exception e) { LOG.debug("Failed to serialize gauge.", e); } } - int numHistograms = 0; - for (Map.Entry<Histogram, Tuple2<QueryScopeInfo, String>> entry : histograms.entrySet()) { + metersBuffer.clear(); + int numMeters = 0; + for (Map.Entry<Meter, Tuple2<QueryScopeInfo, String>> entry : meters.entrySet()) { + if (unregisterRemainingMetrics) { + queryService.unregister(entry.getKey()); + continue; + } + try { - serializeHistogram(buffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); - numHistograms++; + serializeMeter(metersBuffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); + numMeters++; + if (metersBuffer.length() > maximumFramesize) { + LOG.warn("The serialized meter metric is larger than the maximum frame size, " + + " so maybe not all metrics would be reported."); + unregisterRemainingMetrics = true; + queryService.unregister(entry.getKey()); + metersBuffer.clear(); + numMeters = 0; + } } catch (Exception e) { - LOG.debug("Failed to serialize histogram.", e); + LOG.debug("Failed to serialize meter.", e); } } - int numMeters = 0; - for (Map.Entry<Meter, Tuple2<QueryScopeInfo, String>> entry : meters.entrySet()) { + histogramsBuffer.clear(); + int numHistograms = 0; + for (Map.Entry<Histogram, Tuple2<QueryScopeInfo, String>> entry : histograms.entrySet()) { + if (unregisterRemainingMetrics) { + queryService.unregister(entry.getKey()); + continue; + } + try { - serializeMeter(buffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); - numMeters++; + serializeHistogram(histogramsBuffer, entry.getValue().f0, entry.getValue().f1, entry.getKey()); Review comment: the re-ordering is not necessary if the decision is left to the MQS. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services