Maximilian Michels created FLINK-31345: ------------------------------------------
Summary: Trim autoscaler configMap to not exceed 1mb size limit Key: FLINK-31345 URL: https://issues.apache.org/jira/browse/FLINK-31345 Project: Flink Issue Type: Bug Components: Autoscaler, Kubernetes Operator Affects Versions: kubernetes-operator-1.4.0 Reporter: Maximilian Michels Fix For: kubernetes-operator-1.5.0 When the {{autoscaler-<deployment_name>}} ConfigMap which is used to persist scaling decisions and metrics becomes too large, the following error is thrown consistently: {noformat} io.fabric8.kubernetes.client.KubernetesClientException: Operation: [replace] for kind: [ConfigMap] with name: [deployment] in namespace: [namespace] failed. at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:159) at io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperation.lambda$replace$0(HasMetadataOperation.java:169) at io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperation.replace(HasMetadataOperation.java:172) at io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperation.replace(HasMetadataOperation.java:113) at io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperation.replace(HasMetadataOperation.java:41) at io.fabric8.kubernetes.client.extension.ResourceAdapter.replace(ResourceAdapter.java:252) at org.apache.flink.kubernetes.operator.autoscaler.AutoScalerInfo.replaceInKubernetes(AutoScalerInfo.java:167) at org.apache.flink.kubernetes.operator.autoscaler.JobAutoScalerImpl.scale(JobAutoScalerImpl.java:113) at org.apache.flink.kubernetes.operator.reconciler.deployment.AbstractFlinkResourceReconciler.reconcile(AbstractFlinkResourceReconciler.java:178) at org.apache.flink.kubernetes.operator.controller.FlinkDeploymentController.reconcile(FlinkDeploymentController.java:130) at org.apache.flink.kubernetes.operator.controller.FlinkDeploymentController.reconcile(FlinkDeploymentController.java:56) at io.javaoperatorsdk.operator.processing.Controller$1.execute(Controller.java:145) at io.javaoperatorsdk.operator.processing.Controller$1.execute(Controller.java:103) at org.apache.flink.kubernetes.operator.metrics.OperatorJosdkMetrics.timeControllerExecution(OperatorJosdkMetrics.java:80) at io.javaoperatorsdk.operator.processing.Controller.reconcile(Controller.java:102) at io.javaoperatorsdk.operator.processing.event.ReconciliationDispatcher.reconcileExecution(ReconciliationDispatcher.java:139) at io.javaoperatorsdk.operator.processing.event.ReconciliationDispatcher.handleReconcile(ReconciliationDispatcher.java:119) at io.javaoperatorsdk.operator.processing.event.ReconciliationDispatcher.handleDispatch(ReconciliationDispatcher.java:89) at io.javaoperatorsdk.operator.processing.event.ReconciliationDispatcher.handleExecution(ReconciliationDispatcher.java:62) at io.javaoperatorsdk.operator.processing.event.EventProcessor$ReconcilerExecutor.run(EventProcessor.java:406) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) at java.base/java.lang.Thread.run(Unknown Source) Caused by: java.io.IOException: stream was reset: NO_ERROR at io.fabric8.kubernetes.client.dsl.internal.OperationSupport.waitForResult(OperationSupport.java:514) at io.fabric8.kubernetes.client.dsl.internal.OperationSupport.handleResponse(OperationSupport.java:551) at io.fabric8.kubernetes.client.dsl.internal.OperationSupport.handleUpdate(OperationSupport.java:347) at io.fabric8.kubernetes.client.dsl.internal.BaseOperation.handleUpdate(BaseOperation.java:680) at io.fabric8.kubernetes.client.dsl.internal.HasMetadataOperation.lambda$replace$0(HasMetadataOperation.java:167) ... 21 more Caused by: okhttp3.internal.http2.StreamResetException: stream was reset: NO_ERROR at okhttp3.internal.http2.Http2Stream.checkOutNotClosed$okhttp(Http2Stream.kt:646) at okhttp3.internal.http2.Http2Stream$FramingSink.emitFrame(Http2Stream.kt:557) at okhttp3.internal.http2.Http2Stream$FramingSink.write(Http2Stream.kt:532) at okio.ForwardingSink.write(ForwardingSink.kt:29) at okhttp3.internal.connection.Exchange$RequestBodySink.write(Exchange.kt:218) at okio.RealBufferedSink.emitCompleteSegments(RealBufferedSink.kt:255) at okio.RealBufferedSink.write(RealBufferedSink.kt:185) at okhttp3.RequestBody$Companion$toRequestBody$2.writeTo(RequestBody.kt:152) at okhttp3.internal.http.CallServerInterceptor.intercept(CallServerInterceptor.kt:59) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.kt:34) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.kt:95) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.kt:83) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.kt:76) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at org.apache.flink.kubernetes.operator.metrics.KubernetesClientMetrics.intercept(KubernetesClientMetrics.java:130) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at io.fabric8.kubernetes.client.okhttp.OkHttpClientBuilderImpl$InteceptorAdapter.intercept(OkHttpClientBuilderImpl.java:70) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at io.fabric8.kubernetes.client.okhttp.OkHttpClientBuilderImpl$InteceptorAdapter.intercept(OkHttpClientBuilderImpl.java:70) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at io.fabric8.kubernetes.client.okhttp.OkHttpClientBuilderImpl$InteceptorAdapter.intercept(OkHttpClientBuilderImpl.java:70) at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.kt:109) at okhttp3.internal.connection.RealCall.getResponseWithInterceptorChain$okhttp(RealCall.kt:201) at okhttp3.internal.connection.RealCall$AsyncCall.run(RealCall.kt:517) ... 3 more Suppressed: okhttp3.internal.http2.StreamResetException: stream was reset: NO_ERROR ... 31 more {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)