Copilot commented on code in PR #8075:
URL: https://github.com/apache/hbase/pull/8075#discussion_r3284684898
##
hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/EWMABlockSizePredicator.java:
##
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * A {@link BlockCompressedSizePredicator} that uses an Exponentially Weighted
Moving Average (EWMA)
+ * of the compression ratio to predict the uncompressed block size needed to
produce compressed
+ * blocks close to the configured target block size.
+ */
[email protected]
+public class EWMABlockSizePredicator implements BlockCompressedSizePredicator,
Configurable {
+
+ public static final String EWMA_ALPHA_KEY =
"hbase.block.compressed.size.predicator.ewma.alpha";
+ static final double DEFAULT_ALPHA = 0.5;
+
+ private Configuration conf;
+ private double alpha = DEFAULT_ALPHA;
+ private double ewmaRatio;
+ private int adjustedBlockSize;
+ private int configuredMaxBlockSize;
+ private boolean initialized;
+
+ @Override
+ public void setConf(Configuration conf) {
+this.conf = conf;
+this.alpha = conf.getDouble(EWMA_ALPHA_KEY, DEFAULT_ALPHA);
+ }
Review Comment:
setConf assumes conf is non-null and does not validate the configured alpha.
If ReflectionUtils/setConf is ever called with null, this will NPE; and if
alpha is <= 0, > 1, NaN, or Infinity the EWMA formula becomes invalid and can
produce unstable/incorrect ratios. Consider defaulting to DEFAULT_ALPHA when
conf is null and clamping/validating alpha to (0, 1].
##
hbase-diagnostics/src/main/java/org/apache/hadoop/hbase/HFileBlockPerformanceEvaluation.java:
##
@@ -0,0 +1,716 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.EWMABlockSizePredicator;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator;
+import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Performance evaluation utility for HFile block encoding, compression
algorithms, and block size
+ * predicators ({@link BlockCompressedSizePredicator} implementations).
+ *
+ * Tests are