This is an automated email from the ASF dual-hosted git repository. stoty pushed a commit to branch 5.2 in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/5.2 by this push: new 1a8ce3781d PHOENIX-7331 Fix incompatibilities with HBASE-28644 1a8ce3781d is described below commit 1a8ce3781d59a9472bbdfe98ba3e7992b04b2fb2 Author: Istvan Toth <st...@apache.org> AuthorDate: Tue Jun 18 07:27:30 2024 +0200 PHOENIX-7331 Fix incompatibilities with HBASE-28644 --- .../hbase/index/util/IndexManagementUtil.java | 13 +++--- .../apache/phoenix/util/PhoenixKeyValueUtil.java | 51 +++++++++++++++++++++- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java b/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java index 48894b5306..3d8c111d8e 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; @@ -43,6 +42,7 @@ import org.apache.phoenix.hbase.index.covered.Batch; import org.apache.phoenix.hbase.index.covered.data.LazyValueGetter; import org.apache.phoenix.hbase.index.covered.update.ColumnReference; import org.apache.phoenix.hbase.index.scanner.ScannerBuilder.CoveredDeleteScanner; +import org.apache.phoenix.util.PhoenixKeyValueUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -216,8 +216,7 @@ public class IndexManagementUtil { } /** - * Batch all the {@link KeyValue}s in a collection of kvs by timestamp. Updates any {@link KeyValue} with a - * timestamp == {@link HConstants#LATEST_TIMESTAMP} to the timestamp at the time the method is called. + * Batch all the {@link KeyValue}s in a collection of kvs by timestamp. * * @param kvs {@link KeyValue}s to break into batches * @param batches to update with the given kvs @@ -236,16 +235,16 @@ public class IndexManagementUtil { } /** - * Batch all the {@link KeyValue}s in a {@link Mutation} by timestamp. Updates any {@link KeyValue} with a timestamp - * == {@link HConstants#LATEST_TIMESTAMP} to the timestamp at the time the method is called. - * + * Batch all the {@link KeyValue}s in a {@link Mutation} by timestamp. + * * @param m {@link Mutation} from which to extract the {@link KeyValue}s * @return the mutation, broken into batches and sorted in ascending order (smallest first) */ public static Collection<Batch> createTimestampBatchesFromMutation(Mutation m) { Map<Long, Batch> batches = new HashMap<Long, Batch>(); for (List<Cell> family : m.getFamilyCellMap().values()) { - List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family); + // TODO do we really need this to be on-heap ? + List<KeyValue> familyKVs = PhoenixKeyValueUtil.ensureKeyValues(family); createTimestampBatchesFromKeyValues(familyKVs, batches); } // sort the batches diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java b/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java index 23dfc2a31d..a2bced47ee 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java @@ -17,6 +17,7 @@ */ package org.apache.phoenix.util; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -36,6 +37,8 @@ import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.base.Function; +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; import org.apache.phoenix.compat.hbase.CompatUtil; import org.apache.phoenix.execute.MutationState.MultiRowMutationState; import org.apache.phoenix.execute.MutationState.RowMutationState; @@ -210,13 +213,55 @@ public class PhoenixKeyValueUtil { return size; } - public static KeyValue maybeCopyCell(Cell c) { + /** + * If c is not a KeyValue, cast it to KeyValue and return it. + * If c is a KeyValue, just return it + * + * @param c cell + * @return either c case to ExtendedCell, or its copy as a KeyValue + */ + public static KeyValue ensureKeyValue(Cell c) { // Same as KeyValueUtil, but HBase has deprecated this method. Avoid depending on something // that will likely be removed at some point in time. if (c == null) return null; // TODO Do we really want to return only KeyValues, or would it be enough to // copy ByteBufferExtendedCells to heap ? // i.e can we avoid copying on-heap cells like BufferedDataBlockEncoder.OnheapDecodedCell ? + if (c instanceof KeyValue) { + return (KeyValue) c; + } else { + return KeyValueUtil.copyToNewKeyValue(c); + } + } + + /** + * Replace non-KeyValue cells with their KeyValue copies + * + * This ensures that all Cells are copied on-heap, but will do + * extra work for any non-KeyValue on-heap cells + * + * @param cells modified, its elements are replaced + * @return the modified input cells object, for convenience + */ + public static List<KeyValue> ensureKeyValues(List<Cell> cells) { + List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() { + @Override + public KeyValue apply(Cell arg0) { + return ensureKeyValue(arg0); + } + }); + return new ArrayList<>(lazyList); + } + + public static KeyValue maybeCopyCell(Cell c) { + // Same as KeyValueUtil, but HBase has deprecated this method. Avoid depending on something + // that will likely be removed at some point in time. + if (c == null) { + return null; + } + // TODO Do we really want to return only KeyValues, or would it be enough to + // copy ByteBufferExtendedCells to heap ? + // i.e can we avoid copying on-heap cells like BufferedDataBlockEncoder.OnheapDecodedCell ? if (c instanceof KeyValue) { return (KeyValue) c; } @@ -234,6 +279,7 @@ public class PhoenixKeyValueUtil { ListIterator<Cell> cellsIt = cells.listIterator(); while (cellsIt.hasNext()) { Cell c = cellsIt.next(); + //FIXME this does not catch all off-heap cells if (c instanceof ByteBufferExtendedCell) { cellsIt.set(KeyValueUtil.copyToNewKeyValue(c)); } @@ -259,7 +305,8 @@ public class PhoenixKeyValueUtil { public static void setTimestamp(Mutation m, long timestamp) { byte[] tsBytes = Bytes.toBytes(timestamp); for (List<Cell> family : m.getFamilyCellMap().values()) { - List<KeyValue> familyKVs = org.apache.hadoop.hbase.KeyValueUtil.ensureKeyValues(family); + // TODO Do we really need to copy everything to the HEAP here ? + List<KeyValue> familyKVs = ensureKeyValues(family); for (KeyValue kv : familyKVs) { int tsOffset = kv.getTimestampOffset(); System.arraycopy(tsBytes, 0, kv.getBuffer(), tsOffset, Bytes.SIZEOF_LONG);