rmdmattingly commented on code in PR #7294: URL: https://github.com/apache/hbase/pull/7294#discussion_r2353625973
########## hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/PreSortedCellsReducer.java: ########## @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import java.io.IOException; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.MapReduceExtendedCell; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.yetus.audience.InterfaceAudience; + [email protected] +public class PreSortedCellsReducer + extends Reducer<KeyOnlyCellComparable, Cell, ImmutableBytesWritable, Cell> { + + @Override + protected void reduce(KeyOnlyCellComparable key, Iterable<Cell> values, Context context) + throws IOException, InterruptedException { + + int index = 0; + for (Cell cell : values) { + context.write(new ImmutableBytesWritable(CellUtil.cloneRow(key.getCell())), + new MapReduceExtendedCell(PrivateCellUtil.ensureExtendedCell(cell))); Review Comment: It's probably worth the optimization of only building this ImmutableBytesWritable once? Or is it a bug that we're calling key#getCell within the loop here? ########## hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyOnlyCellComparable.java: ########## @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import java.io.ByteArrayInputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.IOException; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.yetus.audience.InterfaceAudience; + [email protected] +public class KeyOnlyCellComparable implements WritableComparable<KeyOnlyCellComparable> { + + static { + WritableComparator.define(KeyOnlyCellComparable.class, new KeyOnlyCellComparator()); + } + + private ExtendedCell cell = null; + + public KeyOnlyCellComparable() { + } + + public KeyOnlyCellComparable(ExtendedCell cell) { + this.cell = cell; + } + + public ExtendedCell getCell() { + return cell; + } + + @Override + @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_COMPARETO_USE_OBJECT_EQUALS", + justification = "This is wrong, yes, but we should be purging Writables, not fixing them") + public int compareTo(KeyOnlyCellComparable o) { + return CellComparator.getInstance().compare(cell, o.cell); + } + + @Override + public void write(DataOutput out) throws IOException { + int keyLen = PrivateCellUtil.estimatedSerializedSizeOfKey(cell); + int valueLen = 0; // We avoid writing value here. So just serialize as if an empty value. + out.writeInt(keyLen + valueLen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE); + out.writeInt(keyLen); + out.writeInt(valueLen); + PrivateCellUtil.writeFlatKey(cell, out); + out.writeLong(cell.getSequenceId()); + } + + @Override + public void readFields(DataInput in) throws IOException { + cell = KeyValue.create(in); + long seqId = in.readLong(); + cell.setSequenceId(seqId); + } + + public static class KeyOnlyCellComparator extends WritableComparator { + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { Review Comment: You might want to reuse the CellComparator here — it's probably better practice, and probably faster, vs doing so much full deserialization. Something like ```java @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int kvLen1 = Bytes.toInt(b1, s1); int keyLen1 = Bytes.toInt(b1, s1 + 4); int kvLen2 = Bytes.toInt(b2, s2); int keyLen2 = Bytes.toInt(b2, s2 + 4); int keyOffset1 = s1 + 12; // 4 + 4 + 4 bytes int keyOffset2 = s2 + 12; return CellComparator.getInstance().compareKeys( b1, keyOffset1, keyLen1, b2, keyOffset2, keyLen2 ); } ``` ########## hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/KeyOnlyCellComparable.java: ########## @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import java.io.ByteArrayInputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.IOException; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.yetus.audience.InterfaceAudience; + [email protected] +public class KeyOnlyCellComparable implements WritableComparable<KeyOnlyCellComparable> { + + static { + WritableComparator.define(KeyOnlyCellComparable.class, new KeyOnlyCellComparator()); + } + + private ExtendedCell cell = null; + + public KeyOnlyCellComparable() { + } + + public KeyOnlyCellComparable(ExtendedCell cell) { + this.cell = cell; + } + + public ExtendedCell getCell() { + return cell; + } + + @Override + @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_COMPARETO_USE_OBJECT_EQUALS", + justification = "This is wrong, yes, but we should be purging Writables, not fixing them") + public int compareTo(KeyOnlyCellComparable o) { + return CellComparator.getInstance().compare(cell, o.cell); + } + + @Override + public void write(DataOutput out) throws IOException { + int keyLen = PrivateCellUtil.estimatedSerializedSizeOfKey(cell); + int valueLen = 0; // We avoid writing value here. So just serialize as if an empty value. + out.writeInt(keyLen + valueLen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE); + out.writeInt(keyLen); + out.writeInt(valueLen); + PrivateCellUtil.writeFlatKey(cell, out); + out.writeLong(cell.getSequenceId()); + } + + @Override + public void readFields(DataInput in) throws IOException { + cell = KeyValue.create(in); + long seqId = in.readLong(); + cell.setSequenceId(seqId); + } + + public static class KeyOnlyCellComparator extends WritableComparator { + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + try { + KeyOnlyCellComparable kv1 = new KeyOnlyCellComparable(); + kv1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1))); + KeyOnlyCellComparable kv2 = new KeyOnlyCellComparable(); + kv2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2))); Review Comment: Do we need to close these DataInputStreams? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
