busbey commented on a change in pull request #921: HBASE-22749: Distributed MOB compactions URL: https://github.com/apache/hbase/pull/921#discussion_r375532118
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java ########## @@ -183,105 +271,184 @@ protected boolean performCompaction(FileDetails fd, InternalScanner scanner, Cel boolean hasMore; Path path = MobUtils.getMobFamilyPath(conf, store.getTableName(), store.getColumnFamilyName()); byte[] fileName = null; - StoreFileWriter mobFileWriter = null, delFileWriter = null; - long mobCells = 0, deleteMarkersCount = 0; + StoreFileWriter mobFileWriter = null; + /* + * mobCells are used only to decide if we need to commit or abort current MOB output file. + */ + long mobCells = 0; long cellsCountCompactedToMob = 0, cellsCountCompactedFromMob = 0; long cellsSizeCompactedToMob = 0, cellsSizeCompactedFromMob = 0; boolean finished = false; + ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); throughputController.start(compactionName); - KeyValueScanner kvs = (scanner instanceof KeyValueScanner)? (KeyValueScanner)scanner : null; - long shippedCallSizeLimit = (long) numofFilesToCompact * this.store.getColumnFamilyDescriptor().getBlocksize(); + KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null; + long shippedCallSizeLimit = + (long) numofFilesToCompact * this.store.getColumnFamilyDescriptor().getBlocksize(); + + Cell mobCell = null; try { - try { - // If the mob file writer could not be created, directly write the cell to the store file. - mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount, - compactionCompression, store.getRegionInfo().getStartKey(), true); - fileName = Bytes.toBytes(mobFileWriter.getPath().getName()); - } catch (IOException e) { - LOG.warn("Failed to create mob writer, " - + "we will continue the compaction by writing MOB cells directly in store files", e); - } - if (major) { - try { - delFileWriter = mobStore.createDelFileWriterInTmp(new Date(fd.latestPutTs), - fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey()); - } catch (IOException e) { - LOG.warn( - "Failed to create del writer, " - + "we will continue the compaction by writing delete markers directly in store files", - e); - } - } + + mobFileWriter = newMobWriter(fd); + fileName = Bytes.toBytes(mobFileWriter.getPath().getName()); + do { hasMore = scanner.next(cells, scannerContext); - if (LOG.isDebugEnabled()) { - now = EnvironmentEdgeManager.currentTime(); - } + now = EnvironmentEdgeManager.currentTime(); for (Cell c : cells) { - if (major && CellUtil.isDelete(c)) { - if (MobUtils.isMobReferenceCell(c) || delFileWriter == null) { - // Directly write it to a store file - writer.append(c); + if (compactMOBs) { + if (MobUtils.isMobReferenceCell(c)) { + String fName = MobUtils.getMobFileName(c); + Path pp = new Path(new Path(fs.getUri()), new Path(path, fName)); + + // Added to support migration + try { + mobCell = mobStore.resolve(c, true, false).getCell(); + } catch (FileNotFoundException fnfe) { + if (discardMobMiss) { + LOG.error("Missing MOB cell: file={} not found cell={}", fName, c); + continue; + } else { + throw fnfe; + } + } + + if (discardMobMiss && mobCell.getValueLength() == 0) { + LOG.error("Missing MOB cell value: file={} cell={}", pp, mobCell); + continue; + } else if (mobCell.getValueLength() == 0) { + String errMsg = String.format("Found 0 length MOB cell in a file=%s cell=%s", + fName, mobCell); + throw new IOException(errMsg); + } + + if (mobCell.getValueLength() > mobSizeThreshold) { + // put the mob data back to the MOB store file + PrivateCellUtil.setSequenceId(mobCell, c.getSequenceId()); + if (!ioOptimizedMode) { + mobFileWriter.append(mobCell); + mobCells++; + writer.append( + MobUtils.createMobRefCell(mobCell, fileName, this.mobStore.getRefCellTags())); + } else { + // I/O optimized mode + // Check if MOB cell origin file size is + // greater than threshold + Long size = mobLengthMap.get().get(fName); + if (size == null) { + // FATAL error, abort compaction + String msg = String.format( + "Found unreferenced MOB file during compaction %s, aborting compaction %s", + fName, getStoreInfo()); + throw new IOException(msg); + } + // Can not be null + if (size < maxMobFileSize) { + // If MOB cell origin file is below threshold + // it is get compacted + mobFileWriter.append(mobCell); + // Update number of mobCells in a current mob writer + mobCells++; + writer.append( + MobUtils.createMobRefCell(mobCell, fileName, this.mobStore.getRefCellTags())); + // Update total size of the output (we do not take into account + // file compression yet) + long len = mobFileWriter.getPos(); + if (len > maxMobFileSize) { + LOG.debug("Closing output MOB File, length={} file={}, store=", len, + Bytes.toString(fileName), getStoreInfo()); + commitOrAbortMobWriter(mobFileWriter, fd.maxSeqId, mobCells, major); + mobFileWriter = newMobWriter(fd); + fileName = Bytes.toBytes(mobFileWriter.getPath().getName()); + mobCells = 0; + } + } else { + // We leave large MOB file as is (is not compacted), Review comment: At this point we've already read the value out of the mob file. shouldn't we check this sooner to avoid that IO? Like we can get the expected value length from the reference cell and if it's above the mob threshold and this file is above the compaction cut off, then we can do this step without any data read. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services