busbey commented on a change in pull request #921: HBASE-22749: Distributed MOB 
compactions
URL: https://github.com/apache/hbase/pull/921#discussion_r379050562
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
 ##########
 @@ -183,105 +271,184 @@ protected boolean performCompaction(FileDetails fd, 
InternalScanner scanner, Cel
     boolean hasMore;
     Path path = MobUtils.getMobFamilyPath(conf, store.getTableName(), 
store.getColumnFamilyName());
     byte[] fileName = null;
-    StoreFileWriter mobFileWriter = null, delFileWriter = null;
-    long mobCells = 0, deleteMarkersCount = 0;
+    StoreFileWriter mobFileWriter = null;
+    /*
+     * mobCells are used only to decide if we need to commit or abort current 
MOB output file.
+     */
+    long mobCells = 0;
     long cellsCountCompactedToMob = 0, cellsCountCompactedFromMob = 0;
     long cellsSizeCompactedToMob = 0, cellsSizeCompactedFromMob = 0;
     boolean finished = false;
+
     ScannerContext scannerContext =
         ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
     throughputController.start(compactionName);
-    KeyValueScanner kvs = (scanner instanceof KeyValueScanner)? 
(KeyValueScanner)scanner : null;
-    long shippedCallSizeLimit = (long) numofFilesToCompact * 
this.store.getColumnFamilyDescriptor().getBlocksize();
+    KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? 
(KeyValueScanner) scanner : null;
+    long shippedCallSizeLimit =
+        (long) numofFilesToCompact * 
this.store.getColumnFamilyDescriptor().getBlocksize();
+
+    Cell mobCell = null;
     try {
-      try {
-        // If the mob file writer could not be created, directly write the 
cell to the store file.
-        mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs), 
fd.maxKeyCount,
-          compactionCompression, store.getRegionInfo().getStartKey(), true);
-        fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
-      } catch (IOException e) {
-        LOG.warn("Failed to create mob writer, "
-               + "we will continue the compaction by writing MOB cells 
directly in store files", e);
-      }
-      if (major) {
-        try {
-          delFileWriter = mobStore.createDelFileWriterInTmp(new 
Date(fd.latestPutTs),
-            fd.maxKeyCount, compactionCompression, 
store.getRegionInfo().getStartKey());
-        } catch (IOException e) {
-          LOG.warn(
-            "Failed to create del writer, "
-            + "we will continue the compaction by writing delete markers 
directly in store files",
-            e);
-        }
-      }
+
+      mobFileWriter = newMobWriter(fd);
+      fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
+
       do {
         hasMore = scanner.next(cells, scannerContext);
-        if (LOG.isDebugEnabled()) {
-          now = EnvironmentEdgeManager.currentTime();
-        }
+        now = EnvironmentEdgeManager.currentTime();
         for (Cell c : cells) {
-          if (major && CellUtil.isDelete(c)) {
-            if (MobUtils.isMobReferenceCell(c) || delFileWriter == null) {
-              // Directly write it to a store file
-              writer.append(c);
+          if (compactMOBs) {
+            if (MobUtils.isMobReferenceCell(c)) {
+              String fName = MobUtils.getMobFileName(c);
+              Path pp = new Path(new Path(fs.getUri()), new Path(path, fName));
+
+              // Added to support migration
+              try {
+                mobCell = mobStore.resolve(c, true, false).getCell();
+              } catch (FileNotFoundException fnfe) {
+                if (discardMobMiss) {
+                  LOG.error("Missing MOB cell: file={} not found cell={}", 
fName, c);
+                  continue;
+                } else {
+                  throw fnfe;
+                }
+              }
+
+              if (discardMobMiss && mobCell.getValueLength() == 0) {
+                LOG.error("Missing MOB cell value: file={} cell={}", pp, 
mobCell);
+                continue;
+              } else if (mobCell.getValueLength() == 0) {
+                String errMsg = String.format("Found 0 length MOB cell in a 
file=%s cell=%s",
+                  fName, mobCell);
+                throw new IOException(errMsg);
+              }
+
+              if (mobCell.getValueLength() > mobSizeThreshold) {
+                // put the mob data back to the MOB store file
+                PrivateCellUtil.setSequenceId(mobCell, c.getSequenceId());
+                if (!ioOptimizedMode) {
+                  mobFileWriter.append(mobCell);
+                  mobCells++;
+                  writer.append(
+                    MobUtils.createMobRefCell(mobCell, fileName, 
this.mobStore.getRefCellTags()));
+                } else {
+                  // I/O optimized mode
+                  // Check if MOB cell origin file size is
+                  // greater than threshold
+                  Long size = mobLengthMap.get().get(fName);
+                  if (size == null) {
+                    // FATAL error, abort compaction
+                    String msg = String.format(
+                      "Found unreferenced MOB file during compaction %s, 
aborting compaction %s",
+                      fName, getStoreInfo());
+                    throw new IOException(msg);
+                  }
+                  // Can not be null
+                  if (size < maxMobFileSize) {
+                    // If MOB cell origin file is below threshold
+                    // it is get compacted
+                    mobFileWriter.append(mobCell);
+                    // Update number of mobCells in a current mob writer
+                    mobCells++;
+                    writer.append(
+                      MobUtils.createMobRefCell(mobCell, fileName, 
this.mobStore.getRefCellTags()));
+                    // Update total size of the output (we do not take into 
account
+                    // file compression yet)
+                    long len = mobFileWriter.getPos();
+                    if (len > maxMobFileSize) {
+                      LOG.debug("Closing output MOB File, length={} file={}, 
store=", len,
+                        Bytes.toString(fileName), getStoreInfo());
+                      commitOrAbortMobWriter(mobFileWriter, fd.maxSeqId, 
mobCells, major);
+                      mobFileWriter = newMobWriter(fd);
+                      fileName = 
Bytes.toBytes(mobFileWriter.getPath().getName());
+                      mobCells = 0;
+                    }
+                  } else {
+                    // We leave large MOB file as is (is not compacted),
 
 Review comment:
   I believe this block covers the case where we have a value that we're going 
to keep in the MOB area, but the mob hfile that currently holds that value is 
above the threshold that controls if we're continue to have that hfile 
participate in compactions.
   
   If that's correct, then two things
   
   1) the use of `mobCell` to determine that the value size is over the mob 
threshold (around line 326) means we resolved the MOB value (looks like line 
~307). We could avoid the IO needed to pull that value up out of the backing 
file, because the size is stored in the reference cell `c`.
   
   2) here in the block we store the name of the backing hfile in the metadata, 
but we write into the non-mob hfile the whole mob cell rather than the original 
reference cell `c`. I missed this when I was first thinking about (1) above, 
but I think that's incorrect?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to