kevinrr888 commented on code in PR #5786:
URL: https://github.com/apache/accumulo/pull/5786#discussion_r2304856364
##########
server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java:
##########
@@ -80,17 +91,142 @@ public static <T extends AbstractId<T>> T getNextId(String
name, ServerContext c
static final Lock tableNameLock = new ReentrantLock();
+ private static KeyExtent findContaining(Ample ample, TableId tableId, Text
row) {
+ Objects.requireNonNull(row);
+ try (var tablets = ample.readTablets().forTable(tableId).overlapping(row,
true, row)
+ .fetch(TabletMetadata.ColumnType.PREV_ROW).build()) {
+ return
tablets.stream().collect(MoreCollectors.onlyElement()).getExtent();
+ }
+ }
+
+ /**
+ * Widen a range to the greatest table split that is before the range and
least table split that
+ * is after the range.
+ */
+ public static KeyExtent widen(Ample ample, KeyExtent extent) {
+ Text prevRowOfStartRowTablet = extent.prevEndRow();
+ Text endRowOfEndRowTablet = extent.endRow();
+
+ if (extent.prevEndRow() != null) {
+ // The startRow is not inclusive, so do not want to find the tablet
containing startRow but
+ // instead find the tablet that contains the next possible row after
startRow
+ Text nextPossibleRow = new
Key(extent.prevEndRow()).followingKey(PartialKey.ROW).getRow();
+ prevRowOfStartRowTablet =
+ findContaining(ample, extent.tableId(),
nextPossibleRow).prevEndRow();
+ }
+
+ if (extent.endRow() != null) {
+ // find the tablet containing endRow and use its endRow
+ endRowOfEndRowTablet = findContaining(ample, extent.tableId(),
extent.endRow()).endRow();
+ }
+
+ return new KeyExtent(extent.tableId(), endRowOfEndRowTablet,
prevRowOfStartRowTablet);
+ }
+
+ public static LockRange widen(Ample ample, TableId tableId, LockRange range,
TableOperation op,
+ boolean tableMustExists) throws AcceptableThriftTableOperationException {
+ if (range.isInfinite()) {
+ return range;
+ }
+ try {
+ var wext = widen(ample, new KeyExtent(tableId, range.getEndRow(),
range.getStartRow()));
+ return LockRange.of(wext.prevEndRow(), wext.endRow());
+ } catch (NoSuchElementException nse) {
+ if (tableMustExists) {
+ throw new AcceptableThriftTableOperationException(tableId.canonical(),
"", op,
+ TableOperationExceptionType.NOTFOUND, "Table does not exist");
+ } else {
+ log.debug("Attempted to widen range {} but no metadata entries found
for table {}", range,
+ tableId);
+ return LockRange.infinite();
+ }
+ }
+
+ }
+
public static long reserveTable(Manager env, TableId tableId, FateId fateId,
LockType lockType,
boolean tableMustExist, TableOperation op) throws Exception {
- if (getLock(env.getContext(), tableId, fateId, lockType).tryLock()) {
+ return reserveTable(env, tableId, fateId, lockType, tableMustExist, op,
LockRange.infinite());
+ }
+
+ public static long reserveTable(Manager env, TableId tableId, FateId fateId,
LockType lockType,
+ boolean tableMustExist, TableOperation op, final LockRange range) throws
Exception {
+ final LockRange widenedRange;
+ if (lockType == LockType.WRITE || op == TableOperation.COMPACT) {
+ /*
+ * Write locks are widened to table split points to avoid
non-overlapping ranges operating on
+ * the same tablet. For example assume a table has splits at c,e and two
fate operations need
+ * write locks on ranges (a1,c2] and (d1,d9]. The fate ranges do not
overlap, however both
+ * will operate on the tablet (c,e]. When the ranges are widened,
(a1,c2] turns into (null,e]
+ * and (d1,d9] turns into (c,e]. The widened ranges for the fate
operations overlap which
+ * prevents both from operating on the same tablet.
+ *
+ * Widening is done for write locks because those need to work on
mutually exclusive sets of
+ * tablets w/ other write or read locks. Read locks do not need to be
mutually exclusive w/
+ * each other and it does not matter if they operate on the same tablet
so widening is not
+ * needed for that case. Widening write locks is sufficient for
detecting overlap w/ any
+ * tablets that read locks need to use. For example if a write locks is
obtained on (a1,c2]
+ * and it widens to (null,e] then a read lock on (d1,d9] would overlap
with the widened range.
+ *
+ * Mostly widening for write locks is nice because fate operations that
get read locks are
+ * probably more frequent. So the work of widening is not done on most
fate operations. Also
+ * widening an infinite range is a quick operation, so create/delete
table will not be slowed
+ * down by widening.
+ *
+ * Widening is done for compactions because those operations widen their
range.
+ */
+ widenedRange = widen(env.getContext().getAmple(), tableId, range, op,
tableMustExist);
+ log.debug("{} widened write lock range from {} to {}", fateId, range,
widenedRange);
+ } else {
+ widenedRange = range;
+ }
+
+ var lock = getLock(env.getContext(), tableId, fateId, lockType,
widenedRange);
+ if (lockType == LockType.WRITE && !widenedRange.equals(lock.getRange())) {
+ // It is possible the range changed since the lock entry was created.
Pre existing locks are
+ // found using the fate id and could have a different range.
+ lock.unlock();
+ // This should be a rare event, log at info in case it happens a lot.
+ log.info(
+ "{} widened range {} differs from existing lock range {}, deleted
lock and will retry",
+ fateId, widenedRange, lock.getRange());
+ return 100;
+ } else if (lockType == LockType.READ) {
+ // Not expecting the lock range on a read lock to change.
+ Preconditions.checkState(widenedRange.equals(lock.getRange()));
+ }
+
+ if (lock.tryLock()) {
+ if (lockType == LockType.WRITE) {
+ // Now that table lock is acquired see if the range still widens to
the same thing. If not
+ // it means the table splits changed so release the lock and try again
later. Once the lock
+ // is acquired assuming the table splits in this range can not change,
so this recheck is
+ // done after getting the lock.
Review Comment:
```suggestion
// it means the table splits changed so release the lock and try
again later. The table splits in this range can not change once the lock is
acquired, so this recheck is done after getting the lock.
```
just a bit more clear
##########
server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java:
##########
@@ -80,17 +91,142 @@ public static <T extends AbstractId<T>> T getNextId(String
name, ServerContext c
static final Lock tableNameLock = new ReentrantLock();
+ private static KeyExtent findContaining(Ample ample, TableId tableId, Text
row) {
+ Objects.requireNonNull(row);
+ try (var tablets = ample.readTablets().forTable(tableId).overlapping(row,
true, row)
+ .fetch(TabletMetadata.ColumnType.PREV_ROW).build()) {
+ return
tablets.stream().collect(MoreCollectors.onlyElement()).getExtent();
+ }
+ }
+
+ /**
+ * Widen a range to the greatest table split that is before the range and
least table split that
+ * is after the range.
+ */
+ public static KeyExtent widen(Ample ample, KeyExtent extent) {
+ Text prevRowOfStartRowTablet = extent.prevEndRow();
+ Text endRowOfEndRowTablet = extent.endRow();
+
+ if (extent.prevEndRow() != null) {
+ // The startRow is not inclusive, so do not want to find the tablet
containing startRow but
+ // instead find the tablet that contains the next possible row after
startRow
+ Text nextPossibleRow = new
Key(extent.prevEndRow()).followingKey(PartialKey.ROW).getRow();
+ prevRowOfStartRowTablet =
+ findContaining(ample, extent.tableId(),
nextPossibleRow).prevEndRow();
+ }
+
+ if (extent.endRow() != null) {
+ // find the tablet containing endRow and use its endRow
+ endRowOfEndRowTablet = findContaining(ample, extent.tableId(),
extent.endRow()).endRow();
+ }
+
+ return new KeyExtent(extent.tableId(), endRowOfEndRowTablet,
prevRowOfStartRowTablet);
+ }
+
+ public static LockRange widen(Ample ample, TableId tableId, LockRange range,
TableOperation op,
+ boolean tableMustExists) throws AcceptableThriftTableOperationException {
+ if (range.isInfinite()) {
+ return range;
+ }
+ try {
+ var wext = widen(ample, new KeyExtent(tableId, range.getEndRow(),
range.getStartRow()));
+ return LockRange.of(wext.prevEndRow(), wext.endRow());
+ } catch (NoSuchElementException nse) {
+ if (tableMustExists) {
+ throw new AcceptableThriftTableOperationException(tableId.canonical(),
"", op,
+ TableOperationExceptionType.NOTFOUND, "Table does not exist");
+ } else {
+ log.debug("Attempted to widen range {} but no metadata entries found
for table {}", range,
+ tableId);
+ return LockRange.infinite();
+ }
+ }
+
+ }
+
public static long reserveTable(Manager env, TableId tableId, FateId fateId,
LockType lockType,
boolean tableMustExist, TableOperation op) throws Exception {
- if (getLock(env.getContext(), tableId, fateId, lockType).tryLock()) {
+ return reserveTable(env, tableId, fateId, lockType, tableMustExist, op,
LockRange.infinite());
+ }
+
+ public static long reserveTable(Manager env, TableId tableId, FateId fateId,
LockType lockType,
+ boolean tableMustExist, TableOperation op, final LockRange range) throws
Exception {
+ final LockRange widenedRange;
+ if (lockType == LockType.WRITE || op == TableOperation.COMPACT) {
+ /*
+ * Write locks are widened to table split points to avoid
non-overlapping ranges operating on
+ * the same tablet. For example assume a table has splits at c,e and two
fate operations need
+ * write locks on ranges (a1,c2] and (d1,d9]. The fate ranges do not
overlap, however both
+ * will operate on the tablet (c,e]. When the ranges are widened,
(a1,c2] turns into (null,e]
+ * and (d1,d9] turns into (c,e]. The widened ranges for the fate
operations overlap which
+ * prevents both from operating on the same tablet.
+ *
+ * Widening is done for write locks because those need to work on
mutually exclusive sets of
+ * tablets w/ other write or read locks. Read locks do not need to be
mutually exclusive w/
+ * each other and it does not matter if they operate on the same tablet
so widening is not
+ * needed for that case. Widening write locks is sufficient for
detecting overlap w/ any
+ * tablets that read locks need to use. For example if a write locks is
obtained on (a1,c2]
+ * and it widens to (null,e] then a read lock on (d1,d9] would overlap
with the widened range.
+ *
+ * Mostly widening for write locks is nice because fate operations that
get read locks are
+ * probably more frequent. So the work of widening is not done on most
fate operations. Also
+ * widening an infinite range is a quick operation, so create/delete
table will not be slowed
+ * down by widening.
+ *
+ * Widening is done for compactions because those operations widen their
range.
+ */
Review Comment:
Very good explanation. I wonder if there's a better/more visible spot for
this. This spot seems fine though
##########
core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java:
##########
@@ -417,9 +430,10 @@ public void
print(Map<FateInstanceType,ReadOnlyFateStore<T>> readOnlyFateStores,
for (TransactionStatus txStatus : fateStatus.getTransactions()) {
fmt.format(
- "%-15s fateId: %s status: %-18s locked: %-15s locking: %-15s op:
%-15s created: %s%n",
+ "%-15s fateId: %s status: %-18s locked: %-15s locking: %-15s op:
%-15s created: %s lock range: %s%n",
txStatus.getFateOp(), txStatus.getFateId(), txStatus.getStatus(),
txStatus.getHeldLocks(),
- txStatus.getWaitingLocks(), txStatus.getTop(),
txStatus.getTimeCreatedFormatted());
+ txStatus.getWaitingLocks(), txStatus.getTop(),
txStatus.getTimeCreatedFormatted(),
+ txStatus.getLockRange());
Review Comment:
would be good to confirm if any of the fate command descriptions should be
updated with this change.
##########
server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java:
##########
@@ -180,8 +316,9 @@ public static Lock getTableNameLock() {
return tableNameLock;
}
- public static Lock getReadLock(Manager env, AbstractId<?> id, FateId fateId)
{
- return Utils.getLock(env.getContext(), id, fateId, LockType.READ);
+ public static DistributedLock getReadLock(Manager env, AbstractId<?> id,
FateId fateId,
+ LockRange range) {
+ return Utils.getLock(env.getContext(), id, fateId, LockType.READ, range);
Review Comment:
Do `Utils.getReadLock` and
`Utils.reserveTable/Namespace(...DistributedReadWriteLock.LockType.READ...)`
seem to accomplish the same thing? Can these be combined?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]