szetszwo commented on code in PR #9796:
URL: https://github.com/apache/ozone/pull/9796#discussion_r2842421637
##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java:
##########
@@ -216,42 +221,152 @@ public OMRequest getLastRequestToSubmit() {
private OMResponse submitReadRequestToOM(OMRequest request)
throws ServiceException {
- // Read from leader or followers using linearizable read
- if (ozoneManager.getConfig().isFollowerReadLocalLeaseEnabled() &&
- allowFollowerReadLocalLease(omRatisServer.getServerDivision(),
- ozoneManager.getConfig().getFollowerReadLocalLeaseLagLimit(),
- ozoneManager.getConfig().getFollowerReadLocalLeaseTimeMs())) {
- ozoneManager.getMetrics().incNumFollowerReadLocalLeaseSuccess();
+ if (request.getCmdType().equals(PrepareStatus)) {
+ // PrepareStatus is an OM request that only target a single OM node.
+ // Therefore, all PrepareStatus requests should be served immediately
without failover regardless
+ // of the OM node leadership or the read consistency. See
PrepareSubCommand.
+ // The implementation is not ideal, but exists for compatibility reason.
return handler.handleReadRequest(request);
- }
- // Get current OM's role
- RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
- // === 1. Follower linearizable read ===
- if (raftServerStatus == NOT_LEADER && omRatisServer.isLinearizableRead()) {
- ozoneManager.getMetrics().incNumLinearizableRead();
- return ozoneManager.getOmExecutionFlow().submit(request, false);
}
- // === 2. Leader local read (skip ReadIndex if allowed) ===
- if (raftServerStatus == LEADER_AND_READY ||
request.getCmdType().equals(PrepareStatus)) {
- if (ozoneManager.getConfig().isAllowLeaderSkipLinearizableRead()) {
- ozoneManager.getMetrics().incNumLeaderSkipLinearizableRead();
- // leader directly serves local committed data
+
+ if (!request.hasReadConsistencyHint() ||
!request.getReadConsistencyHint().hasConsistencyType() ||
+ request.getReadConsistencyHint().getConsistencyType() ==
CONSISTENCY_TYPE_UNKNOWN) {
+ // Read from leader or followers using linearizable read
+ if (ozoneManager.getConfig().isFollowerReadLocalLeaseEnabled() &&
+ allowFollowerReadLocalLease(omRatisServer.getServerDivision(),
+ ozoneManager.getConfig().getFollowerReadLocalLeaseLagLimit(),
+ ozoneManager.getConfig().getFollowerReadLocalLeaseTimeMs())) {
+ ozoneManager.getMetrics().incNumFollowerReadLocalLeaseSuccess();
return handler.handleReadRequest(request);
}
- // otherwise use linearizable path when enabled
- if (omRatisServer.isLinearizableRead()) {
+ // Get current OM's role
+ RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
+ // === 1. Follower linearizable read ===
+ if (raftServerStatus == NOT_LEADER &&
omRatisServer.isLinearizableRead()) {
ozoneManager.getMetrics().incNumLinearizableRead();
return ozoneManager.getOmExecutionFlow().submit(request, false);
}
+ // === 2. Leader local read (skip ReadIndex if allowed) ===
+ if (raftServerStatus == LEADER_AND_READY) {
+ if (ozoneManager.getConfig().isAllowLeaderSkipLinearizableRead()) {
+ ozoneManager.getMetrics().incNumLeaderSkipLinearizableRead();
+ // leader directly serves local committed data
+ return handler.handleReadRequest(request);
+ }
+ // otherwise use linearizable path when enabled
+ if (omRatisServer.isLinearizableRead()) {
+ ozoneManager.getMetrics().incNumLinearizableRead();
+ return ozoneManager.getOmExecutionFlow().submit(request, false);
+ }
- // fallback to local read
- return handler.handleReadRequest(request);
+ // fallback to local read
+ return handler.handleReadRequest(request);
+ } else {
+ throw createLeaderErrorException(raftServerStatus);
+ }
} else {
- throw createLeaderErrorException(raftServerStatus);
+ // If read consistency hint is specified, we should try to respect it
although
+ // there is no guarantee since it depends on the OM node configuration
(e.g.
+ // whether OM Raft server enables linearizable read).
+ ReadConsistencyHint readConsistencyHint =
request.getReadConsistencyHint();
+ ReadConsistencyType consistencyType =
readConsistencyHint.getConsistencyType();
+ RaftServerStatus raftServerStatus;
+ switch (consistencyType) {
+ case STALE:
+ // Serve the stale read request immediately for both leader and
follower
+ ozoneManager.getMetrics().incNumStaleRead();
+ return handler.handleReadRequest(request);
+ case LOCAL_LEASE_FOLLOWER_READ:
+ raftServerStatus = omRatisServer.getLeaderStatus();
+ switch (raftServerStatus) {
+ case NOT_LEADER:
+ if (!ozoneManager.getConfig().isFollowerReadLocalLeaseEnabled()) {
+ throw createLeaderErrorException(raftServerStatus);
+ }
+ LocalLeaseContext localLeaseContext =
readConsistencyHint.getLocalLeaseContext();
+ long localLeaseLagLimit = localLeaseContext.getLagLimit() > 0 ?
+ localLeaseContext.getLagLimit() :
ozoneManager.getConfig().getFollowerReadLocalLeaseLagLimit();
+ long localLeaseLeaseTimeMs = localLeaseContext.getLeaseTimeMs() > 0 ?
+ localLeaseContext.getLeaseTimeMs() :
ozoneManager.getConfig().getFollowerReadLocalLeaseTimeMs();
+ if (allowFollowerReadLocalLease(omRatisServer.getServerDivision(),
+ localLeaseLagLimit, localLeaseLeaseTimeMs)) {
+ ozoneManager.getMetrics().incNumFollowerReadLocalLeaseSuccess();
+ return handler.handleReadRequest(request);
+ }
+ // The LocalLease lag is too high, trigger failover
+ throw createLeaderErrorException(raftServerStatus);
+ case LEADER_AND_NOT_READY:
Review Comment:
For LOCAL_LEASE_FOLLOWER_READ, the LEADER_AND_NOT_READY case should be
treated as the same as the NOT_LEADER case, i.e. just consider it as a
follower. (fixed typo "LEADER_AND_READY")
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]