dcapwell commented on code in PR #3842:
URL: https://github.com/apache/cassandra/pull/3842#discussion_r1934243353
##########
src/java/org/apache/cassandra/service/accord/AccordService.java:
##########
@@ -410,48 +413,67 @@ public synchronized void startup()
}
/**
- * Queries peers to discover min epoch
+ * Queries peers to discover min epoch, and then fetches all topologies
between min and current epochs
*/
- private long fetchMinEpoch()
+ private List<Topology> fetchTopologies(long minEpoch, ClusterMetadata
metadata) throws ExecutionException, InterruptedException
{
- ClusterMetadata metadata = ClusterMetadata.current();
- Map<InetAddressAndPort, Set<TokenRange>> peers = new HashMap<>();
- for (KeyspaceMetadata keyspace : metadata.schema.getKeyspaces())
+ if (configService.maxEpoch() >= metadata.epoch.getEpoch())
{
- List<TableMetadata> tables =
keyspace.tables.stream().filter(TableMetadata::requiresAccordSupport).collect(Collectors.toList());
- if (tables.isEmpty())
- continue;
- DataPlacement current =
metadata.placements.get(keyspace.params.replication);
- DataPlacement settled =
metadata.writePlacementAllSettled(keyspace);
- Sets.SetView<InetAddressAndPort> alive =
Sets.intersection(settled.writes.byEndpoint().keySet(),
current.writes.byEndpoint().keySet());
- InetAddressAndPort self = FBUtilities.getBroadcastAddressAndPort();
- settled.writes.forEach((range, group) -> {
- if (group.endpoints().contains(self))
- {
- for (InetAddressAndPort peer : group.endpoints())
- {
- if (peer.equals(self) || !alive.contains(peer))
continue;
- for (TableMetadata table : tables)
- peers.computeIfAbsent(peer, i -> new
HashSet<>()).add(AccordTopology.fullRange(table.id));
- }
- }
- });
+ logger.info("Accord epoch {} matches TCM. All topologies are known
locally", metadata.epoch);
+ return
Collections.singletonList(AccordTopology.createAccordTopology(metadata));
}
+
+ Set<InetAddressAndPort> peers = new HashSet<>();
+ peers.addAll(metadata.directory.allAddresses());
+ peers.remove(FBUtilities.getBroadcastAddressAndPort());
+
+ // No peers: single node cluster or first node to boot
if (peers.isEmpty())
- return -1;
+ return
Collections.singletonList(AccordTopology.createAccordTopology(metadata));;
+
+ // Bootstrap, fetch min epoch
+ if (minEpoch == 0)
+ {
+ long fetched = findMinEpoch(SharedContext.Global.instance, peers);
+ // No other node has advanced epoch just yet
+ if (fetched == 0)
+ return
Collections.singletonList(AccordTopology.createAccordTopology(metadata));
+
+ minEpoch = fetched;
+ }
+
+ long maxEpoch = metadata.epoch.getEpoch();
+
+ // If we are behind minEpoch, catch up to at least minEpoch
+ if (metadata.epoch.getEpoch() < minEpoch)
+ {
+ minEpoch = metadata.epoch.getEpoch();
+ maxEpoch = minEpoch;
+ }
+
+ List<Future<Topology>> futures = new ArrayList<>();
+ logger.info("Discovered min epoch of {}. Proceeding to fetch epochs up
to {}.", minEpoch, maxEpoch);
+
+ for (long epoch = minEpoch; epoch <= maxEpoch; epoch++)
+ futures.add(FetchTopology.fetch(SharedContext.Global.instance,
peers, epoch));
Review Comment:
is there a reason to not make this a batch? if you know `N` you know `N -
1`, so rather than do many concurrent fetches we could just ask for the full
batch in 1 request?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]