guozhangwang commented on a change in pull request #10609:
URL: https://github.com/apache/kafka/pull/10609#discussion_r644343550
##########
File path:
streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/SubscriptionInfo.java
##########
@@ -125,6 +130,29 @@ public int errorCode() {
return data.errorCode();
}
+ // For version > MIN_NAMED_TOPOLOGY_VERSION
+ private void
setTaskOffsetSumDataWithNamedTopologiesFromTaskOffsetSumMap(final Map<TaskId,
Long> taskOffsetSums) {
+ final Map<Integer, List<SubscriptionInfoData.PartitionToOffsetSum>>
topicGroupIdToPartitionOffsetSum = new HashMap<>();
+ for (final Map.Entry<TaskId, Long> taskEntry :
taskOffsetSums.entrySet()) {
+ final TaskId task = taskEntry.getKey();
+
topicGroupIdToPartitionOffsetSum.computeIfAbsent(task.topicGroupId, t -> new
ArrayList<>()).add(
+ new SubscriptionInfoData.PartitionToOffsetSum()
+ .setPartition(task.partition)
+ .setOffsetSum(taskEntry.getValue()));
+ }
+
+ data.setTaskOffsetSums(taskOffsetSums.entrySet().stream().map(t -> {
+ final SubscriptionInfoData.TaskOffsetSum taskOffsetSum = new
SubscriptionInfoData.TaskOffsetSum();
+ final TaskId task = t.getKey();
+ taskOffsetSum.setTopicGroupId(task.topicGroupId);
+ taskOffsetSum.setPartition(task.partition);
Review comment:
Thanks for the explanation!
##########
File path:
streams/src/main/java/org/apache/kafka/streams/processor/internals/StateDirectory.java
##########
@@ -462,39 +512,49 @@ private void cleanRemovedTasksCalledByUser() throws
Exception {
* List all of the task directories that are non-empty
* @return The list of all the non-empty local directories for stream tasks
*/
- File[] listNonEmptyTaskDirectories() {
- final File[] taskDirectories;
- if (!hasPersistentStores || !stateDir.exists()) {
- taskDirectories = new File[0];
- } else {
- taskDirectories =
- stateDir.listFiles(pathname -> {
- if (!pathname.isDirectory() ||
!TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()) {
- return false;
- } else {
- return !taskDirIsEmpty(pathname);
- }
- });
- }
-
- return taskDirectories == null ? new File[0] : taskDirectories;
+ List<TaskDirectory> listNonEmptyTaskDirectories() {
+ return listTaskDirectories(pathname -> {
+ if (!pathname.isDirectory() ||
!TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()) {
+ return false;
+ } else {
+ return !taskDirIsEmpty(pathname);
+ }
+ });
}
/**
- * List all of the task directories
+ * List all of the task directories along with their parent directory if
they belong to a named topology
* @return The list of all the existing local directories for stream tasks
*/
- File[] listAllTaskDirectories() {
- final File[] taskDirectories;
- if (!hasPersistentStores || !stateDir.exists()) {
- taskDirectories = new File[0];
- } else {
- taskDirectories =
- stateDir.listFiles(pathname -> pathname.isDirectory()
- &&
TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches());
+ List<TaskDirectory> listAllTaskDirectories() {
+ return listTaskDirectories(pathname -> pathname.isDirectory() &&
TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches());
+ }
+
+ private List<TaskDirectory> listTaskDirectories(final FileFilter filter) {
+ final List<TaskDirectory> taskDirectories = new ArrayList<>();
+ if (hasPersistentStores && stateDir.exists()) {
+ if (hasNamedTopologies) {
Review comment:
I was asking more for a semantic one -- as long as this is not expected
then I'm happy for this piece as is :)
##########
File path:
streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java
##########
@@ -593,6 +575,111 @@ public void shouldLogTempDirMessage() {
}
}
+ /************* Named Topology Tests *************/
+
+ @Test
+ public void shouldCreateTaskDirectoriesUnderNamedTopologyDirs() throws
IOException {
+ initializeStateDirectory(true, true);
+
+ directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology1"));
+ directory.getOrCreateDirectoryForTask(new TaskId(0, 1, "topology1"));
+ directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology2"));
+
+ assertThat(new File(appDir, "__topology1__").exists(), is(true));
+ assertThat(new File(appDir, "__topology1__").isDirectory(), is(true));
+ assertThat(new File(appDir, "__topology2__").exists(), is(true));
+ assertThat(new File(appDir, "__topology2__").isDirectory(), is(true));
+
+ assertThat(new File(new File(appDir, "__topology1__"),
"0_0").exists(), is(true));
+ assertThat(new File(new File(appDir, "__topology1__"),
"0_0").isDirectory(), is(true));
+ assertThat(new File(new File(appDir, "__topology1__"),
"0_1").exists(), is(true));
+ assertThat(new File(new File(appDir, "__topology1__"),
"0_1").isDirectory(), is(true));
+ assertThat(new File(new File(appDir, "__topology2__"),
"0_0").exists(), is(true));
+ assertThat(new File(new File(appDir, "__topology2__"),
"0_0").isDirectory(), is(true));
+ }
+
+ @Test
+ public void shouldOnlyListNonEmptyTaskDirectoriesInNamedTopologies()
throws IOException {
+ initializeStateDirectory(true, true);
+
+ TestUtils.tempDirectory(appDir.toPath(), "foo");
+ final TaskDirectory taskDir1 = new
TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 0,
"topology1")), "topology1");
+ final TaskDirectory taskDir2 = new
TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 1,
"topology1")), "topology1");
+ final TaskDirectory taskDir3 = new
TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 0,
"topology2")), "topology2");
+
+ final File storeDir = new File(taskDir1.file(), "store");
+ assertTrue(storeDir.mkdir());
+
+ assertThat(new HashSet<>(directory.listAllTaskDirectories()),
equalTo(mkSet(taskDir1, taskDir2, taskDir3)));
+ assertThat(directory.listNonEmptyTaskDirectories(),
equalTo(singletonList(taskDir1)));
+
+ Utils.delete(taskDir1.file());
+
+ assertThat(new HashSet<>(directory.listAllTaskDirectories()),
equalTo(mkSet(taskDir2, taskDir3)));
+ assertThat(directory.listNonEmptyTaskDirectories(),
equalTo(emptyList()));
+ }
+
+ @Test
+ public void shouldRemoveNonEmptyNamedTopologyDirsWhenCallingClean() throws
Exception {
+ initializeStateDirectory(true, true);
+ final File taskDir = directory.getOrCreateDirectoryForTask(new
TaskId(2, 0, "topology1"));
+ final File namedTopologyDir = new File(appDir, "__topology1__");
+
+ assertThat(taskDir.exists(), is(true));
+ assertThat(namedTopologyDir.exists(), is(true));
+ directory.clean();
+ assertThat(taskDir.exists(), is(false));
+ assertThat(namedTopologyDir.exists(), is(false));
+ }
+
+ @Test
+ public void shouldRemoveEmptyNamedTopologyDirsWhenCallingClean() throws
IOException {
+ initializeStateDirectory(true, true);
+ final File namedTopologyDir = new File(appDir, "__topology1__");
+ assertThat(namedTopologyDir.mkdir(), is(true));
+ assertThat(namedTopologyDir.exists(), is(true));
+ directory.clean();
+ assertThat(namedTopologyDir.exists(), is(false));
+ }
+
+ @Test
+ public void
shouldNotRemoveDirsThatDoNotMatchNamedTopologyDirsWhenCallingClean() throws
IOException {
+ initializeStateDirectory(true, true);
+ final File someDir = new File(appDir,
"_not-a-valid-named-topology_dir_name_");
+ assertThat(someDir.mkdir(), is(true));
+ assertThat(someDir.exists(), is(true));
+ directory.clean();
+ assertThat(someDir.exists(), is(true));
+ }
+
+ @Test
+ public void
shouldCleanupObsoleteTaskDirectoriesInNamedTopologiesAndDeleteTheParentDirectories()
throws IOException {
Review comment:
Could we add a test case to verify that in case both named topology dir
and non-named topology dir co-exist, we would at certain step check against and
throw?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]