guozhangwang commented on a change in pull request #10609: URL: https://github.com/apache/kafka/pull/10609#discussion_r644343550
########## File path: streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/SubscriptionInfo.java ########## @@ -125,6 +130,29 @@ public int errorCode() { return data.errorCode(); } + // For version > MIN_NAMED_TOPOLOGY_VERSION + private void setTaskOffsetSumDataWithNamedTopologiesFromTaskOffsetSumMap(final Map<TaskId, Long> taskOffsetSums) { + final Map<Integer, List<SubscriptionInfoData.PartitionToOffsetSum>> topicGroupIdToPartitionOffsetSum = new HashMap<>(); + for (final Map.Entry<TaskId, Long> taskEntry : taskOffsetSums.entrySet()) { + final TaskId task = taskEntry.getKey(); + topicGroupIdToPartitionOffsetSum.computeIfAbsent(task.topicGroupId, t -> new ArrayList<>()).add( + new SubscriptionInfoData.PartitionToOffsetSum() + .setPartition(task.partition) + .setOffsetSum(taskEntry.getValue())); + } + + data.setTaskOffsetSums(taskOffsetSums.entrySet().stream().map(t -> { + final SubscriptionInfoData.TaskOffsetSum taskOffsetSum = new SubscriptionInfoData.TaskOffsetSum(); + final TaskId task = t.getKey(); + taskOffsetSum.setTopicGroupId(task.topicGroupId); + taskOffsetSum.setPartition(task.partition); Review comment: Thanks for the explanation! ########## File path: streams/src/main/java/org/apache/kafka/streams/processor/internals/StateDirectory.java ########## @@ -462,39 +512,49 @@ private void cleanRemovedTasksCalledByUser() throws Exception { * List all of the task directories that are non-empty * @return The list of all the non-empty local directories for stream tasks */ - File[] listNonEmptyTaskDirectories() { - final File[] taskDirectories; - if (!hasPersistentStores || !stateDir.exists()) { - taskDirectories = new File[0]; - } else { - taskDirectories = - stateDir.listFiles(pathname -> { - if (!pathname.isDirectory() || !TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()) { - return false; - } else { - return !taskDirIsEmpty(pathname); - } - }); - } - - return taskDirectories == null ? new File[0] : taskDirectories; + List<TaskDirectory> listNonEmptyTaskDirectories() { + return listTaskDirectories(pathname -> { + if (!pathname.isDirectory() || !TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()) { + return false; + } else { + return !taskDirIsEmpty(pathname); + } + }); } /** - * List all of the task directories + * List all of the task directories along with their parent directory if they belong to a named topology * @return The list of all the existing local directories for stream tasks */ - File[] listAllTaskDirectories() { - final File[] taskDirectories; - if (!hasPersistentStores || !stateDir.exists()) { - taskDirectories = new File[0]; - } else { - taskDirectories = - stateDir.listFiles(pathname -> pathname.isDirectory() - && TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()); + List<TaskDirectory> listAllTaskDirectories() { + return listTaskDirectories(pathname -> pathname.isDirectory() && TASK_DIR_PATH_NAME.matcher(pathname.getName()).matches()); + } + + private List<TaskDirectory> listTaskDirectories(final FileFilter filter) { + final List<TaskDirectory> taskDirectories = new ArrayList<>(); + if (hasPersistentStores && stateDir.exists()) { + if (hasNamedTopologies) { Review comment: I was asking more for a semantic one -- as long as this is not expected then I'm happy for this piece as is :) ########## File path: streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java ########## @@ -593,6 +575,111 @@ public void shouldLogTempDirMessage() { } } + /************* Named Topology Tests *************/ + + @Test + public void shouldCreateTaskDirectoriesUnderNamedTopologyDirs() throws IOException { + initializeStateDirectory(true, true); + + directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology1")); + directory.getOrCreateDirectoryForTask(new TaskId(0, 1, "topology1")); + directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology2")); + + assertThat(new File(appDir, "__topology1__").exists(), is(true)); + assertThat(new File(appDir, "__topology1__").isDirectory(), is(true)); + assertThat(new File(appDir, "__topology2__").exists(), is(true)); + assertThat(new File(appDir, "__topology2__").isDirectory(), is(true)); + + assertThat(new File(new File(appDir, "__topology1__"), "0_0").exists(), is(true)); + assertThat(new File(new File(appDir, "__topology1__"), "0_0").isDirectory(), is(true)); + assertThat(new File(new File(appDir, "__topology1__"), "0_1").exists(), is(true)); + assertThat(new File(new File(appDir, "__topology1__"), "0_1").isDirectory(), is(true)); + assertThat(new File(new File(appDir, "__topology2__"), "0_0").exists(), is(true)); + assertThat(new File(new File(appDir, "__topology2__"), "0_0").isDirectory(), is(true)); + } + + @Test + public void shouldOnlyListNonEmptyTaskDirectoriesInNamedTopologies() throws IOException { + initializeStateDirectory(true, true); + + TestUtils.tempDirectory(appDir.toPath(), "foo"); + final TaskDirectory taskDir1 = new TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology1")), "topology1"); + final TaskDirectory taskDir2 = new TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 1, "topology1")), "topology1"); + final TaskDirectory taskDir3 = new TaskDirectory(directory.getOrCreateDirectoryForTask(new TaskId(0, 0, "topology2")), "topology2"); + + final File storeDir = new File(taskDir1.file(), "store"); + assertTrue(storeDir.mkdir()); + + assertThat(new HashSet<>(directory.listAllTaskDirectories()), equalTo(mkSet(taskDir1, taskDir2, taskDir3))); + assertThat(directory.listNonEmptyTaskDirectories(), equalTo(singletonList(taskDir1))); + + Utils.delete(taskDir1.file()); + + assertThat(new HashSet<>(directory.listAllTaskDirectories()), equalTo(mkSet(taskDir2, taskDir3))); + assertThat(directory.listNonEmptyTaskDirectories(), equalTo(emptyList())); + } + + @Test + public void shouldRemoveNonEmptyNamedTopologyDirsWhenCallingClean() throws Exception { + initializeStateDirectory(true, true); + final File taskDir = directory.getOrCreateDirectoryForTask(new TaskId(2, 0, "topology1")); + final File namedTopologyDir = new File(appDir, "__topology1__"); + + assertThat(taskDir.exists(), is(true)); + assertThat(namedTopologyDir.exists(), is(true)); + directory.clean(); + assertThat(taskDir.exists(), is(false)); + assertThat(namedTopologyDir.exists(), is(false)); + } + + @Test + public void shouldRemoveEmptyNamedTopologyDirsWhenCallingClean() throws IOException { + initializeStateDirectory(true, true); + final File namedTopologyDir = new File(appDir, "__topology1__"); + assertThat(namedTopologyDir.mkdir(), is(true)); + assertThat(namedTopologyDir.exists(), is(true)); + directory.clean(); + assertThat(namedTopologyDir.exists(), is(false)); + } + + @Test + public void shouldNotRemoveDirsThatDoNotMatchNamedTopologyDirsWhenCallingClean() throws IOException { + initializeStateDirectory(true, true); + final File someDir = new File(appDir, "_not-a-valid-named-topology_dir_name_"); + assertThat(someDir.mkdir(), is(true)); + assertThat(someDir.exists(), is(true)); + directory.clean(); + assertThat(someDir.exists(), is(true)); + } + + @Test + public void shouldCleanupObsoleteTaskDirectoriesInNamedTopologiesAndDeleteTheParentDirectories() throws IOException { Review comment: Could we add a test case to verify that in case both named topology dir and non-named topology dir co-exist, we would at certain step check against and throw? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org