This is an automated email from the ASF dual-hosted git repository. lhotari pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/pulsar.git
commit e717447d6a8966649d82b31db4f408b87df35f1a Author: Lari Hotari <[email protected]> AuthorDate: Thu Jul 17 10:18:57 2025 +0300 [improve][misc] Optimize topic list hashing so that potentially large String allocation is avoided (#24525) (cherry picked from commit 06f424c4823399ae66608ad9fd0e2133a1055b1a) --- .../org/apache/pulsar/common/topics/TopicList.java | 20 ++++++++++++++------ .../apache/pulsar/common/topics/TopicListTest.java | 7 +++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pulsar-common/src/main/java/org/apache/pulsar/common/topics/TopicList.java b/pulsar-common/src/main/java/org/apache/pulsar/common/topics/TopicList.java index 7a5659c33b5..1e0bde1b0c5 100644 --- a/pulsar-common/src/main/java/org/apache/pulsar/common/topics/TopicList.java +++ b/pulsar-common/src/main/java/org/apache/pulsar/common/topics/TopicList.java @@ -18,8 +18,10 @@ */ package org.apache.pulsar.common.topics; +import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -46,6 +48,7 @@ public class TopicList { Pattern topicsPattern = Pattern.compile(regex); return filterTopics(original, topicsPattern); } + public static List<String> filterTopics(List<String> original, Pattern topicsPattern) { final Pattern shortenedTopicsPattern = Pattern.compile(removeTopicDomainScheme(topicsPattern.toString())); @@ -68,14 +71,19 @@ public class TopicList { } public static String calculateHash(List<String> topics) { - return Hashing.crc32c().hashBytes(topics.stream() - .sorted() - .collect(Collectors.joining(",")) - .getBytes(StandardCharsets.UTF_8)).toString(); + Hasher hasher = Hashing.crc32c().newHasher(); + String[] sortedTopics = topics.toArray(new String[topics.size()]); + Arrays.sort(sortedTopics); + for (int i = 0; i < sortedTopics.length; i++) { + hasher.putString(sortedTopics[i], StandardCharsets.UTF_8); + // Skip the delimiter for the last item so that the hash format is compatible with previous versions + if (i < sortedTopics.length - 1) { + hasher.putByte((byte) ','); + } + } + return hasher.hash().toString(); } - - // get topics, which are contained in list1, and not in list2 public static Set<String> minus(Collection<String> list1, Collection<String> list2) { HashSet<String> s1 = new HashSet<>(list1); diff --git a/pulsar-common/src/test/java/org/apache/pulsar/common/topics/TopicListTest.java b/pulsar-common/src/test/java/org/apache/pulsar/common/topics/TopicListTest.java index b3a7536ebff..e7e5dec7e6c 100644 --- a/pulsar-common/src/test/java/org/apache/pulsar/common/topics/TopicListTest.java +++ b/pulsar-common/src/test/java/org/apache/pulsar/common/topics/TopicListTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.Lists; import org.testng.annotations.Test; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.regex.Pattern; @@ -102,10 +103,16 @@ public class TopicListTest { String hash1 = TopicList.calculateHash(Arrays.asList(topicName3, topicName2, topicName1)); String hash2 = TopicList.calculateHash(Arrays.asList(topicName1, topicName3, topicName2)); assertEquals(hash1, hash2, "Hash must not depend on order of topics in the list"); + assertEquals(hash1, "90d4a04a", "Hash must be equal to the expected value"); String hash3 = TopicList.calculateHash(Arrays.asList(topicName1, topicName2)); assertNotEquals(hash1, hash3, "Different list must have different hashes"); + String hash4 = TopicList.calculateHash(Arrays.asList(topicName1)); + assertEquals(hash4, "0d0602ed", "Hash must be equal to the expected value"); + + String hash5 = TopicList.calculateHash(Collections.emptyList()); + assertEquals(hash5, "00000000", "Hash of empty list must be 0"); } @Test
