dajac commented on a change in pull request #9430: URL: https://github.com/apache/kafka/pull/9430#discussion_r573192881
########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -20,102 +20,120 @@ package kafka.tools import java.util.Properties import joptsimple._ -import kafka.utils.{CommandLineUtils, Exit, ToolsUtils} +import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils} import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} import org.apache.kafka.common.requests.ListOffsetsRequest import org.apache.kafka.common.{PartitionInfo, TopicPartition} import org.apache.kafka.common.serialization.ByteArrayDeserializer +import org.apache.kafka.common.utils.Utils +import java.util.regex.Pattern import scala.jdk.CollectionConverters._ import scala.collection.Seq +import scala.math.Ordering.Implicits.infixOrderingOps object GetOffsetShell { + private val TopicPartitionPattern = Pattern.compile( "([^:,]*)(?::(?:([0-9]*)|(?:([0-9]*)-([0-9]*))))?") def main(args: Array[String]): Unit = { + try { + fetchOffsets(args) + } catch { + case e: Exception => + System.err.println(s"Error occurred: $e.getMessage") Review comment: We could use `println` here to be consistent with the other command line tools. Moreover, you need to use curly braces around `e.getMessage`: s"Error: ${e.getMessage}". ########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -132,23 +150,79 @@ object GetOffsetShell { } } - partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) => - println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}") + partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach { + case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}") } + } + def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = { + (a.topic(), a.partition()) < (b.topic(), b.partition()) } /** - * Return the partition infos for `topic`. If the topic does not exist, `None` is returned. + * Creates a topic-partition filter based on a list of patterns. + * Expected format: + * List: TopicPartitionPattern(, TopicPartitionPattern)* + * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern + * TopicPattern: REGEX + * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER */ - private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = { - val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer - if (partitionInfos.isEmpty) - None - else if (partitionIds.isEmpty) - Some(partitionInfos) + def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = { + val ruleSpecs = topicPartitions.split(",") + val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) } Review comment: `.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) }` => `.map(ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics))` - We usually don't use curly braces when the lambda is on one line. ########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -20,102 +20,120 @@ package kafka.tools import java.util.Properties import joptsimple._ -import kafka.utils.{CommandLineUtils, Exit, ToolsUtils} +import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils} import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} import org.apache.kafka.common.requests.ListOffsetsRequest import org.apache.kafka.common.{PartitionInfo, TopicPartition} import org.apache.kafka.common.serialization.ByteArrayDeserializer +import org.apache.kafka.common.utils.Utils +import java.util.regex.Pattern import scala.jdk.CollectionConverters._ import scala.collection.Seq +import scala.math.Ordering.Implicits.infixOrderingOps object GetOffsetShell { + private val TopicPartitionPattern = Pattern.compile( "([^:,]*)(?::(?:([0-9]*)|(?:([0-9]*)-([0-9]*))))?") Review comment: nit: The space before `"` could be removed. ########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -132,23 +161,85 @@ object GetOffsetShell { } } - partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) => - println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}") + partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach { Review comment: No, that's fine as it is then. I missed this. ########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -132,23 +150,79 @@ object GetOffsetShell { } } - partitionOffsets.toArray.sortBy { case (tp, _) => tp.partition }.foreach { case (tp, offset) => - println(s"$topic:${tp.partition}:${Option(offset).getOrElse("")}") + partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach { + case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}") } + } + def compareTopicPartitions(a: TopicPartition, b: TopicPartition): Boolean = { + (a.topic(), a.partition()) < (b.topic(), b.partition()) } /** - * Return the partition infos for `topic`. If the topic does not exist, `None` is returned. + * Creates a topic-partition filter based on a list of patterns. + * Expected format: + * List: TopicPartitionPattern(, TopicPartitionPattern)* + * TopicPartitionPattern: TopicPattern(:PartitionPattern)? | :PartitionPattern + * TopicPattern: REGEX + * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER */ - private def listPartitionInfos(consumer: KafkaConsumer[_, _], topic: String, partitionIds: Set[Int]): Option[Seq[PartitionInfo]] = { - val partitionInfos = consumer.listTopics.asScala.filter { case (k, _) => k == topic }.values.flatMap(_.asScala).toBuffer - if (partitionInfos.isEmpty) - None - else if (partitionIds.isEmpty) - Some(partitionInfos) + def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = { + val ruleSpecs = topicPartitions.split(",") + val rules = ruleSpecs.map { ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics) } + tp => rules.exists { rule => rule.apply(tp) } + } + + def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = { + val matcher = TopicPartitionPattern.matcher(ruleSpec) + if (!matcher.matches()) + throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec") + + def group(group: Int): Option[String] = { + Option(matcher.group(group)).filter(s => s != null && s.nonEmpty) + } + + val topicFilter = IncludeList(group(1).getOrElse(".*")) + val partitionFilter = group(2).map(_.toInt) match { + case Some(partition) => + (p: Int) => p == partition + case None => + val lowerRange = group(3).map(_.toInt).getOrElse(0) + val upperRange = group(4).map(_.toInt).getOrElse(Int.MaxValue) + (p: Int) => p >= lowerRange && p < upperRange + } + + tp => topicFilter.isTopicAllowed(tp.topic, excludeInternalTopics) && partitionFilter(tp.partition()) Review comment: `tp.partition()` => `tp.partition` ########## File path: core/src/main/scala/kafka/tools/GetOffsetShell.scala ########## @@ -20,102 +20,131 @@ package kafka.tools import java.util.Properties import joptsimple._ -import kafka.utils.{CommandLineUtils, Exit, ToolsUtils} +import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils} import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} import org.apache.kafka.common.requests.ListOffsetsRequest import org.apache.kafka.common.{PartitionInfo, TopicPartition} import org.apache.kafka.common.serialization.ByteArrayDeserializer +import org.apache.kafka.common.utils.Utils +import java.util.regex.Pattern import scala.jdk.CollectionConverters._ import scala.collection.Seq +import scala.math.Ordering.Implicits.infixOrderingOps object GetOffsetShell { + private val topicPartitionPattern = Pattern.compile( "([^:,]*)(?::([0-9]*)(?:-([0-9]*))?)?") def main(args: Array[String]): Unit = { + try { + fetchOffsets(args) + } catch { + case e: Exception => Exit.exit(1, Some(e.getMessage)) + } + } + + private def fetchOffsets(args: Array[String]): Unit = { val parser = new OptionParser(false) - val brokerListOpt = parser.accepts("broker-list", "REQUIRED: The list of hostname and port of the server to connect to.") + val brokerListOpt = parser.accepts("broker-list", "DEPRECATED, use --bootstrap-server instead; ignored if --bootstrap-server is specified. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.") .withRequiredArg - .describedAs("hostname:port,...,hostname:port") + .describedAs("HOST1:PORT1,...,HOST3:PORT3") .ofType(classOf[String]) - val topicOpt = parser.accepts("topic", "REQUIRED: The topic to get offset from.") + val bootstrapServerOpt = parser.accepts("bootstrap-server", "REQUIRED. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.") + .requiredUnless("broker-list") + .withRequiredArg + .describedAs("HOST1:PORT1,...,HOST3:PORT3") + .ofType(classOf[String]) + val topicPartitionsOpt = parser.accepts("topic-partitions", s"Comma separated list of topic-partition patterns to get the offsets for, with the format of '$topicPartitionPattern'." + + " The first group is an optional regex for the topic name, if omitted, it matches any topic name." + + " The section after ':' describes a 'partition' pattern, which can be: a number, a range in the format of 'NUMBER-NUMBER' (lower inclusive, upper exclusive), an inclusive lower bound in the format of 'NUMBER-', an exclusive upper bound in the format of '-NUMBER' or may be omitted to accept all partitions.") + .withRequiredArg + .describedAs("topic1:1,topic2:0-3,topic3,topic4:5-,topic5:-3") + .ofType(classOf[String]) + val topicOpt = parser.accepts("topic", s"The topic to get the offsets for. It also accepts a regular expression. If not present, all authorized topics are queried. Cannot be used if --topic-partitions is present.") .withRequiredArg .describedAs("topic") .ofType(classOf[String]) - val partitionOpt = parser.accepts("partitions", "comma separated list of partition ids. If not specified, it will find offsets for all partitions") + val partitionsOpt = parser.accepts("partitions", s"Comma separated list of partition ids to get the offsets for. If not present, all partitions of the authorized topics are queried. Cannot be used if --topic-partitions is present.") .withRequiredArg .describedAs("partition ids") .ofType(classOf[String]) - .defaultsTo("") - val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently commited record timestamp is given.]") + val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently committed record timestamp is given.]") .withRequiredArg .describedAs("timestamp/-1(latest)/-2(earliest)") .ofType(classOf[java.lang.Long]) .defaultsTo(-1L) - parser.accepts("offsets", "DEPRECATED AND IGNORED: number of offsets returned") - .withRequiredArg - .describedAs("count") - .ofType(classOf[java.lang.Integer]) - .defaultsTo(1) - parser.accepts("max-wait-ms", "DEPRECATED AND IGNORED: The max amount of time each fetch request waits.") + val commandConfigOpt = parser.accepts("command-config", s"Property file containing configs to be passed to Consumer Client.") .withRequiredArg - .describedAs("ms") - .ofType(classOf[java.lang.Integer]) - .defaultsTo(1000) + .describedAs("config file") + .ofType(classOf[String]) + val excludeInternalTopicsOpt = parser.accepts("exclude-internal-topics", s"By default, internal topics are included. If specified, internal topics are excluded.") - if (args.length == 0) - CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic offsets.") + if (args.length == 0) + CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting topic-partition offsets.") val options = parser.parse(args : _*) - CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, topicOpt) + val effectiveBrokerListOpt = if (options.has(bootstrapServerOpt)) + bootstrapServerOpt + else + brokerListOpt + + CommandLineUtils.checkRequiredArgs(parser, options, effectiveBrokerListOpt) val clientId = "GetOffsetShell" - val brokerList = options.valueOf(brokerListOpt) + val brokerList = options.valueOf(effectiveBrokerListOpt) + ToolsUtils.validatePortOrDie(parser, brokerList) - val topic = options.valueOf(topicOpt) - val partitionIdsRequested: Set[Int] = { - val partitionsString = options.valueOf(partitionOpt) - if (partitionsString.isEmpty) - Set.empty - else - partitionsString.split(",").map { partitionString => - try partitionString.toInt - catch { - case _: NumberFormatException => - System.err.println(s"--partitions expects a comma separated list of numeric partition ids, but received: $partitionsString") - Exit.exit(1) - } - }.toSet + val excludeInternalTopics = options.has(excludeInternalTopicsOpt) + + if (options.has(topicPartitionsOpt) && (options.has(topicOpt) || options.has(partitionsOpt))) { + throw new IllegalArgumentException("--topic-partitions cannot be used with --topic or --partitions") } + val listOffsetsTimestamp = options.valueOf(timeOpt).longValue - val config = new Properties + val topicPartitionFilter = if (options.has(topicPartitionsOpt)) { + createTopicPartitionFilterWithPatternList(options.valueOf(topicPartitionsOpt), excludeInternalTopics) + } else { + val partitionIdsRequested: Set[Int] = { + val partitionsString = options.valueOf(partitionsOpt) + if (partitionsString == null || partitionsString.isEmpty) + Set.empty + else + partitionsString.split(",").map { partitionString => + try partitionString.toInt + catch { + case _: NumberFormatException => + throw new IllegalArgumentException(s"--partitions expects a comma separated list of numeric " + + s"partition ids, but received: $partitionsString") + } + }.toSet + } + + createTopicPartitionFilterWithTopicAndPartitionPattern( + if (options.has(topicOpt)) Some(options.valueOf(topicOpt)) else None, + excludeInternalTopics, + partitionIdsRequested + ) + } + + val config = if (options.has(commandConfigOpt)) + Utils.loadProps(options.valueOf(commandConfigOpt)) + else + new Properties config.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList) config.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId) val consumer = new KafkaConsumer(config, new ByteArrayDeserializer, new ByteArrayDeserializer) Review comment: Could we address this one as well? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org