cloud-fan commented on a change in pull request #33422: URL: https://github.com/apache/spark/pull/33422#discussion_r673102271
########## File path: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ########## @@ -1947,6 +1947,31 @@ class Dataset[T] private[sql]( CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan) } + /** + * Observe (named) metrics through an `org.apache.spark.sql.Observation` instance. + * This is equivalent to calling `observe(String, Column, Column*)` but does not require + * adding `org.apache.spark.sql.util.QueryExecutionListener` to the spark session. + * This method does not support streaming datasets. + * + * A user can retrieve the metrics by accessing `org.apache.spark.sql.Observation.get`. + * + * {{{ + * // Observe row count (rows) and highest id (maxid) in the Dataset while writing it + * val observation = Observation("my_metrics") + * val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid")) + * observed_ds.write.parquet("ds.parquet") + * val metrics = observation.get + * }}} + * + * @throws IllegalArgumentException If this is a streaming Dataset (this.isStreaming == true) + * + * @group typedrel + * @since 3.3.0 + */ + def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = { Review comment: if it's simply adding an annotation `@varargs`, shall we just do it in this PR? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org