ryan-johnson-databricks commented on code in PR #40545:
URL: https://github.com/apache/spark/pull/40545#discussion_r1150611339


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala:
##########
@@ -501,80 +506,93 @@ object FileSourceMetadataAttribute {
 
   val FILE_SOURCE_METADATA_COL_ATTR_KEY = "__file_source_metadata_col"
 
+  val METADATA: Metadata = new MetadataBuilder()
+    .withMetadata(MetadataAttribute.METADATA)
+    .putBoolean(METADATA_COL_ATTR_KEY, value = true)
+    .putBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true)
+    .build()
+
   /**
-   * Cleanup the internal metadata information of an attribute if it is
-   * a [[FileSourceConstantMetadataAttribute]] or 
[[FileSourceGeneratedMetadataAttribute]].
+   * Removes the internal field metadata.
    */
   def cleanupFileSourceMetadataInformation(attr: Attribute): Attribute =
-    removeInternalMetadata(attr)
+    attr.withMetadata(removeInternalMetadata(attr.metadata))

Review Comment:
   I only see a call site for the `StructField` overload. Now that `FileFormat` 
starts from a `StructType` and converts to `AttributeReference` from there, do 
we expect to still need the `Attribute` version of this method going forward?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala:
##########
@@ -501,80 +506,93 @@ object FileSourceMetadataAttribute {
 
   val FILE_SOURCE_METADATA_COL_ATTR_KEY = "__file_source_metadata_col"
 
+  val METADATA: Metadata = new MetadataBuilder()
+    .withMetadata(MetadataAttribute.METADATA)
+    .putBoolean(METADATA_COL_ATTR_KEY, value = true)
+    .putBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true)
+    .build()
+
   /**
-   * Cleanup the internal metadata information of an attribute if it is
-   * a [[FileSourceConstantMetadataAttribute]] or 
[[FileSourceGeneratedMetadataAttribute]].
+   * Removes the internal field metadata.
    */
   def cleanupFileSourceMetadataInformation(attr: Attribute): Attribute =
-    removeInternalMetadata(attr)
+    attr.withMetadata(removeInternalMetadata(attr.metadata))
+
+  /**
+   * Removes the internal field metadata.
+   */
+  def cleanupFileSourceMetadataInformation(field: StructField): StructField =
+    field.copy(metadata = removeInternalMetadata(field.metadata))
 
   def apply(name: String, dataType: DataType, nullable: Boolean = false): 
AttributeReference =
-    AttributeReference(name, dataType, nullable = nullable,
-      new MetadataBuilder()
-        .putBoolean(METADATA_COL_ATTR_KEY, value = true)
-        .putBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true).build())()
+    AttributeReference(name, dataType, nullable = nullable, METADATA)()
 
   /** Matches if attr is any File source metadata attribute (including 
constant and generated). */
-  def unapply(attr: AttributeReference): Option[AttributeReference] =
-    attr match {
-      case MetadataAttribute(attr)
-        if attr.metadata.contains(FILE_SOURCE_METADATA_COL_ATTR_KEY)
-          && attr.metadata.getBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY) => 
Some(attr)
-      case _ => None
-    }
+  def unapply(attr: AttributeReference): Option[AttributeReference] = {

Review Comment:
   tiny nit: the method doesn't require `{}` and removing it makes a cleaner 
diff



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to