[GitHub] [spark] srielau commented on a diff in pull request #38344: [SPARK-40777][SQL][PROTOBUF] Protobuf import support and move error-classes.

2022-11-02 Thread GitBox


srielau commented on code in PR #38344:
URL: https://github.com/apache/spark/pull/38344#discussion_r1012385639


##
core/src/main/resources/error/error-classes.json:
##
@@ -742,6 +832,11 @@
 ],
 "sqlState" : "22023"
   },
+  "SQL_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : {

Review Comment:
   Rename without _ERROR



##
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala:
##
@@ -3212,4 +3212,181 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
   messageParameters = Map("expression" -> toSQLExpr(expression))
 )
   }
+
+  def cannotConvertProtobufTypeToSqlTypeError(
+  protobufColumn: String,
+  sqlColumn: Seq[String],
+  protobufType: String,
+  sqlType: DataType): Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_FIELD_TYPE_TO_SQL_TYPE_ERROR",
+  messageParameters = Map(
+"protobufColumn" -> protobufColumn,
+"sqlColumn" -> toSQLId(sqlColumn),
+"protobufType" -> protobufType,
+"sqlType" -> toSQLType(sqlType)))
+  }
+
+  def cannotConvertCatalystTypeToProtobufTypeError(
+  sqlColumn: Seq[String],
+  protobufColumn: String,
+  sqlType: DataType,
+  protobufType: String): Throwable = {
+new AnalysisException(
+  errorClass = "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE",
+  messageParameters = Map(
+"sqlColumn" -> toSQLId(sqlColumn),
+"protobufColumn" -> protobufColumn,
+"sqlType" -> toSQLType(sqlType),
+"protobufType" -> protobufType))
+  }
+
+  def cannotConvertCatalystTypeToProtobufEnumTypeError(
+  sqlColumn: Seq[String],
+  protobufColumn: String,
+  data: String,
+  enumString: String): Throwable = {
+new AnalysisException(
+  errorClass = "SQL_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR",
+  messageParameters = Map(
+"sqlColumn" -> toSQLId(sqlColumn),
+"protobufColumn" -> protobufColumn,
+"data" -> data,
+"enumString" -> enumString))
+  }
+
+  def cannotConvertProtobufTypeToCatalystTypeError(
+  protobufType: String,
+  sqlType: DataType,
+  cause: Throwable): Throwable = {
+new AnalysisException(
+  errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+  messageParameters = Map(
+"protobufType" -> protobufType,
+"toType" -> toSQLType(sqlType)),
+  cause = Option(cause.getCause))
+  }
+
+  def cannotConvertSqlTypeToProtobufError(
+  protobufType: String,
+  sqlType: DataType,
+  cause: Throwable): Throwable = {
+new AnalysisException(
+  errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+  messageParameters = Map(
+"protobufType" -> protobufType,
+"toType" -> toSQLType(sqlType)),
+  cause = Option(cause.getCause))
+  }
+
+  def protobufTypeUnsupportedYetError(protobufType: String): Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_TYPE_NOT_SUPPORT",
+  messageParameters = Map("protobufType" -> protobufType))
+  }
+
+  def unknownProtobufMessageTypeError(
+  descriptorName: String,
+  containingType: String): Throwable = {
+new AnalysisException(
+  errorClass = "UNKNOWN_PROTOBUF_MESSAGE_TYPE",
+  messageParameters = Map(
+"descriptorName" -> descriptorName,
+"containingType" -> containingType))
+  }
+
+  def cannotFindCatalystTypeInProtobufSchemaError(catalystFieldPath: String): 
Throwable = {
+new AnalysisException(
+  errorClass = "NO_SQL_TYPE_IN_PROTOBUF_SCHEMA",
+  messageParameters = Map("catalystFieldPath" -> catalystFieldPath))
+  }
+
+  def cannotFindProtobufFieldInCatalystError(field: String): Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA",
+  messageParameters = Map("field" -> field))
+  }
+
+  def protobufFieldMatchError(field: String,
+  protobufSchema: String,
+  matchSize: String,
+  matches: String): Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_FIELD_MISSING",
+  messageParameters = Map(
+"field" -> field,
+"protobufSchema" -> protobufSchema,
+"matchSize" -> matchSize,
+"matches" -> matches))
+  }
+
+  def unableToLocateProtobufMessageError(messageName: String): Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_MESSAGE_NOT_FOUND",
+  messageParameters = Map("messageName" -> messageName))
+  }
+
+  def descrioptorParseError(descFilePath: String, cause: Throwable): Throwable 
= {
+new AnalysisException(
+  errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR",
+  messageParameters = Map.empty("descFilePath" -> descFilePath),
+  cause = Option(cause.getCause))
+  }
+
+  def cannotFindDescriptorFileError(filePath: String, cause: Throwable): 
Throwable = {
+new AnalysisException(
+  errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
+  messageParameters 

[GitHub] [spark] srielau commented on a diff in pull request #38344: [SPARK-40777][SQL][PROTOBUF] Protobuf import support and move error-classes.

2022-10-28 Thread GitBox


srielau commented on code in PR #38344:
URL: https://github.com/apache/spark/pull/38344#discussion_r1008327776


##
core/src/main/resources/error/error-classes.json:
##
@@ -23,6 +23,11 @@
 ],
 "sqlState" : "42000"
   },
+  "CANNOT_FIND_PROTOBUF_DESCRIPTOR_FILE_ERROR" : {

Review Comment:
   Definitely remove _ERROR.
   Would be nice if we could gravitate towards *_NOT_FOUND and *_ALREADY_EXIST 
style naming convention:
   PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND



##
core/src/main/resources/error/error-classes.json:
##
@@ -65,6 +70,11 @@
 ],
 "sqlState" : "22005"
   },
+  "CATALYST_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : {

Review Comment:
   I don't think we call out Catalyst anywhere else. How is that relevant? Can 
we be generic?



##
core/src/main/resources/error/error-classes.json:
##
@@ -65,6 +70,11 @@
 ],
 "sqlState" : "22005"
   },
+  "CATALYST_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : {
+"message" : [
+  "Cannot convert SQL  to Protobuf  because 
 cannot be written since it's not defined in ENUM "

Review Comment:
   That's a because "squared"... I don't have enough context to propose a 
rephrase..
   Also If we have SQL here, maybe we can do CATALYST -> SQL for the name? 



##
core/src/main/resources/error/error-classes.json:
##
@@ -579,6 +594,11 @@
   }
 }
   },
+  "MALFORMED_PROTOBUF_MESSAGE_ERROR" : {

Review Comment:
   Remove _ERROR



##
core/src/main/resources/error/error-classes.json:
##
@@ -629,11 +649,21 @@
 ],
 "sqlState" : "42000"
   },
+  "NO_CATALYST_TYPE_IN_PROTOBUF_SCHEMA" : {
+"message" : [
+  "Cannot find  in Protobuf schema"
+]
+  },
   "NO_HANDLER_FOR_UDAF" : {
 "message" : [
   "No handler for UDAF ''. Use 
sparkSession.udf.register(...) instead."
 ]
   },
+  "NO_PROTOBUF_MESSAGE_TYPE_ERROR" : {

Review Comment:
   Remove _ERROR



##
core/src/main/resources/error/error-classes.json:
##
@@ -706,6 +736,66 @@
 ],
 "sqlState" : "42000"
   },
+  "PROTOBUF_CLASS_LOAD_ERROR" : {
+"message" : [
+  "Could not load Protobuf class with name "
+]
+  },
+  "PROTOBUF_DEPENDENCY_ERROR" : {
+"message" : [
+  "Could not find dependency: "
+]
+  },
+  "PROTOBUF_DESCRIPTOR_ERROR" : {

Review Comment:
   ```suggestion
 "CANNOT_PARSE_PROTOBUF_DESCRIPTOR" : {
   ```
   
   Any hope we know why and can tell? This just says: "Bad Kitty"



##
core/src/main/resources/error/error-classes.json:
##
@@ -706,6 +736,66 @@
 ],
 "sqlState" : "42000"
   },
+  "PROTOBUF_CLASS_LOAD_ERROR" : {
+"message" : [
+  "Could not load Protobuf class with name "
+]
+  },
+  "PROTOBUF_DEPENDENCY_ERROR" : {
+"message" : [
+  "Could not find dependency: "
+]
+  },
+  "PROTOBUF_DESCRIPTOR_ERROR" : {
+"message" : [
+  "Error parsing descriptor byte[] into Descriptor object"
+]
+  },
+  "PROTOBUF_DESCRIPTOR_PARSING_ERROR" : {

Review Comment:
   ```suggestion
 "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR" : {
   ```
   Same as above.. Not sure how helpful this message is.



##
core/src/main/resources/error/error-classes.json:
##
@@ -706,6 +736,66 @@
 ],
 "sqlState" : "42000"
   },
+  "PROTOBUF_CLASS_LOAD_ERROR" : {
+"message" : [
+  "Could not load Protobuf class with name "
+]
+  },
+  "PROTOBUF_DEPENDENCY_ERROR" : {
+"message" : [
+  "Could not find dependency: "
+]
+  },
+  "PROTOBUF_DESCRIPTOR_ERROR" : {
+"message" : [
+  "Error parsing descriptor byte[] into Descriptor object"
+]
+  },
+  "PROTOBUF_DESCRIPTOR_PARSING_ERROR" : {
+"message" : [
+  "Error constructing FileDescriptor"
+]
+  },
+  "PROTOBUF_FIELD_MISSING_ERROR" : {
+"message" : [
+  "Searching for  in Protobuf schema at  gave 
 matches. Candidates: "
+]
+  },
+  "PROTOBUF_FIELD_MISSING_IN_CATALYST_SCHEMA" : {
+"message" : [
+  "Found  in Protobuf schema but there is no match in the SQL 
schema"
+]
+  },
+  "PROTOBUF_FIELD_TYPE_MISMATCH" : {
+"message" : [
+  "Type mismatch encountered for field: "
+]
+  },
+  "PROTOBUF_MESSAGE_TYPE_ERROR" : {
+"message" : [
+  " is not a Protobuf message type"
+]
+  },
+  "PROTOBUF_RECURSION_ERROR" : {

Review Comment:
   ```suggestion
 "RECURSIVE_PROTOBUF_SCHEMA" : {
   ```



##
core/src/main/resources/error/error-classes.json:
##
@@ -760,6 +850,11 @@
 ],
 "sqlState" : "22023"
   },
+  "SQL_TYPE_TO_PROTOBUF_TYPE_ERROR" : {

Review Comment:
   ```suggestion
 "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_TYPE" : {
   ```



##
core/src/main/resources/error/error-classes.json:
##
@@ -792,6 +887,21 @@
   "Unable to acquire  bytes of memory, got "
 ]
   },
+  "UNABLE_TO_CONVERT_TO_PROTOBUF_TYPE" : {
+"message" : [
+  "Unable to convert SQL type  to Protobuf type ."
+]
+  },
+  "UNABLE_TO_LOCATE_PROTOBUF_MES