[GitHub] [spark] srielau commented on a diff in pull request #38344: [SPARK-40777][SQL][PROTOBUF] Protobuf import support and move error-classes.
srielau commented on code in PR #38344: URL: https://github.com/apache/spark/pull/38344#discussion_r1012385639 ## core/src/main/resources/error/error-classes.json: ## @@ -742,6 +832,11 @@ ], "sqlState" : "22023" }, + "SQL_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : { Review Comment: Rename without _ERROR ## sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala: ## @@ -3212,4 +3212,181 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map("expression" -> toSQLExpr(expression)) ) } + + def cannotConvertProtobufTypeToSqlTypeError( + protobufColumn: String, + sqlColumn: Seq[String], + protobufType: String, + sqlType: DataType): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_FIELD_TYPE_TO_SQL_TYPE_ERROR", + messageParameters = Map( +"protobufColumn" -> protobufColumn, +"sqlColumn" -> toSQLId(sqlColumn), +"protobufType" -> protobufType, +"sqlType" -> toSQLType(sqlType))) + } + + def cannotConvertCatalystTypeToProtobufTypeError( + sqlColumn: Seq[String], + protobufColumn: String, + sqlType: DataType, + protobufType: String): Throwable = { +new AnalysisException( + errorClass = "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE", + messageParameters = Map( +"sqlColumn" -> toSQLId(sqlColumn), +"protobufColumn" -> protobufColumn, +"sqlType" -> toSQLType(sqlType), +"protobufType" -> protobufType)) + } + + def cannotConvertCatalystTypeToProtobufEnumTypeError( + sqlColumn: Seq[String], + protobufColumn: String, + data: String, + enumString: String): Throwable = { +new AnalysisException( + errorClass = "SQL_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR", + messageParameters = Map( +"sqlColumn" -> toSQLId(sqlColumn), +"protobufColumn" -> protobufColumn, +"data" -> data, +"enumString" -> enumString)) + } + + def cannotConvertProtobufTypeToCatalystTypeError( + protobufType: String, + sqlType: DataType, + cause: Throwable): Throwable = { +new AnalysisException( + errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE", + messageParameters = Map( +"protobufType" -> protobufType, +"toType" -> toSQLType(sqlType)), + cause = Option(cause.getCause)) + } + + def cannotConvertSqlTypeToProtobufError( + protobufType: String, + sqlType: DataType, + cause: Throwable): Throwable = { +new AnalysisException( + errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE", + messageParameters = Map( +"protobufType" -> protobufType, +"toType" -> toSQLType(sqlType)), + cause = Option(cause.getCause)) + } + + def protobufTypeUnsupportedYetError(protobufType: String): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_TYPE_NOT_SUPPORT", + messageParameters = Map("protobufType" -> protobufType)) + } + + def unknownProtobufMessageTypeError( + descriptorName: String, + containingType: String): Throwable = { +new AnalysisException( + errorClass = "UNKNOWN_PROTOBUF_MESSAGE_TYPE", + messageParameters = Map( +"descriptorName" -> descriptorName, +"containingType" -> containingType)) + } + + def cannotFindCatalystTypeInProtobufSchemaError(catalystFieldPath: String): Throwable = { +new AnalysisException( + errorClass = "NO_SQL_TYPE_IN_PROTOBUF_SCHEMA", + messageParameters = Map("catalystFieldPath" -> catalystFieldPath)) + } + + def cannotFindProtobufFieldInCatalystError(field: String): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA", + messageParameters = Map("field" -> field)) + } + + def protobufFieldMatchError(field: String, + protobufSchema: String, + matchSize: String, + matches: String): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_FIELD_MISSING", + messageParameters = Map( +"field" -> field, +"protobufSchema" -> protobufSchema, +"matchSize" -> matchSize, +"matches" -> matches)) + } + + def unableToLocateProtobufMessageError(messageName: String): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_MESSAGE_NOT_FOUND", + messageParameters = Map("messageName" -> messageName)) + } + + def descrioptorParseError(descFilePath: String, cause: Throwable): Throwable = { +new AnalysisException( + errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR", + messageParameters = Map.empty("descFilePath" -> descFilePath), + cause = Option(cause.getCause)) + } + + def cannotFindDescriptorFileError(filePath: String, cause: Throwable): Throwable = { +new AnalysisException( + errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND", + messageParameters
[GitHub] [spark] srielau commented on a diff in pull request #38344: [SPARK-40777][SQL][PROTOBUF] Protobuf import support and move error-classes.
srielau commented on code in PR #38344: URL: https://github.com/apache/spark/pull/38344#discussion_r1008327776 ## core/src/main/resources/error/error-classes.json: ## @@ -23,6 +23,11 @@ ], "sqlState" : "42000" }, + "CANNOT_FIND_PROTOBUF_DESCRIPTOR_FILE_ERROR" : { Review Comment: Definitely remove _ERROR. Would be nice if we could gravitate towards *_NOT_FOUND and *_ALREADY_EXIST style naming convention: PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND ## core/src/main/resources/error/error-classes.json: ## @@ -65,6 +70,11 @@ ], "sqlState" : "22005" }, + "CATALYST_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : { Review Comment: I don't think we call out Catalyst anywhere else. How is that relevant? Can we be generic? ## core/src/main/resources/error/error-classes.json: ## @@ -65,6 +70,11 @@ ], "sqlState" : "22005" }, + "CATALYST_TYPE_TO_PROTOBUF_ENUM_TYPE_ERROR" : { +"message" : [ + "Cannot convert SQL to Protobuf because cannot be written since it's not defined in ENUM " Review Comment: That's a because "squared"... I don't have enough context to propose a rephrase.. Also If we have SQL here, maybe we can do CATALYST -> SQL for the name? ## core/src/main/resources/error/error-classes.json: ## @@ -579,6 +594,11 @@ } } }, + "MALFORMED_PROTOBUF_MESSAGE_ERROR" : { Review Comment: Remove _ERROR ## core/src/main/resources/error/error-classes.json: ## @@ -629,11 +649,21 @@ ], "sqlState" : "42000" }, + "NO_CATALYST_TYPE_IN_PROTOBUF_SCHEMA" : { +"message" : [ + "Cannot find in Protobuf schema" +] + }, "NO_HANDLER_FOR_UDAF" : { "message" : [ "No handler for UDAF ''. Use sparkSession.udf.register(...) instead." ] }, + "NO_PROTOBUF_MESSAGE_TYPE_ERROR" : { Review Comment: Remove _ERROR ## core/src/main/resources/error/error-classes.json: ## @@ -706,6 +736,66 @@ ], "sqlState" : "42000" }, + "PROTOBUF_CLASS_LOAD_ERROR" : { +"message" : [ + "Could not load Protobuf class with name " +] + }, + "PROTOBUF_DEPENDENCY_ERROR" : { +"message" : [ + "Could not find dependency: " +] + }, + "PROTOBUF_DESCRIPTOR_ERROR" : { Review Comment: ```suggestion "CANNOT_PARSE_PROTOBUF_DESCRIPTOR" : { ``` Any hope we know why and can tell? This just says: "Bad Kitty" ## core/src/main/resources/error/error-classes.json: ## @@ -706,6 +736,66 @@ ], "sqlState" : "42000" }, + "PROTOBUF_CLASS_LOAD_ERROR" : { +"message" : [ + "Could not load Protobuf class with name " +] + }, + "PROTOBUF_DEPENDENCY_ERROR" : { +"message" : [ + "Could not find dependency: " +] + }, + "PROTOBUF_DESCRIPTOR_ERROR" : { +"message" : [ + "Error parsing descriptor byte[] into Descriptor object" +] + }, + "PROTOBUF_DESCRIPTOR_PARSING_ERROR" : { Review Comment: ```suggestion "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR" : { ``` Same as above.. Not sure how helpful this message is. ## core/src/main/resources/error/error-classes.json: ## @@ -706,6 +736,66 @@ ], "sqlState" : "42000" }, + "PROTOBUF_CLASS_LOAD_ERROR" : { +"message" : [ + "Could not load Protobuf class with name " +] + }, + "PROTOBUF_DEPENDENCY_ERROR" : { +"message" : [ + "Could not find dependency: " +] + }, + "PROTOBUF_DESCRIPTOR_ERROR" : { +"message" : [ + "Error parsing descriptor byte[] into Descriptor object" +] + }, + "PROTOBUF_DESCRIPTOR_PARSING_ERROR" : { +"message" : [ + "Error constructing FileDescriptor" +] + }, + "PROTOBUF_FIELD_MISSING_ERROR" : { +"message" : [ + "Searching for in Protobuf schema at gave matches. Candidates: " +] + }, + "PROTOBUF_FIELD_MISSING_IN_CATALYST_SCHEMA" : { +"message" : [ + "Found in Protobuf schema but there is no match in the SQL schema" +] + }, + "PROTOBUF_FIELD_TYPE_MISMATCH" : { +"message" : [ + "Type mismatch encountered for field: " +] + }, + "PROTOBUF_MESSAGE_TYPE_ERROR" : { +"message" : [ + " is not a Protobuf message type" +] + }, + "PROTOBUF_RECURSION_ERROR" : { Review Comment: ```suggestion "RECURSIVE_PROTOBUF_SCHEMA" : { ``` ## core/src/main/resources/error/error-classes.json: ## @@ -760,6 +850,11 @@ ], "sqlState" : "22023" }, + "SQL_TYPE_TO_PROTOBUF_TYPE_ERROR" : { Review Comment: ```suggestion "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_TYPE" : { ``` ## core/src/main/resources/error/error-classes.json: ## @@ -792,6 +887,21 @@ "Unable to acquire bytes of memory, got " ] }, + "UNABLE_TO_CONVERT_TO_PROTOBUF_TYPE" : { +"message" : [ + "Unable to convert SQL type to Protobuf type ." +] + }, + "UNABLE_TO_LOCATE_PROTOBUF_MES