heyihong commented on code in PR #42377: URL: https://github.com/apache/spark/pull/42377#discussion_r1329282029
########## connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala: ########## @@ -57,28 +69,105 @@ private[connect] object ErrorUtils extends Logging { classes.toSeq } - private def buildStatusFromThrowable(st: Throwable, stackTraceEnabled: Boolean): RPCStatus = { + private def serializeClasses(t: Throwable): String = { + JsonMethods.compact(JsonMethods.render(allClasses(t.getClass).map(_.getName))) + } + + private[connect] val NUM_ERRORS_LIMIT = 5 + + // We can get full exception messages and optionally stacktrace by + // a separate RPC call if enrichErrorEnabled is true. So imposing a smaller + // limit to reduce the probability of hitting the 8KB header limit. + private val MAX_MESSAGE_SIZE = 512 + + // Convert Throwable to a protobuf message FetchErrorDetailsResponse. + // Truncate error messages by default. + private[connect] def throwableToFetchErrorDetailsResponse( + st: Throwable, + isTruncated: Boolean = true, + serverStackTraceEnabled: Boolean = false, + pySparkJVMStackTraceEnabled: Boolean = false, + stackTraceInMessage: Boolean = false): FetchErrorDetailsResponse = { + val errorChain = traverseCauses(st).take(NUM_ERRORS_LIMIT) + + var lastErrorOpt: Option[ExceptionInfo] = None + + errorChain.reverse.map { case error => + val builder = ExceptionInfo + .newBuilder() + .setMessage(error.getMessage) + .addAllErrorTypeHierarchy(ErrorUtils.allClasses(error.getClass).map(_.getName).asJava) + + if (isTruncated) { + builder.setMessage(SparkConnectService.extractErrorMessage(error, MAX_MESSAGE_SIZE)) + } else { + if (stackTraceInMessage) { + if (serverStackTraceEnabled || pySparkJVMStackTraceEnabled) { + builder.setMessage( + s"${error.getMessage}\n\n" + + s"JVM stacktrace:\n${ExceptionUtils.getStackTrace(error)}") + } + } else { + if (serverStackTraceEnabled) { + builder.addAllStackTrace( + error.getStackTrace + .map { stackTraceElement => + FetchErrorDetailsResponse.StackTraceElement + .newBuilder() + .setDeclaringClass(stackTraceElement.getClassName) + .setMethodName(stackTraceElement.getMethodName) + .setFileName(stackTraceElement.getFileName) + .setLineNumber(stackTraceElement.getLineNumber) + .build() + } + .toIterable + .asJava) + } + } + } + + lastErrorOpt.foreach(builder.setCause(_)) + lastErrorOpt = Some(builder.build()) + } + + val responseBuilder = FetchErrorDetailsResponse.newBuilder() + + lastErrorOpt.foreach(responseBuilder.setExceptionInfo(_)) + + responseBuilder.build() + } + + private def buildStatusFromThrowable( + st: Throwable, + enrichErrorEnabled: Boolean, + userId: String, + sessionId: String): RPCStatus = { val errorInfo = ErrorInfo .newBuilder() .setReason(st.getClass.getName) .setDomain("org.apache.spark") - .putMetadata( - "classes", - JsonMethods.compact(JsonMethods.render(allClasses(st.getClass).map(_.getName)))) - - lazy val stackTrace = Option(ExceptionUtils.getStackTrace(st)) - val withStackTrace = if (stackTraceEnabled && stackTrace.nonEmpty) { - val maxSize = SparkEnv.get.conf.get(Connect.CONNECT_JVM_STACK_TRACE_MAX_SIZE) - errorInfo.putMetadata("stackTrace", StringUtils.abbreviate(stackTrace.get, maxSize)) - } else { - errorInfo + .putMetadata("classes", serializeClasses(st)) + + if (enrichErrorEnabled) { + val errorId = UUID.randomUUID().toString + + // The errorId of the exception. + errorInfo.putMetadata("errorId", errorId) + + SparkConnectService + .getOrCreateIsolatedSession(userId, sessionId) + .errorIdToError + .put(errorId, st) } RPCStatus .newBuilder() .setCode(RPCCode.INTERNAL_VALUE) - .addDetails(ProtoAny.pack(withStackTrace.build())) - .setMessage(SparkConnectService.extractErrorMessage(st)) + .addAllDetails( + Seq(errorInfo.build(), throwableToFetchErrorDetailsResponse(st)) Review Comment: In case the FetchErrorDetails RPC failed, fallback to an abbreviated FetchErrorDetailsResponse is consistent with the existing control. Otherwise, we may have to throw an Exception or fallback to an exception without cause support. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org