rangadi commented on code in PR #38344:
URL: https://github.com/apache/spark/pull/38344#discussion_r1006324273


##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala:
##########
@@ -175,19 +175,26 @@ private[sql] object ProtobufUtils extends Logging {
       .asInstanceOf[Descriptor]
   }
 
+  // TODO: Revisit to ensure that messageName is searched through all imports

Review Comment:
   What is missing? Looks fairly complete to me.
   Better to state the problem here.



##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala:
##########
@@ -177,47 +175,75 @@ private[sql] object ProtobufUtils extends Logging {
       .asInstanceOf[Descriptor]
   }
 
+  // TODO: Revisit to ensure that messageName is searched through all imports
   def buildDescriptor(descFilePath: String, messageName: String): Descriptor = 
{
-    val descriptor = 
parseFileDescriptor(descFilePath).getMessageTypes.asScala.find { desc =>
-      desc.getName == messageName || desc.getFullName == messageName
+    val descriptorList = parseFileDescriptor(descFilePath).map(fileDescriptor 
=> {
+      fileDescriptor.getMessageTypes.asScala.find { desc =>
+        desc.getName == messageName || desc.getFullName == messageName
+      }
+    }).filter(f => !f.isEmpty)
+
+    if (descriptorList.isEmpty) {
+      throw 
QueryCompilationErrors.noProtobufMessageTypeReturnError(messageName)
     }
 
-    descriptor match {
+    descriptorList.last match {
       case Some(d) => d
       case None =>
-        throw new RuntimeException(s"Unable to locate Message '$messageName' 
in Descriptor")
+        throw 
QueryCompilationErrors.unableToLocateProtobufMessageError(messageName)
     }
   }
 
-  private def parseFileDescriptor(descFilePath: String): 
Descriptors.FileDescriptor = {
+  private def parseFileDescriptor(descFilePath: String): 
List[Descriptors.FileDescriptor] = {

Review Comment:
   Rename to `parseFileDescriptorSet` (otherwise it sounds like it is parsing 
just one file descriptor). 



##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala:
##########
@@ -175,19 +175,26 @@ private[sql] object ProtobufUtils extends Logging {
       .asInstanceOf[Descriptor]
   }
 
+  // TODO: Revisit to ensure that messageName is searched through all imports
   def buildDescriptor(descFilePath: String, messageName: String): Descriptor = 
{
-    val descriptor = 
parseFileDescriptor(descFilePath).getMessageTypes.asScala.find { desc =>
-      desc.getName == messageName || desc.getFullName == messageName
+    val descriptorList = parseFileDescriptor(descFilePath).map(fileDescriptor 
=> {
+      fileDescriptor.getMessageTypes.asScala.find { desc =>
+        desc.getName == messageName || desc.getFullName == messageName
+      }
+    }).filter(f => !f.isEmpty)
+
+    if (descriptorList.isEmpty) {
+      throw 
QueryCompilationErrors.noProtobufMessageTypeReturnError(messageName)
     }
 
-    descriptor match {
+    descriptorList.last match {

Review Comment:
   Could you add a comment on why we are picking the last one? Will be useful 
for future readers as well.



##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala:
##########
@@ -177,47 +175,75 @@ private[sql] object ProtobufUtils extends Logging {
       .asInstanceOf[Descriptor]
   }
 
+  // TODO: Revisit to ensure that messageName is searched through all imports
   def buildDescriptor(descFilePath: String, messageName: String): Descriptor = 
{
-    val descriptor = 
parseFileDescriptor(descFilePath).getMessageTypes.asScala.find { desc =>
-      desc.getName == messageName || desc.getFullName == messageName
+    val descriptorList = parseFileDescriptor(descFilePath).map(fileDescriptor 
=> {

Review Comment:
   Style: use `find()` rather than map().filter(). 
   
   (you can use `findLast()` if there is a reason to use the last match). 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to