sandip-db commented on code in PR #44163: URL: https://github.com/apache/spark/pull/44163#discussion_r1423270340
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala: ########## @@ -604,6 +606,24 @@ class XmlTokenizer( return Some(str) } } catch { Review Comment: Consider this to avoid reader.close repetitions: ``` def next(): Option[String] = { var nextString: Option[String] = None try { if (readUntilStartElement()) { buffer.append(currentStartTag) // Don't check whether the end element was found. Even if not, return everything // that was read, which will invariably cause a parse error later readUntilEndElement(currentStartTag.endsWith(">")) nextString = buffer.toString() buffer = new StringBuilder() } } catch { case e: FileNotFoundException if options.ignoreMissingFiles => logWarning( "Skipping the rest of" + " the content in the missing file during schema inference", e) case NonFatal(e) => ExceptionUtils.getRootCause(e) match { case _: RuntimeException | _: IOException if options.ignoreCorruptFiles => logWarning( "Skipping the rest of" + " the content in the corrupted file during schema inference", e ) } } finally { if (var == None) { reader.close() reader = null } } nextStr } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org