shujingyang-db commented on code in PR #45411:
URL: https://github.com/apache/spark/pull/45411#discussion_r1520394446


##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
     f(dir)
     fs.setVerifyChecksum(true)
   }
+
+  def primitiveFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+          |  <num_num_1>11</num_num_1>
+          |  <num_num_2/>
+          |  <num_num_3>1.1</num_num_3>
+          |  <num_bool>true</num_bool>
+          |  <num_str>13.1</num_str>
+          |  <str_bool>str1</str_bool>
+          |</ROW>
+          |""".stripMargin ::
+    """
+           |<ROW>
+           |  <num_num_1/>
+           |  <num_num_2>21474836470.9</num_num_2>
+           |  <num_num_3/>
+           |  <num_bool>12</num_bool>
+           |  <num_str/>
+           |  <str_bool>true</str_bool>
+           |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836470</num_num_1>
+            |  <num_num_2>92233720368547758070</num_num_2>
+            |  <num_num_3>100</num_num_3>
+            |  <num_bool>false</num_bool>
+            |  <num_str>str1</num_str>
+            |  <str_bool>false</str_bool>
+            |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836570</num_num_1>
+            |  <num_num_2>1.1</num_num_2>
+            |  <num_num_3>21474836470</num_num_3>
+            |  <num_bool/>
+            |  <num_str>92233720368547758070</num_str>
+            |  <str_bool/>
+            |</ROW>""".stripMargin :: Nil
+
+  def xmlNullStruct: Seq[String] =
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers>
+          |    <Host>1.abc.com</Host>
+          |    <Charset>UTF-8</Charset>
+          |  </headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers></headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr/>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin :: Nil
+
+  def complexFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+      <num_struct>11</num_struct>
+      <str_array>1</str_array>
+      <str_array>2</str_array>
+      <str_array>3</str_array>
+      <array></array>
+      <struct_array></struct_array>
+      <struct></struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct>
+        <field>false</field>
+      </num_struct>
+      <str_array/>
+      <array/>
+      <struct_array></struct_array>
+      <struct/>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct/>
+      <str_array>str</str_array>
+      <array>4</array>
+      <array>5</array>
+      <array>6</array>
+      <struct_array>7</struct_array>
+      <struct_array>8</struct_array>
+      <struct_array>9</struct_array>
+      <struct>
+        <field/>
+      </struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct></num_struct>
+      <str_array>str1</str_array>
+      <str_array>str2</str_array>
+      <str_array>33</str_array>
+      <array>7</array>
+      <struct_array>
+        <field>true</field>
+      </struct_array>
+      <struct>
+        <field>str</field>
+      </struct>
+    </ROW>""" :: Nil
+
+  def arrayElementTypeConflict: Seq[String] =
+    """
+          |<ROW>
+          |   <array1>
+          |      <element>1</element>
+          |      <element>1.1</element>
+          |      <element>true</element>
+          |      <element/>
+          |      <element>
+          |         <array/>
+          |      </element>
+          |      <element>
+          |         <object/>
+          |      </element>
+          |   </array1>
+          |   <array1>
+          |      <element>
+          |         <array>
+          |            <element>2</element>
+          |            <element>3</element>
+          |            <element>4</element>
+          |         </array>
+          |      </element>
+          |      <element>
+          |         <object>
+          |            <field>str</field>
+          |         </object>
+          |      </element>
+          |   </array1>
+          |   <array2>
+          |      <field>214748364700</field>
+          |   </array2>
+          |   <array2>
+          |      <field>1</field>
+          |   </array2>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>
+          |    <field>str</field>
+          |  </array3>
+          |  <array3>
+          |    <field>1</field>
+          |  </array3>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>1</array3>
+          |  <array3>2</array3>
+          |  <array3>3</array3>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def missingFields: Seq[String] =
+    """
+        <ROW><a>true</a></ROW>
+        """ ::
+    """
+        <ROW><b>21474836470</b></ROW>
+        """ ::
+    """
+        <ROW><c>33</c><c>44</c></ROW>
+        """ ::
+    """
+        <ROW><d><field>true</field></d></ROW>
+        """ ::
+    """
+        <ROW><e>str</e></ROW>
+        """ :: Nil
+
+  // XML doesn't support array of arrays
+  // It only supports array of structs
+  def complexFieldAndType1: Seq[String] =
+    """
+          |<ROW>
+          |  <struct>
+          |    <field1>true</field1>
+          |    <field2>92233720368547758070</field2>
+          |  </struct>
+          |  <structWithArrayFields>
+          |    <field1>4</field1>
+          |    <field1>5</field1>
+          |    <field1>6</field1>
+          |    <field2>str1</field2>
+          |    <field2>str2</field2>
+          |  </structWithArrayFields>
+          |  <arrayOfString>str1</arrayOfString>
+          |  <arrayOfString>str2</arrayOfString>
+          |  <arrayOfInteger>1</arrayOfInteger>
+          |  <arrayOfInteger>2147483647</arrayOfInteger>
+          |  <arrayOfInteger>-2147483648</arrayOfInteger>
+          |  <arrayOfLong>21474836470</arrayOfLong>
+          |  <arrayOfLong>9223372036854775807</arrayOfLong>
+          |  <arrayOfLong>-9223372036854775808</arrayOfLong>
+          |  <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+          |  <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+          |  <arrayOfDouble>1.2</arrayOfDouble>
+          |  <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+          |  <arrayOfDouble>4.9E-324</arrayOfDouble>
+          |  <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfBoolean>false</arrayOfBoolean>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfStruct>
+          |    <field1>true</field1>
+          |    <field2>str1</field2>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field1>false</field1>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field3/>
+          |  </arrayOfStruct>
+          |<arrayOfArray1>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <item>str1</item><item>str2</item>
+          |</arrayOfArray1>
+          |<arrayOfArray2>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <item>1.1</item><item>2.1</item><item>3.1</item>
+          |</arrayOfArray2>
+          |</ROW>
+          |
+          |""".stripMargin :: Nil
+
+  def complexFieldAndType2: Seq[String] =
+    """
+          |<ROW>
+          |  <arrayOfStruct>
+          |    <item>
+          |      <field1>true</field1>
+          |      <field2>str1</field2>
+          |    </item>
+          |    <item>
+          |      <field1>false</field1>
+          |    </item>
+          |    <item>
+          |      <field3/>
+          |    </item>
+          |  </arrayOfStruct>
+          |  <complexArrayOfStruct>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner1>str1</inner1>
+          |        </item>
+          |        <item>
+          |          <inner2><item>str2</item><item>str22</item></inner2>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array><item>1</item><item>2</item></array>
+          |        <array><item>3</item><item>4</item></array>
+          |      </field2>
+          |    </item>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner2>
+          |            <item>str3</item><item>str33</item>
+          |          </inner2>
+          |        </item>
+          |        <item>
+          |          <inner1>str4</inner1>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array>
+          |          <item>5</item><item>6</item>
+          |        </array>
+          |        <array>
+          |          <item>7</item><item>8</item>
+          |        </array>
+          |      </field2>
+          |    </item>
+          |  </complexArrayOfStruct>
+          |  <arrayOfArray1>
+          |  <array>
+          |    <item>5</item>
+          |  </array>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <array>
+          |    <item>6</item><item>7</item>
+          |  </array>
+          |  <array>
+          |    <item>8</item>
+          |  </array>
+          |</arrayOfArray1>
+          |  <arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner1>str1</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array/>
+          |  <array>
+          |    <item>
+          |      <inner2>str3</inner2>
+          |      <inner2>str33</inner2>
+          |    </item>
+          |    <item>
+          |      <inner2>str4</inner2>
+          |      <inner1>str11</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner3>
+          |        <inner4>2</inner4>
+          |        <inner4>3</inner4>
+          |      </inner3>
+          |      <inner3/>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def nullsInArrays: Seq[String] =

Review Comment:
   I plan to add tests in a follow-up PR. I will remove this for clarity in 
this PR.



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
     f(dir)
     fs.setVerifyChecksum(true)
   }
+
+  def primitiveFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+          |  <num_num_1>11</num_num_1>
+          |  <num_num_2/>
+          |  <num_num_3>1.1</num_num_3>
+          |  <num_bool>true</num_bool>
+          |  <num_str>13.1</num_str>
+          |  <str_bool>str1</str_bool>
+          |</ROW>
+          |""".stripMargin ::
+    """
+           |<ROW>
+           |  <num_num_1/>
+           |  <num_num_2>21474836470.9</num_num_2>
+           |  <num_num_3/>
+           |  <num_bool>12</num_bool>
+           |  <num_str/>
+           |  <str_bool>true</str_bool>
+           |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836470</num_num_1>
+            |  <num_num_2>92233720368547758070</num_num_2>
+            |  <num_num_3>100</num_num_3>
+            |  <num_bool>false</num_bool>
+            |  <num_str>str1</num_str>
+            |  <str_bool>false</str_bool>
+            |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836570</num_num_1>
+            |  <num_num_2>1.1</num_num_2>
+            |  <num_num_3>21474836470</num_num_3>
+            |  <num_bool/>
+            |  <num_str>92233720368547758070</num_str>
+            |  <str_bool/>
+            |</ROW>""".stripMargin :: Nil
+
+  def xmlNullStruct: Seq[String] =
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers>
+          |    <Host>1.abc.com</Host>
+          |    <Charset>UTF-8</Charset>
+          |  </headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers></headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr/>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin :: Nil
+
+  def complexFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+      <num_struct>11</num_struct>
+      <str_array>1</str_array>
+      <str_array>2</str_array>
+      <str_array>3</str_array>
+      <array></array>
+      <struct_array></struct_array>
+      <struct></struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct>
+        <field>false</field>
+      </num_struct>
+      <str_array/>
+      <array/>
+      <struct_array></struct_array>
+      <struct/>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct/>
+      <str_array>str</str_array>
+      <array>4</array>
+      <array>5</array>
+      <array>6</array>
+      <struct_array>7</struct_array>
+      <struct_array>8</struct_array>
+      <struct_array>9</struct_array>
+      <struct>
+        <field/>
+      </struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct></num_struct>
+      <str_array>str1</str_array>
+      <str_array>str2</str_array>
+      <str_array>33</str_array>
+      <array>7</array>
+      <struct_array>
+        <field>true</field>
+      </struct_array>
+      <struct>
+        <field>str</field>
+      </struct>
+    </ROW>""" :: Nil
+
+  def arrayElementTypeConflict: Seq[String] =
+    """
+          |<ROW>
+          |   <array1>
+          |      <element>1</element>
+          |      <element>1.1</element>
+          |      <element>true</element>
+          |      <element/>
+          |      <element>
+          |         <array/>
+          |      </element>
+          |      <element>
+          |         <object/>
+          |      </element>
+          |   </array1>
+          |   <array1>
+          |      <element>
+          |         <array>
+          |            <element>2</element>
+          |            <element>3</element>
+          |            <element>4</element>
+          |         </array>
+          |      </element>
+          |      <element>
+          |         <object>
+          |            <field>str</field>
+          |         </object>
+          |      </element>
+          |   </array1>
+          |   <array2>
+          |      <field>214748364700</field>
+          |   </array2>
+          |   <array2>
+          |      <field>1</field>
+          |   </array2>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>
+          |    <field>str</field>
+          |  </array3>
+          |  <array3>
+          |    <field>1</field>
+          |  </array3>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>1</array3>
+          |  <array3>2</array3>
+          |  <array3>3</array3>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def missingFields: Seq[String] =
+    """
+        <ROW><a>true</a></ROW>
+        """ ::
+    """
+        <ROW><b>21474836470</b></ROW>
+        """ ::
+    """
+        <ROW><c>33</c><c>44</c></ROW>
+        """ ::
+    """
+        <ROW><d><field>true</field></d></ROW>
+        """ ::
+    """
+        <ROW><e>str</e></ROW>
+        """ :: Nil
+
+  // XML doesn't support array of arrays
+  // It only supports array of structs
+  def complexFieldAndType1: Seq[String] =
+    """
+          |<ROW>
+          |  <struct>
+          |    <field1>true</field1>
+          |    <field2>92233720368547758070</field2>
+          |  </struct>
+          |  <structWithArrayFields>
+          |    <field1>4</field1>
+          |    <field1>5</field1>
+          |    <field1>6</field1>
+          |    <field2>str1</field2>
+          |    <field2>str2</field2>
+          |  </structWithArrayFields>
+          |  <arrayOfString>str1</arrayOfString>
+          |  <arrayOfString>str2</arrayOfString>
+          |  <arrayOfInteger>1</arrayOfInteger>
+          |  <arrayOfInteger>2147483647</arrayOfInteger>
+          |  <arrayOfInteger>-2147483648</arrayOfInteger>
+          |  <arrayOfLong>21474836470</arrayOfLong>
+          |  <arrayOfLong>9223372036854775807</arrayOfLong>
+          |  <arrayOfLong>-9223372036854775808</arrayOfLong>
+          |  <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+          |  <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+          |  <arrayOfDouble>1.2</arrayOfDouble>
+          |  <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+          |  <arrayOfDouble>4.9E-324</arrayOfDouble>
+          |  <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfBoolean>false</arrayOfBoolean>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfStruct>
+          |    <field1>true</field1>
+          |    <field2>str1</field2>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field1>false</field1>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field3/>
+          |  </arrayOfStruct>
+          |<arrayOfArray1>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <item>str1</item><item>str2</item>
+          |</arrayOfArray1>
+          |<arrayOfArray2>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <item>1.1</item><item>2.1</item><item>3.1</item>
+          |</arrayOfArray2>
+          |</ROW>
+          |
+          |""".stripMargin :: Nil
+
+  def complexFieldAndType2: Seq[String] =
+    """
+          |<ROW>
+          |  <arrayOfStruct>
+          |    <item>
+          |      <field1>true</field1>
+          |      <field2>str1</field2>
+          |    </item>
+          |    <item>
+          |      <field1>false</field1>
+          |    </item>
+          |    <item>
+          |      <field3/>
+          |    </item>
+          |  </arrayOfStruct>
+          |  <complexArrayOfStruct>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner1>str1</inner1>
+          |        </item>
+          |        <item>
+          |          <inner2><item>str2</item><item>str22</item></inner2>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array><item>1</item><item>2</item></array>
+          |        <array><item>3</item><item>4</item></array>
+          |      </field2>
+          |    </item>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner2>
+          |            <item>str3</item><item>str33</item>
+          |          </inner2>
+          |        </item>
+          |        <item>
+          |          <inner1>str4</inner1>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array>
+          |          <item>5</item><item>6</item>
+          |        </array>
+          |        <array>
+          |          <item>7</item><item>8</item>
+          |        </array>
+          |      </field2>
+          |    </item>
+          |  </complexArrayOfStruct>
+          |  <arrayOfArray1>
+          |  <array>
+          |    <item>5</item>
+          |  </array>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <array>
+          |    <item>6</item><item>7</item>
+          |  </array>
+          |  <array>
+          |    <item>8</item>
+          |  </array>
+          |</arrayOfArray1>
+          |  <arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner1>str1</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array/>
+          |  <array>
+          |    <item>
+          |      <inner2>str3</inner2>
+          |      <inner2>str33</inner2>
+          |    </item>
+          |    <item>
+          |      <inner2>str4</inner2>
+          |      <inner1>str11</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner3>
+          |        <inner4>2</inner4>
+          |        <inner4>3</inner4>
+          |      </inner3>
+          |      <inner3/>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def nullsInArrays: Seq[String] =
+    """<ROW>
+          <field1>
+            <array>
+              <item/>
+            </array>
+            <array>
+              <array>
+                <array>
+                  <item>Test</item>
+                </array>
+              </array>
+            </array>
+          </field1>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field2>
+            <item/>
+            <array>
+              <item>
+                <Test>1</Test>
+              </item>
+            </array>
+          </field2>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field3>
+            <array>
+              <item/>
+            </array>
+            <array>
+              <item>
+                <Test>2</Test>
+              </item>
+            </array>
+          </field3>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field4>
+            <array>
+              <item/>
+              <array>
+                <item>1</item>
+                <item>2</item>
+                <item>3</item>
+              </array>
+            </array>
+          </field4>
+        </ROW>""" :: Nil
+
+  def corruptRecords: Seq[String] =

Review Comment:
   I plan to add tests in a follow-up PR. I will remove this for clarity in 
this PR.



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/TestXmlData.scala:
##########
@@ -68,4 +68,444 @@ private[xml] trait TestXmlData {
     f(dir)
     fs.setVerifyChecksum(true)
   }
+
+  def primitiveFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+          |  <num_num_1>11</num_num_1>
+          |  <num_num_2/>
+          |  <num_num_3>1.1</num_num_3>
+          |  <num_bool>true</num_bool>
+          |  <num_str>13.1</num_str>
+          |  <str_bool>str1</str_bool>
+          |</ROW>
+          |""".stripMargin ::
+    """
+           |<ROW>
+           |  <num_num_1/>
+           |  <num_num_2>21474836470.9</num_num_2>
+           |  <num_num_3/>
+           |  <num_bool>12</num_bool>
+           |  <num_str/>
+           |  <str_bool>true</str_bool>
+           |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836470</num_num_1>
+            |  <num_num_2>92233720368547758070</num_num_2>
+            |  <num_num_3>100</num_num_3>
+            |  <num_bool>false</num_bool>
+            |  <num_str>str1</num_str>
+            |  <str_bool>false</str_bool>
+            |</ROW>""".stripMargin ::
+    """
+            |<ROW>
+            |  <num_num_1>21474836570</num_num_1>
+            |  <num_num_2>1.1</num_num_2>
+            |  <num_num_3>21474836470</num_num_3>
+            |  <num_bool/>
+            |  <num_str>92233720368547758070</num_str>
+            |  <str_bool/>
+            |</ROW>""".stripMargin :: Nil
+
+  def xmlNullStruct: Seq[String] =
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers>
+          |    <Host>1.abc.com</Host>
+          |    <Charset>UTF-8</Charset>
+          |  </headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr></nullstr>
+          |  <ip>27.31.100.29</ip>
+          |  <headers></headers>
+          |</ROW>""".stripMargin ::
+    """<ROW>
+          |  <nullstr/>
+          |  <ip>27.31.100.29</ip>
+          |  <headers/>
+          |</ROW>""".stripMargin :: Nil
+
+  def complexFieldValueTypeConflict: Seq[String] =
+    """<ROW>
+      <num_struct>11</num_struct>
+      <str_array>1</str_array>
+      <str_array>2</str_array>
+      <str_array>3</str_array>
+      <array></array>
+      <struct_array></struct_array>
+      <struct></struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct>
+        <field>false</field>
+      </num_struct>
+      <str_array/>
+      <array/>
+      <struct_array></struct_array>
+      <struct/>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct/>
+      <str_array>str</str_array>
+      <array>4</array>
+      <array>5</array>
+      <array>6</array>
+      <struct_array>7</struct_array>
+      <struct_array>8</struct_array>
+      <struct_array>9</struct_array>
+      <struct>
+        <field/>
+      </struct>
+    </ROW>""" ::
+    """<ROW>
+      <num_struct></num_struct>
+      <str_array>str1</str_array>
+      <str_array>str2</str_array>
+      <str_array>33</str_array>
+      <array>7</array>
+      <struct_array>
+        <field>true</field>
+      </struct_array>
+      <struct>
+        <field>str</field>
+      </struct>
+    </ROW>""" :: Nil
+
+  def arrayElementTypeConflict: Seq[String] =
+    """
+          |<ROW>
+          |   <array1>
+          |      <element>1</element>
+          |      <element>1.1</element>
+          |      <element>true</element>
+          |      <element/>
+          |      <element>
+          |         <array/>
+          |      </element>
+          |      <element>
+          |         <object/>
+          |      </element>
+          |   </array1>
+          |   <array1>
+          |      <element>
+          |         <array>
+          |            <element>2</element>
+          |            <element>3</element>
+          |            <element>4</element>
+          |         </array>
+          |      </element>
+          |      <element>
+          |         <object>
+          |            <field>str</field>
+          |         </object>
+          |      </element>
+          |   </array1>
+          |   <array2>
+          |      <field>214748364700</field>
+          |   </array2>
+          |   <array2>
+          |      <field>1</field>
+          |   </array2>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>
+          |    <field>str</field>
+          |  </array3>
+          |  <array3>
+          |    <field>1</field>
+          |  </array3>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |<ROW>
+          |  <array3>1</array3>
+          |  <array3>2</array3>
+          |  <array3>3</array3>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def missingFields: Seq[String] =
+    """
+        <ROW><a>true</a></ROW>
+        """ ::
+    """
+        <ROW><b>21474836470</b></ROW>
+        """ ::
+    """
+        <ROW><c>33</c><c>44</c></ROW>
+        """ ::
+    """
+        <ROW><d><field>true</field></d></ROW>
+        """ ::
+    """
+        <ROW><e>str</e></ROW>
+        """ :: Nil
+
+  // XML doesn't support array of arrays
+  // It only supports array of structs
+  def complexFieldAndType1: Seq[String] =
+    """
+          |<ROW>
+          |  <struct>
+          |    <field1>true</field1>
+          |    <field2>92233720368547758070</field2>
+          |  </struct>
+          |  <structWithArrayFields>
+          |    <field1>4</field1>
+          |    <field1>5</field1>
+          |    <field1>6</field1>
+          |    <field2>str1</field2>
+          |    <field2>str2</field2>
+          |  </structWithArrayFields>
+          |  <arrayOfString>str1</arrayOfString>
+          |  <arrayOfString>str2</arrayOfString>
+          |  <arrayOfInteger>1</arrayOfInteger>
+          |  <arrayOfInteger>2147483647</arrayOfInteger>
+          |  <arrayOfInteger>-2147483648</arrayOfInteger>
+          |  <arrayOfLong>21474836470</arrayOfLong>
+          |  <arrayOfLong>9223372036854775807</arrayOfLong>
+          |  <arrayOfLong>-9223372036854775808</arrayOfLong>
+          |  <arrayOfBigInteger>922337203685477580700</arrayOfBigInteger>
+          |  <arrayOfBigInteger>-922337203685477580800</arrayOfBigInteger>
+          |  <arrayOfDouble>1.2</arrayOfDouble>
+          |  <arrayOfDouble>1.7976931348623157</arrayOfDouble>
+          |  <arrayOfDouble>4.9E-324</arrayOfDouble>
+          |  <arrayOfDouble>2.2250738585072014E-308</arrayOfDouble>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfBoolean>false</arrayOfBoolean>
+          |  <arrayOfBoolean>true</arrayOfBoolean>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfNull></arrayOfNull>
+          |  <arrayOfStruct>
+          |    <field1>true</field1>
+          |    <field2>str1</field2>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field1>false</field1>
+          |  </arrayOfStruct>
+          |  <arrayOfStruct>
+          |    <field3/>
+          |  </arrayOfStruct>
+          |<arrayOfArray1>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <item>str1</item><item>str2</item>
+          |</arrayOfArray1>
+          |<arrayOfArray2>
+          |  <item>1</item><item>2</item><item>3</item>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <item>1.1</item><item>2.1</item><item>3.1</item>
+          |</arrayOfArray2>
+          |</ROW>
+          |
+          |""".stripMargin :: Nil
+
+  def complexFieldAndType2: Seq[String] =
+    """
+          |<ROW>
+          |  <arrayOfStruct>
+          |    <item>
+          |      <field1>true</field1>
+          |      <field2>str1</field2>
+          |    </item>
+          |    <item>
+          |      <field1>false</field1>
+          |    </item>
+          |    <item>
+          |      <field3/>
+          |    </item>
+          |  </arrayOfStruct>
+          |  <complexArrayOfStruct>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner1>str1</inner1>
+          |        </item>
+          |        <item>
+          |          <inner2><item>str2</item><item>str22</item></inner2>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array><item>1</item><item>2</item></array>
+          |        <array><item>3</item><item>4</item></array>
+          |      </field2>
+          |    </item>
+          |    <item>
+          |      <field1>
+          |        <item>
+          |          <inner2>
+          |            <item>str3</item><item>str33</item>
+          |          </inner2>
+          |        </item>
+          |        <item>
+          |          <inner1>str4</inner1>
+          |        </item>
+          |      </field1>
+          |      <field2>
+          |        <array>
+          |          <item>5</item><item>6</item>
+          |        </array>
+          |        <array>
+          |          <item>7</item><item>8</item>
+          |        </array>
+          |      </field2>
+          |    </item>
+          |  </complexArrayOfStruct>
+          |  <arrayOfArray1>
+          |  <array>
+          |    <item>5</item>
+          |  </array>
+          |</arrayOfArray1>
+          |<arrayOfArray1>
+          |  <array>
+          |    <item>6</item><item>7</item>
+          |  </array>
+          |  <array>
+          |    <item>8</item>
+          |  </array>
+          |</arrayOfArray1>
+          |  <arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner1>str1</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array/>
+          |  <array>
+          |    <item>
+          |      <inner2>str3</inner2>
+          |      <inner2>str33</inner2>
+          |    </item>
+          |    <item>
+          |      <inner2>str4</inner2>
+          |      <inner1>str11</inner1>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |<arrayOfArray2>
+          |  <array>
+          |    <item>
+          |      <inner3>
+          |        <inner4>2</inner4>
+          |        <inner4>3</inner4>
+          |      </inner3>
+          |      <inner3/>
+          |    </item>
+          |  </array>
+          |</arrayOfArray2>
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def nullsInArrays: Seq[String] =
+    """<ROW>
+          <field1>
+            <array>
+              <item/>
+            </array>
+            <array>
+              <array>
+                <array>
+                  <item>Test</item>
+                </array>
+              </array>
+            </array>
+          </field1>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field2>
+            <item/>
+            <array>
+              <item>
+                <Test>1</Test>
+              </item>
+            </array>
+          </field2>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field3>
+            <array>
+              <item/>
+            </array>
+            <array>
+              <item>
+                <Test>2</Test>
+              </item>
+            </array>
+          </field3>
+        </ROW>""" ::
+    """
+        <ROW>
+          <field4>
+            <array>
+              <item/>
+              <array>
+                <item>1</item>
+                <item>2</item>
+                <item>3</item>
+              </array>
+            </array>
+          </field4>
+        </ROW>""" :: Nil
+
+  def corruptRecords: Seq[String] =
+    """<ROW>""" ::
+    """""" ::
+    """<ROW>
+          |  <a>1</a>
+          |  <b>2</b>
+          |</ROW>""".stripMargin ::
+    """
+          |<ROW>
+          |  <a>str_a_4</a>
+          |  <b>str_b_4</b>
+          |  <c>str_c_4</c>
+          |</ROW>
+          |""".stripMargin ::
+    """
+          |</ROW>
+          |""".stripMargin :: Nil
+
+  def emptyRecords: Seq[String] =
+    """<ROW>
+          <a><struct></struct></a>
+        </ROW>""" ::
+    """<ROW>
+          <a>
+            <struct><b><c/></b></struct>
+          </a>
+        </ROW>""" ::
+    """<ROW>
+          <b>
+            <item>
+              <c><struct></struct></c>
+            </item>
+            <item/>
+          </b>
+        </ROW>""" :: Nil
+
+  def arrayAndStructRecords: Seq[String] =

Review Comment:
   I plan to add tests in a follow-up PR. I will remove this for clarity in 
this PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org


Reply via email to