This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch 1.21 in repository https://gitbox.apache.org/repos/asf/drill.git
commit c74d8e6bb60f792231152541119d06d4e14cf938 Author: Charles S. Givre <[email protected]> AuthorDate: Sun Mar 3 01:50:30 2024 -0500 DRILL-8141: Ability to query XML root attributes (#2884) --- .../org/apache/drill/exec/store/xml/XMLReader.java | 8 +++++ .../apache/drill/exec/store/xml/TestXMLReader.java | 38 ++++++++++++++++++++-- .../format-xml/src/test/resources/xml/no_nest.xml | 23 +++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java index 0f6864b190..b57b3ee9e5 100644 --- a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java +++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java @@ -277,6 +277,13 @@ public class XMLReader implements Closeable { if (!rowStarted) { currentTupleWriter = startRow(rootRowWriter); + + Iterator<Attribute> attributes = startElement.getAttributes(); + if (attributes != null && attributes.hasNext()) { + // This would be the root element, so the attribute prefix would simply be the field name. + writeAttributes(fieldName, attributes); + } + } else { if (lastEvent != null && lastEvent.getEventType() == XMLStreamConstants.START_ELEMENT) { @@ -331,6 +338,7 @@ public class XMLReader implements Closeable { // Get the field value fieldValue = currentEvent.asCharacters().getData().trim(); changeState(xmlState.GETTING_DATA); + changeState(xmlState.GETTING_DATA); break; case XMLStreamConstants.END_ELEMENT: diff --git a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java index d546d2e4c9..f820259a12 100644 --- a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java +++ b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java @@ -89,6 +89,34 @@ public class TestXMLReader extends ClusterTest { new RowSetComparison(expected).verifyAndClearAll(results); } + @Test + public void testAttributesOnRoot() throws Exception { + String sql = "SELECT * FROM table(cp.`xml/no_nest.xml` (type => 'xml', dataLevel => 1))"; + RowSet results = client.queryBuilder().sql(sql).rowSet(); + assertEquals(1, results.rowCount()); + TupleMetadata expectedSchema = new SchemaBuilder() + .addMap("attributes") + .addNullable("PPP_Version", MinorType.VARCHAR) + .addNullable("PPP_TimeStamp", MinorType.VARCHAR) + .addNullable("P1_SubVersion", MinorType.VARCHAR) + .addNullable("P1_MID", MinorType.VARCHAR) + .addNullable("P1_PN", MinorType.VARCHAR) + .addNullable("P1_SL", MinorType.VARCHAR) + .addNullable("P2_SubVersion", MinorType.VARCHAR) + .resumeSchema() + .addNullable("P1", MinorType.VARCHAR) + .addMap("P2") + .addNullable("Color", MinorType.VARCHAR) + .resumeSchema() + .buildSchema(); + + RowSet expected = client.rowSetBuilder(expectedSchema) + .addRow(strArray("2023-001", "2023-06-09T21:17:14.416+02:00", "a1", "XX003", "156", "3", "b1"), null, strArray("blue")) + .build(); + + new RowSetComparison(expected).verifyAndClearAll(results); + } + @Test public void testXXE() throws Exception { String sql = "SELECT * FROM cp.`xml/bad.xml`"; @@ -136,9 +164,14 @@ public class TestXMLReader extends ClusterTest { String sql = "SELECT * FROM cp.`xml/weather.xml`"; RowSet results = client.queryBuilder().sql(sql).rowSet(); assertEquals(1, results.rowCount()); - TupleMetadata expectedSchema = new SchemaBuilder() .addMap("attributes") + .addNullable("weather_module_id", MinorType.VARCHAR) + .addNullable("weather_tab_id", MinorType.VARCHAR) + .addNullable("weather_mobile_row", MinorType.VARCHAR) + .addNullable("weather_mobile_zipped", MinorType.VARCHAR) + .addNullable("weather_row", MinorType.VARCHAR) + .addNullable("weather_section", MinorType.VARCHAR) .addNullable("forecast_information_city_data", MinorType.VARCHAR) .addNullable("forecast_information_postal_code_data", MinorType.VARCHAR) .addNullable("forecast_information_latitude_e6_data", MinorType.VARCHAR) @@ -169,7 +202,8 @@ public class TestXMLReader extends ClusterTest { .build(); RowSet expected = client.rowSetBuilder(expectedSchema) - .addRow(strArray("Seattle, WA", "Seattle WA", "", "", "2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear", "62", "17", "Humidity: 62%", "/ig/images/weather" + + .addRow(strArray("0", "0", "0", "1", "0", "0","Seattle, WA", "Seattle WA", "", "", "2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear", + "62", "17", "Humidity: 62%", "/ig/images/weather" + "/sunny.gif", "Wind: N at 4 mph"), null, null, null, null, null, null, null, null, null, null, null, null, null) .build(); diff --git a/contrib/format-xml/src/test/resources/xml/no_nest.xml b/contrib/format-xml/src/test/resources/xml/no_nest.xml new file mode 100644 index 0000000000..e38dccbee8 --- /dev/null +++ b/contrib/format-xml/src/test/resources/xml/no_nest.xml @@ -0,0 +1,23 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<PPP Version="2023-001" TimeStamp="2023-06-09T21:17:14.416+02:00"> + <P1 SubVersion="a1" MID="XX003" PN="156" SL="3"/> + <P2 SubVersion="b1"><Color>blue</Color></P2> +</PPP>
