This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new bc23f70414 Move X-Path handling to the places where they are needed. With this commit, `UnitFormat` does not support anymore GML way to declare a unit with X-Path (it was an undocumented feature). Instead this is handled by the `org.apache.sis.xml` package, which will allow us to do a better work in the future if needed (e.g. actually read the XML document instead of assuming that the anchor name is the unit symbol). bc23f70414 is described below commit bc23f7041442560fbb064e5cb1ad819c66012e7b Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Mon Jul 18 18:39:09 2022 +0200 Move X-Path handling to the places where they are needed. With this commit, `UnitFormat` does not support anymore GML way to declare a unit with X-Path (it was an undocumented feature). Instead this is handled by the `org.apache.sis.xml` package, which will allow us to do a better work in the future if needed (e.g. actually read the XML document instead of assuming that the anchor name is the unit symbol). --- .../java/org/apache/sis/filter/PropertyValue.java | 5 +- .../src/main/java/org/apache/sis/filter/XPath.java | 86 ++++++++++ .../java/org/apache/sis/filter/XPathTest.java} | 32 +--- .../apache/sis/test/suite/FeatureTestSuite.java | 1 + .../java/org/apache/sis/xml/ValueConverter.java | 47 +++++- .../main/java/org/apache/sis/xml}/XPointer.java | 52 +++++- .../main/java/org/apache/sis/xml/package-info.java | 2 +- .../apache/sis/test/suite/MetadataTestSuite.java | 3 +- .../org/apache/sis/xml/ValueConverterTest.java | 18 +- .../java/org/apache/sis/xml}/XPointerTest.java | 18 +- .../apache/sis/internal/util/DefinitionURI.java | 78 ++++----- .../java/org/apache/sis/internal/util/XPaths.java | 182 --------------------- .../java/org/apache/sis/measure/UnitFormat.java | 57 +++---- .../main/java/org/apache/sis/measure/Units.java | 16 +- .../org/apache/sis/measure/UnitFormatTest.java | 3 +- .../java/org/apache/sis/measure/UnitsTest.java | 23 +-- .../apache/sis/test/suite/UtilityTestSuite.java | 4 +- 17 files changed, 297 insertions(+), 330 deletions(-) diff --git a/core/sis-feature/src/main/java/org/apache/sis/filter/PropertyValue.java b/core/sis-feature/src/main/java/org/apache/sis/filter/PropertyValue.java index 066e65a94d..ae9d2fc759 100644 --- a/core/sis-feature/src/main/java/org/apache/sis/filter/PropertyValue.java +++ b/core/sis-feature/src/main/java/org/apache/sis/filter/PropertyValue.java @@ -29,7 +29,6 @@ import org.apache.sis.feature.builder.FeatureTypeBuilder; import org.apache.sis.feature.builder.PropertyTypeBuilder; import org.apache.sis.feature.builder.AttributeTypeBuilder; import org.apache.sis.util.resources.Errors; -import org.apache.sis.internal.util.XPaths; // Branch-dependent imports import org.opengis.feature.Feature; @@ -105,7 +104,7 @@ abstract class PropertyValue<V> extends LeafExpression<Feature,V> @SuppressWarnings("unchecked") static <V> ValueReference<Feature,V> create(String xpath, final Class<V> type) { boolean isVirtual = false; - List<String> path = XPaths.split(xpath); + List<String> path = XPath.split(xpath); split: if (path != null) { /* * If the XPath is like "/∗/property" where the root "/" is the feature instance, @@ -114,7 +113,7 @@ split: if (path != null) { */ final String head = path.get(0); // List and items in the list are guaranteed non-empty. isVirtual = head.equals("/*"); - if (isVirtual || head.charAt(0) != XPaths.SEPARATOR) { + if (isVirtual || head.charAt(0) != XPath.SEPARATOR) { final int offset = isVirtual ? 1 : 0; // Skip the "/*/" component at index 0. final int last = path.size() - 1; if (last >= offset) { diff --git a/core/sis-feature/src/main/java/org/apache/sis/filter/XPath.java b/core/sis-feature/src/main/java/org/apache/sis/filter/XPath.java new file mode 100644 index 0000000000..cd86706a01 --- /dev/null +++ b/core/sis-feature/src/main/java/org/apache/sis/filter/XPath.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.filter; + +import java.util.List; +import java.util.ArrayList; +import org.apache.sis.util.Static; +import org.apache.sis.util.resources.Errors; + +import static org.apache.sis.util.CharSequences.*; + + +/** + * Basic support of X-Path in {@link PropertyValue} expression. + * This is intended to be only a lightweight support, not a replacement for {@link javax.xml.xpath} implementations. + * + * @author Martin Desruisseaux (Geomatys) + * @version 1.3 + * @since 0.4 + * @module + */ +final class XPath extends Static { + /** + * The separator between path components. + */ + public static final char SEPARATOR = '/'; + + /** + * Do not allow instantiation of this class. + */ + private XPath() { + } + + /** + * Splits the given URL around the {@code '/'} separator, or returns {@code null} if there is no separator. + * By convention if the URL is absolute, then the leading {@code '/'} character is kept in the first element. + * For example {@code "/∗/property"} is splitted as two elements: {@code "/∗"} and {@code "property"}. + * + * <p>This method trims the whitespaces of components except the last one (the tip), + * for consistency with the case where this method returns {@code null}.</p> + * + * @param xpath the URL to split. + * @return the splitted URL with the heading separator kept in the first element, or {@code null} + * if there is no separator. If non-null, the list always contains at least one element. + * @throws IllegalArgumentException if the XPath contains at least one empty component. + */ + static List<String> split(final String xpath) { + int next = xpath.indexOf(SEPARATOR); + if (next < 0) { + return null; + } + final List<String> components = new ArrayList<>(4); + int start = skipLeadingWhitespaces(xpath, 0, next); + if (start < next) { + // No leading '/' (the characters before it are a path element, added below). + components.add(xpath.substring(start, skipTrailingWhitespaces(xpath, start, next))); + start = ++next; + } else { + // Keep the `start` position on the leading '/'. + next++; + } + while ((next = xpath.indexOf(SEPARATOR, next)) >= 0) { + components.add(trimWhitespaces(xpath, start, next).toString()); + start = ++next; + } + components.add(xpath.substring(start)); // No whitespace trimming. + if (components.stream().anyMatch(String::isEmpty)) { + throw new IllegalArgumentException(Errors.format(Errors.Keys.UnsupportedXPath_1, xpath)); + } + return components; + } +} diff --git a/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java b/core/sis-feature/src/test/java/org/apache/sis/filter/XPathTest.java similarity index 52% rename from core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java rename to core/sis-feature/src/test/java/org/apache/sis/filter/XPathTest.java index fe277180ee..3c3ae24b24 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java +++ b/core/sis-feature/src/test/java/org/apache/sis/filter/XPathTest.java @@ -14,9 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.sis.internal.util; +package org.apache.sis.filter; -import org.apache.sis.util.Characters; import org.apache.sis.test.TestCase; import org.junit.Test; @@ -24,35 +23,22 @@ import static org.junit.Assert.*; /** - * Tests {@link XPaths}. + * Tests {@link XPath}. * * @author Martin Desruisseaux (Geomatys) - * @version 1.2 + * @version 1.3 * @since 0.4 * @module */ -public final strictfp class XPathsTest extends TestCase { +public final strictfp class XPathTest extends TestCase { /** - * Tests the {@link XPaths#endOfURI(CharSequence, int)} method. - */ - @Test - public void testEndOfURI() { - assertEquals(26, XPaths.endOfURI("urn:ogc:def:uom:EPSG::9001", 0)); - assertEquals(80, XPaths.endOfURI("http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])", 0)); - assertEquals(97, XPaths.endOfURI("http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])", 0)); - assertEquals(-1, XPaths.endOfURI("m/s", 0)); - assertEquals(-1, XPaths.endOfURI("m.s", 0)); - assertEquals(11, XPaths.endOfURI("EPSG" + Characters.NO_BREAK_SPACE + ": 9001", 0)); - } - - /** - * Tests {@link XPaths#split(String)}. + * Tests {@link XPath#split(String)}. */ @Test public void testSplit() { - assertNull(XPaths.split("property")); - assertArrayEquals(new String[] {"/property"}, XPaths.split("/property").toArray()); - assertArrayEquals(new String[] {"Feature", "property", "child"}, XPaths.split("Feature/property/child").toArray()); - assertArrayEquals(new String[] {"/Feature", "property"}, XPaths.split("/Feature/property").toArray()); + assertNull(XPath.split("property")); + assertArrayEquals(new String[] {"/property"}, XPath.split("/property").toArray()); + assertArrayEquals(new String[] {"Feature", "property", "child"}, XPath.split("Feature/property/child").toArray()); + assertArrayEquals(new String[] {"/Feature", "property"}, XPath.split("/Feature/property").toArray()); } } diff --git a/core/sis-feature/src/test/java/org/apache/sis/test/suite/FeatureTestSuite.java b/core/sis-feature/src/test/java/org/apache/sis/test/suite/FeatureTestSuite.java index fa3794c3f9..db88eca51d 100644 --- a/core/sis-feature/src/test/java/org/apache/sis/test/suite/FeatureTestSuite.java +++ b/core/sis-feature/src/test/java/org/apache/sis/test/suite/FeatureTestSuite.java @@ -50,6 +50,7 @@ import org.junit.runners.Suite; org.apache.sis.feature.FeatureOperationsTest.class, org.apache.sis.feature.FeatureFormatTest.class, org.apache.sis.feature.FeaturesTest.class, + org.apache.sis.filter.XPathTest.class, org.apache.sis.filter.CapabilitiesTest.class, org.apache.sis.filter.LeafExpressionTest.class, org.apache.sis.filter.LogicalFilterTest.class, diff --git a/core/sis-metadata/src/main/java/org/apache/sis/xml/ValueConverter.java b/core/sis-metadata/src/main/java/org/apache/sis/xml/ValueConverter.java index 94d925e329..b48f4dc344 100644 --- a/core/sis-metadata/src/main/java/org/apache/sis/xml/ValueConverter.java +++ b/core/sis-metadata/src/main/java/org/apache/sis/xml/ValueConverter.java @@ -66,7 +66,7 @@ import static org.apache.sis.util.CharSequences.trimWhitespaces; * {@code ValueConverter} to a (un)marshaller. * * @author Martin Desruisseaux (Geomatys) - * @version 0.5 + * @version 1.3 * @since 0.3 * @module */ @@ -304,13 +304,16 @@ public class ValueConverter { } /** - * Converts the given string to a unit. The default implementation is as below, omitting - * the check for null value and the call to {@link #exceptionOccured exceptionOccured(…)} - * in case of error: + * Converts the given string to a unit. + * This method shall accept all the following forms (example for the metre unit): * - * {@preformat java - * return Units.valueOf(value); - * } + * <ul> + * <li>{@code m}</li> + * <li>{@code EPSG:9001}</li> + * <li>{@code urn:ogc:def:uom:epsg::9001}</li> + * <li>{@code http://www.opengis.net/def/uom/EPSG/0/9001}</li> + * <li>{@code http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])}</li> + * </ul> * * @param context context (GML version, locale, <i>etc.</i>) of the (un)marshalling process. * @param value the string to convert to a unit, or {@code null}. @@ -323,6 +326,36 @@ public class ValueConverter { public Unit<?> toUnit(final MarshalContext context, String value) throws IllegalArgumentException { value = trimWhitespaces(value); if (value != null && !value.isEmpty()) try { + /* + * First, check for X-Paths like below: + * + * http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m']) + * + * Technically the 'm' value in the X-Path is not necessarily a unit symbol. + * It is rather a reference to a definition like below: + * + * <uomItem> + * <gml:BaseUnit gml:id="m"> + * <gml:description> + * The metre is the length of the path travelled by ligth in vaccum during a time interval of 1/299 792 458 of a second + * </gml:description> + * <gml:identifier codeSpace="http://www.bipm.fr/en/si/base_units">metre</gml:identifier> + * <gml:quantityType>length</gml:quantityType> + * <gml:catalogSymbol codeSpace="http://www.bipm.org/en/si/base_units">m</gml:catalogSymbol> + * <gml:unitsSystem xlink:href="http://www.bipm.fr/en/si"/> + * </gml:BaseUnit> + * </uomItem> + * + * But current version of this method parses the anchor as if it was a unit symbol, + * because we do not have a resolution mechanism yet. + */ + final int endOfURI = XPointer.endOfURI(value, 0); + if (endOfURI > 0) { + final String anchor = XPointer.UOM.reference(value.substring(0, endOfURI)); + if (anchor != null) { + value = anchor; + } + } return Units.valueOf(value); } catch (ParserException e) { if (!exceptionOccured(context, value, String.class, Unit.class, e)) { diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/XPointer.java b/core/sis-metadata/src/main/java/org/apache/sis/xml/XPointer.java similarity index 62% rename from core/sis-utility/src/main/java/org/apache/sis/internal/util/XPointer.java rename to core/sis-metadata/src/main/java/org/apache/sis/xml/XPointer.java index 6bacf3b728..284568d569 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/XPointer.java +++ b/core/sis-metadata/src/main/java/org/apache/sis/xml/XPointer.java @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.sis.internal.util; - +package org.apache.sis.xml; import static org.apache.sis.util.CharSequences.*; import static org.apache.sis.internal.util.DefinitionURI.regionMatches; @@ -25,11 +24,11 @@ import static org.apache.sis.internal.util.DefinitionURI.regionMatches; * Parsers of pointers in x-paths, adapted to the syntax found in GML documents. * * @author Martin Desruisseaux (Geomatys) - * @version 1.2 + * @version 1.3 * @since 1.2 * @module */ -public enum XPointer { +enum XPointer { /** * Pointer to units of measurement. Example: * {@code "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])"}) @@ -63,7 +62,7 @@ public enum XPointer { private int startOfFragment(final String url) { final int f = url.indexOf('#'); if (f >= 1) { - final int i = url.lastIndexOf(XPaths.SEPARATOR, f-1) + 1; + final int i = url.lastIndexOf('/', f-1) + 1; for (final String document : documents) { if (regionMatches(document, url, i, f)) { return f + 1; @@ -110,4 +109,47 @@ public enum XPointer { } return null; } + + /** + * If the given character sequences seems to be a URI, returns the presumed end of that URN. + * Otherwise returns -1. + * Examples: + * <ul> + * <li>{@code "urn:ogc:def:uom:EPSG::9001"}</li> + * <li>{@code "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])"}</li> + * </ul> + * + * @param uri the URI candidate to verify. + * @param offset index of the first character to verify. + * @return index after the last character of the presumed URI, or -1 if this + * method thinks that the given character sequence is not a URI. + */ + public static int endOfURI(final CharSequence uri, int offset) { + boolean isURI = false; + int parenthesis = 0; + final int length = uri.length(); +scan: while (offset < length) { + final int c = Character.codePointAt(uri, offset); + if (!Character.isLetterOrDigit(c)) { + switch (c) { + case '#': // Anchor in URL, presumed followed by xpointer. + case ':': isURI |= (parenthesis == 0); break; // Scheme or URN separator. + case '_': + case '-': // Valid character in URL. + case '%': // Encoded character in URL. + case '.': // Domain name separator in URL. + case '/': break; // Path separator, but could also be division as in "m/s". + case '(': parenthesis++; break; + case ')': parenthesis--; break; + default: { + if (Character.isSpaceChar(c)) break; // Not supposed to be valid, but be lenient. + if (parenthesis != 0) break; + break scan; // Non-valid character outside parenthesis. + } + } + } + offset += Character.charCount(c); + } + return isURI ? offset : -1; + } } diff --git a/core/sis-metadata/src/main/java/org/apache/sis/xml/package-info.java b/core/sis-metadata/src/main/java/org/apache/sis/xml/package-info.java index c12e5299dd..c078aaf61e 100644 --- a/core/sis-metadata/src/main/java/org/apache/sis/xml/package-info.java +++ b/core/sis-metadata/src/main/java/org/apache/sis/xml/package-info.java @@ -59,7 +59,7 @@ * @author Guilhem Legal (Geomatys) * @author Martin Desruisseaux (Geomatys) * @author Cullen Rombach (Image Matters) - * @version 1.2 + * @version 1.3 * @since 0.3 * @module */ diff --git a/core/sis-metadata/src/test/java/org/apache/sis/test/suite/MetadataTestSuite.java b/core/sis-metadata/src/test/java/org/apache/sis/test/suite/MetadataTestSuite.java index 9714e543fa..af4efdc2c4 100644 --- a/core/sis-metadata/src/test/java/org/apache/sis/test/suite/MetadataTestSuite.java +++ b/core/sis-metadata/src/test/java/org/apache/sis/test/suite/MetadataTestSuite.java @@ -25,7 +25,7 @@ import org.junit.BeforeClass; * All tests from the {@code sis-metadata} module, in rough dependency order. * * @author Martin Desruisseaux (Geomatys) - * @version 1.1 + * @version 1.3 * @since 0.3 * @module */ @@ -59,6 +59,7 @@ import org.junit.BeforeClass; org.apache.sis.internal.test.DocumentComparatorTest.class, org.apache.sis.xml.NamespacesTest.class, org.apache.sis.xml.XLinkTest.class, + org.apache.sis.xml.XPointerTest.class, org.apache.sis.xml.NilReasonTest.class, org.apache.sis.xml.LegacyCodesTest.class, org.apache.sis.xml.ValueConverterTest.class, diff --git a/core/sis-metadata/src/test/java/org/apache/sis/xml/ValueConverterTest.java b/core/sis-metadata/src/test/java/org/apache/sis/xml/ValueConverterTest.java index 0bbb05cd1c..2d0d6dc0ba 100644 --- a/core/sis-metadata/src/test/java/org/apache/sis/xml/ValueConverterTest.java +++ b/core/sis-metadata/src/test/java/org/apache/sis/xml/ValueConverterTest.java @@ -22,6 +22,9 @@ import org.apache.sis.test.DependsOn; import org.apache.sis.test.TestCase; import org.junit.Test; +import static org.apache.sis.measure.Units.METRE; +import static org.apache.sis.measure.Units.DEGREE; +import static org.apache.sis.measure.Units.RADIAN; import static org.junit.Assert.*; @@ -29,7 +32,7 @@ import static org.junit.Assert.*; * Tests the {@link ValueConverter} class. * * @author Martin Desruisseaux (Geomatys) - * @version 0.5 + * @version 1.3 * @since 0.4 * @module */ @@ -91,4 +94,17 @@ public final strictfp class ValueConverterTest extends TestCase { assertEquals(StandardCharsets.ISO_8859_1, ValueConverter.DEFAULT.toCharset(null, "8859part1")); assertEquals(StandardCharsets.ISO_8859_1, ValueConverter.DEFAULT.toCharset(null, "ISO-8859-1")); } + + /** + * Tests {@link ValueConverter#toUnit(MarshalContext, String)}. + */ + @Test + public void testToUnit() { + assertSame(METRE, ValueConverter.DEFAULT.toUnit(null, "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])")); + assertSame(DEGREE, ValueConverter.DEFAULT.toUnit(null, "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='deg'])")); + assertSame(RADIAN, ValueConverter.DEFAULT.toUnit(null, "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='rad'])")); + assertSame(METRE, ValueConverter.DEFAULT.toUnit(null, "gmxUom.xml#m")); + assertSame(METRE, ValueConverter.DEFAULT.toUnit(null, "EPSG:9001")); + assertSame(DEGREE, ValueConverter.DEFAULT.toUnit(null, "urn:ogc:def:uom:EPSG::9102")); + } } diff --git a/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPointerTest.java b/core/sis-metadata/src/test/java/org/apache/sis/xml/XPointerTest.java similarity index 68% rename from core/sis-utility/src/test/java/org/apache/sis/internal/util/XPointerTest.java rename to core/sis-metadata/src/test/java/org/apache/sis/xml/XPointerTest.java index c8e952c7e6..ba7c8176e3 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPointerTest.java +++ b/core/sis-metadata/src/test/java/org/apache/sis/xml/XPointerTest.java @@ -14,8 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.sis.internal.util; +package org.apache.sis.xml; +import org.apache.sis.util.Characters; import org.apache.sis.test.TestCase; import org.junit.Test; @@ -26,7 +27,7 @@ import static org.junit.Assert.*; * Tests {@link XPointer}. * * @author Martin Desruisseaux (Geomatys) - * @version 1.2 + * @version 1.3 * @since 1.2 * @module */ @@ -41,4 +42,17 @@ public final strictfp class XPointerTest extends TestCase { assertEquals("m", XPointer.UOM.reference("http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/uom/ML_gmxUom.xml#xpointer(//*[@gml:id='m'])")); assertEquals("m", XPointer.UOM.reference("../uom/ML_gmxUom.xml#xpointer(//*[@gml:id='m'])")); } + + /** + * Tests the {@link XPointer#endOfURI(CharSequence, int)} method. + */ + @Test + public void testEndOfURI() { + assertEquals(26, XPointer.endOfURI("urn:ogc:def:uom:EPSG::9001", 0)); + assertEquals(80, XPointer.endOfURI("http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])", 0)); + assertEquals(97, XPointer.endOfURI("http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])", 0)); + assertEquals(-1, XPointer.endOfURI("m/s", 0)); + assertEquals(-1, XPointer.endOfURI("m.s", 0)); + assertEquals(11, XPointer.endOfURI("EPSG" + Characters.NO_BREAK_SPACE + ": 9001", 0)); + } } diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java index 54037e0455..615fdf8572 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java +++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java @@ -33,7 +33,6 @@ import static org.apache.sis.util.ArgumentChecks.ensureNonNull; * * <p>For example, all the following URIs are for the same object:</p> * <ul> - * <li>{@code "4326"} (codespace inferred by the caller)</li> * <li>{@code "EPSG:4326"} (older format)</li> * <li>{@code "EPSG::4326"} (often seen for similarity with URN below)</li> * <li>{@code "urn:ogc:def:crs:EPSG::4326"} (version number is omitted)</li> @@ -122,7 +121,7 @@ public final class DefinitionURI { public static final String PREFIX = "urn:ogc:def"; /** - * The URN separator. + * The path separator in URN. */ public static final char SEPARATOR = ':'; @@ -468,7 +467,7 @@ public final class DefinitionURI { * @param upper index after the last character in {@code urn} to compare, ignoring whitespaces. * @return {@code true} if the given sub-region of {@code urn} match the given part. */ - static boolean regionMatches(final String part, final String urn, int lower, int upper) { + public static boolean regionMatches(final String part, final String urn, int lower, int upper) { lower = skipLeadingWhitespaces (urn, lower, upper); upper = skipTrailingWhitespaces(urn, lower, upper); final int length = upper - lower; @@ -492,37 +491,6 @@ public final class DefinitionURI { return -1; } - /** - * Returns the substring of the given URN, ignoring whitespaces and version number if present. - * The substring is expected to contains at most one {@code ':'} character. If such separator - * character is present, then that character and everything before it are ignored. - * The ignored part should be the version number, but this is not verified. - * - * <p>If the remaining substring is empty or contains more {@code ':'} characters, then this method - * returns {@code null}. The presence of more {@code ':'} characters means that the code has parameters, - * (e.g. {@code "urn:ogc:def:crs:OGC:1.3:AUTO42003:1:-100:45"}) which are not handled by this method.</p> - * - * @param urn the URN from which to get the code. - * @param fromIndex index of the first character in {@code urn} to check. - * @return the code part of the URN, or {@code null} if empty or invalid. - */ - private static String codeIgnoreVersion(final String urn, int fromIndex) { - final int length = urn.length(); - fromIndex = skipLeadingWhitespaces(urn, fromIndex, length); - if (fromIndex >= length) { - return null; // Empty code. - } - final int s = urn.indexOf(SEPARATOR, fromIndex); - if (s >= 0) { - // Ignore the version number (actually everything up to the first ':'). - fromIndex = skipLeadingWhitespaces(urn, s+1, length); - if (fromIndex >= length || urn.indexOf(SEPARATOR, fromIndex) >= 0) { - return null; // Empty code, or the code is followed by parameters. - } - } - return urn.substring(fromIndex, skipTrailingWhitespaces(urn, fromIndex, length)); - } - /** * Returns the code part of the given URI, provided that it matches the given object type and authority. * This method is useful when: @@ -544,22 +512,44 @@ public final class DefinitionURI { * </ul> * * @param type the expected object type (e.g. {@code "crs"}) in lower cases. See class javadoc for a list of types. - * @param authority the expected authority, typically {@code "epsg"}. See class javadoc for a list of authorities. + * @param authority the expected authority, typically {@code "EPSG"}. See class javadoc for a list of authorities. * @param uri the URI to parse. * @return the code part of the given URI, or {@code null} if the codespace does not match the given type * and authority, the code is empty, or the code is followed by parameters. */ - public static String codeOf(final String type, final String authority, final String uri) { + public static String codeOf(final String type, final String authority, final CharSequence uri) { ensureNonNull("type", type); ensureNonNull("authority", authority); - int upper = uri.indexOf(SEPARATOR); - if (upper >= 0) { - int lower = skipLeadingWhitespaces(uri, 0, upper); - int length = skipTrailingWhitespaces(uri, lower, upper) - lower; - if (length == authority.length() && uri.regionMatches(true, lower, authority, 0, length)) { - return codeIgnoreVersion(uri, upper+1); + final int length = uri.length(); + int s = indexOf(uri, SEPARATOR, 0, length); + if (s >= 0) { + int from = skipLeadingWhitespaces(uri, 0, s); // Start of authority part. + if (skipTrailingWhitespaces(uri, from, s) - from == authority.length() + && CharSequences.regionMatches(uri, from, authority, true)) + { + from = skipLeadingWhitespaces(uri, s+1, length); // Start of code part. + if (from >= length) { + return null; + } + /* + * The substring is expected to contains zero or one more separator character. + * If present, then the separator character and everything before it are ignored. + * The ignored part should be the version number, but this is not verified. + */ + s = indexOf(uri, SEPARATOR, from, length); + if (s >= 0) { + from = skipLeadingWhitespaces(uri, s+1, length); + if (from >= length || indexOf(uri, SEPARATOR, from, length) >= 0) { + /* + * If the remaining substring contains more ':' characters, then it means that + * the code has parameters, e.g. "urn:ogc:def:crs:OGC:1.3:AUTO42003:1:-100:45". + */ + return null; + } + } + return uri.subSequence(from, skipTrailingWhitespaces(uri, from, length)).toString(); } - final DefinitionURI def = parse(uri); + final DefinitionURI def = parse(uri.toString()); if (def != null && def.parameters == null) { if (type.equalsIgnoreCase(def.type) && authority.equalsIgnoreCase(def.authority)) { String code = def.code; @@ -602,7 +592,7 @@ public final class DefinitionURI { for (final Map.Entry<String,String> entry : paths.entrySet()) { final String path = entry.getValue(); if (url.regionMatches(true, lower, path, 0, path.length())) { - lower = CharSequences.skipLeadingWhitespaces(url, lower + path.length(), url.length()); + lower = skipLeadingWhitespaces(url, lower + path.length(), url.length()); if (authority == null) { authority = url.substring(lower, skipIdentifierPart(url, lower)); } else if (!url.regionMatches(true, lower, authority, 0, authority.length())) { diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/XPaths.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/XPaths.java deleted file mode 100644 index 01544a82ed..0000000000 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/XPaths.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.sis.internal.util; - -import java.util.List; -import java.util.ArrayList; -import org.apache.sis.util.Static; -import org.apache.sis.util.resources.Errors; - -import static org.apache.sis.util.CharSequences.*; -import static org.apache.sis.internal.util.DefinitionURI.regionMatches; - - -/** - * Utility methods related to x-paths. This is intended to be only a lightweight support; - * this is not a replacement for {@link javax.xml.xpath} implementations. This is used as - * a place where to centralize XPath handling for possible replacement by a more advanced - * framework in the future. - * - * @author Martin Desruisseaux (Geomatys) - * @version 1.2 - * @since 0.4 - * @module - */ -public final class XPaths extends Static { - /** - * The separator between path components. - */ - public static final char SEPARATOR = '/'; - - /** - * Do not allow instantiation of this class. - */ - private XPaths() { - } - - /** - * If the given character sequences seems to be a URI, returns the presumed end of that URN. - * Otherwise returns -1. - * Examples: - * <ul> - * <li>{@code "urn:ogc:def:uom:EPSG::9001"}</li> - * <li>{@code "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])"}</li> - * </ul> - * - * @param uri the URI candidate to verify. - * @param offset index of the first character to verify. - * @return index after the last character of the presumed URI, or -1 if this - * method thinks that the given character sequence is not a URI. - */ - public static int endOfURI(final CharSequence uri, int offset) { - boolean isURI = false; - int parenthesis = 0; - final int length = uri.length(); -scan: while (offset < length) { - final int c = Character.codePointAt(uri, offset); - if (!Character.isLetterOrDigit(c)) { - switch (c) { - case '#': // Anchor in URL, presumed followed by xpointer. - case ':': isURI |= (parenthesis == 0); break; // Scheme or URN separator. - case '_': - case '-': // Valid character in URL. - case '%': // Encoded character in URL. - case '.': // Domain name separator in URL. - case SEPARATOR: break; // Path separator, but could also be division as in "m/s". - case '(': parenthesis++; break; - case ')': parenthesis--; break; - default: { - if (Character.isSpaceChar(c)) break; // Not supposed to be valid, but be lenient. - if (parenthesis != 0) break; - break scan; // Non-valid character outside parenthesis. - } - } - } - offset += Character.charCount(c); - } - return isURI ? offset : -1; - } - - /** - * Splits the given URL around the {@code '/'} separator, or returns {@code null} if there is no separator. - * By convention if the URL is absolute, then the leading {@code '/'} character is kept in the first element. - * For example {@code "/∗/property"} is splitted as two elements: {@code "/∗"} and {@code "property"}. - * - * <p>This method trims the whitespaces of components except the last one (the tip), - * for consistency with the case where this method returns {@code null}.</p> - * - * @param xpath the URL to split. - * @return the splitted URL with the heading separator kept in the first element, or {@code null} - * if there is no separator. If non-null, the list always contains at least one element. - * @throws IllegalArgumentException if the XPath contains at least one empty component. - */ - public static List<String> split(final String xpath) { - int next = xpath.indexOf(SEPARATOR); - if (next < 0) { - return null; - } - final List<String> components = new ArrayList<>(4); - int start = skipLeadingWhitespaces(xpath, 0, next); - if (start < next) { - // No leading '/' (the characters before it are a path element, added below). - components.add(xpath.substring(start, skipTrailingWhitespaces(xpath, start, next))); - start = ++next; - } else { - // Keep the `start` position on the leading '/'. - next++; - } - while ((next = xpath.indexOf(SEPARATOR, next)) >= 0) { - components.add(trimWhitespaces(xpath, start, next).toString()); - start = ++next; - } - components.add(xpath.substring(start)); // No whitespace trimming. - if (components.stream().anyMatch(String::isEmpty)) { - throw new IllegalArgumentException(Errors.format(Errors.Keys.UnsupportedXPath_1, xpath)); - } - return components; - } - - /** - * Parses a URL which contains a pointer to a XML fragment. - * The current implementation recognizes the following types: - * - * <ul> - * <li>{@code uom} for Unit Of Measurement (example: - * {@code "http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])"})</li> - * </ul> - * - * @param type the object type. - * @param url the URL to parse. - * @return the reference, or {@code null} if none. - */ - public static String xpointer(final String type, final String url) { - if (type.equals("uom")) { - final int f = url.indexOf('#'); - if (f >= 1) { - /* - * For now we accept any path as long as it ends with the "gmxUom.xml" file - * because resources may be hosted on different servers, or the path may be - * relative instead of absolute. - */ - int i = url.lastIndexOf('/', f-1) + 1; - if (regionMatches("gmxUom.xml", url, i, f) || regionMatches("ML_gmxUom.xml", url, i, f)) { - /* - * The fragment should typically be of the form "xpointer(//*[@gml:id='m'])". - * However sometime we found no "xpointer", but directly the unit instead. - */ - i = url.indexOf('(', f+1); - if (i >= 0 && regionMatches("xpointer", url, f+1, i)) { - i = url.indexOf("@gml:id=", i+1); - if (i >= 0) { - i = skipLeadingWhitespaces(url, i+8, url.length()); // 8 is the length of "@gml:id=" - final int c = url.charAt(i); - if (c == '\'' || c == '"') { - final int s = url.indexOf(c, ++i); - if (s >= 0) { - return trimWhitespaces(url, i, s).toString(); - } - } - } - } else { - return trimWhitespaces(url, f+1, url.length()).toString(); - } - } - } - } - return null; - } -} diff --git a/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java b/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java index a48a31776e..c9a30202ef 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java +++ b/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java @@ -37,8 +37,6 @@ import org.apache.sis.internal.system.Loggers; import org.apache.sis.internal.util.Constants; import org.apache.sis.internal.util.DefinitionURI; import org.apache.sis.internal.util.FinalFieldSetter; -import org.apache.sis.internal.util.XPointer; -import org.apache.sis.internal.util.XPaths; import org.apache.sis.math.Fraction; import org.apache.sis.util.ArgumentChecks; import org.apache.sis.math.MathFunctions; @@ -60,14 +58,15 @@ import static java.util.logging.Logger.getLogger; * some symbols found in <cite>Well Known Text</cite> (WKT) definitions or in XML files. * * <h2>Parsing authority codes</h2> - * As a special case, if a character sequence given to the {@link #parse(CharSequence)} method is of the - * {@code "EPSG:####"} or {@code "urn:ogc:def:uom:EPSG::####"} form (ignoring case and whitespaces), - * then {@code "####"} is parsed as an integer and forwarded to the {@link Units#valueOfEPSG(int)} method. + * If a character sequence given to the {@link #parse(CharSequence)} method is of the form {@code "EPSG:####"}, + * {@code "urn:ogc:def:uom:EPSG::####"} or {@code "http://www.opengis.net/def/uom/EPSG/0/####"} (ignoring case + * and whitespaces around path separators), then {@code "####"} is parsed as an integer and forwarded to the + * {@link Units#valueOfEPSG(int)} method. * - * <h2>NetCDF unit symbols</h2> - * The attributes in netCDF files often merge the axis direction with the angular unit, - * as in {@code "degrees_east"}, {@code "degrees_north"} or {@code "Degrees North"}. - * This class ignores those suffixes and unconditionally returns {@link Units#DEGREE} for all axis directions. + * <h2>Note on netCDF unit symbols</h2> + * In netCDF files, values of "unit" attribute are concatenations of an angular unit with an axis direction, + * as in {@code "degrees_east"} or {@code "degrees_north"}. This class ignores those suffixes and unconditionally + * returns {@link Units#DEGREE} for all axis directions. * * <h2>Multi-threading</h2> * {@code UnitFormat} is generally not thread-safe. If units need to be parsed or formatted in different threads, @@ -87,6 +86,11 @@ public class UnitFormat extends Format implements javax.measure.format.UnitForma */ private static final long serialVersionUID = -3064428584419360693L; + /** + * Whether the parsing of authority codes such as {@code "EPSG:9001"} is allowed. + */ + private static final boolean PARSE_AUTHORITY_CODES = true; + /** * The unit name for degrees (not necessarily angular), to be handled in a special way. * Must contain only ASCII lower case letters ([a … z]). @@ -1110,16 +1114,14 @@ appPow: if (unit == null) { */ int end = symbols.length(); int start = CharSequences.skipLeadingWhitespaces(symbols, position.getIndex(), end); - int endOfURI = XPaths.endOfURI(symbols, start); - if (endOfURI >= 0) { - final String uom = symbols.subSequence(start, endOfURI).toString(); - String code = DefinitionURI.codeOf("uom", Constants.EPSG, uom); + if (PARSE_AUTHORITY_CODES) { + final String code = DefinitionURI.codeOf("uom", Constants.EPSG, symbols); if (code != null) { NumberFormatException failure = null; try { final Unit<?> unit = Units.valueOfEPSG(Integer.parseInt(code)); if (unit != null) { - position.setIndex(endOfURI); + position.setIndex(end); finish(position); return unit; } @@ -1127,29 +1129,8 @@ appPow: if (unit == null) { failure = e; } throw (ParserException) new ParserException(Errors.format(Errors.Keys.UnknownUnit_1, - Constants.EPSG + Constants.DEFAULT_SEPARATOR + code), - symbols, start + Math.max(0, uom.lastIndexOf(code))).initCause(failure); - } - /* - * Not an EPSG code. Maybe it is a URI like this example: - * http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m']) - * - * If we find such `uom` value, we could replace `symbols` by that `uom`. But it would cause a wrong - * error index to be reported in case of parsing failure. We will rather try to adjust the indices - * (and replace `symbols` only in last resort). - */ - code = XPointer.UOM.reference(uom); - if (code != null) { - final int base = start; - start = endOfURI - code.length(); - do if (--start < base) { // Should never happen (see above comment), but we are paranoiac. - symbols = code; - start = 0; - break; - } while (!CharSequences.regionMatches(symbols, start, code)); - end = start + code.length(); - } else { - endOfURI = -1; + Constants.EPSG + Constants.DEFAULT_SEPARATOR + code), symbols, + start + Math.max(0, symbols.toString().lastIndexOf(code))).initCause(failure); } } /* @@ -1326,7 +1307,7 @@ search: while ((i = CharSequences.skipTrailingWhitespaces(symbols, start, i) finish(position); // For preventing interpretation of "degree minute" as "degree × minute". } unit = operation.apply(unit, component, start); - position.setIndex(endOfURI >= 0 ? endOfURI : i); + position.setIndex(i); return unit; } diff --git a/core/sis-utility/src/main/java/org/apache/sis/measure/Units.java b/core/sis-utility/src/main/java/org/apache/sis/measure/Units.java index d2fa3a646f..090d7a8007 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/measure/Units.java +++ b/core/sis-utility/src/main/java/org/apache/sis/measure/Units.java @@ -1701,14 +1701,14 @@ public final class Units extends Static { * and may change in future SIS versions. * * <h4>Parsing authority codes</h4> - * As a special case, if the given {@code uom} arguments is of the form {@code "EPSG:####"} - * or {@code "urn:ogc:def:uom:EPSG:####"} (ignoring case and whitespaces), then {@code "####"} - * is parsed as an integer and forwarded to the {@link #valueOfEPSG(int)} method. - * - * <h4>NetCDF unit symbols</h4> - * The attributes in netCDF files often merge the axis direction with the angular unit, - * as in {@code "degrees_east"} or {@code "degrees_north"}. This {@code valueOf} method - * ignores those suffixes and unconditionally returns {@link #DEGREE} for all axis directions. + * If the given {@code uom} arguments is of the form {@code "EPSG:####"}, {@code "urn:ogc:def:uom:EPSG:####"} + * or {@code "http://www.opengis.net/def/uom/EPSG/0/####"} (ignoring case and whitespaces around separators), + * then {@code "####"} is parsed as an integer and forwarded to the {@link #valueOfEPSG(int)} method. + * + * <h4>Note on netCDF unit symbols</h4> + * In netCDF files, values of "unit" attribute are concatenations of an angular unit with an axis direction, + * as in {@code "degrees_east"} or {@code "degrees_north"}. This {@code valueOf(…)} method ignores those suffixes + * and unconditionally returns {@link #DEGREE} for all axis directions. * * @param uom the symbol to parse, or {@code null}. * @return the parsed symbol, or {@code null} if {@code uom} was null. diff --git a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java index a7374d5857..3e6e0e291f 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java @@ -39,7 +39,7 @@ import static org.junit.Assert.*; * * @author Martin Desruisseaux (Geomatys) * @author Alexis Manin (Geomatys) - * @version 1.2 + * @version 1.3 * @since 0.8 * @module */ @@ -431,7 +431,6 @@ public final strictfp class UnitFormatTest extends TestCase { assertSame(Units.METRE, f.parse("EPSG:9001")); assertSame(Units.METRE, f.parse("urn:ogc:def:uom:EPSG::9001")); assertSame(Units.METRES_PER_SECOND, f.parse("urn:ogc:def:uom:EPSG::1026")); - assertSame(Units.METRE, f.parse("http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])")); } /** diff --git a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java index 81134e360e..455407959e 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java @@ -35,15 +35,14 @@ import static org.apache.sis.test.Assert.*; * * @author Martin Desruisseaux (Geomatys) * @author Alexis Manin (Geomatys) - * @version 1.2 + * @version 1.3 * @since 0.3 * @module */ @DependsOn({ UnitFormatTest.class, SexagesimalConverterTest.class, - org.apache.sis.internal.util.DefinitionURITest.class, - org.apache.sis.internal.util.XPointerTest.class + org.apache.sis.internal.util.DefinitionURITest.class }) public final strictfp class UnitsTest extends TestCase { /** @@ -365,19 +364,23 @@ public final strictfp class UnitsTest extends TestCase { } /** - * Tests {@link Units#valueOfEPSG(int)} and {@link Units#valueOf(String)} with a {@code "EPSG:####"} syntax. + * Tests {@link Units#valueOf(String)} with a URN syntax. */ @Test - public void testValueOfEPSG() { - assertSame(METRE, valueOfEPSG(9001)); - assertSame(DEGREE, valueOfEPSG(9102)); // Used in prime meridian and operation parameters. - assertSame(DEGREE, valueOfEPSG(9122)); // Used in coordinate system axes. + public void testValueOfURN() { assertSame(METRE, valueOf("EPSG:9001")); assertSame(DEGREE, valueOf(" epsg : 9102")); assertSame(DEGREE, valueOf("urn:ogc:def:uom:EPSG::9102")); - assertSame(METRE, valueOf("http://www.isotc211.org/2005/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])")); - assertSame(METRE, valueOf("gmxUom.xml#m")); + } + /** + * Tests {@link Units#valueOfEPSG(int)} and {@link Units#valueOf(String)} with a {@code "EPSG:####"} syntax. + */ + @Test + public void testValueOfEPSG() { + assertSame(METRE, valueOfEPSG(9001)); + assertSame(DEGREE, valueOfEPSG(9102)); // Used in prime meridian and operation parameters. + assertSame(DEGREE, valueOfEPSG(9122)); // Used in coordinate system axes. assertSame(TROPICAL_YEAR, valueOfEPSG(1029)); assertSame(SECOND, valueOfEPSG(1040)); assertSame(FOOT, valueOfEPSG(9002)); diff --git a/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java b/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java index 4e81d87f86..bb83fbea71 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java +++ b/core/sis-utility/src/test/java/org/apache/sis/test/suite/UtilityTestSuite.java @@ -25,7 +25,7 @@ import org.junit.BeforeClass; * All tests from the {@code sis-utility} module, in rough dependency order. * * @author Martin Desruisseaux (Geomatys) - * @version 1.2 + * @version 1.3 * @since 0.3 * @module */ @@ -87,8 +87,6 @@ import org.junit.BeforeClass; // GeoAPI most basic types. org.apache.sis.internal.util.DefinitionURITest.class, - org.apache.sis.internal.util.XPathsTest.class, - org.apache.sis.internal.util.XPointerTest.class, org.apache.sis.util.SimpleInternationalStringTest.class, org.apache.sis.util.DefaultInternationalStringTest.class, org.apache.sis.internal.util.LocalizedParseExceptionTest.class,