[
https://issues.apache.org/jira/browse/JENA-1560?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16509526#comment-16509526
]
ASF GitHub Bot commented on JENA-1560:
--------------------------------------
Github user afs commented on a diff in the pull request:
https://github.com/apache/jena/pull/432#discussion_r194708154
--- Diff:
jena-core/src/main/java/org/apache/jena/util/PrefixMappingUtils.java ---
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.util;
+
+import java.util.* ;
+import java.util.function.Consumer ;
+import java.util.stream.Collectors ;
+
+import org.apache.jena.atlas.lib.SetUtils ;
+import org.apache.jena.atlas.lib.Trie ;
+import org.apache.jena.graph.Graph ;
+import org.apache.jena.graph.Node ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.graph.impl.WrappedGraph;
+import org.apache.jena.rdf.model.Model ;
+import org.apache.jena.shared.PrefixMapping ;
+import org.apache.jena.shared.impl.PrefixMappingImpl ;
+
+public class PrefixMappingUtils {
+ /**
+ * Return a read-only graph that has the same data (RDF triples) as
the one given, but has a
+ * prefix mapping that only includes "in use " prefixes.
+ * <p>
+ * The prefix mappings of the two graphs are not connected.
+ * Later changes to the prefix mapping of the original graph are not
reflected in the returned graph.
+ * Modifications to the triples conatained in the underlying graph are
reflected.
+ */
+ public static Graph graphInUsePrefixMapping(Graph graph) {
+ final PrefixMapping prefixMapping = calcInUsePrefixMapping(graph) ;
+ prefixMapping.lock() ;
+ Graph graph2 = new WrappedGraph(graph) {
+ @Override
+ public void performAdd(Triple triple)
+ { throw new UnsupportedOperationException() ; }
+
+ @Override
+ public void performDelete(Triple triple)
+ { throw new UnsupportedOperationException() ; }
+
+ @Override
+ public PrefixMapping getPrefixMapping() {
+ return prefixMapping ;
+ }
+ } ;
+ return graph2 ;
+ }
+
+ /**
+ * Analyse the graph to see which prefixes of the graph are in use.
+ * <p>
+ * In the case of overlapping prefixes (where one prefix declaration
is has an initial
+ * URI string which matches another prefix declaration), all are
included, though
+ * they may not be used when printing (that depends on the output
process). In effect,
+ * this process has "false positives".
+ * <p>
+ * This function does not calculate new prefixes.
+ *
+ * @see #calcInUsePrefixMappingTTL(Graph)
+ */
+ public static PrefixMapping calcInUsePrefixMapping(Graph graph) {
+ PrefixMapping prefixMapping = graph.getPrefixMapping() ;
+ if ( prefixMapping == null )
+ return null ;
+ return calcInUsePrefixMapping(graph, prefixMapping) ;
+ }
+
+ /**
+ * Analyse the graph to see which prefixes of the given {@link
PrefixMapping} are in
+ * use.
+ * <p>
+ * In the case of overlapping prefixes (where one prefix declaration
is has an initial
+ * URI string which matches another prefix declaration), all are
included, though
+ * they may not be used when printing (that depends on the output
process). In effect,
+ * this process has "false positives".
+ * <p>
+ * This function does not calculate new prefixes.
+ *
+ * @see #calcInUsePrefixMappingTTL(Graph, PrefixMapping)
+ */
+ public static PrefixMapping calcInUsePrefixMapping(Graph graph,
PrefixMapping prefixMapping) {
+
+ /* Method:
+ *
+ * For each URI in the data, look it up in the trie.
+ * to see if has a declared prefix.
+ *
+ * Exit early if every prefix is accounted for.
+ */
+
+ // Map prefix to URI.
+ Map<String, String> pmap = prefixMapping.getNsPrefixMap() ;
+
+ // Map URI to prefix, with partial lookup (all uri keys that
partly match the URI)
+ Trie<String> trie = new Trie<>() ;
+ // Change this to "add(uri, uri)" to calculate the uris.
+ pmap.forEach((prefix,uri)-> trie.add(uri, prefix)) ;
+ Iterator<Triple> iter = graph.find(null, null, null) ;
+ // Prefixes in use.
+ // (URIs if "add(uri, uri)")
+ Set<String> inUse = new HashSet<>() ;
+
+ // Process to apply to each node
+ // Accumulate any prefixes into 'inUse' if the data URI
+ // is partially matched by a prefix URIs in the trie.
+ Consumer<Node> process = (node)->{
+ if ( ! node.isURI() )
+ return ;
+ String uri = node.getURI() ;
+ // Get all prefixes whose URIs are candidates
+ List<String> hits = trie.partialSearch(uri) ;
+ if ( hits.isEmpty() )
+ return ;
+ inUse.addAll(hits) ;
+ } ;
+
+ while(iter.hasNext()) {
+ Triple triple = iter.next() ;
+ process.accept(triple.getSubject()) ;
+ process.accept(triple.getPredicate()) ;
+ process.accept(triple.getObject()) ;
+ if ( pmap.size() == inUse.size() )
+ break ;
+ }
+
+ if ( pmap.size() == inUse.size() )
+ return prefixMapping ;
+
+ // Build result.
+ PrefixMapping pmap2 = new PrefixMappingImpl() ;
+ inUse.forEach((prefix)-> pmap2.setNsPrefix(prefix,
prefixMapping.getNsPrefixURI(prefix)) ) ;
+ return pmap2 ;
+ }
+
+ /**
+ * Analyse the graph to see which prefixes of the graph are in use.
+ * <p>
+ * This function attempts to process each URI in the graph as if it
were to be printed
+ * in Turtle. Only prefixes that lead to valid output strings are
returned. This is
+ * more expensive than {@link #calcInUsePrefixMapping(Graph)}.
+ * <p>
+ * This function does not calculate new prefixes.
+ *
+ * @see #calcInUsePrefixMappingTTL(Graph)
+ */
+ public static PrefixMapping calcInUsePrefixMappingTTL(Graph graph) {
+ PrefixMapping prefixMapping = graph.getPrefixMapping() ;
+ if ( prefixMapping == null )
+ return null ;
+ return calcInUsePrefixMappingTTL(graph, prefixMapping) ;
+ }
+
+ /**
+ * Analyse the graph to see which prefixes of the given {@link
PrefixMapping} are used
+ * by the graph triples.
+ * <p>
+ * This function attempts to process each URI in the graph as if it
were to be printed
+ * in Turtle. Only prefixes that lead to valid output strings are
returned. This is
+ * more expensive than {@link #calcInUsePrefixMapping(Graph,
PrefixMapping)}.
+ * <p>
+ * This function does not calculate new prefixes.
+ *
+ * @see #calcInUsePrefixMapping(Graph, PrefixMapping)
+ */
+ public static PrefixMapping calcInUsePrefixMappingTTL(Graph graph,
PrefixMapping prefixMapping) {
+
+ /* Method:
+ *
+ * For each URI, split in in the usual place, after "/" or "#" for
http URIs, and
+ * after the last ":" for URNs, then see if that is a declared
prefix.
+ *
+ * Exit early if every prefix is accounted for.
+ */
+ // Map prefix -> URI.
+ Map<String, String> pmap = prefixMapping.getNsPrefixMap() ;
+
+ // All URIs used as prefixes in the prefix mapping.
+ Set<String> prefixURIs = new HashSet<>(pmap.values()) ;
+
+ // Prefixes used.
+ Set<String> inUse = new HashSet<>() ;
+ // Process to be applied to each node in the graph.
+ Consumer<Node> process = (node) -> {
+ if ( ! node.isURI() )
+ return ;
+ String uri = node.getURI() ;
+
+ int idx = SplitIRI.splitpoint(uri) ;
+ if ( idx < 0 )
+ return ;
+ String nsURI = SplitIRI.namespaceTTL(uri) ;
+ String prefix = prefixMapping.getNsURIPrefix(nsURI) ;
+ if ( prefix != null )
+ inUse.add(prefix) ;
+ } ;
+
+ Iterator<Triple> iter = graph.find(null, null, null) ;
+ while(iter.hasNext()) {
+ Triple triple = iter.next() ;
+ process.accept(triple.getSubject()) ;
+ process.accept(triple.getPredicate()) ;
+ process.accept(triple.getObject()) ;
+ if ( inUse.size() == prefixURIs.size() )
+ // Fast exit.
+ break ;
+ }
+
+ if ( pmap.size() == inUse.size() )
+ return prefixMapping ;
+
+ // Build result.
+ PrefixMapping pmap2 = new PrefixMappingImpl() ;
+ inUse.forEach((prefix)-> pmap2.setNsPrefix(prefix,
prefixMapping.getNsPrefixURI(prefix)) ) ;
+ return pmap2 ;
+ }
+
+ /** Check every URI as a possible use of a prefix */
+ private static Set<String> fullMethod(Model m) {
+ /* Method: Covers prefixes not based on "/", "#" or final ":"
splitting.
+ *
+ * Build a trie to use as a partial lookup matcher.
+ * For each URI in the data, look it up as a partial match in the
trie
+ * to get all uris in the prefix map that apply.
+ */
+
+ // Map prefix to URI.
+ Map<String, String> pmap = m.getNsPrefixMap() ;
+ // Map URI to prefix, with partial lookup (all uri keys that
partly match the URI)
+ Trie<String> trie = new Trie<>() ;
+
+ // change to add(uri, prefix) to get prefixes.
+ pmap.forEach((prefix,uri)-> trie.add(uri, uri)) ;
+
+ Iterator<Triple> iter = m.getGraph().find(null, null, null) ;
+ // Prefix URIs in use.
+ Set<String> inUseURIs = new HashSet<>() ;
+ while(iter.hasNext()) {
+ Triple triple = iter.next() ;
+ processFull(trie, inUseURIs, triple.getSubject()) ;
+ processFull(trie, inUseURIs, triple.getPredicate()) ;
+ processFull(trie, inUseURIs, triple.getObject()) ;
+ if ( pmap.size() == inUseURIs.size() )
+ break ;
+ }
+ return inUseURIs ;
+ }
+
+ private static void processFull(Trie<String> trie, Set<String>
prefixesInUse, Node node) {
+ if ( ! node.isURI() )
+ return ;
+ String uri = node.getURI() ;
+ // Shorten to "/" or "#" or ":"
+ String pref = uri ;
+
+ // Get all under the pref
+ List<String> hits = trie.partialSearch(pref) ;
+ if ( hits == null || hits.isEmpty() )
--- End diff --
Just being defensive.
> Prefix utilities to prune prefixes to those needed by the data.
> ---------------------------------------------------------------
>
> Key: JENA-1560
> URL: https://issues.apache.org/jira/browse/JENA-1560
> Project: Apache Jena
> Issue Type: New Feature
> Components: Core
> Affects Versions: Jena 3.7.0
> Reporter: Andy Seaborne
> Assignee: Andy Seaborne
> Priority: Minor
>
> Sometimes, prefixes are no longer needed by the data. They still clutter-up
> output.
> In long lived (persistent), the effect can build up to the point where
> several 10s of prefixes are present but not used.
> Some utilities to prune the unused prefixes would be useful.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)