[ https://issues.apache.org/jira/browse/JENA-1560?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16508872#comment-16508872 ]
ASF GitHub Bot commented on JENA-1560: -------------------------------------- Github user ajs6f commented on a diff in the pull request: https://github.com/apache/jena/pull/432#discussion_r194566823 --- Diff: jena-core/src/main/java/org/apache/jena/util/PrefixMappingUtils.java --- @@ -0,0 +1,362 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.util; + +import java.util.* ; +import java.util.function.Consumer ; +import java.util.stream.Collectors ; + +import org.apache.jena.atlas.lib.SetUtils ; +import org.apache.jena.atlas.lib.Trie ; +import org.apache.jena.graph.Graph ; +import org.apache.jena.graph.Node ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.graph.impl.WrappedGraph; +import org.apache.jena.rdf.model.Model ; +import org.apache.jena.shared.PrefixMapping ; +import org.apache.jena.shared.impl.PrefixMappingImpl ; + +public class PrefixMappingUtils { + /** + * Return a read-only graph that has the same data (RDF triples) as the one given, but has a + * prefix mapping that only includes "in use " prefixes. + * <p> + * The prefix mappings of the two graphs are not connected. + * Later changes to the prefix mapping of the original graph are not reflected in the returned graph. + * Modifications to the triples conatained in the underlying graph are reflected. + */ + public static Graph graphInUsePrefixMapping(Graph graph) { + final PrefixMapping prefixMapping = calcInUsePrefixMapping(graph) ; + prefixMapping.lock() ; + Graph graph2 = new WrappedGraph(graph) { + @Override + public void performAdd(Triple triple) + { throw new UnsupportedOperationException() ; } + + @Override + public void performDelete(Triple triple) + { throw new UnsupportedOperationException() ; } + + @Override + public PrefixMapping getPrefixMapping() { + return prefixMapping ; + } + } ; + return graph2 ; + } + + /** + * Analyse the graph to see which prefixes of the graph are in use. + * <p> + * In the case of overlapping prefixes (where one prefix declaration is has an initial + * URI string which matches another prefix declaration), all are included, though + * they may not be used when printing (that depends on the output process). In effect, + * this process has "false positives". + * <p> + * This function does not calculate new prefixes. + * + * @see #calcInUsePrefixMappingTTL(Graph) + */ + public static PrefixMapping calcInUsePrefixMapping(Graph graph) { + PrefixMapping prefixMapping = graph.getPrefixMapping() ; + if ( prefixMapping == null ) + return null ; + return calcInUsePrefixMapping(graph, prefixMapping) ; + } + + /** + * Analyse the graph to see which prefixes of the given {@link PrefixMapping} are in + * use. + * <p> + * In the case of overlapping prefixes (where one prefix declaration is has an initial + * URI string which matches another prefix declaration), all are included, though + * they may not be used when printing (that depends on the output process). In effect, + * this process has "false positives". + * <p> + * This function does not calculate new prefixes. + * + * @see #calcInUsePrefixMappingTTL(Graph, PrefixMapping) + */ + public static PrefixMapping calcInUsePrefixMapping(Graph graph, PrefixMapping prefixMapping) { + + /* Method: + * + * For each URI in the data, look it up in the trie. + * to see if has a declared prefix. + * + * Exit early if every prefix is accounted for. + */ + + // Map prefix to URI. + Map<String, String> pmap = prefixMapping.getNsPrefixMap() ; + + // Map URI to prefix, with partial lookup (all uri keys that partly match the URI) + Trie<String> trie = new Trie<>() ; + // Change this to "add(uri, uri)" to calculate the uris. + pmap.forEach((prefix,uri)-> trie.add(uri, prefix)) ; + Iterator<Triple> iter = graph.find(null, null, null) ; + // Prefixes in use. + // (URIs if "add(uri, uri)") + Set<String> inUse = new HashSet<>() ; + + // Process to apply to each node + // Accumulate any prefixes into 'inUse' if the data URI + // is partially matched by a prefix URIs in the trie. + Consumer<Node> process = (node)->{ + if ( ! node.isURI() ) + return ; + String uri = node.getURI() ; + // Get all prefixes whose URIs are candidates + List<String> hits = trie.partialSearch(uri) ; + if ( hits.isEmpty() ) + return ; + inUse.addAll(hits) ; + } ; + + while(iter.hasNext()) { + Triple triple = iter.next() ; + process.accept(triple.getSubject()) ; + process.accept(triple.getPredicate()) ; + process.accept(triple.getObject()) ; + if ( pmap.size() == inUse.size() ) + break ; + } + + if ( pmap.size() == inUse.size() ) + return prefixMapping ; + + // Build result. + PrefixMapping pmap2 = new PrefixMappingImpl() ; + inUse.forEach((prefix)-> pmap2.setNsPrefix(prefix, prefixMapping.getNsPrefixURI(prefix)) ) ; + return pmap2 ; + } + + /** + * Analyse the graph to see which prefixes of the graph are in use. + * <p> + * This function attempts to process each URI in the graph as if it were to be printed + * in Turtle. Only prefixes that lead to valid output strings are returned. This is + * more expensive than {@link #calcInUsePrefixMapping(Graph)}. + * <p> + * This function does not calculate new prefixes. + * + * @see #calcInUsePrefixMappingTTL(Graph) + */ + public static PrefixMapping calcInUsePrefixMappingTTL(Graph graph) { + PrefixMapping prefixMapping = graph.getPrefixMapping() ; + if ( prefixMapping == null ) + return null ; + return calcInUsePrefixMappingTTL(graph, prefixMapping) ; + } + + /** + * Analyse the graph to see which prefixes of the given {@link PrefixMapping} are used + * by the graph triples. + * <p> + * This function attempts to process each URI in the graph as if it were to be printed + * in Turtle. Only prefixes that lead to valid output strings are returned. This is + * more expensive than {@link #calcInUsePrefixMapping(Graph, PrefixMapping)}. + * <p> + * This function does not calculate new prefixes. + * + * @see #calcInUsePrefixMapping(Graph, PrefixMapping) + */ + public static PrefixMapping calcInUsePrefixMappingTTL(Graph graph, PrefixMapping prefixMapping) { --- End diff -- Maybe leave a TODO to factor out common logic between this and `calcInUsePrefixMapping`? > Prefix utilities to prune prefixes to those needed by the data. > --------------------------------------------------------------- > > Key: JENA-1560 > URL: https://issues.apache.org/jira/browse/JENA-1560 > Project: Apache Jena > Issue Type: New Feature > Components: Core > Affects Versions: Jena 3.7.0 > Reporter: Andy Seaborne > Assignee: Andy Seaborne > Priority: Minor > > Sometimes, prefixes are no longer needed by the data. They still clutter-up > output. > In long lived (persistent), the effect can build up to the point where > several 10s of prefixes are present but not used. > Some utilities to prune the unused prefixes would be useful. -- This message was sent by Atlassian JIRA (v7.6.3#76005)