jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/379838 )
Change subject: Add stored query for category traversal ...................................................................... Add stored query for category traversal Example: SELECT ?out ?depth WHERE { SERVICE mediawiki:categoryTree { bd:serviceParam mediawiki:start <https://en.wikipedia.org/wiki/Category:Ducks> . bd:serviceParam mediawiki:direction "Reverse" . bd:serviceParam mediawiki:depth 5 . } } ORDER BY ASC(?depth) Change-Id: Ice1615d3d75e47beb9943ca6ca9fb39f6fe27588 Bug: T157676 --- M blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseContextListener.java A blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/categories/CategoriesStoredQuery.java M blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/mwapi/MWApiServiceFactory.java A common/src/main/java/org/wikidata/query/rdf/common/uri/Mediawiki.java M common/src/main/java/org/wikidata/query/rdf/common/uri/Ontology.java M dist/src/script/loadCategoryDump.sh M dist/src/script/prefixes.conf 7 files changed, 115 insertions(+), 7 deletions(-) Approvals: Smalyshev: Looks good to me, approved jenkins-bot: Verified Gehel: Looks good to me, but someone else must approve diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseContextListener.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseContextListener.java index 3a6abe7..d17a0d2 100644 --- a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseContextListener.java +++ b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseContextListener.java @@ -14,6 +14,7 @@ import org.openrdf.query.resultio.TupleQueryResultFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.wikidata.query.rdf.blazegraph.categories.CategoriesStoredQuery; import org.wikidata.query.rdf.blazegraph.constraints.CoordinatePartBOp; import org.wikidata.query.rdf.blazegraph.constraints.DecodeUriBOp; import org.wikidata.query.rdf.blazegraph.constraints.WikibaseCornerBOp; @@ -88,6 +89,7 @@ LabelService.register(); GeoService.register(); MWApiServiceFactory.register(); + CategoriesStoredQuery.register(); // Whitelist services we like by default reg.addWhitelistURL(GASService.Options.SERVICE_KEY.toString()); diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/categories/CategoriesStoredQuery.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/categories/CategoriesStoredQuery.java new file mode 100644 index 0000000..5fe4719 --- /dev/null +++ b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/categories/CategoriesStoredQuery.java @@ -0,0 +1,84 @@ +package org.wikidata.query.rdf.blazegraph.categories; + +import org.openrdf.model.URI; +import org.openrdf.model.impl.URIImpl; +import org.wikidata.query.rdf.common.uri.Mediawiki; + +import com.bigdata.rdf.sparql.ast.eval.ServiceParams; +import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams; +import com.bigdata.rdf.sparql.ast.service.ServiceRegistry; +import com.bigdata.rdf.sparql.ast.service.storedquery.SimpleStoredQueryService; + +/** + * Stored query for categories: + * SELECT ?out ?depth WHERE { + * SERVICE mediawiki:categoryTree { + * bd:serviceParam mediawiki:start <https://en.wikipedia.org/wiki/Category:Ducks> . + * bd:serviceParam mediawiki:direction "Reverse" . + * bd:serviceParam mediawiki:depth 5 . + * } + * } ORDER BY ASC(?depth) + * + * Directions are: + * - Forward: get parent category tree + * - Reverse: get subcategory tree + * - Undirected: both directions + */ +public class CategoriesStoredQuery extends SimpleStoredQueryService { + /** + * The URI service key. + */ + public static final URI SERVICE_KEY = new URIImpl(Mediawiki.NAMESPACE + "categoryTree"); + /** + * start parameter. + */ + public static final URI START_PARAM = new URIImpl(Mediawiki.NAMESPACE + "start"); + /** + * direction parameter. + */ + public static final URI DIRECTION_PARAM = new URIImpl(Mediawiki.NAMESPACE + "direction"); + /** + * max depth parameter. + */ + public static final URI DEPTH_PARAM = new URIImpl(Mediawiki.NAMESPACE + "depth"); + /** + * Default max depth. + */ + public static final int MAX_DEPTH = 8; + /** + * Register the service so it is recognized by Blazegraph. + */ + public static void register() { + ServiceRegistry reg = ServiceRegistry.getInstance(); + reg.add(SERVICE_KEY, new CategoriesStoredQuery()); + reg.addWhitelistURL(SERVICE_KEY.toString()); + } + + @Override + protected String getQuery(ServiceCallCreateParams createParams, + ServiceParams serviceParams) { + final StringBuilder sb = new StringBuilder(); + + final URI start = serviceParams.getAsURI(START_PARAM); + final String direction = serviceParams.getAsString(DIRECTION_PARAM, "Reverse"); + final int depth = serviceParams.getAsInt(DEPTH_PARAM, MAX_DEPTH); + + // Fixed parts + sb.append("SELECT * WHERE {\n" + + "SERVICE gas:service {\n" + + " gas:program gas:gasClass \"com.bigdata.rdf.graph.analytics.BFS\" .\n" + + " gas:program gas:linkType mediawiki:isInCategory .\n"); + // Variable parts + sb.append(" gas:program gas:traversalDirection \"" + direction + "\" .\n" + + " gas:program gas:in <" + start.stringValue() + "> .\n" + + " gas:program gas:out ?out .\n" + + " gas:program gas:out1 ?depth .\n" + + " gas:program gas:out2 ?predecessor .\n" + + " gas:program gas:maxIterations " + depth + " .\n"); + // Fixed footer + sb.append(" }\n" + + "}"); + return sb.toString(); + } + +} diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/mwapi/MWApiServiceFactory.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/mwapi/MWApiServiceFactory.java index 7fe91d8..12b6fb0 100644 --- a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/mwapi/MWApiServiceFactory.java +++ b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/mwapi/MWApiServiceFactory.java @@ -17,6 +17,7 @@ import org.openrdf.model.impl.URIImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.wikidata.query.rdf.common.uri.Mediawiki; import org.wikidata.query.rdf.common.uri.Ontology; import com.bigdata.bop.IVariable; @@ -78,7 +79,7 @@ /** * Namespace for MWAPI parameters. */ - public static final String MWAPI_NAMESPACE = Ontology.MEDIAWIKI_NAMESPACE + "API/"; + public static final String MWAPI_NAMESPACE = Mediawiki.NAMESPACE + "API/"; /** * Default service config filename. */ diff --git a/common/src/main/java/org/wikidata/query/rdf/common/uri/Mediawiki.java b/common/src/main/java/org/wikidata/query/rdf/common/uri/Mediawiki.java new file mode 100644 index 0000000..0663f7f --- /dev/null +++ b/common/src/main/java/org/wikidata/query/rdf/common/uri/Mediawiki.java @@ -0,0 +1,23 @@ +package org.wikidata.query.rdf.common.uri; + +/** + * Mediawiki ontology. + */ +public final class Mediawiki { + /** + * Common prefix of all ontology parts. + */ + public static final String NAMESPACE = "https://www.mediawiki.org/ontology#"; + /** + * Category class. + */ + public static final String CATEGORY = NAMESPACE + "Category"; + /** + * mediawiki:isInCategory predicate. + */ + public static final String IS_IN_CATEGORY = NAMESPACE + "isInCategory"; + + private Mediawiki() { + // Utility class. + } +} diff --git a/common/src/main/java/org/wikidata/query/rdf/common/uri/Ontology.java b/common/src/main/java/org/wikidata/query/rdf/common/uri/Ontology.java index c5a642e..1501559 100644 --- a/common/src/main/java/org/wikidata/query/rdf/common/uri/Ontology.java +++ b/common/src/main/java/org/wikidata/query/rdf/common/uri/Ontology.java @@ -9,10 +9,6 @@ */ public static final String NAMESPACE = "http://wikiba.se/ontology#"; /** - * Common prefix of Mediawiki ontology parts. - */ - public static final String MEDIAWIKI_NAMESPACE = "https://www.mediawiki.org/ontology#"; - /** * Old ontology prefix, for loading old dumps. * TODO: remove this when ontology stabilizes. */ diff --git a/dist/src/script/loadCategoryDump.sh b/dist/src/script/loadCategoryDump.sh index 21fa663..7c33362 100755 --- a/dist/src/script/loadCategoryDump.sh +++ b/dist/src/script/loadCategoryDump.sh @@ -15,13 +15,13 @@ exit 1 fi -TS=$(curl -s -XGET $SOURCE/lastdump/$WIKI-categories.last | cut -c1-8) +TS=$(curl -s -f -XGET $SOURCE/lastdump/$WIKI-categories.last | cut -c1-8) if [ -z "$TS" ]; then echo "Could not load timestamp" exit 1 fi FILENAME=$WIKI-$TS-categories.ttl.gz -curl -s -XGET $SOURCE/$TS/$FILENAME -o $DATA_DIR/$FILENAME +curl -s -f -XGET $SOURCE/$TS/$FILENAME -o $DATA_DIR/$FILENAME if [ ! -s $DATA_DIR/$FILENAME ]; then echo "Could not download $FILENAME" exit 1 diff --git a/dist/src/script/prefixes.conf b/dist/src/script/prefixes.conf index 973a150..322d267 100644 --- a/dist/src/script/prefixes.conf +++ b/dist/src/script/prefixes.conf @@ -1,4 +1,6 @@ PREFIX psn: <http://www.wikidata.org/prop/statement/value-normalized/> PREFIX pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> PREFIX prn: <http://www.wikidata.org/prop/reference/value-normalized/> +PREFIX mediawiki: <https://www.mediawiki.org/ontology#> PREFIX mwapi: <https://www.mediawiki.org/ontology#API/> +PREFIX gas: <http://www.bigdata.com/rdf/gas#> -- To view, visit https://gerrit.wikimedia.org/r/379838 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ice1615d3d75e47beb9943ca6ca9fb39f6fe27588 Gerrit-PatchSet: 2 Gerrit-Project: wikidata/query/rdf Gerrit-Branch: master Gerrit-Owner: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits