jenkins-bot has submitted this change and it was merged. Change subject: Fix entity namespace checking in RecentChangesPoller ......................................................................
Fix entity namespace checking in RecentChangesPoller Change-Id: Id67d78fb591023a796e2dd2bc0eac9e1de22caf2 --- M tools/src/main/java/org/wikidata/query/rdf/tool/Update.java M tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java M tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java M tools/src/test/java/org/wikidata/query/rdf/tool/change/RecentChangesPollerUnitTest.java 4 files changed, 66 insertions(+), 14 deletions(-) Approvals: Smalyshev: Looks good to me, approved jenkins-bot: Verified diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java b/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java index 9a9bff9..3c50e3a 100644 --- a/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java +++ b/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java @@ -93,7 +93,7 @@ @Option(shortName = "V", longName = "verify", description = "Verify updates (may have performance impact)") boolean verify(); - @Option(defaultToNull = true, description = "If specified must be numbers of Item and Property namespaces" + @Option(defaultToNull = true, description = "If specified must be numerical indexes of Item and Property namespaces" + " that defined in Wikibase repository, comma separated.") String entityNamespaces(); } @@ -103,12 +103,9 @@ */ public static void main(String[] args) { Options options = handleOptions(Options.class, args); - WikibaseRepository wikibaseRepository; - if (options.entityNamespaces() == null) { - wikibaseRepository = new WikibaseRepository(options.wikibaseScheme(), options.wikibaseHost()); - } else { - String[] entityNamespaces = options.entityNamespaces().split(","); - wikibaseRepository = new WikibaseRepository(options.wikibaseScheme(), options.wikibaseHost(), 0, entityNamespaces); + WikibaseRepository wikibaseRepository = buildWikibaseRepository(options); + if (wikibaseRepository == null) { + return; } URI sparqlUri; try { @@ -167,7 +164,7 @@ return IdRangeChangeSource.forItems(start, end, options.batchSize()); } if (options.ids() != null) { - List<String> parsedIds = new ArrayList<String>(); + List<String> parsedIds = new ArrayList<String>(); // FIXME use OptionsUtils.splitByComma(options.ids()) for (String idOpt: options.ids()) { if (idOpt.contains(",")) { // Id list @@ -216,6 +213,30 @@ } /** + * Build WikibaseRepository object. + * + * @return null if non can be built - its ok to just exit - errors have been + * logged to the user + */ + private static WikibaseRepository buildWikibaseRepository(Options options) { + if (options.entityNamespaces() == null) { + return new WikibaseRepository(options.wikibaseScheme(), options.wikibaseHost()); + } + + String[] strEntityNamespaces = options.entityNamespaces().split(","); // FIXME use OptionsUtils.splitByComma(options.entityNamespaces()) + long[] longEntityNamespaces = new long[strEntityNamespaces.length]; + try { + for (int i = 0; i < strEntityNamespaces.length; i++) { + longEntityNamespaces[i] = Long.parseLong(strEntityNamespaces[i]); + } + } catch (NumberFormatException e) { + log.error("Invalid value for --entityNamespaces. Namespace index should be an integer.", e); + return null; + } + return new WikibaseRepository(options.wikibaseScheme(), options.wikibaseHost(), 0, longEntityNamespaces); + } + + /** * Metric registry. */ private final MetricRegistry metrics = new MetricRegistry(); diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java b/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java index 64415da..e4bf556 100644 --- a/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java +++ b/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java @@ -107,7 +107,7 @@ for (Object rco : result) { JSONObject rc = (JSONObject) rco; long namespace = (long) rc.get("ns"); - if (namespace != 0 && namespace != 120) { + if (!wikibase.isEntityNamespace(namespace)) { log.debug("Skipping change in irrelevant namespace: {}", rc); continue; } diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java b/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java index e4287f0..900ff91 100644 --- a/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java +++ b/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java @@ -20,7 +20,7 @@ import javax.net.ssl.SSLException; import javax.net.ssl.SSLHandshakeException; -import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.ArrayUtils; import org.apache.http.Consts; import org.apache.http.HttpEntityEnclosingRequest; import org.apache.http.HttpRequest; @@ -97,7 +97,7 @@ uris = new Uris(scheme, host, port); } - public WikibaseRepository(String scheme, String host, int port, String[] entityNamespaces) { + public WikibaseRepository(String scheme, String host, int port, long[] entityNamespaces) { uris = new Uris(scheme, host, port, entityNamespaces); } @@ -351,6 +351,16 @@ } /** + * Check that a namespace is valid wikibase entity namespace. + * + * @param namespace the namespace index + * @return + */ + public boolean isEntityNamespace(long namespace) { + return ArrayUtils.contains(uris.getEntityNamespaces(), namespace); + } + + /** * URIs used for accessing wikibase. */ public static class Uris { @@ -369,7 +379,7 @@ /** * Item and Property namespaces. */ - private String[] entityNamespaces = {"0", "120"}; + private long[] entityNamespaces = {0, 120}; public Uris(String scheme, String host) { this.scheme = scheme; @@ -383,7 +393,7 @@ this.port = port; } - public Uris(String scheme, String host, int port, String[] entityNamespaces) { + public Uris(String scheme, String host, int port, long[] entityNamespaces) { this.scheme = scheme; this.host = host; this.port = port; @@ -405,7 +415,7 @@ builder.addParameter("list", "recentchanges"); builder.addParameter("rcdir", "newer"); builder.addParameter("rcprop", "title|ids|timestamp"); - builder.addParameter("rcnamespace", StringUtils.join(this.entityNamespaces, "|")); + builder.addParameter("rcnamespace", getEntityNamespacesString("|")); builder.addParameter("rclimit", Integer.toString(batchSize)); if (continueObject == null) { builder.addParameter("continue", ""); @@ -541,6 +551,24 @@ return scheme; } + /** + * The wikibase entity namespace indexes. + */ + private long[] getEntityNamespaces() { + return entityNamespaces; + } + + /** + * The wikibase entity namespace indexes joined with a delimiter. + */ + private String getEntityNamespacesString(String delimiter) { + String rcnamespace = ""; + for (long i : entityNamespaces) { + rcnamespace += i + delimiter; + } + return rcnamespace.substring(0, rcnamespace.length() - delimiter.length()); // Remove delimiter at the end + } + } /** diff --git a/tools/src/test/java/org/wikidata/query/rdf/tool/change/RecentChangesPollerUnitTest.java b/tools/src/test/java/org/wikidata/query/rdf/tool/change/RecentChangesPollerUnitTest.java index 8f7beeb..9ee8ecd 100644 --- a/tools/src/test/java/org/wikidata/query/rdf/tool/change/RecentChangesPollerUnitTest.java +++ b/tools/src/test/java/org/wikidata/query/rdf/tool/change/RecentChangesPollerUnitTest.java @@ -54,6 +54,7 @@ recentChanges.add(rc); } when(repository.fetchRecentChanges(startTime, null, batchSize)).thenReturn(result); + when(repository.isEntityNamespace(0)).thenReturn(true); RecentChangesPoller poller = new RecentChangesPoller(repository, startTime, batchSize); Batch batch = poller.firstBatch(); @@ -109,6 +110,7 @@ when(repository.fetchRecentChanges(startTime, null, batchSize)).thenReturn(result); when(repository.getContinueObject((Change)any())).thenReturn(contJson); + when(repository.isEntityNamespace(0)).thenReturn(true); RecentChangesPoller poller = new RecentChangesPoller(repository, startTime, batchSize); Batch batch = poller.firstBatch(); @@ -156,6 +158,7 @@ recentChanges.add(rc); when(repository.fetchRecentChanges(startTime, null, batchSize)).thenReturn(result); + when(repository.isEntityNamespace(0)).thenReturn(true); RecentChangesPoller poller = new RecentChangesPoller(repository, startTime, batchSize); Batch batch = poller.firstBatch(); -- To view, visit https://gerrit.wikimedia.org/r/298982 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Id67d78fb591023a796e2dd2bc0eac9e1de22caf2 Gerrit-PatchSet: 2 Gerrit-Project: wikidata/query/rdf Gerrit-Branch: master Gerrit-Owner: Pastakhov <pastak...@yandex.ru> Gerrit-Reviewer: Pastakhov <pastak...@yandex.ru> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits