jenkins-bot has submitted this change and it was merged.

Change subject: Handle 404s/deletes
......................................................................


Handle 404s/deletes

When wikbase 404s on a page we make sure that that page doesn't have any
entries in the RDF store.  This should cover deletes.

Change-Id: I938d23fab7dd636198f2b0fb3ffde25a266f5815
---
M tools/src/main/java/org/wikidata/query/rdf/tool/Update.java
M tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
M tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
M 
tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java
M tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
M 
tools/src/test/java/org/wikidata/query/rdf/tool/rdf/RdfRepositoryIntegrationTest.java
6 files changed, 69 insertions(+), 21 deletions(-)

Approvals:
  Manybubbles: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java
index 1338146..e249f4e 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/Update.java
@@ -278,13 +278,13 @@
      */
     private void handleChange(Change change) throws RetryableException, 
ContainedException {
         log.debug("Received revision information {}", change);
-        // TODO deletes
         if (change.revision() >= 0 && 
rdfRepository.hasRevision(change.entityId(), change.revision())) {
             log.debug("RDF repostiroy already has this revision, skipping.");
             return;
         }
         Munger munger = new Munger(entityDataUris, entityUris);
-        rdfRepository.sync(change.entityId(), 
munger.munge(wikibase.fetchRdfForEntity(change.entityId())));
+        rdfRepository.sync(change.entityId(),
+                munger.munge(change.entityId(), 
wikibase.fetchRdfForEntity(change.entityId())));
         updateMeter.mark();
     }
 
diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
index 9f71496..858c7c2 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
@@ -38,11 +38,14 @@
      * @param statements statements to munge
      * @return a reference to statements
      */
-    public Collection<Statement> munge(Collection<Statement> statements) {
-        /*
-         * Filters and adds RDF based in a single pass.
-         */
+    public Collection<Statement> munge(String entityId, Collection<Statement> 
statements) {
+        if (statements.isEmpty()) {
+            // Empty collection is a delete.
+            return statements;
+        }
+        // Filters and adds RDF based in a single pass.
         Iterator<Statement> itr = statements.iterator();
+        String entityUri = entityUris.namespace() + entityId;
         Value revisionId = null;
         Value lastModified = null;
         Resource entity = null;
@@ -71,7 +74,14 @@
             }
             if (subject.startsWith(entityUris.namespace())) {
                 entity = s.getSubject();
-                if (predicate.equals(RDF.TYPE) && 
s.getObject().stringValue().equals(Ontology.ITEM)) {
+                if (!subject.equals(entityUri)) {
+                    /*
+                     * Some flavors of rdf dump information about other 
entities
+                     * along side the main entity. We can't handle that 
properly
+                     * and it doesn't make a ton of sense anyway.
+                     */
+                    itr.remove();
+                } else if (predicate.equals(RDF.TYPE) && 
s.getObject().stringValue().equals(Ontology.ITEM)) {
                     // We don't need wd:Q1 a wdo:item
                     itr.remove();
                 } else if (predicate.equals(SchemaDotOrg.NAME)) {
diff --git 
a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
index 47d6384..6709aa0 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
@@ -63,26 +63,32 @@
      * @param statements all known statements about the entity
      */
     public void sync(String entityId, Collection<Statement> statements) {
+        StringBuilder command = new StringBuilder();
         UpdateBuilder siteLinksBuilder = updateBuilder();
         siteLinksBuilder.delete("?s", "?p", "?o");
         siteLinksBuilder.where("?s", "schema:about", "entity:" + entityId);
         siteLinksBuilder.where("?s", "?p", "?o");
-        siteLinksBuilder.where().notExists().values(statements, "?s", "?p", 
"?o");
+        if (!statements.isEmpty()) {
+            siteLinksBuilder.where().notExists().values(statements, "?s", 
"?p", "?o");
+        }
+        command.append(siteLinksBuilder).append(";\n");
 
         UpdateBuilder generalBuilder = updateBuilder();
         generalBuilder.delete("entity:" + entityId, "?p", "?o");
         generalBuilder.where("entity:" + entityId, "?p", "?o");
-        generalBuilder.where().notExists().values(statements, "?s", "?p", 
"?o");
+        if (!statements.isEmpty()) {
+            generalBuilder.where().notExists().values(statements, "?s", "?p", 
"?o");
+        }
+        command.append(generalBuilder).append(";\n");
 
-        UpdateBuilder insertBuilder = updateBuilder();
-        for (Statement statement : statements) {
-            insertBuilder.insert(statement.getSubject(), 
statement.getPredicate(), statement.getObject());
+        if (!statements.isEmpty()) {
+            UpdateBuilder insertBuilder = updateBuilder();
+            for (Statement statement : statements) {
+                insertBuilder.insert(statement.getSubject(), 
statement.getPredicate(), statement.getObject());
+            }
+            command.append(insertBuilder).append(";\n");
         }
         long start = System.currentTimeMillis();
-        StringBuilder command = new StringBuilder();
-        command.append(siteLinksBuilder).append(";\n");
-        command.append(generalBuilder).append(";\n");
-        command.append(insertBuilder).append(";\n");
         execute("update", IGNORE_RESPONSE, command.toString());
         log.debug("Updating {} took {} millis", entityId, 
System.currentTimeMillis() - start);
     }
diff --git 
a/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java
 
b/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java
index 5a65658..82b8750 100644
--- 
a/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java
+++ 
b/tools/src/main/java/org/wikidata/query/rdf/tool/wikibase/WikibaseRepository.java
@@ -8,6 +8,7 @@
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Date;
 import java.util.List;
 import java.util.Locale;
@@ -87,6 +88,14 @@
         parser.setRDFHandler(collector);
         try {
             try (CloseableHttpResponse response = client.execute(new 
HttpGet(uri))) {
+                if (response.getStatusLine().getStatusCode() == 404) {
+                    // A delete/nonexistent page
+                    return Collections.emptyList();
+                }
+                if (response.getStatusLine().getStatusCode() >= 300) {
+                    throw new ContainedException("Unexpected status code 
fetching RDF for " + uri + ":  "
+                            + response.getStatusLine().getStatusCode());
+                }
                 parser.parse(new 
InputStreamReader(response.getEntity().getContent(), Charsets.UTF_8), 
uri.toString());
             }
         } catch (IOException e) {
@@ -204,6 +213,7 @@
              */
             builder.setPath(String.format(Locale.ROOT, 
"/wiki/Special:EntityData/%s.ttl", title));
             builder.addParameter("nocache", "");
+            builder.addParameter("flavor", "dump");
             return build(builder);
         }
 
diff --git 
a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java 
b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
index 6ba5cf5..a6525ff 100644
--- a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
+++ b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
@@ -38,7 +38,7 @@
     public void mungesEntityDataOntoEntity() {
         List<Statement> statements = basicEntity("Q23");
 
-        munger.munge(statements);
+        munger.munge("Q23", statements);
         // This Matcher is so hard to build......
         ImmutableList.Builder<Matcher<? super Statement>> matchers = 
ImmutableList.builder();
         matchers.add(equalTo(statement("Q23", SchemaDotOrg.VERSION, new 
LiteralImpl("a revision number I promise"))));
@@ -50,7 +50,7 @@
     public void extraDataIsntModified() {
         List<Statement> statements = basicEntity("Q23");
         statements.add(statement("Q23", "P509", "Q6"));
-        munger.munge(statements);
+        munger.munge("Q23", statements);
         assertThat(statements, hasItem(equalTo(statement("Q23", "P509", 
"Q6"))));
     }
 
@@ -58,7 +58,7 @@
     public void complainsAboutExtraSubjects() {
         List<Statement> statements = basicEntity("Q23");
         statements.add(statement("http://example.com/bogus";, "Q23", "Q23"));
-        munger.munge(statements);
+        munger.munge("Q23", statements);
     }
 
     @Test
@@ -75,7 +75,7 @@
             statements.add(metaDecl);
             statements.add(articleDecl);
         }
-        munger.munge(statements);
+        munger.munge("Q23", statements);
         assertThat(statements, 
both(hasItem(equalTo(articleDecl))).and(hasItem(equalTo(metaDecl))));
     }
 
@@ -87,12 +87,25 @@
 
         List<Statement> statements = basicEntity("Q23");
         statements.addAll(ImmutableList.of(rdfsDecl, skosDecl, schemaDecl));
-        munger.munge(statements);
+        munger.munge("Q23", statements);
         assertThat(statements, hasItem(equalTo(rdfsDecl)));
         assertThat(statements, not(hasItem(equalTo(skosDecl))));
         assertThat(statements, not(hasItem(equalTo(schemaDecl))));
     }
 
+    @Test
+    public void labelsOnOthersRemoved() {
+        Statement georgeDecl = statement("Q23", RDFS.LABEL, new 
LiteralImpl("george", "en"));
+        Statement marthaDecl = statement("Q191789", RDFS.LABEL, new 
LiteralImpl("martha", "en"));
+
+        List<Statement> statements = basicEntity("Q23");
+        statements.add(georgeDecl);
+        statements.add(marthaDecl);
+        munger.munge("Q23", statements);
+        assertThat(statements, hasItem(equalTo(georgeDecl)));
+        assertThat(statements, not(hasItem(equalTo(marthaDecl))));
+    }
+
     private List<Statement> basicEntity(String entityId) {
         List<Statement> statements = new ArrayList<>();
         String entityData = EntityData.WIKIDATA.namespace() + entityId;
diff --git 
a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/RdfRepositoryIntegrationTest.java
 
b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/RdfRepositoryIntegrationTest.java
index 619e1eb..268717e 100644
--- 
a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/RdfRepositoryIntegrationTest.java
+++ 
b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/RdfRepositoryIntegrationTest.java
@@ -11,6 +11,7 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 
@@ -193,6 +194,14 @@
         assertFalse(r.hasNext());
     }
 
+    @Test
+    public void delete() throws QueryEvaluationException {
+        newSiteLink();
+        repository.sync("Q23", Collections.<Statement> emptyList());
+        TupleQueryResult r = repository.query("SELECT * WHERE {?s ?p ?o}");
+        assertFalse(r.hasNext());
+    }
+
     private void syncJustVersion(String entityId, int version) {
         Statement statement = statement(entityId, SchemaDotOrg.VERSION,
                 new IntegerLiteralImpl(new 
BigInteger(Integer.toString(version))));

-- 
To view, visit https://gerrit.wikimedia.org/r/200774
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I938d23fab7dd636198f2b0fb3ffde25a266f5815
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to