Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/203154
Change subject: WIP: Simplify statement construction ...................................................................... WIP: Simplify statement construction Change-Id: If2fc2ba2437592322c76a5655d3c4d1f6987fe0e --- A tools/src/test/java/org/wikidata/query/rdf/tool/ExpandedStatementBuilder.java M tools/src/test/java/org/wikidata/query/rdf/tool/MungeIntegrationTest.java M tools/src/test/java/org/wikidata/query/rdf/tool/StatementHelper.java M tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java 4 files changed, 415 insertions(+), 113 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf refs/changes/54/203154/1 diff --git a/tools/src/test/java/org/wikidata/query/rdf/tool/ExpandedStatementBuilder.java b/tools/src/test/java/org/wikidata/query/rdf/tool/ExpandedStatementBuilder.java new file mode 100644 index 0000000..3f85887 --- /dev/null +++ b/tools/src/test/java/org/wikidata/query/rdf/tool/ExpandedStatementBuilder.java @@ -0,0 +1,365 @@ +package org.wikidata.query.rdf.tool; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.wikidata.query.rdf.tool.StatementHelper.statement; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +import org.hamcrest.Matcher; +import org.openrdf.model.Statement; +import org.openrdf.model.Value; +import org.openrdf.model.impl.LiteralImpl; +import org.wikidata.query.rdf.common.uri.Ontology; +import org.wikidata.query.rdf.common.uri.Provenance; +import org.wikidata.query.rdf.common.uri.RDF; +import org.wikidata.query.rdf.common.uri.SchemaDotOrg; +import org.wikidata.query.rdf.common.uri.WikibaseUris; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +/** + * Builds expanded statements in the style of Wikibase's dump flavored export or + * post-munged. Because this is able to build both styles this amounts to the + * best way to test the munger on expanded statements. Its therefore super + * important that it faithfully reflects what we expect of the munger. + */ +public class ExpandedStatementBuilder { + private final List<ExtraInfo> extraInfo = new ArrayList<>(); + private final Random random; + private final WikibaseUris uris; + private final String entity; + private final String property; + private final Object value; + + private boolean includeBasicEntity = true; + /** + * Version set if includeBasicEntity is true. + */ + private Value version; + /** + * Date modified set if includeBasicEntity is true. + */ + private Value dateModified; + private ExpandedValueInfo expandedValue; + + private String statementUri; + private String rank = Ontology.NORMAL_RANK; + private boolean bestRank = false; + + /** + * Null until its built and then can't be changed. + */ + private List<Statement> statements; + + /* + * These are built as part of the build step and used later when + * transforming from wikibase style to munged style. + */ + private Statement entityDataAboutDecl; + private Statement entityDataVersionDecl; + private Statement entityDataDateModifiedDecl; + private Statement statementTypeDecl; + + public ExpandedStatementBuilder(Random random, WikibaseUris uris, String entity, String property, Object value) { + this.random = random; + this.uris = uris; + this.entity = entity; + this.property = property; + this.value = value; + } + + /** + * Get the entity of this expanded statement. + */ + public String entity() { + return entity; + } + + /** + * Get the value of this expanded statement. + */ + public Object value() { + return value; + } + + /** + * Should this statement be of best rank? + */ + public ExpandedStatementBuilder bestRank(boolean bestRank) { + checkCanChange(); + this.bestRank = bestRank; + return this; + } + + /** + * Add a reference to this statement. + */ + public ExpandedStatementBuilder reference(String property, Object value) { + extraInfo.add(new ReferenceInfo(property, value)); + return this; + } + + /** + * Add a qualifier to this statement. + */ + public ExpandedStatementBuilder qualifier(String property, Object value) { + extraInfo.add(new QualifierInfo(property, value)); + return this; + } + + /** + * Add an expanded value statement to this statement. + */ + public ExpandedStatementBuilder expandedValue(String property, Object value) { + if (expandedValue == null) { + expandedValue = new ExpandedValueInfo(this.property, null); + extraInfo.add(expandedValue); + } + expandedValue.entries.add(new ExpandedValueInfoEntry(property, value)); + return this; + } + + /** + * Get the results in wikibase style. + */ + public List<Statement> wikibaseStyle() { + return built(); + } + + /** + * Get the results in wikibase style but shuffled. + */ + public List<Statement> wikibaseStyleShuffled() { + List<Statement> wikibaseStyle = wikibaseStyle(); + Collections.shuffle(wikibaseStyle, random); + return wikibaseStyle; + } + + /** + * Get the results in munged style. + */ + public List<Statement> mungedStyle() { + List<Statement> st = built(); + if (includeBasicEntity) { + st.remove(entityDataAboutDecl); + st.remove(entityDataVersionDecl); + st.remove(entityDataDateModifiedDecl); + statement(st, uris.entity() + entity, SchemaDotOrg.VERSION, version); + statement(st, uris.entity() + entity, SchemaDotOrg.DATE_MODIFIED, dateModified); + } + st.remove(statementTypeDecl); + for (ExtraInfo e : extraInfo) { + e.munge(st); + } + return st; + } + + public List<Matcher<? super Statement>> mungedStyleMatchers() { + List<Matcher<? super Statement>> matchers = new ArrayList<>(); + for (Statement s : mungedStyle()) { + matchers.add(equalTo(s)); + } + return matchers; + } + + public Matcher<Iterable<? extends Statement>> mungedStyleMatcher() { + return containsInAnyOrder(mungedStyleMatchers()); + } + + private void checkCanChange() { + if (statements != null) { + throw new IllegalStateException("Result already built"); + } + } + + private List<Statement> built() { + if (statements == null) { + build(); + } + return new ArrayList<>(statements); + } + + private void build() { + statements = new ArrayList<>(); + buildBasicEntityIfNeeded(); + buildStatement(); + buildReferenceIfNeeded(); + } + + private void buildBasicEntityIfNeeded() { + if (!includeBasicEntity) { + return; + } + if (version == null) { + version = new LiteralImpl("a revision number I promise"); + } + if (dateModified == null) { + dateModified = new LiteralImpl("a date I promise"); + } + String entityDataUri = uris.entityData() + entity; + entityDataAboutDecl = statement(statements, entityDataUri, SchemaDotOrg.ABOUT, entity); + entityDataVersionDecl = statement(statements, entityDataUri, SchemaDotOrg.VERSION, version); + entityDataDateModifiedDecl = statement(statements, entityDataUri, SchemaDotOrg.DATE_MODIFIED, dateModified); + } + + private void buildStatement() { + if (statementUri == null) { + statementUri = uris.statement() + entity + "-" + randomId(); + } + + statement(statements, uris.entity() + entity, uris.entity() + property, statementUri); + statementTypeDecl = statement(statements, statementUri, RDF.TYPE, Ontology.STATEMENT); + statement(statements, statementUri, uris.value() + property, value); + statement(statements, statementUri, Ontology.RANK, rank); + if (bestRank) { + statement(statements, statementUri, Ontology.RANK, Ontology.BEST_RANK); + } + } + + private void buildReferenceIfNeeded() { + for (ExtraInfo e : extraInfo) { + e.build(); + } + } + + private String randomId() { + return RandomizedTest.randomAsciiOfLength(10); + } + + private abstract class ExtraInfo { + protected final String property; + protected final Object value; + + public ExtraInfo(String property, Object value) { + this.property = property; + this.value = value; + } + + /** + * Build the statements representing this extra. + */ + public abstract void build(); + + /** + * Perform whatever munging we expect the munger to perform. + */ + public abstract void munge(List<Statement> statements); + } + + private abstract class AbstractComplexExtraInfo extends ExtraInfo { + protected String uri; + private Statement typeDecl; + + public AbstractComplexExtraInfo(String property, Object value) { + super(property, value); + } + + @Override + public void build() { + if (uri == null) { + uri = namespace() + entity + "-" + randomId(); + } + statement(statements, statementUri, declarationPredicate(), uri); + typeDecl = statement(statements, uri, RDF.TYPE, type()); + } + + @Override + public void munge(List<Statement> statements) { + statements.remove(typeDecl); + } + + protected abstract String namespace(); + + protected abstract String declarationPredicate(); + + protected abstract String type(); + } + + private class ReferenceInfo extends AbstractComplexExtraInfo { + public ReferenceInfo(String property, Object value) { + super(property, value); + } + + @Override + public void build() { + super.build(); + statement(statements, uri, uris.value() + property, value); + } + + @Override + protected String namespace() { + return uris.reference(); + } + + @Override + protected String declarationPredicate() { + return Provenance.WAS_DERIVED_FROM; + } + + @Override + protected String type() { + return Ontology.REFERENCE; + } + } + + private class QualifierInfo extends ExtraInfo { + public QualifierInfo(String property, Object value) { + super(property, value); + } + + @Override + public void build() { + statement(statements, statementUri, uris.qualifier() + property, value); + } + + @Override + public void munge(List<Statement> statements) { + // Intentionally a noop + } + } + + private class ExpandedValueInfo extends AbstractComplexExtraInfo { + private List<ExpandedValueInfoEntry> entries = new ArrayList<>(); + + public ExpandedValueInfo(String property, Object value) { + super(property, value); + } + + @Override + public void build() { + super.build(); + for (ExpandedValueInfoEntry e : entries) { + statement(statements, uri, e.predicate, e.object); + } + } + + @Override + protected String namespace() { + return uris.value(); + } + + @Override + protected String declarationPredicate() { + return uris.value() + property + "-value"; + } + + @Override + protected String type() { + return Ontology.VALUE; + } + } + + private static class ExpandedValueInfoEntry { + private final String predicate; + private final Object object; + + public ExpandedValueInfoEntry(String predicate, Object object) { + this.predicate = predicate; + this.object = object; + } + } +} diff --git a/tools/src/test/java/org/wikidata/query/rdf/tool/MungeIntegrationTest.java b/tools/src/test/java/org/wikidata/query/rdf/tool/MungeIntegrationTest.java index 4f909b6..54d88a3 100644 --- a/tools/src/test/java/org/wikidata/query/rdf/tool/MungeIntegrationTest.java +++ b/tools/src/test/java/org/wikidata/query/rdf/tool/MungeIntegrationTest.java @@ -25,10 +25,10 @@ @Test public void loadTest() throws IOException { String source = Resources.getResource(MungeIntegrationTest.class, "test.ttl").toString(); - Munge.Httpd http = new Munge.Httpd(10999, uris, new Munger(uris).singleLabelMode("en"), source); + Munge.Httpd http = new Munge.Httpd(10998, uris, new Munger(uris).singleLabelMode("en"), source); http.start(); try { - assertEquals(1002, rdfRepository.loadUrl("http://localhost:10999")); + assertEquals(1002, rdfRepository.loadUrl("http://localhost:10998")); } finally { http.stop(); } diff --git a/tools/src/test/java/org/wikidata/query/rdf/tool/StatementHelper.java b/tools/src/test/java/org/wikidata/query/rdf/tool/StatementHelper.java index 855359d..6225d95 100644 --- a/tools/src/test/java/org/wikidata/query/rdf/tool/StatementHelper.java +++ b/tools/src/test/java/org/wikidata/query/rdf/tool/StatementHelper.java @@ -1,10 +1,12 @@ package org.wikidata.query.rdf.tool; +import java.math.BigInteger; import java.util.List; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.Value; +import org.openrdf.model.impl.IntegerLiteralImpl; import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.StatementImpl; import org.openrdf.model.impl.URIImpl; @@ -19,35 +21,31 @@ */ public class StatementHelper { /** - * Statement constructor taking just URIs as strings. + * Statement build helper. */ - public static Statement statement(String s, String p, String o) { - return statement(s, p, uri(o)); + public static Statement statement(String s, String p, Object o) { + Value oValue; + if (o instanceof String) { + oValue = uri(o.toString()); + } else if (o instanceof Value) { + oValue = (Value) o; + } else if (o instanceof Integer) { + oValue = new IntegerLiteralImpl(BigInteger.valueOf((int) o)); + } else if (o instanceof Long) { + oValue = new IntegerLiteralImpl(BigInteger.valueOf((long) o)); + } else { + throw new IllegalArgumentException("Illegal object: " + o); + } + return new StatementImpl(uri(s), uri(p), oValue); } /** - * Statement constructor with a value. Use this one for all values. + * Statement build helper. */ - public static Statement statement(String s, String p, Value o) { - return new StatementImpl(uri(s), uri(p), o); - } - - /** - * Statement constructor taking just URIs as strings and appending the - * statement to a list. - */ - public static Statement statement(List<Statement> statements, String s, String p, String o) { - return statement(statements, s, p, uri(o)); - } - - /** - * Statement constructor with a value appending the statement to a list. Use - * this one for all values. - */ - public static Statement statement(List<Statement> statements, String s, String p, Value o) { - Statement statement = statement(s, p, o); - statements.add(statement); - return statement; + public static Statement statement(List<Statement> statements, String s, String p, Object o) { + Statement st = statement(s, p, o); + statements.add(st); + return st; } /** diff --git a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java index ec83423..88a09ce 100644 --- a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java +++ b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java @@ -27,6 +27,7 @@ import org.wikidata.query.rdf.common.uri.SKOS; import org.wikidata.query.rdf.common.uri.SchemaDotOrg; import org.wikidata.query.rdf.common.uri.WikibaseUris; +import org.wikidata.query.rdf.tool.ExpandedStatementBuilder; import org.wikidata.query.rdf.tool.exception.ContainedException; import org.wikidata.query.rdf.tool.rdf.Munger.BadSubjectException; @@ -118,107 +119,38 @@ @Test public void basicExpandedStatement() throws ContainedException { - List<Statement> george = basicEntity("Q23"); - String statementUri = uris.statement() + "Q23-ce976010-412f-637b-c687-9fd2d52dc140"; - Statement statementTypeDecl = statement(george, statementUri, RDF.TYPE, Ontology.STATEMENT); - Statement valueDecl = statement(george, statementUri, uris.value() + "P509", "Q356405"); - Statement rankDecl = statement(george, statementUri, Ontology.RANK, Ontology.NORMAL_RANK); - Statement statementDecl = statement("Q23", "P509", statementUri); - if (randomBoolean()) { - george.add(0, statementDecl); - } else { - george.add(statementDecl); - } - munger.munge("Q23", george); - assertThat(george, hasItem(statementDecl)); - assertThat(george, not(hasItem(statementTypeDecl))); - assertThat(george, hasItem(valueDecl)); - assertThat(george, hasItem(rankDecl)); + ExpandedStatementBuilder b = new ExpandedStatementBuilder(getRandom(), uris, "Q23", "P509", "Q356405"); + statementBuilderTestCase(b); // TODO can we rewrite the valueDecl into something without the repeated // property? } @Test public void expandedStatementWithReference() throws ContainedException { - List<Statement> george = basicEntity("Q23"); - String statementUri = uris.statement() + "Q23-9D3713FF-7BCC-489F-9386-C7322C0AC284"; - String referenceUri = uris.reference() + "e36b7373814a0b74caa84a5fc2b1e3297060ab0f"; - Statement statementDecl = statement(george, "Q23", "P19", statementUri); - Statement statementTypeDecl = statement(george, statementUri, RDF.TYPE, Ontology.STATEMENT); - Statement valueDecl = statement(george, statementUri, uris.value() + "P19", "Q494413"); - Statement rankDecl = statement(george, statementUri, Ontology.RANK, Ontology.NORMAL_RANK); - Statement referenceTypeDecl = statement(george, referenceUri, RDF.TYPE, Ontology.REFERENCE); - Statement referenceValueDecl = statement(george, referenceUri, uris.value() + "P854", - "http://www.anb.org/articles/02/02-00332.html"); - Statement referenceDecl = statement(statementUri, Provenance.WAS_DERIVED_FROM, referenceUri); - if (randomBoolean()) { - george.add(0, referenceDecl); - } else { - george.add(referenceDecl); - } - munger.munge("Q23", george); - assertThat(george, hasItem(statementDecl)); - assertThat(george, not(hasItem(statementTypeDecl))); - assertThat(george, hasItem(valueDecl)); - assertThat(george, hasItem(rankDecl)); - assertThat(george, hasItem(referenceDecl)); - assertThat(george, not(hasItem(referenceTypeDecl))); - assertThat(george, hasItem(referenceValueDecl)); + ExpandedStatementBuilder b = new ExpandedStatementBuilder(getRandom(), uris, "Q23", "P509", "Q356405"); + b.reference("P854", "http://www.anb.org/articles/02/02-00332.html"); + statementBuilderTestCase(b); } @Test public void expandedStatementWithQualifier() throws ContainedException { - List<Statement> george = basicEntity("Q23"); - String statementUri = uris.statement() + "q23-8A2F4718-6159-4E58-A8F9-6F24F5EFEC42"; - Statement statementDecl = statement(george, "Q23", "P26", statementUri); - Statement statementTypeDecl = statement(george, statementUri, RDF.TYPE, Ontology.STATEMENT); - Statement valueDecl = statement(george, statementUri, uris.value() + "P26", "Q191789"); - Statement rankDecl = statement(george, statementUri, Ontology.RANK, Ontology.NORMAL_RANK); - Statement qualifierDecl = statement(george, statementUri, uris.qualifier() + "P580", - new LiteralImpl("1759-01-06T00:00:00Z", XMLSchema.DATETIME)); - munger.munge("Q23", george); - assertThat(george, hasItem(statementDecl)); - assertThat(george, not(hasItem(statementTypeDecl))); - assertThat(george, hasItem(valueDecl)); - assertThat(george, hasItem(rankDecl)); - assertThat(george, hasItem(qualifierDecl)); + ExpandedStatementBuilder b = new ExpandedStatementBuilder(getRandom(), uris, "Q23", "P26", "Q191789"); + b.qualifier("P580", new LiteralImpl("1759-01-06T00:00:00Z", XMLSchema.DATETIME)); + statementBuilderTestCase(b); } @Test public void basicExpandedValue() { - List<Statement> universe = basicEntity("Q1"); - String statementUri = uris.statement() + "q1-someuuid"; - String valueUri = uris.value() + "someotheruuid"; - Statement statementDecl = statement(universe, "Q1", "P580", statementUri); - Statement statementTypeDecl = statement(universe, statementUri, RDF.TYPE, Ontology.STATEMENT); - Statement valueDecl = statement(universe, statementUri, uris.value() + "P580", - new LiteralImpl("-13798000000-01-01T00:00:00Z", XMLSchema.DATETIME)); - Statement expandedValueDecl = statement(universe, statementUri, uris.value() + "P580" - + "-value", valueUri); - Statement expandedValueTypeDecl = statement(universe, valueUri, RDF.TYPE, Ontology.VALUE); - /* - * Currently wikibase exports the deep time values as strings, not - * dateTime. - */ - Statement expandedValueValueDecl = statement(universe, valueUri, Ontology.Time.VALUE, - "-13798000000-01-01T00:00:00Z"); - Statement expandedValuePrecisionDecl = statement(universe, valueUri, Ontology.Time.PRECISION, - new IntegerLiteralImpl(BigInteger.valueOf(3))); - Statement expandedValueTimezoneDecl = statement(universe, valueUri, Ontology.Time.TIMEZONE, - new IntegerLiteralImpl(BigInteger.valueOf(0))); - Statement expandedValueCalendarModelDecl = statement(universe, valueUri, Ontology.Time.CALENDAR_MODEL, - "Q1985727"); - munger.munge("Q1", universe); - assertThat(universe, hasItem(statementDecl)); - assertThat(universe, not(hasItem(statementTypeDecl))); - assertThat(universe, hasItem(valueDecl)); - assertThat(universe, hasItem(expandedValueDecl)); - assertThat(universe, not(hasItem(expandedValueTypeDecl))); - assertThat(universe, hasItem(expandedValueDecl)); - assertThat(universe, hasItem(expandedValueValueDecl)); - assertThat(universe, hasItem(expandedValuePrecisionDecl)); - assertThat(universe, hasItem(expandedValueTimezoneDecl)); - assertThat(universe, hasItem(expandedValueCalendarModelDecl)); + ExpandedStatementBuilder b = new ExpandedStatementBuilder(getRandom(), uris, "Q1", "P580", new LiteralImpl( + "-13798000000-01-01T00:00:00Z", XMLSchema.DATETIME)); + b.expandedValue(Ontology.Time.VALUE, b.value()); + b.expandedValue(Ontology.Time.PRECISION, 3); + b.expandedValue(Ontology.Time.TIMEZONE, 0); + b.expandedValue(Ontology.Time.CALENDAR_MODEL, "Q1985727"); + for (Statement s : b.wikibaseStyle()) { + System.err.println(s); + } + statementBuilderTestCase(b); } @Test @@ -299,6 +231,13 @@ assertThat(universe, hasItem(expandedValueTimezoneDecl)); assertThat(universe, hasItem(expandedValueCalendarModelDecl)); } + + private void statementBuilderTestCase(ExpandedStatementBuilder b) { + List<Statement> statements = b.wikibaseStyleShuffled(); + munger.munge(b.entity(), statements); + assertThat(statements, b.mungedStyleMatcher()); + } + // TODO somevalue and novalue // TODO badges -- To view, visit https://gerrit.wikimedia.org/r/203154 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If2fc2ba2437592322c76a5655d3c4d1f6987fe0e Gerrit-PatchSet: 1 Gerrit-Project: wikidata/query/rdf Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits