Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/201242
Change subject: WIP: Handle wikidata's dates ...................................................................... WIP: Handle wikidata's dates Wikidata's dates represent values out of the range that blazegraph properly supports so we implement our own inline value for them. They also have the advantage of doing a bit of sanity munging on the blazegraph side. Change-Id: I083e4feefd752253c141de896e36c38a229639d8 --- M blazegraph/pom.xml D blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/Dummy.java A blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseDateExtension.java A blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseExtensionFactory.java M common/pom.xml A common/src/main/java/org/wikidata/query/rdf/common/WikibaseDate.java D common/src/test/java/org/wikidata/query/rdf/common/DummyUnitTest.java A common/src/test/java/org/wikidata/query/rdf/common/WikibaseDateTest.java M tools/pom.xml M tools/src/test/resources/blazegraph/RWStore.properties 10 files changed, 634 insertions(+), 27 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf refs/changes/42/201242/1 diff --git a/blazegraph/pom.xml b/blazegraph/pom.xml index 116a910..cbcb386 100644 --- a/blazegraph/pom.xml +++ b/blazegraph/pom.xml @@ -1,4 +1,5 @@ -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.wikidata.query.rdf</groupId> @@ -23,7 +24,13 @@ <artifactId>bigdata</artifactId> <version>${blazegraph.version}</version> </dependency> + <dependency> + <groupId>org.wikidata.query.rdf</groupId> + <artifactId>common</artifactId> + <version>${project.parent.version}</version> + </dependency> </dependencies> + <build> <plugins> <plugin> diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/Dummy.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/Dummy.java deleted file mode 100644 index ec39444..0000000 --- a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/Dummy.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.wikidata.query.rdf.blazegraph; - -public class Dummy { - // TODO remove me when we have stuff to put in here. -} diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseDateExtension.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseDateExtension.java new file mode 100644 index 0000000..21ee709 --- /dev/null +++ b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseDateExtension.java @@ -0,0 +1,169 @@ +package org.wikidata.query.rdf.blazegraph; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.vocabulary.XMLSchema; +import org.wikidata.query.rdf.common.WikibaseDate; + +import com.bigdata.rdf.internal.IDatatypeURIResolver; +import com.bigdata.rdf.internal.IExtension; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.impl.literal.AbstractLiteralIV; +import com.bigdata.rdf.internal.impl.literal.LiteralExtensionIV; +import com.bigdata.rdf.internal.impl.literal.XSDNumericIV; +import com.bigdata.rdf.model.BigdataURI; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; +import com.bigdata.util.InnerCause; + +/** + * This implementation of {@link IExtension} implements inlining for literals + * that represent xsd:dateTime literals. Unlike + * {@link com.bigdata.rdf.internal.impl.extensions.DateTimeExtension} on which + * this is based, it stores the literals as time in <strong>seconds</strong> + * since the epoch. The seconds are encoded as an inline long. Also unlike + * DateTimeExtension it only supports UTC as the default time zone because UTC + * is king. + */ +public class WikibaseDateExtension<V extends BigdataValue> implements IExtension<V> { + private static final Logger log = Logger.getLogger(WikibaseDateExtension.class); + + private static final List<URI> SUPPORTED_DATA_TYPES = Collections.unmodifiableList(Arrays.asList( + XMLSchema.DATETIME, XMLSchema.DATE, XMLSchema.TIME, XMLSchema.GDAY, XMLSchema.GMONTH, XMLSchema.GMONTHDAY, + XMLSchema.GYEAR, XMLSchema.GYEARMONTH)); + + @SuppressWarnings("rawtypes") + private final Map<IV, BigdataURI> dataTypes; + private final Set<BigdataURI> dataTypesSet; + + public WikibaseDateExtension(final IDatatypeURIResolver resolver) { + @SuppressWarnings("rawtypes") + Map<IV, BigdataURI> dataTypes = new HashMap<>(); + for (URI uri : SUPPORTED_DATA_TYPES) { + BigdataURI val = resolver.resolve(uri); + dataTypes.put(val.getIV(), val); + } + this.dataTypes = Collections.unmodifiableMap(dataTypes); + dataTypesSet = Collections.unmodifiableSet(new HashSet<>(this.dataTypes.values())); + } + + @Override + public Set<BigdataURI> getDatatypes() { + return dataTypesSet; + } + + /** + * Attempts to convert the supplied value into an epoch representation and + * encodes the long in a delegate {@link XSDLongIV}, and returns an + * {@link LiteralExtensionIV} to wrap the native type. + */ + @Override + @SuppressWarnings({ "rawtypes", "unchecked" }) + public LiteralExtensionIV createIV(Value value) { + if (!(value instanceof Literal)) { + throw new IllegalArgumentException("Expected a literal but got " + value); + } + try { + Literal literal = (Literal) value; + BigdataURI dataType = resolveDataType(literal); + WikibaseDate date = WikibaseDate.fromString(value.stringValue()).cleanWeirdStuff(); + AbstractLiteralIV delegate = new XSDNumericIV(date.secondsSinceEpoch()); + return new LiteralExtensionIV(delegate, dataType.getIV()); + } catch (Exception e) { + /* + * Exception logging in blazegraph isn't great for this so we log + * here as well + */ + log.warn("Couldn't create IV", e); + throw e; + } + } + + private BigdataURI resolveDataType(Literal literal) { + URI dt = literal.getDatatype(); + if (dt == null) { + throw new IllegalArgumentException("Literal doesn't have a data type: " + literal); + } + + // TODO why loop instead of use a hash set or something? + for (BigdataURI val : dataTypes.values()) { + // Note: URI.stringValue() is efficient.... + if (val.stringValue().equals(dt.stringValue())) { + return val; + } + } + throw new IllegalArgumentException("Unrecognized data type: " + dt); + } + + /** + * Use the long value of the {@link XSDLongIV} delegate which represents + * seconds since the epoch to create a WikibaseDate and then represent that + * properly using xsd's string representations. + */ + @Override + @SuppressWarnings({ "rawtypes", "unchecked" }) + public V asValue(final LiteralExtensionIV iv, final BigdataValueFactory vf) { + if (!dataTypes.containsKey(iv.getExtensionIV())) { + throw new IllegalArgumentException("Unrecognized datatype: " + iv.getExtensionIV()); + } + + WikibaseDate date = WikibaseDate.fromSecondsSinceEpoch(iv.getDelegate().longValue()); + try { + BigdataURI dt = dataTypes.get(iv.getExtensionIV()); + + String s = date.toString(); + + // TODO all of these! + if (dt.equals(XMLSchema.DATETIME)) { + // Noop + } else if (dt.equals(XMLSchema.DATE)) { + // YYYY-MM-DD (10 chars) + s = s.substring(0, 10); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.TIME)) { + // everything after the date (from 11 chars in) + s = s.substring(10); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.GDAY)) { + // gDay Defines a part of a date - the day (---DD) + s = "---" + s.substring(8, 10); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.GMONTH)) { + // gMonth Defines a part of a date - the month (--MM) + s = "--" + s.substring(5, 7); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.GMONTHDAY)) { + // gMonthDay Defines a part of a date - the month and day + // (--MM-DD) + s = "--" + s.substring(5, 10); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.GYEAR)) { + // gYear Defines a part of a date - the year (YYYY) + s = s.substring(0, 4); + throw new UnsupportedOperationException("TODO"); + } else if (dt.equals(XMLSchema.GYEARMONTH)) { + // gYearMonth Defines a part of a date - the year and month + // (YYYY-MM) + s = s.substring(0, 7); + throw new UnsupportedOperationException("TODO"); + } + + return (V) vf.createLiteral(s, dt); + } catch (RuntimeException ex) { + if (InnerCause.isInnerCause(ex, InterruptedException.class)) { + throw ex; + } + throw new IllegalArgumentException("bad iv: " + iv, ex); + } + } +} diff --git a/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseExtensionFactory.java b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseExtensionFactory.java new file mode 100644 index 0000000..1b72abc --- /dev/null +++ b/blazegraph/src/main/java/org/wikidata/query/rdf/blazegraph/WikibaseExtensionFactory.java @@ -0,0 +1,29 @@ +package org.wikidata.query.rdf.blazegraph; + +import java.util.Collection; +import java.util.Iterator; + +import com.bigdata.rdf.internal.DefaultExtensionFactory; +import com.bigdata.rdf.internal.IDatatypeURIResolver; +import com.bigdata.rdf.internal.IExtension; +import com.bigdata.rdf.internal.ILexiconConfiguration; +import com.bigdata.rdf.internal.impl.extensions.DateTimeExtension; +import com.bigdata.rdf.model.BigdataLiteral; +import com.bigdata.rdf.model.BigdataValue; + +public class WikibaseExtensionFactory extends DefaultExtensionFactory { + @Override + @SuppressWarnings("rawtypes") + protected void _init(IDatatypeURIResolver resolver, ILexiconConfiguration<BigdataValue> config, + Collection<IExtension> extensions) { + if (config.isInlineDateTimes()) { + Iterator<IExtension> extensionsItr = extensions.iterator(); + while (extensionsItr.hasNext()) { + if (extensionsItr.next() instanceof DateTimeExtension) { + extensionsItr.remove(); + } + } + extensions.add(new WikibaseDateExtension<BigdataLiteral>(resolver)); + } + } +} diff --git a/common/pom.xml b/common/pom.xml index 1406621..23959bb 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -18,6 +18,15 @@ </license> </licenses> + <dependencies> + <dependency> + <groupId>joda-time</groupId> + <artifactId>joda-time</artifactId> + <version>2.7</version> + <scope>test</scope> + </dependency> + </dependencies> + <build> <plugins> <plugin> diff --git a/common/src/main/java/org/wikidata/query/rdf/common/WikibaseDate.java b/common/src/main/java/org/wikidata/query/rdf/common/WikibaseDate.java new file mode 100644 index 0000000..c479c5d --- /dev/null +++ b/common/src/main/java/org/wikidata/query/rdf/common/WikibaseDate.java @@ -0,0 +1,261 @@ +package org.wikidata.query.rdf.common; + +import static java.lang.Integer.parseInt; +import static java.lang.Long.parseLong; +import static java.util.concurrent.TimeUnit.DAYS; +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MINUTES; + +import java.util.Arrays; +import java.util.Locale; + +/** + * Handles wikidata dates. Note that this ignores leap seconds. This isn't ok + * but its what joda time does so it where we're starting. + */ +public class WikibaseDate { + /** + * Build a WikibaseDate from the string representation used by Wikibase or + * any other xsd date type. + */ + public static WikibaseDate fromString(String string) { + // TODO handle all xsd:date style formats in addition to these _and_ + // handle timezones in them. + String[] s = string.split("(?<!\\A)[\\-\\:TZ]"); + int i = 0; + return new WikibaseDate(parseLong(s[i++]), parseInt(s[i++]), parseInt(s[i++]), parseInt(s[i++]), + parseInt(s[i++]), parseInt(s[i++])); + } + + /** + * Build a WikibaseDAte from seconds since epoch. + */ + public static WikibaseDate fromSecondsSinceEpoch(long secondsSinceEpoch) { + long year = yearFromSecondsSinceEpoch(secondsSinceEpoch); + int second = (int) (secondsSinceEpoch - calculateFirstDayOfYear(year) * SECONDS_PER_DAY); + int month = 1; + long[] secondsPerMonthCumulative = secondsPerMonthCumulative(year); + while (month < 12 && second >= secondsPerMonthCumulative[month]) { + month++; + } + second -= secondsPerMonthCumulative[month - 1]; + int day = second / SECONDS_PER_DAY + 1; + second %= SECONDS_PER_DAY; + int hour = second / SECONDS_PER_HOUR; + second %= SECONDS_PER_HOUR; + int minute = second / SECONDS_PER_MINUTE; + second %= SECONDS_PER_MINUTE; + return new WikibaseDate(year, month, day, hour, minute, second); + } + + private static final int DAYS_0000_TO_1970 = 719527; + private static final int SECONDS_PER_MINUTE = (int) MINUTES.toSeconds(1); + private static final int SECONDS_PER_HOUR = (int) HOURS.toSeconds(1); + private static final int SECONDS_PER_DAY = (int) DAYS.toSeconds(1); + private static final long AVERAGE_SECONDS_PER_YEAR = (SECONDS_PER_DAY * 365 * 3 + SECONDS_PER_DAY * 366) / 4; + private static final long SECONDS_AT_EPOCH = 1970 * AVERAGE_SECONDS_PER_YEAR; + /** + * Days per month in non-leap-years. + */ + static final int[] DAYS_PER_MONTH = new int[] { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + private static final long[] SECONDS_PER_MONTH = new long[12]; + private static final long[] SECONDS_PER_MONTH_CUMULATIVE = new long[12]; + private static final long[] SECONDS_PER_MONTH_CUMULATIVE_LEAP_YEAR; + static { + long total = 0; + for (int i = 0; i < DAYS_PER_MONTH.length; i++) { + SECONDS_PER_MONTH[i] = DAYS.toSeconds(DAYS_PER_MONTH[i]); + SECONDS_PER_MONTH_CUMULATIVE[i] = total; + total += SECONDS_PER_MONTH[i]; + } + SECONDS_PER_MONTH_CUMULATIVE_LEAP_YEAR = Arrays.copyOf(SECONDS_PER_MONTH_CUMULATIVE, + SECONDS_PER_MONTH_CUMULATIVE.length); + for (int i = 2; i < SECONDS_PER_MONTH_CUMULATIVE_LEAP_YEAR.length; i++) { + SECONDS_PER_MONTH_CUMULATIVE_LEAP_YEAR[i] += SECONDS_PER_DAY; + } + } + + // TODO it'll be faster to keep it in seconds since epoch form + private final long year; + private final int month; + private final int day; + private final int hour; + private final int minute; + private final int second; + + public WikibaseDate(long year, int month, int day, int hour, int minute, int second) { + this.year = year; + this.month = month; + this.day = day; + this.hour = hour; + this.minute = minute; + this.second = second; + } + + /** + * Wikidata contains some odd dates like -13798000000-00-00T00:00:00Z and + * February 30th. We simply guess what they mean here. + * + * @return his if the date is fine, a new date if we modified it + */ + public WikibaseDate cleanWeirdStuff() { + long newYear = year; + int newMonth = month; + int newDay = day; + int newHour = hour; + int newMinute = minute; + int newSecond = second; + if (month == 0) { + newMonth = 1; + } + if (day == 0) { + newDay = 1; + } else { + int maxDaysInMonth = DAYS_PER_MONTH[newMonth - 1]; + if (isLeapYear(newYear) && newMonth == 2) { + maxDaysInMonth++; + } + if (newDay > maxDaysInMonth) { + newMonth++; + newDay = newDay - maxDaysInMonth + 1; + if (newMonth > 12) { + newMonth = newMonth - 12; + newYear++; + } + } + } + if (newYear == year && newMonth == month && newDay == day && newHour == hour && newMinute == minute + && newSecond == second) { + return this; + } + return new WikibaseDate(newYear, newMonth, newDay, newHour, newMinute, newSecond); + } + + public long secondsSinceEpoch() { + long seconds = calculateFirstDayOfYear(year) * SECONDS_PER_DAY; + seconds += SECONDS_PER_MONTH_CUMULATIVE[month - 1]; + seconds += (day - 1) * SECONDS_PER_DAY; + seconds += hour * SECONDS_PER_HOUR; + seconds += minute * SECONDS_PER_MINUTE; + seconds += second; + if (month > 2 && isLeapYear(year)) { + seconds += SECONDS_PER_DAY; + } + return seconds; + } + + @Override + public String toString() { + return String.format(Locale.ROOT, "%+012d-%02d-%02dT%02d:%02d:%02dZ", year, month, day, hour, minute, second); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + day; + result = prime * result + hour; + result = prime * result + minute; + result = prime * result + month; + result = prime * result + second; + result = prime * result + (int) (year ^ (year >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + WikibaseDate other = (WikibaseDate) obj; + if (day != other.day) { + return false; + } + if (hour != other.hour) { + return false; + } + if (minute != other.minute) { + return false; + } + if (month != other.month) { + return false; + } + if (second != other.second) { + return false; + } + if (year != other.year) { + return false; + } + return true; + } + + static boolean isLeapYear(long year) { + // Borrowed from joda-time's GregorianChronology + return ((year & 3) == 0) && ((year % 100) != 0 || (year % 400) == 0); + } + + static long calculateFirstDayOfYear(long year) { + /* + * This is a clever hack for getting the number of leap years that works + * properly for negative years borrowed from JodaTime's + * GregorianChronology. + */ + long leapYears = year / 100; + if (year < 0) { + leapYears = ((year + 3) >> 2) - leapYears + ((leapYears + 3) >> 2) - 1; + } else { + leapYears = (year >> 2) - leapYears + (leapYears >> 2); + if (isLeapYear(year)) { + leapYears--; + } + } + return year * 365L + leapYears - DAYS_0000_TO_1970; + } + + static long yearFromSecondsSinceEpoch(long secondsSinceEpoch) { + /* + * Similar to Joda-Time's way of getting year from date - estimate and + * then fix the estimate. Except our estimates can be really off. + */ + long unitSeconds = AVERAGE_SECONDS_PER_YEAR / 2; + long i2 = secondsSinceEpoch / 2 + SECONDS_AT_EPOCH / 2; + if (i2 < 0) { + i2 = i2 - unitSeconds + 1; + } + long year = i2 / unitSeconds; + while (true) { + // Rerunning calculateFirstDayOfYear isn't going to be efficient + // here. + long yearStart = calculateFirstDayOfYear(year) * SECONDS_PER_DAY; + long diff = secondsSinceEpoch - yearStart; + if (diff < 0) { + year--; + continue; + } + if (diff >= SECONDS_PER_DAY * 365) { + yearStart += SECONDS_PER_DAY * 365; + if (isLeapYear(year)) { + yearStart += SECONDS_PER_DAY; + } + if (yearStart <= secondsSinceEpoch) { + year++; + continue; + } + } + return year; + } + } + + static long[] secondsPerMonthCumulative(long year) { + if (isLeapYear(year)) { + return SECONDS_PER_MONTH_CUMULATIVE_LEAP_YEAR; + } + return SECONDS_PER_MONTH_CUMULATIVE; + } +} diff --git a/common/src/test/java/org/wikidata/query/rdf/common/DummyUnitTest.java b/common/src/test/java/org/wikidata/query/rdf/common/DummyUnitTest.java deleted file mode 100644 index 9ed73d9..0000000 --- a/common/src/test/java/org/wikidata/query/rdf/common/DummyUnitTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.wikidata.query.rdf.common; - -import static org.hamcrest.Matchers.lessThan; - -import org.junit.Test; -import org.junit.runner.RunWith; - -import com.carrotsearch.randomizedtesting.RandomizedRunner; -import com.carrotsearch.randomizedtesting.RandomizedTest; - -@RunWith(RandomizedRunner.class) -public class DummyUnitTest extends RandomizedTest { - @Test - public void dummy() { - // TODO remove me when there are real tests here - assertThat(randomIntBetween(0, 10), lessThan(11)); - } -} diff --git a/common/src/test/java/org/wikidata/query/rdf/common/WikibaseDateTest.java b/common/src/test/java/org/wikidata/query/rdf/common/WikibaseDateTest.java new file mode 100644 index 0000000..16e42b3 --- /dev/null +++ b/common/src/test/java/org/wikidata/query/rdf/common/WikibaseDateTest.java @@ -0,0 +1,132 @@ +package org.wikidata.query.rdf.common; + +import static org.wikidata.query.rdf.common.WikibaseDate.DAYS_PER_MONTH; +import static org.wikidata.query.rdf.common.WikibaseDate.fromSecondsSinceEpoch; +import static org.wikidata.query.rdf.common.WikibaseDate.fromString; +import static org.wikidata.query.rdf.common.WikibaseDate.isLeapYear; + +import org.joda.time.chrono.GregorianChronology; +import org.junit.Test; +import org.junit.runner.RunWith; + +import com.carrotsearch.randomizedtesting.RandomizedRunner; +import com.carrotsearch.randomizedtesting.RandomizedTest; +import com.carrotsearch.randomizedtesting.annotations.Repeat; + +@RunWith(RandomizedRunner.class) +public class WikibaseDateTest extends RandomizedTest { + @Test + public void epoch() { + check(1970, 1, 1, 0, 0, 0); + } + + @Test + public void yearOne() { + check(1, 1, 1, 0, 0, 0); + } + + @Test + public void yearMinusOne() { + check(-1, 1, 1, 0, 0, 0); + } + + @Test + public void yearZero() { + check(0, 1, 1, 0, 0, 0); + } + + @Test + public void whenIWroteThis() { + check(2015, 4, 1, 13, 53, 40); + } + + @Test + public void onLeapYear() { + check(2000, 11, 1, 0, 0, 0); + } + + @Test + public void negativeLeapYear() { + check(-4, 11, 1, 0, 0, 0); + } + + @Test + public void onLeapYearBeforeLeapDay() { + check(2000, 2, 28, 13, 53, 40); + } + + @Test + public void onLeapYearOnLeapDay() { + check(2000, 2, 29, 13, 53, 40); + } + + @Test + public void onLeapYearAfterLeapDay() { + check(2000, 3, 1, 13, 53, 40); + } + + @Test + public void offLeapYearBeforeLeapDay() { + check(2001, 2, 28, 13, 53, 40); + } + + @Test + public void offLeapYearAfterLeapDay() { + check(2001, 3, 1, 13, 53, 40); + } + + @Test + public void veryNegativeYear() { + check(-286893830, 1, 1, 0, 0, 0); + } + + @Test + public void bigBang() { + WikibaseDate wbDate = fromString("-13798000000-00-00T00:00:00Z").cleanWeirdStuff(); + assertEquals(wbDate, fromString("-13798000000-01-01T00:00:00Z")); + assertEquals(-435422885863219200L, wbDate.secondsSinceEpoch()); + checkRoundTrip(wbDate); + } + + @Test + @Repeat(iterations = 100) + public void randomDate() { + // Build a valid random date + + // Joda doesn't work outside these years + int year = randomIntBetween(-292275054, 292278993); + int month = randomIntBetween(1, 12); + int day; + if (isLeapYear(year) && month == 2) { + day = randomIntBetween(1, 29); + } else { + day = randomIntBetween(1, DAYS_PER_MONTH[month - 1]); + } + int hour = randomIntBetween(0, 23); + int minute = randomIntBetween(0, 59); + int second = randomIntBetween(0, 59); + check(year, month, day, hour, minute, second); + } + + /** + * Checks that the dates resolve the same way joda-time resolves dates and + * that they round trip. + */ + private void check(int year, int month, int day, int hour, int minute, int second) { + WikibaseDate wbDate = new WikibaseDate(year, month, day, hour, minute, second); + assertEquals(wbDate.toString(), jodaSeconds(year, month, day, hour, minute, second), wbDate.secondsSinceEpoch()); + checkRoundTrip(wbDate); + } + + private void checkRoundTrip(WikibaseDate wbDate) { + long seconds = wbDate.secondsSinceEpoch(); + WikibaseDate roundDate = fromSecondsSinceEpoch(seconds); + assertEquals(wbDate, roundDate); + long roundSeconds = roundDate.secondsSinceEpoch(); + assertEquals(seconds, roundSeconds); + } + + private long jodaSeconds(int year, int month, int day, int hour, int minute, int second) { + return GregorianChronology.getInstanceUTC().getDateTimeMillis(year, month, day, hour, minute, second, 0) / 1000; + } +} diff --git a/tools/pom.xml b/tools/pom.xml index 5e30852..a687f25 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -100,7 +100,7 @@ <version>2.10</version> <executions> <execution> - <id>unpack</id> + <id>unpack-blazegraph</id> <phase>pre-integration-test</phase> <goals> <goal>unpack</goal> @@ -115,6 +115,28 @@ </artifactItem> </artifactItems> <outputDirectory>${project.build.directory}/blazegraph</outputDirectory> + </configuration> + </execution> + <execution> + <id>drop-in-blazegraph-extension</id> + <phase>pre-integration-test</phase> + <goals> + <goal>copy</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.wikidata.query.rdf</groupId> + <artifactId>common</artifactId> + <version>${project.parent.version}</version> + </artifactItem> + <artifactItem> + <groupId>org.wikidata.query.rdf</groupId> + <artifactId>blazegraph</artifactId> + <version>${project.parent.version}</version> + </artifactItem> + </artifactItems> + <outputDirectory>${project.build.directory}/blazegraph/WEB-INF/lib</outputDirectory> </configuration> </execution> </executions> @@ -171,7 +193,7 @@ </configuration> <executions> <execution> - <id>start-blzegraph</id> + <id>start-blazegraph</id> <phase>pre-integration-test</phase> <!-- This complains some because this project _isn't_ a war. But we want to start the Blazegraph war. --> <goals> @@ -226,7 +248,7 @@ <artifactId>jetty-maven-plugin</artifactId> <executions> <execution> - <id>run-blzegraph</id> + <id>run-blazegraph</id> <phase>generate-sources</phase> <goals> <goal>run</goal> diff --git a/tools/src/test/resources/blazegraph/RWStore.properties b/tools/src/test/resources/blazegraph/RWStore.properties index 0bf8332..12def50 100644 --- a/tools/src/test/resources/blazegraph/RWStore.properties +++ b/tools/src/test/resources/blazegraph/RWStore.properties @@ -20,6 +20,7 @@ # Use the default vocabulary for now. com.bigdata.rdf.store.AbstractTripleStore.vocabularyClass=com.bigdata.rdf.vocab.DefaultBigdataVocabulary +com.bigdata.rdf.store.AbstractTripleStore.extensionFactoryClass=org.wikidata.query.rdf.blazegraph.WikibaseExtensionFactory # These seem to be ubiquitous overwrites. Not sure why they aren't the default but it works. com.bigdata.namespace.kb.lex.com.bigdata.btree.BTree.branchingFactor=400 -- To view, visit https://gerrit.wikimedia.org/r/201242 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I083e4feefd752253c141de896e36c38a229639d8 Gerrit-PatchSet: 1 Gerrit-Project: wikidata/query/rdf Gerrit-Branch: master Gerrit-Owner: Manybubbles <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
