http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/sail/src/test/java/mvm/rya/RdfCloudTripleStoreTest.java ---------------------------------------------------------------------- diff --git a/sail/src/test/java/mvm/rya/RdfCloudTripleStoreTest.java b/sail/src/test/java/mvm/rya/RdfCloudTripleStoreTest.java new file mode 100644 index 0000000..eee6bce --- /dev/null +++ b/sail/src/test/java/mvm/rya/RdfCloudTripleStoreTest.java @@ -0,0 +1,699 @@ +package mvm.rya; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +import junit.framework.TestCase; +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.accumulo.AccumuloRyaDAO; +import mvm.rya.api.RdfCloudTripleStoreConstants; +import mvm.rya.rdftriplestore.RdfCloudTripleStore; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.openrdf.model.Namespace; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.query.*; +import org.openrdf.repository.RepositoryException; +import org.openrdf.repository.RepositoryResult; +import org.openrdf.repository.sail.SailRepository; +import org.openrdf.repository.sail.SailRepositoryConnection; + +import javax.xml.datatype.DatatypeConfigurationException; +import javax.xml.datatype.DatatypeFactory; +import java.util.GregorianCalendar; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Class PartitionConnectionTest + * Date: Jul 6, 2011 + * Time: 5:24:07 PM + */ +public class RdfCloudTripleStoreTest extends TestCase { + public static final String NAMESPACE = "http://here/2010/tracked-data-provenance/ns#";//44 len + public static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public static final String HBNAMESPACE = "http://here/2010/tracked-data-provenance/heartbeat/ns#"; + public static final String HB_TIMESTAMP = HBNAMESPACE + "timestamp"; + + private SailRepository repository; + private SailRepositoryConnection connection; + + ValueFactory vf = ValueFactoryImpl.getInstance(); + + private String objectUuid = "objectuuid1"; + private String ancestor = "ancestor1"; + private String descendant = "descendant1"; + private static final long START = 1309532965000l; + private static final long END = 1310566686000l; + private Connector connector; + + @Override + protected void setUp() throws Exception { + super.setUp(); + connector = new MockInstance().getConnector("", ""); + + RdfCloudTripleStore sail = new RdfCloudTripleStore(); + AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration(); + conf.setTablePrefix("lubm_"); + sail.setConf(conf); + AccumuloRyaDAO crdfdao = new AccumuloRyaDAO(); + crdfdao.setConnector(connector); + crdfdao.setConf(conf); + sail.setRyaDAO(crdfdao); + + repository = new SailRepository(sail); + repository.initialize(); + connection = repository.getConnection(); + + loadData(); + } + + private void loadData() throws RepositoryException, DatatypeConfigurationException { + connection.add(new StatementImpl(vf.createURI(NAMESPACE, objectUuid), vf.createURI(NAMESPACE, "name"), vf.createLiteral("objUuid"))); + //created + String uuid = "uuid1"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Created"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "createdItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:A"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "stringLit"), vf.createLiteral("stringLit1"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "stringLit"), vf.createLiteral("stringLit2"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "stringLit"), vf.createLiteral("stringLit3"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "stringLit"), vf.createLiteral("stringLit4"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "strLit1"), vf.createLiteral("strLit1"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "strLit1"), vf.createLiteral("strLit2"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "strLit1"), vf.createLiteral("strLit3"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 0, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 1, 0, 0, 0)))); + //clicked + uuid = "uuid2"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Clicked"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "clickedItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:B"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 2, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 3, 0, 0, 0)))); + //deleted + uuid = "uuid3"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Deleted"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "deletedItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:C"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 4, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 5, 0, 0, 0)))); + //dropped + uuid = "uuid4"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Dropped"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "droppedItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:D"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 6, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 7, 0, 0, 0)))); + //received + uuid = "uuid5"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Received"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "receivedItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:E"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 8, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 9, 0, 0, 0)))); + //sent + uuid = "uuid6"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Sent"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "sentItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:F"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 10, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 11, 0, 0, 0)))); + //stored + uuid = "uuid7"; + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(RDF_NS, "type"), vf.createURI(NAMESPACE, "Stored"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "storedItem"), vf.createURI(NAMESPACE, objectUuid))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedBy"), vf.createURI("urn:system:G"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "performedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 12, 0, 0, 0)))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, uuid), vf.createURI(NAMESPACE, "reportedAt"), vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(2011, 7, 12, 6, 13, 0, 0, 0)))); + + //derivedFrom + connection.add(new StatementImpl(vf.createURI(NAMESPACE, descendant), vf.createURI(NAMESPACE, "derivedFrom"), vf.createURI(NAMESPACE, ancestor))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, descendant), vf.createURI(NAMESPACE, "name"), vf.createLiteral("descendantOne"))); + connection.add(new StatementImpl(vf.createURI(NAMESPACE, ancestor), vf.createURI(NAMESPACE, "name"), vf.createLiteral("ancestor1"))); + + //heartbeats + String hbuuid = "hbuuid1"; + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(RDF_NS, "type"), vf.createURI(HBNAMESPACE, "HeartbeatMeasurement"))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HB_TIMESTAMP), vf.createLiteral((START + 1) + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "count"), vf.createLiteral(1 + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "systemName"), vf.createURI("urn:system:A"))); + connection.add(new StatementImpl(vf.createURI("urn:system:A"), vf.createURI(HBNAMESPACE, "heartbeat"), vf.createURI(HBNAMESPACE, hbuuid))); + + hbuuid = "hbuuid2"; + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(RDF_NS, "type"), vf.createURI(HBNAMESPACE, "HeartbeatMeasurement"))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HB_TIMESTAMP), vf.createLiteral((START + 2) + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "count"), vf.createLiteral(2 + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "systemName"), vf.createURI("urn:system:B"))); + connection.add(new StatementImpl(vf.createURI("urn:system:B"), vf.createURI(HBNAMESPACE, "heartbeat"), vf.createURI(HBNAMESPACE, hbuuid))); + + hbuuid = "hbuuid3"; + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(RDF_NS, "type"), vf.createURI(HBNAMESPACE, "HeartbeatMeasurement"))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HB_TIMESTAMP), vf.createLiteral((START + 3) + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "count"), vf.createLiteral(3 + ""))); + connection.add(new StatementImpl(vf.createURI(HBNAMESPACE, hbuuid), vf.createURI(HBNAMESPACE, "systemName"), vf.createURI("urn:system:C"))); + connection.add(new StatementImpl(vf.createURI("urn:system:C"), vf.createURI(HBNAMESPACE, "heartbeat"), vf.createURI(HBNAMESPACE, hbuuid))); + + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral("obj1"))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral("obj2"))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral("obj3"))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral("obj4"))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral("obj1"))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral("obj2"))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral("obj3"))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral("obj4"))); + connection.add(new StatementImpl(vf.createURI("urn:subj3"), vf.createURI("urn:pred"), vf.createLiteral("obj1"))); + connection.add(new StatementImpl(vf.createURI("urn:subj3"), vf.createURI("urn:pred"), vf.createLiteral("obj4"))); + + //Foreign Chars + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_SIM))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_TRAD))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral(FAN_TH))); + connection.add(new StatementImpl(vf.createURI("urn:subj1"), vf.createURI("urn:pred"), vf.createLiteral(FAN_RN))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_SIM))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_TRAD))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral(FAN_TH))); + connection.add(new StatementImpl(vf.createURI("urn:subj2"), vf.createURI("urn:pred"), vf.createLiteral(FAN_RN))); + connection.add(new StatementImpl(vf.createURI("urn:subj3"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_SIM))); + connection.add(new StatementImpl(vf.createURI("urn:subj3"), vf.createURI("urn:pred"), vf.createLiteral(FAN_CH_TRAD))); + + connection.commit(); + } + + private static final String FAN_CH_SIM = "é£æ"; + private static final String FAN_CH_TRAD = "風æ"; + private static final String FAN_TH = "à¹à¸à¸"; + private static final String FAN_RN = "венÑилÑÑоÑ"; + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + connection.close(); + repository.shutDown(); + } + + protected String getXmlDate(long ts) throws DatatypeConfigurationException { + GregorianCalendar gregorianCalendar = new GregorianCalendar(); + gregorianCalendar.setTimeInMillis(ts); + //"2011-07-12T05:12:00.000Z"^^xsd:dateTime + return "\"" + vf.createLiteral(DatatypeFactory.newInstance().newXMLGregorianCalendar(gregorianCalendar)).stringValue() + "\"^^xsd:dateTime"; + } + +// public void testScanAll() throws Exception { +// Scanner sc = connector.createScanner("lubm_spo", Constants.NO_AUTHS); +// for (Map.Entry<Key, Value> aSc : sc) System.out.println(aSc.getKey().getRow()); +// } + + public void testNamespace() throws Exception { + String namespace = "urn:testNamespace#"; + String prefix = "pfx"; + connection.setNamespace(prefix, namespace); + + assertEquals(namespace, connection.getNamespace(prefix)); + } + + public void testValues() throws Exception { + String query = "SELECT DISTINCT ?entity WHERE {" + + "VALUES (?entity) { (<http://test/entity>) }" + + "}"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testGetNamespaces() throws Exception { + String namespace = "urn:testNamespace#"; + String prefix = "pfx"; + connection.setNamespace(prefix, namespace); + + namespace = "urn:testNamespace2#"; + prefix = "pfx2"; + connection.setNamespace(prefix, namespace); + + RepositoryResult<Namespace> result = connection.getNamespaces(); + int count = 0; + while (result.hasNext()) { + result.next(); + count++; + } + + assertEquals(2, count); + } + + public void testAddCommitStatement() throws Exception { + StatementImpl stmt = new StatementImpl(vf.createURI("urn:namespace#subj"), vf.createURI("urn:namespace#pred"), vf.createLiteral("object")); + connection.add(stmt); + connection.commit(); + } + + public void testSelectOnlyQuery() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + "ns:uuid1 ns:createdItem ?cr.\n" + + "ns:uuid1 ns:reportedAt ?ra.\n" + + "ns:uuid1 ns:performedAt ?pa.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testForeignSelectOnlyQuery() throws Exception { + String query; + query = "select * where { ?s <urn:pred> ?o }"; // hits po + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(20, tupleHandler.getCount()); + + query = "select * where { <urn:subj1> <urn:pred> ?o }"; //hits spo + tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(8, tupleHandler.getCount()); + + query = "select * where { ?s ?p '"+FAN_CH_SIM+"' }"; //hits osp + tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(3, tupleHandler.getCount()); +} + + + + //provenance Queries////////////////////////////////////////////////////////////////////// + + public void testEventInfo() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + " ns:uuid1 ?p ?o.\n" + + "}\n"; + + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(12, tupleHandler.getCount()); + } + + public void testAllAncestors() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + "ns:" + descendant + " ns:derivedFrom ?dr.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + // tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testAllDescendants() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + "?ds ns:derivedFrom ns:" + ancestor + ".\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testEventsForUri() throws Exception { + String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + + "PREFIX ns:<" + NAMESPACE + ">\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "PREFIX mvm: <" + RdfCloudTripleStoreConstants.NAMESPACE + ">\n" + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + + "select * where {\n" + + "{" + + " ?s rdf:type ns:Created.\n" + + " ?s ns:createdItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Clicked.\n" + + " ?s ns:clickedItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Deleted.\n" + + " ?s ns:deletedItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Dropped.\n" + + " ?s ns:droppedItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Received.\n" + + " ?s ns:receivedItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Stored.\n" + + " ?s ns:storedItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Sent.\n" + + " ?s ns:sentItem ns:objectuuid1.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.setBinding(START_BINDING, vf.createLiteral(START)); +// tupleQuery.setBinding(END_BINDING, vf.createLiteral(END)); +// tupleQuery.setBinding(TIME_PREDICATE, vf.createURI(NAMESPACE, "performedAt")); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(7, tupleHandler.getCount()); + } + + public void testAllEvents() throws Exception { + String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + + "PREFIX ns:<" + NAMESPACE + ">\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "PREFIX mvm: <" + RdfCloudTripleStoreConstants.NAMESPACE + ">\n" + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + + "select * where {\n" + + "{" + + " ?s rdf:type ns:Created.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Clicked.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Deleted.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Dropped.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Received.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Stored.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "UNION {" + + " ?s rdf:type ns:Sent.\n" + + " ?s ns:performedBy ?pb.\n" + + " ?s ns:performedAt ?pa.\n" + + " FILTER(mvm:range(?pa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.setBinding(START_BINDING, vf.createLiteral(START)); +// tupleQuery.setBinding(END_BINDING, vf.createLiteral(END)); +// tupleQuery.setBinding(TIME_PREDICATE, vf.createURI(NAMESPACE, "performedAt")); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(7, tupleHandler.getCount()); +// System.out.println(tupleHandler.getCount()); + } + + public void testEventsBtwnSystems() throws Exception { //TODO: How to do XMLDateTime ranges + String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + + "PREFIX ns:<" + NAMESPACE + ">\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "PREFIX mvm: <" + RdfCloudTripleStoreConstants.NAMESPACE + ">\n" + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + + "select * where {\n" + + " ?sendEvent rdf:type ns:Sent;\n" + + " ns:sentItem ?objUuid;\n" + + " ns:performedBy <urn:system:F>;\n" + + " ns:performedAt ?spa.\n" + + " ?recEvent rdf:type ns:Received;\n" + + " ns:receivedItem ?objUuid;\n" + + " ns:performedBy <urn:system:E>;\n" + + " ns:performedAt ?rpa.\n" + +// " FILTER(mvm:range(?spa, \"2011-07-12T05:12:00.000Z\"^^xsd:dateTime, \"2011-07-12T07:12:00.000Z\"^^xsd:dateTime))\n" + + " FILTER(mvm:range(?spa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + " FILTER(mvm:range(?rpa, " + getXmlDate(START) + ", " + getXmlDate(END) + "))\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.setBinding(START_BINDING, vf.createLiteral(START)); +// tupleQuery.setBinding(END_BINDING, vf.createLiteral(END)); +// tupleQuery.setBinding(TIME_PREDICATE, vf.createURI(NAMESPACE, "performedAt")); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testHeartbeatCounts() throws Exception { + String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + + "PREFIX hns:<" + HBNAMESPACE + ">\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "PREFIX mvm: <" + RdfCloudTripleStoreConstants.NAMESPACE + ">\n" + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + + "select * where {\n" + + " ?hb rdf:type hns:HeartbeatMeasurement;\n" + + " hns:count ?count;\n" + + " hns:timestamp ?ts;\n" + + " hns:systemName ?systemName.\n" + + " FILTER(mvm:range(?ts, \"" + START + "\", \"" + (START + 3) + "\"))\n" + + "}\n"; +// System.out.println(query); + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.setBinding(RdfCloudTripleStoreConfiguration.CONF_QUERYPLAN_FLAG, vf.createLiteral(true)); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(2, tupleHandler.getCount()); + } + + //provenance Queries////////////////////////////////////////////////////////////////////// + + public void testCreatedEvents() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + " ?s ns:createdItem ns:objectuuid1.\n" + + " ?s ns:reportedAt ?ra.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testSelectAllAfterFilter() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + " ?s ns:createdItem ns:objectuuid1.\n" + + " ?s ?p ?o.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(12, tupleHandler.getCount()); + } + + public void testFilterQuery() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + "ns:uuid1 ns:createdItem ?cr.\n" + + "ns:uuid1 ns:stringLit ?sl.\n" + + "FILTER regex(?sl, \"stringLit1\")" + + "ns:uuid1 ns:reportedAt ?ra.\n" + + "ns:uuid1 ns:performedAt ?pa.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); + // tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testMultiplePredicatesMultipleBindingSets() throws Exception { + //MMRTS-121 + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "select * where {\n" + + "?id ns:createdItem ns:objectuuid1.\n" + + "?id ns:stringLit ?sl.\n" + + "?id ns:strLit1 ?s2.\n" + + "}\n"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(12, tupleHandler.getCount()); + } + + public void testMultiShardLookupTimeRange() throws Exception { + //MMRTS-113 + String query = "PREFIX hb: <http://here/2010/tracked-data-provenance/heartbeat/ns#>\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "SELECT * WHERE\n" + + "{\n" + + "?id hb:timestamp ?timestamp.\n" + +// "FILTER(mvmpart:timeRange(?id, hb:timestamp, " + START + " , " + (START + 2) + " , 'TIMESTAMP'))\n" + + "?id hb:count ?count.\n" + + "?system hb:heartbeat ?id.\n" + + "}"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(3, tupleHandler.getCount()); + } + + public void testMultiShardLookupTimeRangeValueConst() throws Exception { + //MMRTS-113 + String query = "PREFIX hb: <http://here/2010/tracked-data-provenance/heartbeat/ns#>\n" + + "PREFIX mvmpart: <urn:mvm.mmrts.partition.rdf/08/2011#>\n" + + "SELECT * WHERE\n" + + "{\n" + + "<http://here/2010/tracked-data-provenance/heartbeat/ns#hbuuid2> hb:timestamp ?timestamp.\n" + +// "FILTER(mvmpart:timeRange(<http://here/2010/tracked-data-provenance/heartbeat/ns#hbuuid2>, hb:timestamp, " + START + " , " + END + " , 'TIMESTAMP'))\n" + + "<http://here/2010/tracked-data-provenance/heartbeat/ns#hbuuid2> hb:count ?count.\n" + + "?system hb:heartbeat <http://here/2010/tracked-data-provenance/heartbeat/ns#hbuuid2>.\n" + + "}"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testLinkQuery() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "SELECT * WHERE {\n" + + " <http://here/2010/tracked-data-provenance/ns#uuid1> ns:createdItem ?o .\n" + + " ?o ns:name ?n .\n" + + "}"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(1, tupleHandler.getCount()); + } + + public void testRangeOverDuplicateItems() throws Exception { + String query = "PREFIX ns:<" + NAMESPACE + ">\n" + + "SELECT * WHERE {\n" + + " ?subj <urn:pred> \"obj2\" .\n" + + "}"; + TupleQuery tupleQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query); +// tupleQuery.evaluate(new PrintTupleHandler()); + CountTupleHandler tupleHandler = new CountTupleHandler(); + tupleQuery.evaluate(tupleHandler); + assertEquals(2, tupleHandler.getCount()); + } + + private static class PrintTupleHandler implements TupleQueryResultHandler { + + @Override + public void startQueryResult(List<String> strings) throws TupleQueryResultHandlerException { + } + + @Override + public void endQueryResult() throws TupleQueryResultHandlerException { + + } + + @Override + public void handleSolution(BindingSet bindingSet) throws TupleQueryResultHandlerException { + System.out.println(bindingSet); + } + + @Override + public void handleBoolean(boolean paramBoolean) throws QueryResultHandlerException { + } + + @Override + public void handleLinks(List<String> paramList) throws QueryResultHandlerException { + } + } + + private static class CountTupleHandler implements TupleQueryResultHandler { + + int count = 0; + + @Override + public void startQueryResult(List<String> strings) throws TupleQueryResultHandlerException { + } + + @Override + public void endQueryResult() throws TupleQueryResultHandlerException { + } + + @Override + public void handleSolution(BindingSet bindingSet) throws TupleQueryResultHandlerException { + count++; + } + + public int getCount() { + return count; + } + + @Override + public void handleBoolean(boolean paramBoolean) throws QueryResultHandlerException { + } + + @Override + public void handleLinks(List<String> paramList) throws QueryResultHandlerException { + } + } + +}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/sail/src/test/java/mvm/rya/RdfCloudTripleStoreUtilsTest.java ---------------------------------------------------------------------- diff --git a/sail/src/test/java/mvm/rya/RdfCloudTripleStoreUtilsTest.java b/sail/src/test/java/mvm/rya/RdfCloudTripleStoreUtilsTest.java new file mode 100644 index 0000000..de49ef2 --- /dev/null +++ b/sail/src/test/java/mvm/rya/RdfCloudTripleStoreUtilsTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//package mvm.rya; + +// +//import java.util.List; +// +//import junit.framework.TestCase; +// +//import org.openrdf.model.BNode; +//import org.openrdf.model.Resource; +//import org.openrdf.model.URI; +//import org.openrdf.model.Value; +//import org.openrdf.model.impl.ValueFactoryImpl; +// +//import com.google.common.io.ByteStreams; +// +//import static mvm.rya.api.RdfCloudTripleStoreUtils.*; +// +//public class RdfCloudTripleStoreUtilsTest extends TestCase { +// +// public void testWriteReadURI() throws Exception { +// final ValueFactoryImpl vf = new ValueFactoryImpl(); +// URI uri = vf.createURI("http://www.example.org/test/rel"); +// byte[] value = writeValue(uri); +// +// Value readValue = readValue(ByteStreams +// .newDataInput(value), vf); +// assertEquals(uri, readValue); +// } +// +// public void testWriteReadBNode() throws Exception { +// final ValueFactoryImpl vf = new ValueFactoryImpl(); +// Value val = vf.createBNode("bnodeid"); +// byte[] value = writeValue(val); +// +// Value readValue = readValue(ByteStreams +// .newDataInput(value), vf); +// assertEquals(val, readValue); +// } +// +// public void testWriteReadLiteral() throws Exception { +// final ValueFactoryImpl vf = new ValueFactoryImpl(); +// Value val = vf.createLiteral("myliteral"); +// byte[] value = writeValue(val); +// +// Value readValue = readValue(ByteStreams +// .newDataInput(value), vf); +// assertEquals(val, readValue); +// } +// +// public void testContexts() throws Exception { +// final ValueFactoryImpl vf = new ValueFactoryImpl(); +// BNode cont1 = vf.createBNode("cont1"); +// BNode cont2 = vf.createBNode("cont2"); +// BNode cont3 = vf.createBNode("cont3"); +// +// byte[] cont_bytes = writeContexts(cont1, cont2, +// cont3); +// final String cont = new String(cont_bytes); +// System.out.println(cont); +// +// List<Resource> contexts = readContexts(cont_bytes, +// vf); +// for (Resource resource : contexts) { +// System.out.println(resource); +// } +// } +//} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/80faf06d/sail/src/test/java/mvm/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java ---------------------------------------------------------------------- diff --git a/sail/src/test/java/mvm/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java b/sail/src/test/java/mvm/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java new file mode 100644 index 0000000..c97c854 --- /dev/null +++ b/sail/src/test/java/mvm/rya/rdftriplestore/evaluation/QueryJoinSelectOptimizerTest.java @@ -0,0 +1,992 @@ +package mvm.rya.rdftriplestore.evaluation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.api.RdfCloudTripleStoreConfiguration; +import mvm.rya.api.layout.TablePrefixLayoutStrategy; +import mvm.rya.api.persist.RdfEvalStatsDAO; +import mvm.rya.joinselect.AccumuloSelectivityEvalDAO; +import mvm.rya.prospector.service.ProspectorServiceEvalStatsDAO; +import mvm.rya.rdftriplestore.evaluation.QueryJoinSelectOptimizer; +import mvm.rya.rdftriplestore.evaluation.RdfCloudTripleStoreSelectivityEvaluationStatistics; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.evaluation.impl.FilterOptimizer; +import org.openrdf.query.parser.ParsedQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; + +public class QueryJoinSelectOptimizerTest { + + private static final String DELIM = "\u0000"; + private final byte[] EMPTY_BYTE = new byte[0]; + private final Value EMPTY_VAL = new Value(EMPTY_BYTE); + + private String q1 = ""// + + "SELECT ?h " // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + "}";// + + private String Q1 = ""// + + "SELECT ?h " // + + "{" // + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + "}";// + + private String q2 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + private String Q2 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + private String q3 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " {?m <uri:eats> <uri:chickens>} OPTIONAL {?m <uri:scratches> <uri:ears>}. " // + + " {?m <uri:eats> <uri:kibble>. ?m <uri:watches> <uri:television>.} UNION {?m <uri:rollsIn> <uri:mud>}. " // + + " ?l <uri:runsIn> <uri:field> ."// + + " ?l <uri:smells> <uri:butt> ."// + + " ?l <uri:eats> <uri:sticks> ."// + + "}";// + + private String Q4 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?m <uri:scratches> <uri:ears>. " // + + " ?m <uri:eats> <uri:chickens>. " // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + "}";// + + private String q5 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " {?m <uri:eats> <uri:kibble>. ?m <uri:watches> <uri:television>.?m <uri:eats> <uri:chickens>} " + " UNION {?m <uri:rollsIn> <uri:mud>}. " // + + " ?l <uri:runsIn> <uri:field> ."// + + " ?l <uri:smells> <uri:butt> ."// + + " ?l <uri:eats> <uri:sticks> ."// + + "}";// + + + private String q6 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " FILTER(?l = <uri:grover>) ." // + + " {?m <uri:eats> <uri:kibble>. ?m <uri:watches> <uri:television>.?m <uri:eats> <uri:chickens>} " + " UNION {?m <uri:rollsIn> <uri:mud>}. " // + + " ?l <uri:runsIn> <uri:field> ."// + + " ?l <uri:smells> <uri:butt> ."// + + " ?l <uri:eats> <uri:sticks> ."// + + "}";// + + private Connector conn; + AccumuloRdfConfiguration arc; + BatchWriterConfig config; + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res; + Instance mock; + + @Before + public void init() throws AccumuloException, AccumuloSecurityException, TableNotFoundException, TableExistsException { + + mock = new MockInstance("accumulo"); + PasswordToken pToken = new PasswordToken("pass".getBytes()); + conn = mock.getConnector("user", pToken); + + config = new BatchWriterConfig(); + config.setMaxMemory(1000); + config.setMaxLatency(1000, TimeUnit.SECONDS); + config.setMaxWriteThreads(10); + + if (conn.tableOperations().exists("rya_prospects")) { + conn.tableOperations().delete("rya_prospects"); + } + if (conn.tableOperations().exists("rya_selectivity")) { + conn.tableOperations().delete("rya_selectivity"); + } + + arc = new AccumuloRdfConfiguration(); + arc.setTableLayoutStrategy(new TablePrefixLayoutStrategy()); + arc.setMaxRangesForScanner(300); + res = new ProspectorServiceEvalStatsDAO(conn, arc); + + } + + @Test + public void testOptimizeQ1() throws Exception { + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("1".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m4.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + Long count1; + Long count2; + Long count3; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + m1.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + i = 2 * i; + j = 2 * j; + k = 2 * k; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + bw2.addMutations(mList2); + bw2.close(); + + scan = conn.createScanner("rya_selectivity", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + + } + + TupleExpr te = getTupleExpr(q1); + + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + Assert.assertTrue(te.equals(getTupleExpr(Q1))); + + } + + @Test + public void testOptimizeQ2() throws Exception { + + System.out.println("*********************QUERY2********************"); + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("4".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("5".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + Long count1; + Long count2; + Long count3; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + m1.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m4.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m5.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + + i = 2 * i; + j = 2 * j; + k = 2 * k; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m5); + mList2.add(m4); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + // scan = conn.createScanner("rya_selectivity" , new Authorizations()); + // scan.setRange(new Range()); + // + // for (Map.Entry<Key, Value> entry : scan) { + // System.out.println("Key row string is " + entry.getKey().getRow().toString()); + // System.out.println("Key is " + entry.getKey()); + // System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + // + // } + + TupleExpr te = getTupleExpr(q2); + System.out.println("Bindings are " + te.getBindingNames()); + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + System.out.println("Optimized query is " + te); + // System.out.println("Bindings are " + te.getBindingNames()); + Assert.assertTrue(te.equals(getTupleExpr(Q2))); + + } + + @Test + public void testOptimizeQ3() throws Exception { + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicateobject" + DELIM + "uri:smells" + DELIM + "uri:butt"; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("5".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("5".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m11.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + int l = 5; + Long count1; + Long count2; + Long count3; + Long count4; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + count4 = (long) l; + m1.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m4.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m5.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m6.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m7.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m8.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m9.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m10.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + + i = 2 * i; + j = 2 * j; + k = 2 * k; + l = 2 * l; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m5); + mList2.add(m4); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + bw2.addMutations(mList2); + bw2.close(); + + scan = conn.createScanner("rya_selectivity", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + + } + + TupleExpr te = getTupleExpr(q3); + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + + System.out.print("Optimized query is " + te); + + } + + @Test + public void testOptimizeQ4() throws Exception { + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("4".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("0".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("8".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("0".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + Long count1; + Long count2; + Long count3; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + m1.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m4.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m5.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + + i = 2 * i; + j = 2 * j; + k = 2 * k; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m5); + mList2.add(m4); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + scan = conn.createScanner("rya_selectivity", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + + } + + TupleExpr te = getTupleExpr(q2); + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + Assert.assertTrue(te.equals(getTupleExpr(Q4))); + + System.out.print("Optimized query is " + te); + + } + + @Test + public void testOptimizeQ5() throws Exception { + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:watches" + DELIM + "uri:television"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicateobject" + DELIM + "uri:smells" + DELIM + "uri:butt"; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("5".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("0".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m11.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + int l = 5; + Long count1; + Long count2; + Long count3; + Long count4; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + count4 = (long) l; + m1.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m4.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m5.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m6.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m7.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m8.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m9.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m10.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + + i = 2 * i; + j = 2 * j; + k = 2 * k; + l = 2 * l; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m5); + mList2.add(m4); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + bw2.addMutations(mList2); + bw2.close(); + + scan = conn.createScanner("rya_selectivity", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + + } + + TupleExpr te = getTupleExpr(q5); + System.out.println("Bindings are " + te.getBindingNames()); + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + System.out.println("Bindings are " + te.getBindingNames()); + + System.out.print("Optimized query is " + te); + + } + + + + + + + + + @Test + public void testOptimizeQ6() throws Exception { + + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc); + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(arc); + accc.setConnector(conn); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:watches" + DELIM + "uri:television"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicateobject" + DELIM + "uri:smells" + DELIM + "uri:butt"; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; + + m1 = new Mutation(s1 + DELIM + "3"); + m1.put(new Text("count"), new Text(""), new Value("5".getBytes())); + m2 = new Mutation(s2 + DELIM + "2"); + m2.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("0".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + + bw1.addMutations(mList); + bw1.close(); + + Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getValue().get()))); + } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m11.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + int i = 2; + int j = 3; + int k = 4; + int l = 5; + Long count1; + Long count2; + Long count3; + Long count4; + + for (String s : sList) { + count1 = (long) i; + count2 = (long) j; + count3 = (long) k; + count4 = (long) l; + m1.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m3.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m4.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m5.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m6.put(new Text(s), new Text(count2.toString()), EMPTY_VAL); + m7.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + m8.put(new Text(s), new Text(count4.toString()), EMPTY_VAL); + m9.put(new Text(s), new Text(count3.toString()), EMPTY_VAL); + m10.put(new Text(s), new Text(count1.toString()), EMPTY_VAL); + + i = 2 * i; + j = 2 * j; + k = 2 * k; + l = 2 * l; + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m5); + mList2.add(m4); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + bw2.addMutations(mList2); + bw2.close(); + + scan = conn.createScanner("rya_selectivity", new Authorizations()); + scan.setRange(new Range()); + + for (Map.Entry<Key,Value> entry : scan) { + System.out.println("Key row string is " + entry.getKey().getRow().toString()); + System.out.println("Key is " + entry.getKey()); + System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString()))); + + } + + TupleExpr te = getTupleExpr(q6); + TupleExpr te2 = (TupleExpr) te.clone(); + System.out.println("Bindings are " + te.getBindingNames()); + RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc); + QueryJoinSelectOptimizer qjs = new QueryJoinSelectOptimizer(ars, accc); + System.out.println("Originial query is " + te); + qjs.optimize(te, null, null); + + + + FilterOptimizer fo = new FilterOptimizer(); + fo.optimize(te2, null, null); + System.out.print("filter optimized query before js opt is " + te2); + qjs.optimize(te2, null, null); + + System.out.println("join selectivity opt query before filter opt is " + te); + fo.optimize(te, null, null); + + System.out.println("join selectivity opt query is " + te); + System.out.print("filter optimized query is " + te2); + + } + + + + + + + + + + + + + + + private TupleExpr getTupleExpr(String query) throws MalformedQueryException { + + SPARQLParser sp = new SPARQLParser(); + ParsedQuery pq = sp.parseQuery(query, null); + + return pq.getTupleExpr(); + } + +}