Single variable hash keys (for now). Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ee103d9b Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ee103d9b Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ee103d9b
Branch: refs/heads/master Commit: ee103d9b906e1c9bebf13c612807699a09cf87c8 Parents: 20f17a1 Author: Andy Seaborne <[email protected]> Authored: Wed Sep 9 09:44:58 2015 +0100 Committer: Andy Seaborne <[email protected]> Committed: Wed Sep 9 09:44:58 2015 +0100 ---------------------------------------------------------------------- .../apache/jena/sparql/engine/join/JoinKey.java | 26 ++++++++++++++------ .../sparql/engine/join/QueryIterHashJoin.java | 6 ++++- .../engine/join/AbstractTestInnerJoin.java | 16 +++++++++++- .../sparql/engine/join/AbstractTestJoin.java | 26 ++++++++++++++++++++ .../engine/join/AbstractTestLeftJoin.java | 1 - 5 files changed, 65 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java index a4a067d..0fd47b6 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/JoinKey.java @@ -28,22 +28,35 @@ import org.apache.jena.sparql.core.Var ; /** JoinKey for hash joins */ public final class JoinKey implements Iterable<Var> { + private static final JoinKey emptyKey = new JoinKey(DS.listOfNone()) ; + // Common way to make a JoinKey /** Make a JoinKey from the intersection of two sets **/ - public static JoinKey create(Collection<Var> vars1, Collection<Var> vars2) { // JoinKeys are generally small so short loops are best. // vars2 may be smallest e.g. from triple and running accumulator (vars1) List<Var> intersection = DS.list() ; for ( Var v : vars1 ) { if ( vars2.contains(v) ) + intersection.add(v) ; + } + return new JoinKey(intersection) ; + } + + /** Make a JoinKey of single variable from the intersection of two sets **/ + public static JoinKey createVarKey(Collection<Var> vars1, Collection<Var> vars2) { + // JoinKeys are generally small so short loops are best. + // vars2 may be smallest e.g. from triple and running accumulator (vars1) + List<Var> intersection = DS.list() ; + for ( Var v : vars1 ) { + if ( vars2.contains(v) ) // First and single key. return create(v) ; // Compound keys needs validation : what if they are partial // i.e. some rows only have part of the join key? //intersection.add(v) ; } - return new JoinKey(intersection) ; + return emptyKey ; } public static JoinKey create(Var var) { @@ -87,9 +100,11 @@ public final class JoinKey implements Iterable<Var> private JoinKey(List<Var> _keys) { keys = _keys ; } - private JoinKey(Var var) { keys = DS.listOfOne(var) ; } + private JoinKey(Var var) { keys = DS.listOfOne(var) ; } - public boolean isEmpty() { return keys.isEmpty() ; } + public boolean isEmpty() { return keys.isEmpty() ; } + + public int length() { return keys.size() ; } /** Get a single variable for this key. * For any one key, it always returns the same var */ @@ -107,6 +122,3 @@ public final class JoinKey implements Iterable<Var> return keys.toString() ; } } - - - http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java index 1b4b29a..98e779d 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/join/QueryIterHashJoin.java @@ -22,6 +22,7 @@ import java.util.Iterator ; import java.util.List ; import org.apache.jena.atlas.iterator.Iter ; +import org.apache.jena.atlas.logging.Log ; import org.apache.jena.sparql.algebra.Algebra ; import org.apache.jena.sparql.core.Var ; import org.apache.jena.sparql.engine.ExecutionContext ; @@ -69,6 +70,9 @@ public class QueryIterHashJoin extends QueryIter2 { right.close() ; return QueryIterNullIterator.create(execCxt) ; } + if ( joinKey != null && joinKey.length() > 1 ) + Log.warn(QueryIterHashJoin.class, "Multivariable join key") ; + return new QueryIterHashJoin(joinKey, left, right, execCxt) ; } @@ -96,7 +100,7 @@ public class QueryIterHashJoin extends QueryIter2 { List<Var> varsLeft = Iter.toList(bLeft.vars()) ; List<Var> varsRight = Iter.toList(bRight.vars()) ; - joinKey = JoinKey.create(varsLeft, varsRight) ; + joinKey = JoinKey.createVarKey(varsLeft, varsRight) ; left = pLeft ; right = pRight ; } http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java index 0242d4a..5152e4e 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestInnerJoin.java @@ -19,6 +19,7 @@ package org.apache.jena.sparql.engine.join; import org.apache.jena.sparql.algebra.Table ; +import org.apache.jena.sparql.core.Var ; import org.apache.jena.sparql.expr.ExprList ; import org.junit.Test ; @@ -67,10 +68,23 @@ public abstract class AbstractTestInnerJoin extends AbstractTestJoin { // No key. @Test public void join_14() { testJoin(null, tableD1(), tableD2(), tableD3()) ; } - + @Test public void join_skew_01() { testJoin("x", tableS1(), tableS2(), tableS1J2()) ; } + @Test public void join_skew_02() { testJoin("w", tableS1(), tableS2(), tableS1J2()) ; } + @Test public void join_skew_03() { testJoin(null, tableS1(), tableS2(), tableS1J2()) ; } + //@Test + // Multiple variable join keys on skew data don't work. + public void join_skew_04() { + JoinKey joinKey = new JoinKey.Builder() + .add(Var.alloc("x")) + .add(Var.alloc("w")) + .build() ; + testJoinWithKey(joinKey, tableS1(), tableS2(), tableS1J2()) ; + } + // Disjoint tables. @Test public void join_disjoint_01() { testJoin("a", tableD2(), tableD8(), tableD8x2()) ; } @Test public void join_disjoint_02() { testJoin("z", tableD2(), tableD8(), tableD8x2()) ; } + @Test public void join_disjoint_03() { testJoin(null, tableD2(), tableD8(), tableD8x2()) ; } } http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java index 434af09..dd16393 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestJoin.java @@ -216,6 +216,24 @@ public abstract class AbstractTestJoin extends Assert { ")") ; } + // Skew tables for join testing. + // Join keys of ?x ?w and [?x , ?w] + + protected static Table tableS1() { + return parseTableInt("(table" + ," (row (?z <http://example/z1>) (?x <http://example/x>) (?w 'w11-1'))" + ," (row (?z <http://example/z4>) (?x <http://example/x>)))" + ); } + protected static Table tableS2() { + return parseTableInt("(table (row (?x <http://example/x>) (?w <http://example/z1>)))") ; + } + + protected static Table tableS1J2() { + return parseTableInt("(table" + ," (row (?z <http://example/z4>) (?x <http://example/x>) (?w <http://example/z1>) ))" + ); + } + // Code protected static Table parseTableInt(String... strings) { @@ -244,6 +262,14 @@ public abstract class AbstractTestJoin extends Assert { executeTest(joinKey, left, right, null, tableOut) ; } + protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, Table tableOut) { + executeTest(joinKey, left, right, null, tableOut) ; + } + + protected void testJoinWithKey(JoinKey joinKey, Table left, Table right, ExprList conditions, Table tableOut) { + executeTest(joinKey, left, right, conditions, tableOut) ; + } + // Any kind of join (choose by abstract join() operation). protected abstract void executeTest(JoinKey joinKey, Table left, Table right, ExprList conditions, Table expectedResults) ; http://git-wip-us.apache.org/repos/asf/jena/blob/ee103d9b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java index 2786bed..afca7db 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/join/AbstractTestLeftJoin.java @@ -66,7 +66,6 @@ public abstract class AbstractTestLeftJoin extends AbstractTestJoin { @Test public void leftjoin_J13() { testJoin("z", tableD2(), tableD1(), tableD3_LJ()) ; } // No key. - @Test public void leftjoin_14() { testJoin(null, tableD1(), tableD2(), tableD3()) ; }
