Changeset: e576bf4d22c0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e576bf4d22c0
Modified Files:
        gdk/gdk_join.c
Branch: Mar2018
Log Message:

Update some comments: we don't have head and tail columns.


diffs (218 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -19,13 +19,11 @@
  * first two arguments.  The join operations differ in the way in
  * which tuples from the two inputs are matched.
  *
- * All inputs BATs must be dense headed, the output BATs will also be
- * dense headed.  The outputs consist of two aligned BATs (i.e. same
- * length and same seqbase in the head column (0@0)) that contain in
- * their tails the OIDs of the input BATs that match.  The candidate
- * lists, if given, contain in their tail the OIDs of the associated
+ * The outputs consist of two aligned BATs (i.e. same length and same
+ * hseqbase (0@0)) that contain the OIDs of the input BATs that match.
+ * The candidate lists, if given, contain the OIDs of the associated
  * input BAT which must be considered for matching.  The input BATs
- * must have the same tail type.
+ * must have the same type.
  *
  * All functions also have a parameter nil_matches which indicates
  * whether NIL must be considered an ordinary value that can match, or
@@ -86,7 +84,7 @@ joinparamcheck(BAT *l, BAT *r1, BAT *r2,
        }
        if ((sl && ATOMtype(sl->ttype) != TYPE_oid) ||
            (sr && ATOMtype(sr->ttype) != TYPE_oid)) {
-               GDKerror("%s: candidate lists must have OID tail.\n", func);
+               GDKerror("%s: candidate lists must have type OID.\n", func);
                return GDK_FAIL;
        }
        if ((sl && !BATtordered(sl)) ||
@@ -313,10 +311,10 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
        const oid *lvals;
        oid o, seq;
 
-       /* r has a dense tail, and if there is a candidate list, it
-        * too is dense.  This means we don't have to do any searches,
-        * we only need to compare ranges to know whether a value from
-        * l has a match in r */
+       /* r is dense, and if there is a candidate list, it too is
+        * dense.  This means we don't have to do any searches, we
+        * only need to compare ranges to know whether a value from l
+        * has a match in r */
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
        assert(r->tsorted || r->trevsorted);
        assert(sl == NULL || sl->tsorted);
@@ -339,9 +337,8 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
        }
        /* at this point, the matchable values in r are [lo..hi) */
        if (BATtdense(l)) {
-               /* if l has a dense tail, we can further restrict the
-                * [lo..hi) range to values in l that match with
-                * values in r */
+               /* if l is dense, we can further restrict the [lo..hi)
+                * range to values in l that match with values in r */
                i = hi - lo;    /* remember these for nil_on_miss case below */
                o = lo;
                if (l->tseqbase > lo)
@@ -349,13 +346,13 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                if (l->tseqbase + BATcount(l) < hi)
                        hi = l->tseqbase + BATcount(l);
                if (sl == NULL || BATtdense(sl)) {
-                       /* l has a dense tail, and so does the left
-                        * candidate list (if it exists); this means
-                        * we don't have to actually look at any
-                        * values in l: we can just do some
-                        * arithmetic; it also means that r1 will be
-                        * dense, and if nil_on_miss is not set, or if
-                        * all values in l match, r2 will too */
+                       /* l is dense, and so is the left candidate
+                        * list (if it exists); this means we don't
+                        * have to actually look at any values in l:
+                        * we can just do some arithmetic; it also
+                        * means that r1 will be dense, and if
+                        * nil_on_miss is not set, or if all values in
+                        * l match, r2 will too */
                        seq = l->hseqbase;
                        cnt = BATcount(l);
                        if (sl) {
@@ -384,16 +381,15 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                        /* at this point, the matched values in l and
                         * r (taking candidate lists into account) are
                         * [lo..hi) which we can translate back to the
-                        * respective head values that we can store in
-                        * r1 and r2; note that r1 will have a dense
-                        * tail since all values in l will match
-                        * something (even if nil if nil_on_miss is
-                        * set) */
+                        * respective OID values that we can store in
+                        * r1 and r2; note that r1 will be dense since
+                        * all values in l will match something (even
+                        * if nil if nil_on_miss is set) */
                        if (only_misses) {
                                /* the return values are
                                 * [seq..lo') + [hi'..seq+cnt)
                                 * where lo' and hi' are lo and hi
-                                * translated back to l's head
+                                * translated back to l's OID
                                 * values */
                                lo = lo + l->hseqbase - l->tseqbase; /* lo' */
                                hi = hi + l->hseqbase - l->tseqbase; /* hi' */
@@ -496,14 +492,13 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                        }
                        goto doreturn;
                }
-               /* l has a dense tail, but the candidate list exists
-                * and does not have a dense tail; we can, by
-                * manipulating the range [lo..hi), just look at the
-                * candidate list values */
+               /* l is dense, but the candidate list exists and is
+                * not dense; we can, by manipulating the range
+                * [lo..hi), just look at the candidate list values */
                assert(!BATtdense(sl));
                lvals = (const oid *) Tloc(sl, 0);
-               /* translate lo and hi to l's head values that now
-                * need to match */
+               /* translate lo and hi to l's OID values that now need
+                * to match */
                lo = lo - l->tseqbase + l->hseqbase;
                hi = hi - l->tseqbase + l->hseqbase;
                cnt = BATcount(sl);
@@ -569,8 +564,8 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                }
                goto doreturn;
        }
-       /* l does not have a dense tail, so we need to look at the
-        * values and check whether they are in the range [lo..hi) */
+       /* l is not dense, so we need to look at the values and check
+        * whether they are in the range [lo..hi) */
        lvals = (const oid *) Tloc(l, 0);
        seq = l->hseqbase;
        cnt = BATcount(l);
@@ -637,9 +632,8 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                        }
                        goto doreturn;
                }
-               /* candidate list exists and has a dense tail,
-                * we can try to restrict the values in l that
-                * we need to look at */
+               /* candidate list exists and is dense, we can try to
+                * restrict the values in l that we need to look at */
                if (sl->tseqbase > l->hseqbase) {
                        /* we don't need to start at the
                         * beginning of l */
@@ -2442,7 +2436,7 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
        int lwidth;
        const void *nil = ATOMnilptr(l->ttype);
        int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
-       oid lval = oid_nil;     /* hold value if l has dense tail */
+       oid lval = oid_nil;     /* hold value if l is dense */
        const char *v = (const char *) &lval;
        int lskipped = 0;       /* whether we skipped values in l */
        const Hash *restrict hsh;
@@ -2488,8 +2482,7 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                assert(!r->tvarsized || !r->ttype);
                lvars = NULL;
        }
-       /* offset to convert BUN for value in right tail column to OID
-        * in right head column */
+       /* offset to convert BUN to OID for value in right column */
        rseq = r->hseqbase;
 
        /* basic properties will be adjusted if necessary later on,
@@ -3622,10 +3615,9 @@ subleftjoin(BAT **r1p, BAT **r2p, BAT *l
                        nil_on_miss, semi, only_misses, maxsize, t0, 0, 
"leftjoin");
 }
 
-/* Perform an equi-join over l and r.  Returns two new, aligned,
- * dense-headed bats with in the tail the oids (head column values) of
- * matching tuples.  The result is in the same order as l (i.e. r1 is
- * sorted). */
+/* Perform an equi-join over l and r.  Returns two new, aligned, bats
+ * with the oids of matching tuples.  The result is in the same order
+ * as l (i.e. r1 is sorted). */
 gdk_return
 BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
@@ -3635,10 +3627,10 @@ BATleftjoin(BAT **r1p, BAT **r2p, BAT *l
 }
 
 /* Performs a left outer join over l and r.  Returns two new, aligned,
- * dense-headed bats with in the tail the oids (head column values) of
- * matching tuples, or the oid in the first output bat and nil in the
- * second output bat if the value in l does not occur in r.  The
- * result is in the same order as l (i.e. r1 is sorted). */
+ * bats with the oids of matching tuples, or the oid in the first
+ * output bat and nil in the second output bat if the value in l does
+ * not occur in r.  The result is in the same order as l (i.e. r1 is
+ * sorted). */
 gdk_return
 BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
@@ -3647,10 +3639,9 @@ BATouterjoin(BAT **r1p, BAT **r2p, BAT *
                           GDKdebug & ALGOMASK ? GDKusec() : 0);
 }
 
-/* Perform a semi-join over l and r.  Returns two new, aligned,
- * dense-headed bats with in the tail the oids (head column values) of
- * matching tuples.  The result is in the same order as l (i.e. r1 is
- * sorted). */
+/* Perform a semi-join over l and r.  Returns two new, aligned, bats
+ * with the oids of matching tuples.  The result is in the same order
+ * as l (i.e. r1 is sorted). */
 gdk_return
 BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
@@ -3659,9 +3650,9 @@ BATsemijoin(BAT **r1p, BAT **r2p, BAT *l
                           GDKdebug & ALGOMASK ? GDKusec() : 0);
 }
 
-/* Return the difference of l and r.  The result is a BAT with in the
- * tail the oids of those values in l that do not occur in r.  This is
- * what you might call an anti-semi-join.  The result can be used as a
+/* Return the difference of l and r.  The result is a BAT with the
+ * oids of those values in l that do not occur in r.  This is what you
+ * might call an anti-semi-join.  The result can be used as a
  * candidate list. */
 BAT *
 BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN estimate)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to