Changeset: 0891a54a264d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0891a54a264d
Added Files:
monetdb5/modules/mal/groups.c
monetdb5/modules/mal/groups.h
monetdb5/modules/mal/joinpath.c
monetdb5/modules/mal/joinpath.h
Modified Files:
monetdb5/modules/mal/Makefile.ag
monetdb5/optimizer/opt_groups.c
monetdb5/optimizer/opt_groups.h
monetdb5/optimizer/opt_joinpath.c
monetdb5/optimizer/opt_joinpath.h
Branch: default
Log Message:
Split optimizers from runtime
Step towards making the optimizer directory focussed on the
optimization process. The support part should go to
modules/mal.
diffs (truncated from 949 to 300 lines):
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -38,9 +38,11 @@ lib_mal = {
constraints.c constraints.h \
factories.c factories.h \
groupby.c groupby.h \
+ groups.c groups.h \
histogram.h \
inspect.c inspect.h \
iterator.c iterator.h \
+ joinpath.c joinpath.h \
language.c language.h \
mal_io.c mal_io.h \
mal_mapi.c mal_mapi.h \
diff --git a/monetdb5/modules/mal/groups.c b/monetdb5/modules/mal/groups.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/groups.c
@@ -0,0 +1,117 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2012 MonetDB B.V.
+ * All Rights Reserved.
+*/
+#include "monetdb_config.h"
+#include "groups.h"
+#include "group.h"
+
+/*
+ * The groups optimizer takes a sequence and attempts to minimize the
intermediate result.
+ * The choice depends on a good estimate of intermediate results using
properties.
+ * We start by just performing the underlying MAL instructions in sequence as
requested
+ * This will lead to better locality of BAT access.
+ */
+typedef struct{
+ int *arg;
+ BAT *b;
+} Elm;
+
+str
+GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ int *grp = (int*) getArgReference(stk,pci,0);
+ int *ext = (int*) getArgReference(stk,pci,1);
+ int i, j, oldgrp, oldext;
+ str msg = MAL_SUCCEED;
+ lng *sizes = (lng*) GDKzalloc(sizeof(lng) * pci->argc), l;
+ bat *bid = (bat*) GDKzalloc(sizeof(bat) * pci->argc), bi;
+ BUN *cnt = (BUN*) GDKzalloc(sizeof(BUN) * pci->argc), c;
+ BAT *b, *sample, *uniq;
+
+ for( i=2; i< pci->argc; i++){
+ bid[i] = *(int *) getArgReference(stk, pci, i);
+ b = BATdescriptor(bid[i]);
+ if ( b ){
+ cnt[i]= BATcount(b);
+ sample = BATsample(b,1000);
+ if ( sample) {
+ uniq = BATkunique( BATmirror(sample));
+ if ( uniq){
+ sizes[i] = (lng) BATcount(uniq);
+ BBPreleaseref(uniq->batCacheid);
+ }
+ BBPreleaseref(sample->batCacheid);
+ }
+ BBPreleaseref(bid[i]);
+ }
+ }
+
+ /* for (i=2; i<pci->argc; i++)
+ mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i,
sizes[i]); */
+ /* sort order may have influences */
+ /* SF100 Q16 showed < ordering is 2 times faster as > ordering */
+ for ( i = 2; i< pci->argc; i++)
+ for ( j = i+1; j<pci->argc; j++)
+ if ( sizes[j] < sizes[i]){
+ l = sizes[j]; sizes[j]= sizes[i]; sizes[i]= l;
+ bi = bid[j]; bid[j]= bid[i]; bid[i]= bi;
+ c = cnt[j]; cnt[j]= cnt[i]; cnt[i]= c;
+ }
+ /* for (i=2; i<pci->argc; i++)
+ mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i,
sizes[i]); */
+
+ /* (grp,ext) := group.new(..) */
+ *grp = 0;
+ *ext = 0;
+ msg = GRPgroup(grp, ext, &bid[2]);
+ if ( msg != MAL_SUCCEED){
+ GDKfree(sizes);
+ GDKfree(bid);
+ GDKfree(cnt);
+ return msg;
+ }
+ /* check group count */
+ b = BATdescriptor(*grp);
+ if ( b && BATcount(b) != cnt[2]) {
+ BBPreleaseref(*grp);
+ b = 0;
+ /* (grp,ext) := group.derive(grp,ext,arg) */
+ /* (grp,ext) := group.done(grp,ext,arg) */
+ for ( i=3; i < pci->argc; i++){
+ oldgrp= *grp;
+ oldext= *ext;
+ msg = GRPderive(grp, ext, &oldgrp, &oldext, &bid[i]);
+ if ( msg == MAL_SUCCEED){
+ BBPdecref(oldgrp, TRUE);
+ BBPdecref(oldext, TRUE);
+ } else break;
+ /* check group count */
+ b = BATdescriptor(*grp);
+ if ( b && BATcount(b) == cnt[i])
+ break;
+ }
+ }
+ if (b)
+ BBPreleaseref(*grp);
+ GDKfree(sizes);
+ GDKfree(bid);
+ GDKfree(cnt);
+ (void) cntxt;
+ (void) mb;
+ return msg;
+}
diff --git a/monetdb5/modules/mal/groups.h b/monetdb5/modules/mal/groups.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/groups.h
@@ -0,0 +1,36 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2012 MonetDB B.V.
+ * All Rights Reserved.
+*/
+#ifndef _OPT_GROUPS_
+#define _OPT_GROUPS_
+#include "monetdb_config.h"
+#include "mal_interpreter.h"
+
+#ifdef WIN32
+#if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) &&
!defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) &&
!defined(LIBMONETDB5)
+#define groups_export extern __declspec(dllimport)
+#else
+#define groups_export extern __declspec(dllexport)
+#endif
+#else
+#define groups_export extern
+#endif
+
+groups_export str GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr
stk, InstrPtr pci);
+
+#endif
diff --git a/monetdb5/modules/mal/joinpath.c b/monetdb5/modules/mal/joinpath.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/joinpath.c
@@ -0,0 +1,311 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2012 MonetDB B.V.
+ * All Rights Reserved.
+*/
+/*
+ * Post-optimization. After the join path has been constructed
+ * we could search for common subpaths. This heuristic is to
+ * remove any pair which is used more than once.
+ * Inner paths are often foreign key walks.
+ * The heuristics is sufficient for the code produced by SQL frontend.
+ * The alternative is to search for all possible subpaths and materialize them.
+ * For example, using recursion for all common paths.
+ */
+#include "monetdb_config.h"
+#include "joinpath.h"
+//#include "cluster.h"
+
+/*
+ * The join path optimizer takes a join sequence and
+ * attempts to minimize the intermediate result.
+ * The choice depends on a good estimate of intermediate
+ * results using properties.
+ * For the time being, we use a simplistic model, based
+ * on the assumption that most joins are foreign key joins anyway.
+ *
+ * We use a sample based approach for sizeable tables.
+ * The model is derived from the select statement. However, we did not succeed.
+ * The code is now commented for future improvement.
+ *
+ * Final conclusion from this exercise is:
+ * The difference between the join input size and the join output size is not
+ * the correct (or unique) metric which should be used to decide which order
+ * should be used in the joinPath.
+ * A SMALL_OPERAND is preferrable set to those cases where the table
+ * fits in the cache. This depends on the cache size and operand type.
+ * For the time being we limit ourself to a default of 1Kelements
+ */
+/*#define SAMPLE_THRESHOLD_lOG 17*/
+
+#define SMALL_OPERAND 1024
+
+static BUN
+ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag)
+{
+ BUN lc, rc;
+ BUN cost=0;
+#if 0
+ BUN lsize,rsize;
+ BAT *lsample, *rsample, *j;
+#endif
+
+ (void) flag;
+ lc = BATcount(l);
+ rc = BATcount(r);
+#if 0
+ /* The sampling method */
+ if(flag < 2 && ( lc > 100000 || rc > 100000)){
+ lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
+ lsample= BATsample(l,lsize);
+ BBPreclaim(lsample);
+ rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
+ rsample= BATsample(r,rsize);
+ BBPreclaim(rsample);
+ j= BATjoin(l,r, MAX(lsize,rsize));
+ lsize= BATcount(j);
+ BBPreclaim(j);
+ return lsize;
+ }
+#endif
+
+ /* first use logical properties to estimate upper bound of result size
*/
+ if (l->tkey && r->hkey)
+ cost = MIN(lc,rc);
+ else
+ if (l->tkey)
+ cost = rc;
+ else
+ if (r->hkey)
+ cost = lc;
+ else
+ if (lc * rc >= BUN_MAX)
+ cost = BUN_MAX;
+ else
+ cost = lc * rc;
+
+ /* then use physical properties to rank costs */
+ if (BATtdense(l) && BAThdense(r))
+ /* densefetchjoin -> sequential access */
+ cost /= 7;
+ else
+ if (BATtordered(l) && BAThdense(r))
+ /* orderedfetchjoin > sequential access */
+ cost /= 6;
+ else
+ if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */)
+ /* (reversed-) orderedfetchjoin -> sequential access */
+ cost /= 6;
+ else
+ if (BAThdense(r) && rc <= SMALL_OPERAND)
+ /* fetchjoin with random access in L1 */
+ cost /= 5;
+ else
+ if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list