Improved patch against latest CVS with more comments and new test 
case attached. No regressions with make test.

> This updated patch greatly improves query times against compound 
> SELECT statements (i.e., UNIONs) when the parent SELECT has a WHERE 
> clause.


 
____________________________________________________________________________________
Expecting? Get great news right away with email Auto-Check. 
Try the Yahoo! Mail Beta.
http://advision.webevents.yahoo.com/mailbeta/newmail_tools.html 
Index: src/expr.c
===================================================================
RCS file: /sqlite/sqlite/src/expr.c,v
retrieving revision 1.294
diff -u -3 -p -r1.294 expr.c
--- src/expr.c  15 May 2007 07:00:34 -0000      1.294
+++ src/expr.c  19 May 2007 15:19:50 -0000
@@ -604,6 +604,7 @@ Select *sqlite3SelectDup(Select *p){
   pNew->isAgg = p->isAgg;
   pNew->usesEphm = 0;
   pNew->disallowOrderBy = 0;
+  pNew->fOptPass = 0;
   pNew->pRightmost = 0;
   pNew->addrOpenEphm[0] = -1;
   pNew->addrOpenEphm[1] = -1;
Index: src/select.c
===================================================================
RCS file: /sqlite/sqlite/src/select.c,v
retrieving revision 1.348
diff -u -3 -p -r1.348 select.c
--- src/select.c        14 May 2007 16:50:49 -0000      1.348
+++ src/select.c        19 May 2007 15:19:50 -0000
@@ -73,6 +73,7 @@ Select *sqlite3SelectNew(
   pNew->pOffset = pOffset;
   pNew->iLimit = -1;
   pNew->iOffset = -1;
+  pNew->fOptPass = 0;
   pNew->addrOpenEphm[0] = -1;
   pNew->addrOpenEphm[1] = -1;
   pNew->addrOpenEphm[2] = -1;
@@ -1397,7 +1398,7 @@ static int matchOrderbyToColumn(
   Parse *pParse,          /* A place to leave error messages */
   Select *pSelect,        /* Match to result columns of this SELECT */
   ExprList *pOrderBy,     /* The ORDER BY values to match against columns */
-  int iTable,             /* Insert this value in iTable */
+  int *iTable,            /* Insert this value in iTable */
   int mustComplete        /* If TRUE all ORDER BYs must match */
 ){
   int nErr = 0;
@@ -1423,6 +1424,10 @@ static int matchOrderbyToColumn(
     int iCol = -1;
     char *zLabel;
 
+    if( pE->op==TK_COLUMN ) {
+      *iTable = pE->iTable;
+      continue;
+    }
     if( pOrderBy->a[i].done ) continue;
     if( sqlite3ExprIsInteger(pE, &iCol) ){
       if( iCol<=0 || iCol>pEList->nExpr ){
@@ -1456,7 +1461,7 @@ static int matchOrderbyToColumn(
     if( iCol>=0 ){
       pE->op = TK_COLUMN;
       pE->iColumn = iCol;
-      pE->iTable = iTable;
+      pE->iTable = *iTable;
       pE->iAgg = -1;
       pOrderBy->a[i].done = 1;
     }else if( mustComplete ){
@@ -1731,7 +1736,7 @@ static int multiSelect(
         ** intermediate results.
         */
         unionTab = pParse->nTab++;
-        if( pOrderBy && matchOrderbyToColumn(pParse, p, pOrderBy, unionTab,1) 
){
+        if( pOrderBy && matchOrderbyToColumn(pParse,p,pOrderBy,&unionTab,1) ){
           rc = 1;
           goto multi_select_end;
         }
@@ -1825,7 +1830,7 @@ static int multiSelect(
       */
       tab1 = pParse->nTab++;
       tab2 = pParse->nTab++;
-      if( pOrderBy && matchOrderbyToColumn(pParse,p,pOrderBy,tab1,1) ){
+      if( pOrderBy && matchOrderbyToColumn(pParse,p,pOrderBy,&tab1,1) ){
         rc = 1;
         goto multi_select_end;
       }
@@ -2643,11 +2648,11 @@ int sqlite3SelectResolve(
      sqlite3ExprResolveNames(&sNC, p->pHaving) ){
     return SQLITE_ERROR;
   }
-  if( p->pPrior==0 ){
-    if( processOrderGroupBy(&sNC, p->pOrderBy, "ORDER") ||
-        processOrderGroupBy(&sNC, pGroupBy, "GROUP") ){
-      return SQLITE_ERROR;
-    }
+  if( p->pPrior==0 && processOrderGroupBy(&sNC, p->pOrderBy, "ORDER") ){
+    return SQLITE_ERROR;
+  }
+  if( processOrderGroupBy(&sNC, pGroupBy, "GROUP") ){
+    return SQLITE_ERROR;
   }
 
   /* Make sure the GROUP BY clause does not contain aggregate functions.
@@ -2774,6 +2779,167 @@ static void updateAccumulator(Parse *pPa
   pAggInfo->directMode = 0;
 }
 
+#ifdef SQLITE_OMIT_COMPOUND_SELECT
+#define SQLITE_OMIT_UNION_WHERE_OPTIMIZATION
+#endif
+
+#ifndef SQLITE_OMIT_UNION_WHERE_OPTIMIZATION
+
+static int optSelect(Parse*, Select*);
+static int optExprList(Parse*, ExprList*);
+
+static int optExpr(Parse* pParse, Expr* p) {
+  if (p) {
+    if( optExpr(pParse, p->pLeft)
+    || optExpr(pParse, p->pRight)
+    || optExprList(pParse, p->pList)
+    || optSelect(pParse, p->pSelect) ){
+      return SQLITE_ERROR;
+    }
+  }
+  return SQLITE_OK;
+}
+
+static int optSrcList(Parse* pParse, SrcList* p) {
+  if (p) {
+    int i;
+    for (i = 0; i < p->nSrc; ++i) {
+      if( optSelect(pParse, p->a[i].pSelect) ){
+        return SQLITE_ERROR;
+      }
+    }
+  }
+  return SQLITE_OK;
+}
+
+static int optExprList(Parse* pParse, ExprList* p) {
+  if (p) {
+    int i;
+    for (i = 0; i < p->nExpr; ++i) {
+      if( optExpr(pParse, p->a[i].pExpr) ){
+        return SQLITE_ERROR;
+      }
+    }
+  }
+  return SQLITE_OK;
+}
+
+/* This function will optimize the WHERE clauses of a single compound SELECT
+** statement by merging it with its parent WHERE clause, often resulting
+** in a much faster query due to better index selection, consuming less
+** temp store in the process.
+** This is most commonly seen in the form of a SELECT on a VIEW
+** which contains one or more UNIONs.
+**
+** For example:
+**
+**  SELECT * FROM (
+**    SELECT a, b FROM t1 WHERE b!=7
+**   UNION ALL
+**    SELECT x, sum(y) FROM t2
+**    WHERE x<9 GROUP BY x HAVING sum(y)>7
+**  ) WHERE a>b;
+**
+** will be transformed into:
+**
+**  SELECT * FROM (
+**    SELECT a, b FROM t1 WHERE b!=7 AND a>b
+**   UNION ALL
+**    SELECT x, sum(y) FROM t2
+**    WHERE x<9 GROUP BY x HAVING sum(y)>7 AND x>sum(y)
+**  );
+**
+** This function expects a completely resolved parse tree.
+*/
+static void optimizeSelectOnUnion(Select* p) {
+  if( p && p->pWhere                   /* Parent has a WHERE clause. */
+  && p->pEList && p->pEList->nExpr > 0 /* At least 1 column in result list. */
+  && p->pSrc && p->pSrc->nSrc == 1     /* Parent FROM clause has 1 entry. */
+  && p->pSrc->a[0].pSelect             /* FROM clause entry is a SELECT. */
+  && p->pSrc->a[0].pSelect->pPrior     /* FROM entry is a compound SELECT. */
+  ){
+    int numColumnsInLeftmostSelect, deleteParentWhereClause = 1;
+    Select *leftmostSelect;
+    Select *pSub = p->pSrc->a[0].pSelect;
+    assert( pSub );
+    leftmostSelect = pSub;
+    while( leftmostSelect->pPrior ){
+      leftmostSelect = leftmostSelect->pPrior;
+    }
+    numColumnsInLeftmostSelect = leftmostSelect->pEList->nExpr;
+    for( ; pSub; pSub = pSub->pPrior ){
+      if( pSub->pEList->nExpr != numColumnsInLeftmostSelect ){
+        /* Mismatched number of result columns in a compound subselect.
+        ** This error will be caught later in the code generation phase. */
+        return;
+      }
+      /* Do not add the parent WHERE clause to this compound subselect if:
+      **    (1) the subselect is an aggregate SELECT without a GROUP BY clause
+      ** OR (2) the subselect has a LIMIT clause (may be overly restrictive)
+      */
+      if( (pSub->isAgg && !pSub->pGroupBy)   /* Restriction (1) */
+      || (pSub->pLimit)                      /* Restriction (2) */
+      ){
+        deleteParentWhereClause = 0;
+        continue;
+      } else {
+        Expr *parentWhereCopy = sqlite3ExprDup(p->pWhere);
+        substExpr(parentWhereCopy, p->pSrc->a[0].iCursor, pSub->pEList);
+        if( pSub->pGroupBy ){
+          pSub->pHaving = sqlite3ExprAnd(pSub->pHaving, parentWhereCopy);
+        } else {
+          pSub->pWhere = sqlite3ExprAnd(pSub->pWhere, parentWhereCopy);
+        }
+      }
+    }
+    if( deleteParentWhereClause ){
+      sqlite3ExprDelete(p->pWhere);
+      p->pWhere = 0;
+    }
+  }
+}
+
+#define SQLITE_SELECT_OPT_UNION_WHERE 1
+
+static int optSelect(Parse* pParse, Select* p) {
+  int rc = SQLITE_OK;
+  if (pParse && p && (p->fOptPass & SQLITE_SELECT_OPT_UNION_WHERE)==0) {
+    p->fOptPass |= SQLITE_SELECT_OPT_UNION_WHERE;
+    if( p->pPrior ){
+      if( p->pRightmost==0 ){
+        Select *pLoop;
+        for(pLoop=p; pLoop; pLoop=pLoop->pPrior){
+          pLoop->pRightmost = p;
+        }
+      }
+      if (p->pOrderBy) {
+        /* adjust cursors for ORDER BY expressions */
+        int unionTab = pParse->nTab++;
+        if (matchOrderbyToColumn(pParse, p, p->pOrderBy, &unionTab, 1)) {
+           return SQLITE_ERROR;
+        }
+      }
+    }
+    if (sqlite3SelectResolve(pParse, p, 0)) {
+      return SQLITE_ERROR;
+    }
+    if (optSrcList(pParse, p->pSrc)
+    || optExprList(pParse, p->pEList)
+    || optExpr(pParse, p->pWhere)
+    || optExprList(pParse, p->pGroupBy)
+    || optExpr(pParse, p->pHaving)
+    || optExprList(pParse, p->pOrderBy)
+    || optSelect(pParse, p->pPrior)
+    || optExpr(pParse, p->pLimit)
+    || optExpr(pParse, p->pOffset) ){
+      return SQLITE_ERROR;
+    }
+    optimizeSelectOnUnion(p);
+  }
+  return rc;
+}
+
+#endif /* SQLITE_OMIT_UNION_WHERE_OPTIMIZATION */
 
 /*
 ** Generate code for the given SELECT statement.
@@ -2861,6 +3027,13 @@ int sqlite3Select(
   memset(&sAggInfo, 0, sizeof(sAggInfo));
 
 #ifndef SQLITE_OMIT_COMPOUND_SELECT
+
+#ifndef SQLITE_OMIT_UNION_WHERE_OPTIMIZATION
+  if( optSelect(pParse, p) ){
+    return 1;
+  }
+#endif
+
   /* If there is are a sequence of queries, do the earlier ones first.
   */
   if( p->pPrior ){
Index: src/sqliteInt.h
===================================================================
RCS file: /sqlite/sqlite/src/sqliteInt.h,v
retrieving revision 1.569
diff -u -3 -p -r1.569 sqliteInt.h
--- src/sqliteInt.h     16 May 2007 17:28:43 -0000      1.569
+++ src/sqliteInt.h     19 May 2007 15:19:51 -0000
@@ -1263,6 +1263,7 @@ struct Select {
   u8 isAgg;              /* True if this is an aggregate query */
   u8 usesEphm;           /* True if uses an OpenEphemeral opcode */
   u8 disallowOrderBy;    /* Do not allow an ORDER BY to be attached if TRUE */
+  u8 fOptPass;           /* Set bits for each optimization pass performed */
   char affinity;         /* MakeRecord with this affinity for SRT_Set */
   SrcList *pSrc;         /* The FROM clause */
   Expr *pWhere;          /* The WHERE clause */
Index: test/misc5.test
===================================================================
RCS file: /sqlite/sqlite/test/misc5.test,v
retrieving revision 1.16
diff -u -3 -p -r1.16 misc5.test
--- test/misc5.test     3 Jan 2007 23:37:29 -0000       1.16
+++ test/misc5.test     19 May 2007 15:19:51 -0000
@@ -553,7 +553,7 @@ ifcapable subquery&&compound {
       SELECT * FROM logs 
       LIMIT (SELECT lmt FROM logs_base) ;
     }
-  } {1 {no such column: logs.id}}
+  } {1 {no such table: logs_base}}
 }
 
 # Overflow the lemon parser stack by providing an overly complex
Index: test/select7.test
===================================================================
RCS file: /sqlite/sqlite/test/select7.test,v
retrieving revision 1.9
diff -u -3 -p -r1.9 select7.test
--- test/select7.test   9 May 2007 22:56:39 -0000       1.9
+++ test/select7.test   19 May 2007 15:19:51 -0000
@@ -135,4 +135,87 @@ ifcapable {subquery && compound} {
      {only a single result allowed for a SELECT that is part of an expression}]
 }
 
+# Test cases for WHERE clause optimization on compound SELECT statements
+ifcapable {subquery && compound} {
+  do_test select7-6.1 {
+    execsql {
+      CREATE TABLE T5(a,b);
+      INSERT INTO T5 VALUES(1,2);
+      INSERT INTO T5 VALUES(1,3);
+      INSERT INTO T5 VALUES(1,4);
+      INSERT INTO T5 VALUES(2,4);
+      INSERT INTO T5 VALUES(2,7);
+      INSERT INTO T5 VALUES(3,1);
+
+      CREATE TABLE T4(x,y);
+      INSERT INTO T4 VALUES(2,3);
+      INSERT INTO T4 VALUES(7,9);
+      INSERT INTO T4 VALUES(8,8);
+      INSERT INTO T4 VALUES(1,3);
+
+      CREATE VIEW UV1 AS
+        SELECT x,y FROM T4 UNION ALL SELECT * FROM T5; 
+
+      SELECT * FROM UV1 WHERE x>1 and y<>3;
+    }
+  } {7 9 8 8 2 4 2 7 3 1}
+  do_test select7-6.2 {
+    execsql {
+      CREATE VIEW UV2 AS
+        SELECT x,y FROM T4 WHERE x*y>4 UNION SELECT * FROM T5 WHERE a*2=b;
+      SELECT x, sum(y) FROM UV2 GROUP BY x;
+    }
+  } {1 2 2 7 7 9 8 8}
+  do_test select7-6.3 {
+    execsql {
+      SELECT * FROM (
+        SELECT x,y from UV1 UNION SELECT y,x FROM UV2 WHERE x+y>6
+      ) WHERE y>2*x OR x=y ORDER BY y-x;
+    }
+  } {8 8 1 3 1 4 2 7}
+  do_test select7-6.4 {
+    execsql {
+      SELECT x, sum(y) FROM (
+        SELECT x,y from UV1 UNION SELECT y,x FROM UV2 WHERE x+y>6
+      ) GROUP BY 1 ORDER BY 2,1;
+    }
+  } {3 1 9 7 8 8 1 9 7 9 2 14}
+  do_test select7-6.5 {
+    execsql {
+      SELECT j, k FROM (
+        SELECT a+2 AS j, sum(b) AS k FROM T5 GROUP BY 1 HAVING k>=9
+          UNION ALL SELECT x,y FROM T4 WHERE y-x<>2
+            UNION ALL SELECT 7,3
+      ) WHERE j>3 ORDER BY 2,1;
+    }
+  } {7 3 8 8 4 11}
+  do_test select7-6.6 {
+    execsql {
+      CREATE TABLE n1(a integer primary key);
+      INSERT INTO n1 VALUES(1);
+      INSERT INTO n1 VALUES(2);
+      INSERT INTO n1 VALUES(3);
+      INSERT INTO n1 VALUES(4);
+      INSERT INTO n1 VALUES(5);
+      INSERT INTO n1 VALUES(6);
+      INSERT INTO n1 VALUES(7);
+      INSERT INTO n1 VALUES(8);
+      INSERT INTO n1 VALUES(9);
+      CREATE VIEW vu as select v3.a a, v5.a-v2.a*v7.a b
+                        from n1 v1,n1 v2,n1 v3,n1 v4,n1 v5,n1 v6,n1 v7;
+      CREATE VIEW v2 as select * from vu union all select 7, 8;
+      select count(*), avg(b) from v2 where a<3;
+    }
+  } {1062882 -20.0}
+  do_test select7-6.7 {
+    execsql {
+      SELECT * FROM (
+        SELECT 7 a, max(a) b FROM n1
+        UNION ALL 
+        SELECT a, a*a FROM n1 WHERE a<4
+      ) WHERE b>8;
+    }
+  } {7 9 3 9}
+}
+
 finish_test

-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------

Reply via email to