Repository: incubator-trafodion
Updated Branches:
  refs/heads/master b6478e582 -> 222b6b76d


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelExpr.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExpr.h b/core/sql/optimizer/RelExpr.h
index d2d6c20..1b9fad9 100644
--- a/core/sql/optimizer/RelExpr.h
+++ b/core/sql/optimizer/RelExpr.h
@@ -601,8 +601,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // A virtual method called on every node from prepareTreeForCSESharing(),
   // use this to do the actual work of removing predicates other than
@@ -629,8 +630,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // --------------------------------------------------------------------
   // Create a query execution plan.
@@ -968,6 +970,8 @@ public:
                                 CollHeap* outHeap = NULL);
   RelExpr * copyTree(CollHeap* heap = NULL);
   RelExpr * copyRelExprTree(CollHeap* outHeap = NULL);
+  const RelExpr * getOriginalExpr(NABoolean transitive = TRUE) const;
+  RelExpr * getOriginalExpr(NABoolean transitive = TRUE);
 
   // --------------------------------------------------------------------
   // Methods used internally by Cascades
@@ -1594,6 +1598,10 @@ private:
   // the partfunc pointed by my spp has a dop reduction.
   NABoolean dopReduced_;
 
+  // if we copy an expression with copyTopNode() then
+  // remember the original here, e.g. to find VEG regions
+  RelExpr *originalExpr_;
+
 public:
 
   // begin: accessors & mutators for relexpr tracking info

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelGrby.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelGrby.h b/core/sql/optimizer/RelGrby.h
index b5f1aee..8f4c21b 100644
--- a/core/sql/optimizer/RelGrby.h
+++ b/core/sql/optimizer/RelGrby.h
@@ -223,8 +223,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // flows compRefOpt constraints up the query tree.
   virtual void processCompRefOptConstraints(NormWA * normWAPtr) ;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelJoin.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelJoin.h b/core/sql/optimizer/RelJoin.h
index 2c66b31..a185ff7 100644
--- a/core/sql/optimizer/RelJoin.h
+++ b/core/sql/optimizer/RelJoin.h
@@ -792,8 +792,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // Detect whether rows coming from the ith child contain multi-column skew 
for
   // a set of join predicates. The output argument vidOfEquiJoinWithSkew is the

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelMisc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelMisc.h b/core/sql/optimizer/RelMisc.h
index 7812d0b..d892e45 100644
--- a/core/sql/optimizer/RelMisc.h
+++ b/core/sql/optimizer/RelMisc.h
@@ -2557,6 +2557,9 @@ private:
   NAString comment_;
 };
 
+// forward reference
+class CountedCSEInfo;
+
 // Container for the common info about a common subexpression. This is
 // a helper class for class CommonSubExprRef below.
 
@@ -2576,10 +2579,20 @@ public:
   enum CSEAnalysisOutcome
     {
       UNKNOWN_ANALYSIS,    // analysis not yet done
+      ELIMINATED_IN_BINDER,// single-consumer CSE, eliminated in binder
       EXPAND,              // expand the common subexpression
       CREATE_TEMP,         // materialize CSE as temp, then read the temp
       TEMP,                // read the temp created by someone else
       ERROR                // error occurred, diags are set
+      // possible future analysis outcomes:
+      // - decide outcome in the optimizer
+      // - pipeline results from a single producer to multiple consumers
+      //
+      // Note: Right now, all consumers of a CSE have the same
+      //       outcome; that may change in the future,
+      //       e.g. we may have 5 consumers that can use sharing
+      //       very well and one that isn't suitable for sharing
+      //       (maybe because it reads different data).
     };
 
   enum CSETempTableType
@@ -2595,7 +2608,10 @@ public:
        cseId_(-1),
        childCSEs_(mem),
        consumers_(mem),
+       alternativeConsumers_(mem),
+       numLexicalRefs_(0),
        neededColumns_(mem),
+       cseTreeKeyColumns_(mem),
        idOfAnalyzingConsumer_(-1),
        analysisOutcome_(UNKNOWN_ANALYSIS),
        tempTableType_(UNKNOWN_TEMP_TABLE),
@@ -2607,12 +2623,16 @@ public:
 
   const NAString &getName() const                            { return name_; }
   Int32 getCSEId() const                                    { return cseId_; }
-  const LIST(CSEInfo *) &getChildCSEs() const           { return childCSEs_; }
+  const LIST(CountedCSEInfo) &getChildCSEs() const      { return childCSEs_; }
   const CollIndex getNumConsumers() const     { return consumers_.entries(); }
   CommonSubExprRef *getConsumer(CollIndex i) const   { return consumers_[i]; }
+  Int32 getNumLexicalRefs() const                  { return numLexicalRefs_; }
+  Int32 getTotalNumRefs(Int32 restrictToSingleConsumer = -1) const;
 
   Int32 getIdOfAnalyzingConsumer() const    { return idOfAnalyzingConsumer_; }
   CSEAnalysisOutcome getAnalysisOutcome(Int32 id) const;
+  NABoolean isShared(Int32 id) const
+                                   { return analysisOutcome_ == CREATE_TEMP; }
   NABoolean usesATempTable() const         { return insertIntoTemp_ != NULL; }
   CSETempTableType getTempTableType() const         { return tempTableType_; }
   const NABitVector &getNeededColumns() const       { return neededColumns_; }
@@ -2621,14 +2641,20 @@ public:
                                     { return vegRefsWithDifferingConstants_; }
   const ValueIdSet &getVEGRefsWithDifferingInputs() const
                                        { return vegRefsWithDifferingInputs_; }
+  const NABitVector &getCSETreeKeyColumns() const
+                                                { return cseTreeKeyColumns_; }
   const QualifiedName &getTempTableName() const     { return tempTableName_; }
   const NAString &getTempTableDDL() const            { return tempTableDDL_; }
   const NATable *getTempNATable() const               { return tempNATable_; }
   RelExpr *getInsertIntoTemp() const               { return insertIntoTemp_; }
 
   void setCSEId(Int32 id)                                     { cseId_ = id; }
-  void addChildCSE(CSEInfo *child);
+  Int32 addChildCSE(CSEInfo *childInfo, NABoolean addLexicalRef);
   void addCSERef(CommonSubExprRef *cse);
+  void eliminate()               { analysisOutcome_ == ELIMINATED_IN_BINDER; }
+  void registerAnAlternativeConsumer(CommonSubExprRef *c)
+                                          { alternativeConsumers_.insert(c); }
+  void replaceConsumerWithAnAlternative(CommonSubExprRef *c);
   void setIdOfAnalyzingConsumer(Int32 id)     { idOfAnalyzingConsumer_ = id; }
   void setAnalysisOutcome(CSEAnalysisOutcome outcome)
                                                { analysisOutcome_ = outcome; }
@@ -2641,6 +2667,7 @@ public:
                                       { vegRefsWithDifferingConstants_ += s; }
   void addVEGRefsWithDifferingInputs(const ValueIdSet &s)
                                          { vegRefsWithDifferingInputs_ += s; }
+  void addCSEKeyColumn(CollIndex c)               { cseTreeKeyColumns_ += c; }
   void setTempTableName(const QualifiedName &n)        { tempTableName_ = n; }
   void setTempTableDDL(const char *s)                   { tempTableDDL_ = s; }
   void setTempNATable(const NATable *nat)              { tempNATable_ = nat; }
@@ -2654,13 +2681,27 @@ private:
   Int32 cseId_;
 
   // list of other CSEs that are referenced by this one
-  LIST(CSEInfo *) childCSEs_;
+  LIST(CountedCSEInfo) childCSEs_;
 
   // list of nodes referring to the common
   // subexpression, their index numbers match
   // the index in this list
   LIST(CommonSubExprRef *) consumers_;
 
+  // We sometimes make copies of a tree, creating alternative
+  // consumers. Some notes on these alternative copies: The analyzing
+  // consumer (see CommonSubExprRef::analyzeAndPrepareForSharing())
+  // and its ancestors are always in the consumers_ list and do not
+  // change. As we replace a tree with its copy, we may replace other
+  // consumers with their respective copies. The code must be able to
+  // deal with this situation, so be careful when making decisions
+  // based on a particular consumer obtained from this list.
+  LIST(CommonSubExprRef *) alternativeConsumers_;
+
+  // number of lexical refs in the query for this expression
+  // (how many time does it appear in the query text)
+  Int32 numLexicalRefs_;
+
   // a common list of columns and predicate to use used for a
   // materialized CSE
 
@@ -2673,6 +2714,13 @@ private:
   ValueIdSet vegRefsWithDifferingConstants_;
   ValueIdSet vegRefsWithDifferingInputs_;
 
+  // information for heuristics
+
+  // "key" columns in the child tree, this could include Hive
+  // partition columns, "tag" constant columns in a union, and also
+  // trailing key columns that may not be significant
+  NABitVector cseTreeKeyColumns_;
+
   // information for the materialization of the CSE
   Int32 idOfAnalyzingConsumer_;
   CSEAnalysisOutcome analysisOutcome_;
@@ -2683,6 +2731,29 @@ private:
   RelExpr *insertIntoTemp_;
 };
 
+// A CSEInfo and a count (of how many references we have to it)
+class CountedCSEInfo
+{
+public:
+
+  CountedCSEInfo() : info_(NULL), lexicalCount_(-1)                         {}
+  CountedCSEInfo(CSEInfo *info, Int32 cnt = 0) :
+                                            info_(info), lexicalCount_(cnt) {}
+  CountedCSEInfo(const CountedCSEInfo &other) :
+                     info_(other.info_), lexicalCount_(other.lexicalCount_) {}
+  ~CountedCSEInfo()                                                         {}
+
+  CSEInfo *getInfo() const                                   { return info_; }
+  Int32 getLexicalCount() const                      { return lexicalCount_; }
+
+  void incrementLexicalCount()                            { lexicalCount_++; }
+
+private:
+
+  CSEInfo *info_;
+  Int32 lexicalCount_;
+};
+
 // -----------------------------------------------------------------------
 // The CommonSubExprRef class represents a potential common subexpression
 // (CSE) in the query tree. The common subexpression has a name, and
@@ -2705,6 +2776,11 @@ public:
        : RelExpr(REL_COMMON_SUBEXPR_REF,cse,NULL,oHeap),
          internalName_(internalName, oHeap),
          id_(-1),
+         isAnExpansionOf_(NULL),
+         isAnAlternativeOf_(NULL),
+         parentCSEId_(-1),
+         parentRefId_(-1),
+         lexicalRefNumFromParent_(-1),
          hbAccessOptionsFromCTE_(NULL)
   {}
 
@@ -2713,6 +2789,10 @@ public:
   // the name used in the CTE or a generated name
   const NAString &getName() const                    { return internalName_; }
   Int32 getId() const                                          { return id_; }
+  Int32 getParentCSEId() const                        { return parentCSEId_; }
+  Int32 getParentConsumerId() const                   { return parentRefId_; }
+  Int32 getLexicalRefNumFromParent() const {return lexicalRefNumFromParent_; }
+  NABoolean isAChildOfTheMainQuery() const;
 
   virtual Int32 getArity() const;
 
@@ -2721,6 +2801,7 @@ public:
   const ValueIdSet & getNonVEGColumns() const       { return nonVEGColumns_; }
 
   const ValueIdSet &getPushedPredicates() const  { return pushedPredicates_; }
+  const EstLogPropSharedPtr &getEstLogProps() const{ return cseEstLogProps_; }
 
   void setId(Int32 id)                     { CMPASSERT(id_ == -1); id_ = id; }
 
@@ -2729,10 +2810,46 @@ public:
                                             { hbAccessOptionsFromCTE_ = hbo; }
 
   // add this node to the global list of CommonSubExprRefs kept in CmpStatement
-  void addToCmpStatement();
+  void addToCmpStatement(NABoolean lexicalRef);
+
+  // establish a parent/child relationship between two CommonSubExprRefs
+  void addParentRef(CommonSubExprRef *parentRef);
 
   // is this the first reference to the common subexpression?
-  NABoolean isFirstReference();
+  NABoolean isFirstReference() const;
+
+  // CommonSubExprRefs come in different flavors:
+  //
+  // Lexical ref:     This is a node that got created in a parser
+  //                  rule, parsing a reference to a CTE (or, in the
+  //                  future, something equivalent, like a view reference).
+  // Expanded ref:    Since the parser expands CTE references by making a
+  //                  copy on each reference, if that copied tree contains
+  //                  child references, an expanded ref is created.
+  //                  Lexical and expanded refs shouldn't be treated
+  //                  differently, it is somewhat arbitrary which one
+  //                  is the lexical one and which ones are expanded.
+  // Original ref:    This is the original copy of a lexical or expanded
+  //                  ref.
+  // Alternative ref: Sometimes we copy a tree after binding, e.g. to
+  //                  have a fall-back during the SQO phase. Such copies
+  //                  are alternative refs, and only one of them should
+  //                  be used in the output of each compilation phase.
+  //                  Only one of the alternatives is part of the
+  //                  CSEInfo consumer list, the others are stored in
+  //                  the alternative consumer list.
+  //
+  // A given CommonSubExprRef is either a lexical or an expanded ref.
+  // The "alternative" flavor is orthogonal to that, both lexical
+  // and expanded refs can either be an original or an alternative.
+  NABoolean isALexicalRef() const         { return isAnExpansionOf_ == NULL; }
+  NABoolean isAnExpandedRef() const       { return isAnExpansionOf_ != NULL; }
+  NABoolean isAnOriginalRef() const     { return isAnAlternativeOf_ == NULL; }
+  NABoolean isAnAlternativeRef() const  { return isAnAlternativeOf_ != NULL; }
+  CommonSubExprRef *getLexicalRef()
+                        { return isAnExpansionOf_ ? isAnExpansionOf_ : this; }
+  CommonSubExprRef *getOriginalRef()
+                    { return isAnAlternativeOf_ ? isAnAlternativeOf_ : this; }
 
   // a virtual function for performing name binding within the query tree
   virtual RelExpr * bindNode(BindWA *bindWAPtr);
@@ -2749,6 +2866,14 @@ public:
        Lng32 childId = (-MAX_REL_ARITY));
   virtual void rewriteNode(NormWA & normWARef);
   virtual RelExpr * semanticQueryOptimizeNode(NormWA & normWARef);
+  virtual NABoolean prepareMeForCSESharing(
+       const ValueIdSet &outputsToAdd,
+       const ValueIdSet &predicatesToRemove,
+       const ValueIdSet &commonPredicatesToAdd,
+       const ValueIdSet &inputsToRemove,
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // add all the expressions that are local to this
   // node to an existing list of expressions (used by GUI tool)
@@ -2773,6 +2898,10 @@ public:
   // for use by the root node for inlining
   static Union *makeUnion(RelExpr *lc, RelExpr *rc, NABoolean blocked);
 
+  static void makeValueIdListFromBitVector(ValueIdList &tgt,
+                                           const ValueIdList &src,
+                                           const NABitVector &vec);
+
   // for debugging
   void display();
   static void displayAll(const char *optionalId = NULL);
@@ -2817,10 +2946,6 @@ private:
   RelExpr * createTempScan(CSEInfo &info, NormWA & normWARef);
   RelExpr * getTempScan() const                           { return tempScan_; }
 
-  static void makeValueIdListFromBitVector(ValueIdList &tgt,
-                                           const ValueIdList &src,
-                                           const NABitVector &vec);
-
   // data members
   // ------------
 
@@ -2833,6 +2958,40 @@ private:
   // common subexprssion. These references are numbered 0, 1, ...
   Int32 id_;
 
+  // indicate different flavors of CommonSubExprRef nodes and point
+  // back to the original node(s), if any
+  CommonSubExprRef *isAnExpansionOf_;
+  CommonSubExprRef *isAnAlternativeOf_;
+
+  // There are three ids that describe our predecessor in the
+  // RelExpr tree and in the lexical directed multigraph of the
+  // CSEs:
+  // Parent CSE id:
+  //     This is the the parent CSE or main query that directly
+  //     contains the reference. In the RelExpr tree, this is our
+  //     closest ancestor CommonSubExprRef node or the root node
+  //     of the tree. It is stored as the integer CSE id here,
+  //     we could also store the name. A special id is used for
+  //     the main query: CmpStatement::getCSEIdForMainQuery().
+  //
+  // Parent Ref id:
+  //     This is the consumer id of the parent CommonSubExprRef
+  //     (or 0 if the ref originates from the main query).
+  //
+  // Lexical ref num from parent:
+  //     This indicates which reference from the parent CSE we
+  //     are looking at. The main query or a CSE may refer to
+  //     the same child multiple times, and this is the number
+  //     indicating this (0...n-1). The id_ data member counts
+  //     the total number of references to a CSE; the lexical
+  //     ref num from parent counts only the lexical number
+  //     of references and only from a particular parent CSE
+  //     (or the main query).
+  //
+  Int32 parentCSEId_;
+  Int32 parentRefId_;
+  Int32 lexicalRefNumFromParent_;
+
   // The list of columns produced by the common subexpression.
   // We keep the full list here, even when the characteristic
   // outputs get reduced during the normalization phase. This
@@ -2841,8 +3000,9 @@ private:
   // of different consumers is by position in this list.
   ValueIdList columnList_;
 
-  // same columns without making VEGRefs. This is needed
-  // in preCodeGen.
+  // Same columns without making VEGRefs. This is needed
+  // in preCodeGen. It also includes other potential VEG
+  // members if this is the analyzing consumer.
   ValueIdSet nonVEGColumns_;
 
   // The common inputs (typically parameters). Pushing down
@@ -2873,6 +3033,12 @@ private:
   // create a temp table for the resulting CSE.
   HbaseAccessOptions *hbAccessOptionsFromCTE_;
 
+  // the estimated logical properties of this common subexpression,
+  // set in the analyzing consumer only, since they should be the same
+  // for all consumers
+  EstLogPropSharedPtr cseEstLogProps_;
+
+  // the temp scan to replace this node after the SQO phase
   RelExpr *tempScan_;
 
 }; // class CommonSubExprRef

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelScan.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelScan.h b/core/sql/optimizer/RelScan.h
index b2c7add..4f24dc0 100644
--- a/core/sql/optimizer/RelScan.h
+++ b/core/sql/optimizer/RelScan.h
@@ -499,8 +499,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
  // synthesizes compRefOpt constraints.
   virtual void processCompRefOptConstraints(NormWA * normWAPtr) ;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelSet.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelSet.h b/core/sql/optimizer/RelSet.h
index 00eff98..434b91a 100644
--- a/core/sql/optimizer/RelSet.h
+++ b/core/sql/optimizer/RelSet.h
@@ -261,8 +261,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // ---------------------------------------------------------------------
   // Function for testing the eligibility of this

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/VEGTable.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/VEGTable.cpp b/core/sql/optimizer/VEGTable.cpp
index 5e92bcb..7b07531 100644
--- a/core/sql/optimizer/VEGTable.cpp
+++ b/core/sql/optimizer/VEGTable.cpp
@@ -1693,16 +1693,29 @@ VEGRegion * VEGTable::getVEGRegion(const ExprNode * 
const ownerExpr,
                                   Lng32 subtreeId) const
 {
   VEGRegion* candidateRegion = NULL;
+  const ExprNode * ownerOrOrig = ownerExpr;
   
-  // Loop through the regions to find the one for this ExprNode
-  for (RegionId i = FIRST_VEG_REGION;
-       i < (RegionId)arrayEntry_.entries();
-       i++)
+  // Loop over the node and its original expressions
+  while (ownerOrOrig)
     {
-      candidateRegion = arrayEntry_[i];
-      if ( (candidateRegion->getOwnerExpr() == ownerExpr) AND 
-          (candidateRegion->getSubtreeId() == subtreeId))
-       return candidateRegion;
+      // Loop through the regions to find the one for this ExprNode
+      for (RegionId i = FIRST_VEG_REGION;
+           i < (RegionId)arrayEntry_.entries();
+           i++)
+        {
+          candidateRegion = arrayEntry_[i];
+          if ( (candidateRegion->getOwnerExpr() == ownerOrOrig) AND 
+               (candidateRegion->getSubtreeId() == subtreeId))
+            return candidateRegion;
+        }
+
+      // VEGRegion not found, try with one of the original expressions
+      const RelExpr *re = ownerOrOrig->castToRelExpr();
+
+      if (re && re->getOriginalExpr(FALSE) != re)
+        ownerOrOrig = re->getOriginalExpr(FALSE);
+      else
+        ownerOrOrig = NULL;
     }
 
     // Didn't find any

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ValueDesc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ValueDesc.cpp b/core/sql/optimizer/ValueDesc.cpp
index b0d77ba..c308854 100644
--- a/core/sql/optimizer/ValueDesc.cpp
+++ b/core/sql/optimizer/ValueDesc.cpp
@@ -3293,7 +3293,7 @@ void ValueIdSet::replaceOperandsOfInstantiateNull
 // For each ValueId in other check whether it is referenced anywhere
 // within an ItemExpr whose ValueId belongs to this set.
 // -----------------------------------------------------------------------
-void ValueIdSet::weedOutUnreferenced(ValueIdSet & other)
+void ValueIdSet::weedOutUnreferenced(ValueIdSet & other) const
 {
   ValueIdSet unrefSet;
   NABoolean notFound;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ValueDesc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ValueDesc.h b/core/sql/optimizer/ValueDesc.h
index d423eb6..d69d988 100644
--- a/core/sql/optimizer/ValueDesc.h
+++ b/core/sql/optimizer/ValueDesc.h
@@ -1080,7 +1080,7 @@ public:
   // Delete all values in other that are not referenced by the value
   // expressions that belong to this set.
   // --------------------------------------------------------------------
-  void weedOutUnreferenced(ValueIdSet & other);
+  void weedOutUnreferenced(ValueIdSet & other) const;
 
   // --------------------------------------------------------------------
   // weedOutNonEquiPreds()

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/parser/sqlparser.y
----------------------------------------------------------------------
diff --git a/core/sql/parser/sqlparser.y b/core/sql/parser/sqlparser.y
index b6a6d88..3a2b3aa 100755
--- a/core/sql/parser/sqlparser.y
+++ b/core/sql/parser/sqlparser.y
@@ -6727,7 +6727,7 @@ table_name_and_hint : table_name optimizer_hint 
hbase_access_options
                               if ($3)
                                 cse->setHbaseAccessOptions($3);
 
-                              cse->addToCmpStatement();
+                              cse->addToCmpStatement(TRUE);
                               $$ = cse;
                             }
                           else
@@ -7061,7 +7061,7 @@ table_name_as_clause_and_hint : table_name as_clause 
optimizer_hint hbase_access
                                          if ($4)
                                            cse->setHbaseAccessOptions($4);
 
-                                         cse->addToCmpStatement();
+                                         cse->addToCmpStatement(TRUE);
                                          $$ = cse;
                                        }
                                      else

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/regress/compGeneral/EXPECTED045
----------------------------------------------------------------------
diff --git a/core/sql/regress/compGeneral/EXPECTED045 
b/core/sql/regress/compGeneral/EXPECTED045
index 6489b5d..84692d9 100644
--- a/core/sql/regress/compGeneral/EXPECTED045
+++ b/core/sql/regress/compGeneral/EXPECTED045
@@ -466,6 +466,29 @@
 
 --- SQL operation complete.
 >>
+>>insert into date_dim values (
++>  1, '1', date '2000-01-01', 1, 1, 1, 2000, 1, 1, 1, 1, 2000, 1, 1, 'aday', 
'aq', ' ', ' ', ' ', 1, 31, 0, 0, ' ', ' ', ' ', ' ', ' '
++>);
+
+--- 1 row(s) inserted.
+>>
+>>insert into store_sales values (
++>  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 
0.0, 1.0, 1.0, 0.0
++>);
+
+--- 1 row(s) inserted.
+>>
+>>update statistics for table date_dim on every column;
+
+--- SQL operation complete.
+>>update statistics for table date_dim on (d_qoy, d_year);
+
+*** WARNING[9202] UPDATE STATISTICS has located previously generated 
histograms that are not being regenerated. This may affect the plans that will 
be generated. Missing columns lists are 
(D_CURRENT_YEAR),(D_CURRENT_QUARTER),(D_CURRENT_MONTH),(D_CURRENT_WEEK),(D_CURRENT_DAY),(D_SAME_DAY_LQ),(D_SAME_DAY_LY),(D_LAST_DOM),(D_FIRST_DOM),(D_FOLLOWING_HOLIDAY),(D_WEEKEND),(D_HOLIDAY),(D_QUARTER_NAME),(D_DAY_NAME),(D_FY_WEEK_SEQ),(D_FY_QUARTER_SEQ),(D_FY_YEAR),(D_DOM),(D_MOY),(D_DOW),(D_QUARTER_SEQ),(D_WEEK_SEQ),(D_MONTH_SEQ),(D_DATE),(D_DATE_ID),(D_DATE_SK).
+
+--- SQL operation completed with warnings.
+>>update statistics for table store_sales on every column;
+
+--- SQL operation complete.
 >>
 >>--------------------------------------------------------------------
 >>obey TEST045(queries);
@@ -712,7 +735,7 @@ SCAN TEMP                                          2
 +> where d_week_seq1=d_week_seq2-53
 +> order by d_week_seq1;
 
-*** WARNING[5001] Common subexpression WSWSCS cannot be shared among multiple 
consumers. Reason: Operator map_value_ids not supported.
+*** WARNING[5001] Common subexpression WSCS will not be shared among multiple 
consumers. Reason: expression is only evaluated once because parent is 
materialized.
 
 --- SQL command prepared.
 >>-- use temp for wscs only, not wswscs, due to MapValueIds
@@ -1146,7 +1169,7 @@ SCAN TEMP                                          4
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[2997] RESULTS (Operator map_value_ids not supported)
+*** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1154,9 +1177,9 @@ SCAN TEMP                                          4
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                         15
+BLOCKED_UNION                                      5
+HIVE_INSERT                                        3
+SCAN TEMP                                         11
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1229,9 +1252,7 @@ CHANNEL  I_BRAND_ID   I_CLASS_ID   I_CATEGORY_ID  
SUM_SALES             NUMBER_S
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[5001] Common subexpression FREQUENT_SS_ITEMS cannot be shared 
among multiple consumers. Reason: Differing inputs in CTE references, try CQD 
QUERY_CACHE '0'.
-
-*** WARNING[5001] Common subexpression BEST_SS_CUSTOMER cannot be shared among 
multiple consumers. Reason: Differing inputs in CTE references, try CQD 
QUERY_CACHE '0'.
+*** WARNING[5001] Common subexpression MAX_STORE_SALES will not be shared 
among multiple consumers. Reason: expression is only evaluated once because 
parent is materialized.
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1239,9 +1260,9 @@ CHANNEL  I_BRAND_ID   I_CLASS_ID   I_CATEGORY_ID  
SUM_SALES             NUMBER_S
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                          2
+BLOCKED_UNION                                      3
+HIVE_INSERT                                        2
+SCAN TEMP                                          4
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1320,9 +1341,7 @@ SCAN TEMP                                          2
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[5001] Common subexpression FREQUENT_SS_ITEMS cannot be shared 
among multiple consumers. Reason: Differing inputs in CTE references, try CQD 
QUERY_CACHE '0'.
-
-*** WARNING[5001] Common subexpression BEST_SS_CUSTOMER cannot be shared among 
multiple consumers. Reason: Differing inputs in CTE references, try CQD 
QUERY_CACHE '0'.
+*** WARNING[5001] Common subexpression MAX_STORE_SALES will not be shared 
among multiple consumers. Reason: expression is only evaluated once because 
parent is materialized.
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1330,9 +1349,9 @@ SCAN TEMP                                          2
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                          2
+BLOCKED_UNION                                      3
+HIVE_INSERT                                        2
+SCAN TEMP                                          4
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1583,9 +1602,13 @@ SCAN TEMP                                          2
 +>       > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales 
else null end
 +> order by store_q1_q2_increase;
 
-*** WARNING[5001] Common subexpression SS cannot be shared among multiple 
consumers. Reason: Encountered VEGs with different constants in different 
consumers.
+*** WARNING[5001] Common subexpression SS will not be shared among multiple 
consumers. Reason: Encountered VEGs with different constants in different 
consumers.
+
+*** WARNING[5001] Common subexpression SS will not be shared among multiple 
consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
+
+*** WARNING[5001] Common subexpression WS will not be shared among multiple 
consumers. Reason: Encountered VEGs with different constants in different 
consumers.
 
-*** WARNING[5001] Common subexpression WS cannot be shared among multiple 
consumers. Reason: Encountered VEGs with different constants in different 
consumers.
+*** WARNING[5001] Common subexpression WS will not be shared among multiple 
consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
 
 --- SQL command prepared.
 >>-- Different constants used in different references of WITH clause - not yet 
 >>supported

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/regress/compGeneral/TEST045
----------------------------------------------------------------------
diff --git a/core/sql/regress/compGeneral/TEST045 
b/core/sql/regress/compGeneral/TEST045
index 36c8bf4..6a6d465 100644
--- a/core/sql/regress/compGeneral/TEST045
+++ b/core/sql/regress/compGeneral/TEST045
@@ -486,6 +486,17 @@ create table item
 --  )
 ;
 
+insert into date_dim values (
+  1, '1', date '2000-01-01', 1, 1, 1, 2000, 1, 1, 1, 1, 2000, 1, 1, 'aday', 
'aq', ' ', ' ', ' ', 1, 31, 0, 0, ' ', ' ', ' ', ' ', ' '
+);
+
+insert into store_sales values (
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 
0.0, 1.0, 1.0, 0.0
+);
+
+update statistics for table date_dim on every column;
+update statistics for table date_dim on (d_qoy, d_year);
+update statistics for table store_sales on every column;
 
 --------------------------------------------------------------------
 ?section queries

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h 
b/core/sql/sqlcomp/DefaultConstants.h
index 3dd96e6..af60786 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3880,6 +3880,11 @@ enum DefaultConstants
 
   GROUP_BY_PUSH_TO_BOTH_SIDES_OF_JOIN,
 
+  CSE_TEMP_TABLE_MAX_SIZE,
+  CSE_TEMP_TABLE_MAX_MAX_SIZE,
+  CSE_COMMON_KEY_PRED_CONTROL,
+  CSE_PCT_KEY_COL_PRED_CONTROL,
+
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
   __NUM_DEFAULT_ATTRIBUTES

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index b6fa742..dab9e68 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1152,16 +1152,23 @@ SDDkwd__(CAT_ENABLE_QUERY_INVALIDATION, "ON"),
  DDkwd__(CSE_CACHE_TEMP_QUERIES,               "OFF"),
  // "cleanup obsolete volatile tables" command cleans up Hive temp tables
  DDkwd__(CSE_CLEANUP_HIVE_TABLES,              "OFF"),
+ // don't temp if all consumers have preds on n key columns
+ DDui___(CSE_COMMON_KEY_PRED_CONTROL,          "1"),
  // emit warnings that help diagnose why CSEs are not shared
  DDkwd__(CSE_DEBUG_WARNINGS,                   "OFF"),
  // create a CommonSubExpr node for CTEs defined in WITH clauses (OFF/ON)
  DDkwd__(CSE_FOR_WITH,                         "OFF"),
  // use Hive tables as temp tables
  DDkwd__(CSE_HIVE_TEMP_TABLE,                  "ON"),
+ // don't temp if avg consumer has preds on more than n percent of key cols
+ DDflt0_(CSE_PCT_KEY_COL_PRED_CONTROL,         "49.9"),
  // print debugging info on stdout
  DDkwd__(CSE_PRINT_DEBUG_INFO,                 "OFF"),
+ // limit temp table size (based on max. card and regular card)
+ DDflt0_(CSE_TEMP_TABLE_MAX_MAX_SIZE,          "1E12"),
+ DDflt0_(CSE_TEMP_TABLE_MAX_SIZE,              "1E9"),
  // implement CommonSubExpr as a temp table (OFF/SYSTEM/ON)
- DDkwd__(CSE_USE_TEMP,                         "ON"),
+ DDkwd__(CSE_USE_TEMP,                         "SYSTEM"),
 
 SDDui___(CYCLIC_ESP_PLACEMENT,                  "1"),
 

Reply via email to