This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 6602921d38f83401cbc81ee89ed4d317e2e02a0a
Author: Chris Hajas <[email protected]>
AuthorDate: Sun Mar 5 12:34:12 2023 -0800

    Optimize ConstructRootColMappingPerPart for common case
    
    This method is very expensive when we have many partitions and many
    columns. In a partition table, it will iterate over each partition, and
    for each partition check every column against each root's column. This
    is because the child partition can have dropped columns and columns in a
    different order than the root. Because this is a less-common scenario,
    we should optimize for the common case here. While iterating through the
    root's columns, as long as the child's column also matches, we don't
    need to recheck child columns that have previously been found. If there
    are no dropped/swapped columns, this makes the check go from O(n^2) for
    each partition to O(n).
    
    As an example, for a 1200 partition, 200 column table of type `int` with no
    dropped/swapped columns, a simple `select * from part_table` query went 
from 6
    to 5.3s with an empty relcache, and 560s to 110ms with the table in the
    relcache. With more partitions and more columns the speedup gets even
    larger.
---
 .../gpopt/translate/CTranslatorRelcacheToDXL.cpp   |  1 -
 .../gpopt/operators/CLogicalDynamicGetBase.h       |  5 ++
 .../src/operators/CLogicalDynamicGetBase.cpp       | 56 ++++++++++++----------
 .../libgpos/include/gpos/string/CWStringConst.h    | 10 ++++
 .../gporca/libgpos/src/string/CWStringConst.cpp    | 28 +++++++++++
 5 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 2a3d0f80b4..57f64baf2e 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -2537,7 +2537,6 @@ 
CTranslatorRelcacheToDXL::RetrievePartKeysAndTypes(CMemoryPool *mp,
 {
        GPOS_ASSERT(nullptr != rel);
 
-       // FIXME: isn't it faster to check rel.rd_partkey?
        if (!rel->rd_partdesc)
        {
                // not a partitioned table
diff --git 
a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h 
b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h
index 64f630bb1b..3f097800dc 100644
--- 
a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h
+++ 
b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h
@@ -70,6 +70,11 @@ protected:
        static ColRefToUlongMapArray *ConstructRootColMappingPerPart(
                CMemoryPool *mp, CColRefArray *root_cols, IMdIdArray 
*partition_mdids);
 
+       using ColNameToIndexMap =
+               CHashMap<const CWStringConst, ULONG, CWStringConst::HashValue,
+                                CWStringConst::Equals, CleanupNULL<const 
CWStringConst>,
+                                CleanupDelete<ULONG>>;
+
 public:
        // ctors
        explicit CLogicalDynamicGetBase(CMemoryPool *mp);
diff --git 
a/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp 
b/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp
index 6329e8e5db..499d54d6cb 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp
@@ -220,7 +220,12 @@ CLogicalDynamicGetBase::DerivePartitionInfo(CMemoryPool 
*mp,
 }
 
 // Construct a mapping from each column in root table to an index in each child
-// partition's table descr by matching column names
+// partition's table descr by matching column names For each partition, this
+// iterates over each child partition and compares the column names and creates
+// a mapping. In the common case, the root and child partition's columns have
+// the same colref. However, if they've been dropped/swapped, the mapping will
+// be different. This method is fairly expensive, as it's building multiple 
hashmaps
+// and ends up getting called from a few different places in the codebase.
 ColRefToUlongMapArray *
 CLogicalDynamicGetBase::ConstructRootColMappingPerPart(
        CMemoryPool *mp, CColRefArray *root_cols, IMdIdArray *partition_mdids)
@@ -228,6 +233,15 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart(
        CMDAccessor *mda = COptCtxt::PoctxtFromTLS()->Pmda();
 
        ColRefToUlongMapArray *part_maps = GPOS_NEW(mp) 
ColRefToUlongMapArray(mp);
+
+       // Build hashmap of colname to the index
+       ColNameToIndexMap *root_mapping = GPOS_NEW(mp) ColNameToIndexMap(mp);
+       for (ULONG i = 0; i < root_cols->Size(); ++i)
+       {
+               CColRef *root_colref = (*root_cols)[i];
+               root_mapping->Insert(root_colref->Name().Pstr(), GPOS_NEW(mp) 
ULONG(i));
+       }
+
        for (ULONG ul = 0; ul < partition_mdids->Size(); ++ul)
        {
                IMDId *part_mdid = (*partition_mdids)[ul];
@@ -236,35 +250,28 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart(
                GPOS_ASSERT(nullptr != partrel);
 
                ColRefToUlongMap *mapping = GPOS_NEW(mp) ColRefToUlongMap(mp);
-
-               for (ULONG i = 0; i < root_cols->Size(); ++i)
+               // The root mapping cannot contain dropped columns, but may be
+               // in a different order than the child cols.Iterate through 
each of the child
+               // cols, and retrieve the corresponding index in the parent 
table
+               for (ULONG j = 0; j < partrel->ColumnCount(); ++j)
                {
-                       CColRef *root_colref = (*root_cols)[i];
+                       const IMDColumn *coldesc = partrel->GetMdCol(j);
+                       const CWStringConst *colname = 
coldesc->Mdname().GetMDName();
 
-                       BOOL found_mapping = false;
-                       for (ULONG j = 0, idx = 0; j < partrel->ColumnCount(); 
++j, ++idx)
+                       if (coldesc->IsDropped())
                        {
-                               const IMDColumn *coldesc = partrel->GetMdCol(j);
-                               const CWStringConst *colname = 
coldesc->Mdname().GetMDName();
-
-                               if (coldesc->IsDropped())
-                               {
-                                       --idx;
-                                       continue;
-                               }
-
-                               if (colname->Equals(root_colref->Name().Pstr()))
-                               {
-                                       // Found the corresponding column in 
the child partition
-                                       // Save the index in the mapping
-                                       mapping->Insert(root_colref, 
GPOS_NEW(mp) ULONG(idx));
-                                       found_mapping = true;
-                                       break;
-                               }
+                               continue;
                        }
 
-                       if (!found_mapping)
+                       ULONG *root_idx = root_mapping->Find(colname);
+                       if (nullptr != root_idx)
+                       {
+                               mapping->Insert((*root_cols)[*root_idx],
+                                                               GPOS_NEW(mp) 
ULONG(*root_idx));
+                       }
+                       else
                        {
+                               root_mapping->Release();
                                GPOS_RAISE(
                                        CException::ExmaInvalid, 
CException::ExmiInvalid,
                                        GPOS_WSZ_LIT(
@@ -273,5 +280,6 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart(
                }
                part_maps->Append(mapping);
        }
+       root_mapping->Release();
        return part_maps;
 }
diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h 
b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
index 4ba814248b..99293f1ee6 100644
--- a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
+++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
@@ -46,6 +46,16 @@ public:
 
        // returns the wide character buffer storing the string
        const WCHAR *GetBuffer() const override;
+
+       // equality
+       static BOOL Equals(const CWStringConst *string1,
+                                          const CWStringConst *string2);
+
+       // hash function
+       static ULONG HashValue(const CWStringConst *string);
+
+       // checks whether the string is byte-wise equal to another string
+       BOOL Equals(const CWStringBase *str) const override;
 };
 }  // namespace gpos
 
diff --git a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp 
b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
index fee1c0414b..154897f70d 100644
--- a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
+++ b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
@@ -13,6 +13,7 @@
 
 #include "gpos/base.h"
 #include "gpos/common/clibwrapper.h"
+#include "gpos/utils.h"
 
 using namespace gpos;
 
@@ -118,4 +119,31 @@ CWStringConst::GetBuffer() const
        return m_w_str_buffer;
 }
 
+// equality
+BOOL
+CWStringConst::Equals(const CWStringConst *string1,
+                                         const CWStringConst *string2)
+{
+       ULONG length = GPOS_WSZ_LENGTH(string1->GetBuffer());
+       return length == GPOS_WSZ_LENGTH(string2->GetBuffer()) &&
+                  0 == clib::Wcsncmp(string1->GetBuffer(), 
string2->GetBuffer(),
+                                                         length);
+}
+
+// hash function
+ULONG
+CWStringConst::HashValue(const CWStringConst *string)
+{
+       return gpos::HashByteArray(
+               (BYTE *) string->GetBuffer(),
+               GPOS_WSZ_LENGTH(string->GetBuffer()) * GPOS_SIZEOF(WCHAR));
+}
+
+// checks whether the string is byte-wise equal to another string
+BOOL
+CWStringConst::Equals(const CWStringBase *str) const
+{
+       GPOS_ASSERT(nullptr != str);
+       return CWStringBase::Equals(str->GetBuffer());
+}
 // EOF


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to