This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 6602921d38f83401cbc81ee89ed4d317e2e02a0a Author: Chris Hajas <[email protected]> AuthorDate: Sun Mar 5 12:34:12 2023 -0800 Optimize ConstructRootColMappingPerPart for common case This method is very expensive when we have many partitions and many columns. In a partition table, it will iterate over each partition, and for each partition check every column against each root's column. This is because the child partition can have dropped columns and columns in a different order than the root. Because this is a less-common scenario, we should optimize for the common case here. While iterating through the root's columns, as long as the child's column also matches, we don't need to recheck child columns that have previously been found. If there are no dropped/swapped columns, this makes the check go from O(n^2) for each partition to O(n). As an example, for a 1200 partition, 200 column table of type `int` with no dropped/swapped columns, a simple `select * from part_table` query went from 6 to 5.3s with an empty relcache, and 560s to 110ms with the table in the relcache. With more partitions and more columns the speedup gets even larger. --- .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 1 - .../gpopt/operators/CLogicalDynamicGetBase.h | 5 ++ .../src/operators/CLogicalDynamicGetBase.cpp | 56 ++++++++++++---------- .../libgpos/include/gpos/string/CWStringConst.h | 10 ++++ .../gporca/libgpos/src/string/CWStringConst.cpp | 28 +++++++++++ 5 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 2a3d0f80b4..57f64baf2e 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -2537,7 +2537,6 @@ CTranslatorRelcacheToDXL::RetrievePartKeysAndTypes(CMemoryPool *mp, { GPOS_ASSERT(nullptr != rel); - // FIXME: isn't it faster to check rel.rd_partkey? if (!rel->rd_partdesc) { // not a partitioned table diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h index 64f630bb1b..3f097800dc 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDynamicGetBase.h @@ -70,6 +70,11 @@ protected: static ColRefToUlongMapArray *ConstructRootColMappingPerPart( CMemoryPool *mp, CColRefArray *root_cols, IMdIdArray *partition_mdids); + using ColNameToIndexMap = + CHashMap<const CWStringConst, ULONG, CWStringConst::HashValue, + CWStringConst::Equals, CleanupNULL<const CWStringConst>, + CleanupDelete<ULONG>>; + public: // ctors explicit CLogicalDynamicGetBase(CMemoryPool *mp); diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp index 6329e8e5db..499d54d6cb 100644 --- a/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CLogicalDynamicGetBase.cpp @@ -220,7 +220,12 @@ CLogicalDynamicGetBase::DerivePartitionInfo(CMemoryPool *mp, } // Construct a mapping from each column in root table to an index in each child -// partition's table descr by matching column names +// partition's table descr by matching column names For each partition, this +// iterates over each child partition and compares the column names and creates +// a mapping. In the common case, the root and child partition's columns have +// the same colref. However, if they've been dropped/swapped, the mapping will +// be different. This method is fairly expensive, as it's building multiple hashmaps +// and ends up getting called from a few different places in the codebase. ColRefToUlongMapArray * CLogicalDynamicGetBase::ConstructRootColMappingPerPart( CMemoryPool *mp, CColRefArray *root_cols, IMdIdArray *partition_mdids) @@ -228,6 +233,15 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart( CMDAccessor *mda = COptCtxt::PoctxtFromTLS()->Pmda(); ColRefToUlongMapArray *part_maps = GPOS_NEW(mp) ColRefToUlongMapArray(mp); + + // Build hashmap of colname to the index + ColNameToIndexMap *root_mapping = GPOS_NEW(mp) ColNameToIndexMap(mp); + for (ULONG i = 0; i < root_cols->Size(); ++i) + { + CColRef *root_colref = (*root_cols)[i]; + root_mapping->Insert(root_colref->Name().Pstr(), GPOS_NEW(mp) ULONG(i)); + } + for (ULONG ul = 0; ul < partition_mdids->Size(); ++ul) { IMDId *part_mdid = (*partition_mdids)[ul]; @@ -236,35 +250,28 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart( GPOS_ASSERT(nullptr != partrel); ColRefToUlongMap *mapping = GPOS_NEW(mp) ColRefToUlongMap(mp); - - for (ULONG i = 0; i < root_cols->Size(); ++i) + // The root mapping cannot contain dropped columns, but may be + // in a different order than the child cols.Iterate through each of the child + // cols, and retrieve the corresponding index in the parent table + for (ULONG j = 0; j < partrel->ColumnCount(); ++j) { - CColRef *root_colref = (*root_cols)[i]; + const IMDColumn *coldesc = partrel->GetMdCol(j); + const CWStringConst *colname = coldesc->Mdname().GetMDName(); - BOOL found_mapping = false; - for (ULONG j = 0, idx = 0; j < partrel->ColumnCount(); ++j, ++idx) + if (coldesc->IsDropped()) { - const IMDColumn *coldesc = partrel->GetMdCol(j); - const CWStringConst *colname = coldesc->Mdname().GetMDName(); - - if (coldesc->IsDropped()) - { - --idx; - continue; - } - - if (colname->Equals(root_colref->Name().Pstr())) - { - // Found the corresponding column in the child partition - // Save the index in the mapping - mapping->Insert(root_colref, GPOS_NEW(mp) ULONG(idx)); - found_mapping = true; - break; - } + continue; } - if (!found_mapping) + ULONG *root_idx = root_mapping->Find(colname); + if (nullptr != root_idx) + { + mapping->Insert((*root_cols)[*root_idx], + GPOS_NEW(mp) ULONG(*root_idx)); + } + else { + root_mapping->Release(); GPOS_RAISE( CException::ExmaInvalid, CException::ExmiInvalid, GPOS_WSZ_LIT( @@ -273,5 +280,6 @@ CLogicalDynamicGetBase::ConstructRootColMappingPerPart( } part_maps->Append(mapping); } + root_mapping->Release(); return part_maps; } diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h index 4ba814248b..99293f1ee6 100644 --- a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h +++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h @@ -46,6 +46,16 @@ public: // returns the wide character buffer storing the string const WCHAR *GetBuffer() const override; + + // equality + static BOOL Equals(const CWStringConst *string1, + const CWStringConst *string2); + + // hash function + static ULONG HashValue(const CWStringConst *string); + + // checks whether the string is byte-wise equal to another string + BOOL Equals(const CWStringBase *str) const override; }; } // namespace gpos diff --git a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp index fee1c0414b..154897f70d 100644 --- a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp +++ b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp @@ -13,6 +13,7 @@ #include "gpos/base.h" #include "gpos/common/clibwrapper.h" +#include "gpos/utils.h" using namespace gpos; @@ -118,4 +119,31 @@ CWStringConst::GetBuffer() const return m_w_str_buffer; } +// equality +BOOL +CWStringConst::Equals(const CWStringConst *string1, + const CWStringConst *string2) +{ + ULONG length = GPOS_WSZ_LENGTH(string1->GetBuffer()); + return length == GPOS_WSZ_LENGTH(string2->GetBuffer()) && + 0 == clib::Wcsncmp(string1->GetBuffer(), string2->GetBuffer(), + length); +} + +// hash function +ULONG +CWStringConst::HashValue(const CWStringConst *string) +{ + return gpos::HashByteArray( + (BYTE *) string->GetBuffer(), + GPOS_WSZ_LENGTH(string->GetBuffer()) * GPOS_SIZEOF(WCHAR)); +} + +// checks whether the string is byte-wise equal to another string +BOOL +CWStringConst::Equals(const CWStringBase *str) const +{ + GPOS_ASSERT(nullptr != str); + return CWStringBase::Equals(str->GetBuffer()); +} // EOF --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
