This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit fc065034df7287b2e037eaa2f6474d1389724204
Author: DevChattopadhyay <[email protected]>
AuthorDate: Wed Jul 20 15:57:59 2022 +0530

    Queries on Distributed Replicated tables hangs when using optimizer
    
    During the optimisation of CTE’s for distributed replicated tables, 
Sequence operator optimize the first child with any distribution
    Requirement and compute the distribution request on the other children 
based on derived distribution of the first child.
    If distribution of first child is a Singleton, requests singleton on all 
children
    If distribution of first child is a Non-Singleton, requests Non-Singleton 
on all children,
    Here when the first child is a Replicated/TaintedReplicated, still we 
requests Non-Singleton, Hence optimiser adding redistribution motion on
    Top of second child, which is creating a wrong plan hence query is getting 
hung.
    So we are trying to request Non-singleton without enforcers when the first 
child is non-singleton, non-universal and Replicated/TaintedReplicated.
    Which can avoid adding redistribution motion on top of second child.
    
    Old plan:
                                 QUERY PLAN
    
----------------------------------------------------------------------------------------------------------------------------
     Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..1293.00 rows=1 
width=24)
      -> Sequence (cost=0.00..1293.00 rows=1 width=24)
         -> Shared Scan (share slice:id 4:0) (cost=0.00..431.00 rows=1 width=1)
            -> Materialize (cost=0.00..431.00 rows=1 width=1)
               -> WindowAgg (cost=0.00..431.00 rows=1 width=16)
                  Partition By: testtable.name
                  -> Sort (cost=0.00..431.00 rows=1 width=5)
                     Sort Key: testtable.name
                     -> Seq Scan on testtable (cost=0.00..431.00 rows=1 width=5)
         -> Redistribute Motion 1:3 (slice3) (cost=0.00..862.00 rows=1 width=24)
            -> Hash Left Join (cost=0.00..862.00 rows=1 width=24)
               Hash Cond: (“outer”.tblnm = 
pg_catalog.textin(unknownout(“outer”.tblnm), ‘’::void, (-1)))
               -> Result (cost=0.00..431.00 rows=1 width=8)
                  -> Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 
rows=1 width=1)
                     -> Result (cost=0.00..431.00 rows=1 width=1)
                        -> Shared Scan (share slice:id 1:0) (cost=0.00..431.00 
rows=1 width=1)
               -> Hash (cost=431.00..431.00 rows=1 width=16)
                  -> Result (cost=0.00..431.00 rows=1 width=16)
                     -> Aggregate (cost=0.00..431.00 rows=1 width=8)
                        -> Gather Motion 1:1 (slice2; segments: 1) 
(cost=0.00..431.00 rows=1 width=1)
                           -> Result (cost=0.00..431.00 rows=1 width=1)
                              -> Shared Scan (share slice:id 2:0) 
(cost=0.00..431.00 rows=1 width=1)
     Optimizer: Pivotal Optimizer (GPORCA)
    (23 rows)
    
    New Plan:
                                     QUERY PLAN
    
----------------------------------------------------------------------------------------------------------------------------------------------
     Sequence (cost=0.00..1293.00 rows=1 width=24) (actual time=1.120..1.120 
rows=0 loops=1)
      -> Shared Scan (share slice:id 0:0) (cost=0.00..431.00 rows=1 width=1) 
(actual time=0.708..0.708 rows=0 loops=1)
         -> Materialize (cost=0.00..431.00 rows=1 width=1) (actual 
time=0.706..0.707 rows=0 loops=1)
            -> Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 
rows=1 width=16) (actual time=0.697..0.697 rows=0 loops=1)
               -> WindowAgg (cost=0.00..431.00 rows=1 width=16) (never executed)
                  Partition By: testtable.name
                  -> Sort (cost=0.00..431.00 rows=1 width=10) (never executed)
                     Sort Key: testtable.name
                     Sort Method: quicksort Memory: 33kB
                     -> Seq Scan on testtable (cost=0.00..431.00 rows=1 
width=10) (never executed)
      -> Hash Left Join (cost=0.00..862.00 rows=1 width=24) (actual 
time=0.410..0.410 rows=0 loops=1)
         Hash Cond: (“outer”.tblnm = 
pg_catalog.textin(unknownout(“outer”.tblnm), ‘’::void, (-1)))
         Extra Text: Hash chain length 1.0 avg, 1 max, using 1 of 65536 buckets.
         -> Result (cost=0.00..431.00 rows=1 width=8) (actual time=0.001..0.001 
rows=0 loops=1)
            -> Shared Scan (share slice:id 0:0) (cost=0.00..431.00 rows=1 
width=1) (actual time=0.001..0.001 rows=0 loops=1)
         -> Hash (cost=431.00..431.00 rows=1 width=16) (actual 
time=0.014..0.014 rows=1 loops=1)
            Buckets: 65536 Batches: 1 Memory Usage: 1kB
            -> Result (cost=0.00..431.00 rows=1 width=16) (actual 
time=0.006..0.006 rows=1 loops=1)
               -> Aggregate (cost=0.00..431.00 rows=1 width=8) (actual 
time=0.004..0.004 rows=1 loops=1)
                  -> Shared Scan (share slice:id 0:0) (cost=0.00..431.00 rows=1 
width=1) (actual time=0.002..0.002 rows=0 loops=1)
     Optimizer: Pivotal Optimizer (GPORCA)
     Execution time: 1.800 ms
    
    Co-authored-by: Hari krishna Maddileti <[email protected]>
---
 .../dxl/minidump/TaintedReplicatedTablesCTE.mdp    | 565 +++++++++++++++++++++
 .../gpopt/base/CDistributionSpecNonSingleton.h     |  13 +-
 .../src/base/CDistributionSpecNonSingleton.cpp     |   4 +-
 .../gporca/libgpopt/src/base/CEnfdDistribution.cpp |   7 +
 .../libgpopt/src/operators/CPhysicalSequence.cpp   |  31 ++
 .../src/operators/CPhysicalSerialUnionAll.cpp      |   6 +-
 src/backend/gporca/server/CMakeLists.txt           |   2 +-
 src/test/regress/expected/rpt.out                  |  82 +++
 src/test/regress/expected/rpt_optimizer.out        |  81 +++
 src/test/regress/sql/rpt.sql                       |  32 ++
 10 files changed, 816 insertions(+), 7 deletions(-)

diff --git 
a/src/backend/gporca/data/dxl/minidump/TaintedReplicatedTablesCTE.mdp 
b/src/backend/gporca/data/dxl/minidump/TaintedReplicatedTablesCTE.mdp
new file mode 100644
index 0000000000..a99628513b
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/TaintedReplicatedTablesCTE.mdp
@@ -0,0 +1,565 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/";>
+  <dxl:Comment><![CDATA[
+     Objective: If the producer is replicated, request a non-singleton spec
+     that is not allowed to be enforced, to avoid potential CTE hang issue.
+     Setup:
+       create table testtable ( i character varying(10)) DISTRIBUTED 
REPLICATED;
+       explain
+       WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS 
RANK_DESC FROM testtable),
+       cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM 
cte1) X)
+       select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+                                                    QUERY PLAN
+       
----------------------------------------------------------------------------------------------------
+        Sequence  (cost=0.00..1293.00 rows=2 width=24)
+          ->  Shared Scan (share slice:id 0:0)  (cost=0.00..431.00 rows=1 
width=1)
+                ->  Gather Motion 1:1  (slice1; segments: 1)  
(cost=0.00..431.00 rows=1 width=16)
+                      ->  WindowAgg  (cost=0.00..431.00 rows=3 width=16)
+                            Partition By: with_test3.i
+                            ->  Sort  (cost=0.00..431.00 rows=3 width=10)
+                                  Sort Key: with_test3.i
+                                  ->  Seq Scan on with_test3  
(cost=0.00..431.00 rows=3 width=10)
+          ->  Hash Left Join  (cost=0.00..862.00 rows=2 width=24)
+                Hash Cond: (('COL1'::text) = ('COL1'::text))
+                ->  Result  (cost=0.00..431.00 rows=1 width=8)
+                      ->  Shared Scan (share slice:id 0:0)  (cost=0.00..431.00 
rows=1 width=1)
+                ->  Hash  (cost=431.00..431.00 rows=1 width=16)
+                      ->  Result  (cost=0.00..431.00 rows=1 width=16)
+                            Filter: (('COL1'::text) = 'COL1'::text)
+                            ->  Aggregate  (cost=0.00..431.00 rows=1 width=8)
+                                  ->  Shared Scan (share slice:id 0:0)  
(cost=0.00..431.00 rows=1 width=1)
+        Optimizer: Pivotal Optimizer (GPORCA)
+       (18 rows)
+   ]]>
+   </dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" 
DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" 
MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" 
LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" 
JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" 
JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" 
EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" 
XformBindThreshold="0"/>
+      <dxl:TraceFlags 
Value="101013,102001,102002,102003,102043,102074,102120,102144,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.1043.1.0" Name="varchar" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="true" IsFixedLength="false" Length="-1" PassByValue="false">
+        <dxl:DistrOpfamily Mdid="0.1995.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7105.1.0"/>
+        <dxl:EqualityOp Mdid="0.98.1.0"/>
+        <dxl:InequalityOp Mdid="0.531.1.0"/>
+        <dxl:LessThanOp Mdid="0.664.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.665.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.666.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.667.1.0"/>
+        <dxl:ComparisonOp Mdid="0.360.1.0"/>
+        <dxl:ArrayType Mdid="0.1015.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.410.1.0"/>
+        <dxl:InequalityOp Mdid="0.411.1.0"/>
+        <dxl:LessThanOp Mdid="0.412.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.414.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.413.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1016.1.0"/>
+        <dxl:MinAgg Mdid="0.2131.1.0"/>
+        <dxl:MaxAgg Mdid="0.2115.1.0"/>
+        <dxl:AvgAgg Mdid="0.2100.1.0"/>
+        <dxl:SumAgg Mdid="0.2107.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:GPDBScalarOp Mdid="0.664.1.0" Name="&lt;" ComparisonType="LT" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.25.1.0"/>
+        <dxl:RightType Mdid="0.25.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.740.1.0"/>
+        <dxl:Commutator Mdid="0.666.1.0"/>
+        <dxl:InverseOp Mdid="0.667.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1994.1.0"/>
+          <dxl:Opfamily Mdid="0.4017.1.0"/>
+          <dxl:Opfamily Mdid="0.4056.1.0"/>
+          <dxl:Opfamily Mdid="0.10018.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:Type Mdid="0.25.1.0" Name="text" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="true" IsFixedLength="false" Length="-1" PassByValue="false">
+        <dxl:DistrOpfamily Mdid="0.1995.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7105.1.0"/>
+        <dxl:EqualityOp Mdid="0.98.1.0"/>
+        <dxl:InequalityOp Mdid="0.531.1.0"/>
+        <dxl:LessThanOp Mdid="0.664.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.665.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.666.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.667.1.0"/>
+        <dxl:ComparisonOp Mdid="0.360.1.0"/>
+        <dxl:ArrayType Mdid="0.1009.1.0"/>
+        <dxl:MinAgg Mdid="0.2145.1.0"/>
+        <dxl:MaxAgg Mdid="0.2129.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1990.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/>
+        <dxl:EqualityOp Mdid="0.607.1.0"/>
+        <dxl:InequalityOp Mdid="0.608.1.0"/>
+        <dxl:LessThanOp Mdid="0.609.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.611.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.610.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/>
+        <dxl:ComparisonOp Mdid="0.356.1.0"/>
+        <dxl:ArrayType Mdid="0.1028.1.0"/>
+        <dxl:MinAgg Mdid="0.2118.1.0"/>
+        <dxl:MaxAgg Mdid="0.2134.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false">
+        <dxl:DistrOpfamily Mdid="0.2227.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/>
+        <dxl:EqualityOp Mdid="0.387.1.0"/>
+        <dxl:InequalityOp Mdid="0.402.1.0"/>
+        <dxl:LessThanOp Mdid="0.2799.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.2800.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/>
+        <dxl:ComparisonOp Mdid="0.2794.1.0"/>
+        <dxl:ArrayType Mdid="0.1010.1.0"/>
+        <dxl:MinAgg Mdid="0.2798.1.0"/>
+        <dxl:MaxAgg Mdid="0.2797.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:GPDBFunc Mdid="0.3100.1.0" Name="row_number" ReturnsSet="false" 
Stability="Immutable" DataAccess="NoSQL" IsStrict="false" 
IsNDVPreserving="false" IsAllowedForPS="false">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBFunc>
+      <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2226.1.0"/>
+        <dxl:EqualityOp Mdid="0.385.1.0"/>
+        <dxl:InequalityOp Mdid="0.0.0.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1012.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2225.1.0"/>
+        <dxl:EqualityOp Mdid="0.352.1.0"/>
+        <dxl:InequalityOp Mdid="0.3315.1.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1011.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:RelationStatistics Mdid="2.56117.1.0" Name="testtable" 
Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/>
+      <dxl:Relation Mdid="0.56117.1.0" Name="testtable" IsTemporary="false" 
HasOids="false" StorageType="Heap" DistributionPolicy="Replicated" Keys="7,1" 
NumberLeafPartitions="0">
+        <dxl:Columns>
+          <dxl:Column Name="name" Attno="1" Mdid="0.1043.1.0" 
TypeModifier="14" Nullable="true" ColWidth="10">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" 
ColWidth="6">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" 
Nullable="false" ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" 
Nullable="false" ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+        </dxl:Columns>
+        <dxl:IndexInfoList/>
+        <dxl:Triggers/>
+        <dxl:CheckConstraints/>
+      </dxl:Relation>
+      <dxl:MDCast Mdid="3.25.1.0;25.1.0" Name="text" BinaryCoercible="true" 
SourceTypeId="0.25.1.0" DestinationTypeId="0.25.1.0" CastFuncId="0.0.0.0" 
CoercePathType="0"/>
+      <dxl:GPDBScalarOp Mdid="0.98.1.0" Name="=" ComparisonType="Eq" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.25.1.0"/>
+        <dxl:RightType Mdid="0.25.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.67.1.0"/>
+        <dxl:Commutator Mdid="0.98.1.0"/>
+        <dxl:InverseOp Mdid="0.531.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1995.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7105.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1994.1.0"/>
+          <dxl:Opfamily Mdid="0.1995.1.0"/>
+          <dxl:Opfamily Mdid="0.2095.1.0"/>
+          <dxl:Opfamily Mdid="0.2229.1.0"/>
+          <dxl:Opfamily Mdid="0.4017.1.0"/>
+          <dxl:Opfamily Mdid="0.4056.1.0"/>
+          <dxl:Opfamily Mdid="0.7105.1.0"/>
+          <dxl:Opfamily Mdid="0.10018.1.0"/>
+          <dxl:Opfamily Mdid="0.10022.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBAgg Mdid="0.2803.1.0" Name="count" IsSplittable="true" 
HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="16" ColName="tblnm" TypeMdid="0.25.1.0"/>
+        <dxl:Ident ColId="17" ColName="tblnm" TypeMdid="0.25.1.0"/>
+        <dxl:Ident ColId="18" ColName="diffcnt" TypeMdid="0.20.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList>
+        <dxl:LogicalCTEProducer CTEId="1" Columns="1,9">
+          <dxl:LogicalWindow>
+            <dxl:WindowSpecList>
+              <dxl:WindowSpec PartitionColumns="1">
+                <dxl:WindowFrame FrameSpec="Range" ExclusionStrategy="Nulls">
+                  <dxl:TrailingEdge TrailingBoundary="UnboundedPreceding"/>
+                  <dxl:LeadingEdge LeadingBoundary="CurrentRow"/>
+                </dxl:WindowFrame>
+              </dxl:WindowSpec>
+            </dxl:WindowSpecList>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="1" Alias="name">
+                <dxl:Ident ColId="1" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+              </dxl:ProjElem>
+              <dxl:ProjElem ColId="9" Alias="rank_desc">
+                <dxl:WindowFunc Mdid="0.3100.1.0" TypeMdid="0.20.1.0" 
Distinct="false" WindowStarArg="false" WindowSimpleAgg="false" 
WindowStrategy="Immediate" WinSpecPos="0"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:LogicalGet>
+              <dxl:TableDescriptor Mdid="0.56117.1.0" TableName="testtable" 
LockMode="1">
+                <dxl:Columns>
+                  <dxl:Column ColId="1" Attno="1" ColName="name" 
TypeMdid="0.1043.1.0" TypeModifier="14" ColWidth="10"/>
+                  <dxl:Column ColId="2" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                  <dxl:Column ColId="3" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                  <dxl:Column ColId="4" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                  <dxl:Column ColId="5" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                  <dxl:Column ColId="6" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                  <dxl:Column ColId="7" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+                  <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                </dxl:Columns>
+              </dxl:TableDescriptor>
+            </dxl:LogicalGet>
+          </dxl:LogicalWindow>
+        </dxl:LogicalCTEProducer>
+        <dxl:LogicalCTEProducer CTEId="2" Columns="13,12">
+          <dxl:LogicalProject>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="13" Alias="tblnm">
+                <dxl:ConstValue TypeMdid="0.25.1.0" 
Value="AAAAGkxHTEFHUkVFX1JORVdMX1RZUF9SRUY=" LintValue="3353216647"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:LogicalGroupBy>
+              <dxl:GroupingColumns/>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="12" Alias="count">
+                  <dxl:AggFunc AggMdid="0.2803.1.0" AggDistinct="false" 
AggStage="Normal" AggKind="n" AggArgTypes="">
+                    <dxl:ValuesList ParamType="aggargs"/>
+                    <dxl:ValuesList ParamType="aggdirectargs"/>
+                    <dxl:ValuesList ParamType="aggorder"/>
+                    <dxl:ValuesList ParamType="aggdistinct"/>
+                  </dxl:AggFunc>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+              <dxl:LogicalCTEConsumer CTEId="1" Columns="10,11"/>
+            </dxl:LogicalGroupBy>
+          </dxl:LogicalProject>
+        </dxl:LogicalCTEProducer>
+      </dxl:CTEList>
+      <dxl:LogicalCTEAnchor CTEId="1">
+        <dxl:LogicalCTEAnchor CTEId="2">
+          <dxl:LogicalJoin JoinType="Left">
+            <dxl:LogicalProject>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="16" Alias="tblnm">
+                  <dxl:ConstValue TypeMdid="0.25.1.0" 
Value="AAAAGkxHTEFHUkVFX1JORVdMX1RZUF9SRUY=" LintValue="3353216647"/>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+              <dxl:LogicalCTEConsumer CTEId="1" Columns="14,15"/>
+            </dxl:LogicalProject>
+            <dxl:LogicalCTEConsumer CTEId="2" Columns="17,18"/>
+            <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.98.1.0">
+              <dxl:Ident ColId="16" ColName="tblnm" TypeMdid="0.25.1.0"/>
+              <dxl:Ident ColId="17" ColName="tblnm" TypeMdid="0.25.1.0"/>
+            </dxl:Comparison>
+          </dxl:LogicalJoin>
+        </dxl:LogicalCTEAnchor>
+      </dxl:LogicalCTEAnchor>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="10">
+      <dxl:Sequence>
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="1293.001107" Rows="2.000000" 
Width="24"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="29" Alias="tblnm">
+            <dxl:Ident ColId="29" ColName="tblnm" TypeMdid="0.25.1.0"/>
+          </dxl:ProjElem>
+          <dxl:ProjElem ColId="30" Alias="tblnm">
+            <dxl:Ident ColId="30" ColName="tblnm" TypeMdid="0.25.1.0"/>
+          </dxl:ProjElem>
+          <dxl:ProjElem ColId="31" Alias="diffcnt">
+            <dxl:Ident ColId="31" ColName="count" TypeMdid="0.20.1.0"/>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:CTEProducer CTEId="0" Columns="0,8">
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="431.000220" Rows="1.000000" 
Width="1"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="0" Alias="name">
+              <dxl:Ident ColId="0" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+            </dxl:ProjElem>
+            <dxl:ProjElem ColId="8" Alias="rank_desc">
+              <dxl:Ident ColId="8" ColName="rank_desc" TypeMdid="0.20.1.0"/>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:GatherMotion InputSegments="0" OutputSegments="-1">
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="431.000219" Rows="1.000000" 
Width="16"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="0" Alias="name">
+                <dxl:Ident ColId="0" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+              </dxl:ProjElem>
+              <dxl:ProjElem ColId="8" Alias="rank_desc">
+                <dxl:Ident ColId="8" ColName="rank_desc" TypeMdid="0.20.1.0"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:SortingColumnList/>
+            <dxl:Window PartitionColumns="0">
+              <dxl:Properties>
+                <dxl:Cost StartupCost="0" TotalCost="431.000041" 
Rows="3.000000" Width="16"/>
+              </dxl:Properties>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="0" Alias="name">
+                  <dxl:Ident ColId="0" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+                </dxl:ProjElem>
+                <dxl:ProjElem ColId="8" Alias="rank_desc">
+                  <dxl:WindowFunc Mdid="0.3100.1.0" TypeMdid="0.20.1.0" 
Distinct="false" WindowStarArg="false" WindowSimpleAgg="false" 
WindowStrategy="Immediate" WinSpecPos="0"/>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+              <dxl:Filter/>
+              <dxl:Sort SortDiscardDuplicates="false">
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="431.000041" 
Rows="3.000000" Width="10"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="0" Alias="name">
+                    <dxl:Ident ColId="0" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:Filter/>
+                <dxl:SortingColumnList>
+                  <dxl:SortingColumn ColId="0" SortOperatorMdid="0.664.1.0" 
SortOperatorName="&lt;" SortNullsFirst="false"/>
+                </dxl:SortingColumnList>
+                <dxl:LimitCount/>
+                <dxl:LimitOffset/>
+                <dxl:TableScan>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="431.000022" 
Rows="3.000000" Width="10"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="0" Alias="name">
+                      <dxl:Ident ColId="0" ColName="name" 
TypeMdid="0.1043.1.0" TypeModifier="14"/>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter/>
+                  <dxl:TableDescriptor Mdid="0.56117.1.0" 
TableName="testtable" LockMode="1">
+                    <dxl:Columns>
+                      <dxl:Column ColId="0" Attno="1" ColName="name" 
TypeMdid="0.1043.1.0" TypeModifier="14" ColWidth="10"/>
+                      <dxl:Column ColId="1" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                      <dxl:Column ColId="2" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                      <dxl:Column ColId="3" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                      <dxl:Column ColId="4" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                      <dxl:Column ColId="5" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                      <dxl:Column ColId="6" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+                      <dxl:Column ColId="7" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                    </dxl:Columns>
+                  </dxl:TableDescriptor>
+                </dxl:TableScan>
+              </dxl:Sort>
+              <dxl:WindowKeyList>
+                <dxl:WindowKey>
+                  <dxl:SortingColumnList/>
+                  <dxl:WindowFrame FrameSpec="Range" ExclusionStrategy="Nulls">
+                    <dxl:TrailingEdge TrailingBoundary="UnboundedPreceding"/>
+                    <dxl:LeadingEdge LeadingBoundary="CurrentRow"/>
+                  </dxl:WindowFrame>
+                </dxl:WindowKey>
+              </dxl:WindowKeyList>
+            </dxl:Window>
+          </dxl:GatherMotion>
+        </dxl:CTEProducer>
+        <dxl:HashJoin JoinType="Left">
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="862.000839" Rows="2.000000" 
Width="24"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="29" Alias="tblnm">
+              <dxl:Ident ColId="29" ColName="tblnm" TypeMdid="0.25.1.0"/>
+            </dxl:ProjElem>
+            <dxl:ProjElem ColId="30" Alias="tblnm">
+              <dxl:Ident ColId="30" ColName="tblnm" TypeMdid="0.25.1.0"/>
+            </dxl:ProjElem>
+            <dxl:ProjElem ColId="31" Alias="count">
+              <dxl:Ident ColId="31" ColName="count" TypeMdid="0.20.1.0"/>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:Filter/>
+          <dxl:JoinFilter/>
+          <dxl:HashCondList>
+            <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.98.1.0">
+              <dxl:Ident ColId="29" ColName="tblnm" TypeMdid="0.25.1.0"/>
+              <dxl:Ident ColId="30" ColName="tblnm" TypeMdid="0.25.1.0"/>
+            </dxl:Comparison>
+          </dxl:HashCondList>
+          <dxl:Result>
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="431.000010" Rows="1.000000" 
Width="8"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="29" Alias="tblnm">
+                <dxl:ConstValue TypeMdid="0.25.1.0" 
Value="AAAAGkxHTEFHUkVFX1JORVdMX1RZUF9SRUY=" LintValue="3353216647"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:OneTimeFilter/>
+            <dxl:CTEConsumer CTEId="0" Columns="20,21">
+              <dxl:Properties>
+                <dxl:Cost StartupCost="0" TotalCost="431.000002" 
Rows="1.000000" Width="1"/>
+              </dxl:Properties>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="20" Alias="name">
+                  <dxl:Ident ColId="20" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+                </dxl:ProjElem>
+                <dxl:ProjElem ColId="21" Alias="rank_desc">
+                  <dxl:Ident ColId="21" ColName="rank_desc" 
TypeMdid="0.20.1.0"/>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+            </dxl:CTEConsumer>
+          </dxl:Result>
+          <dxl:Result>
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="431.000018" Rows="1.000000" 
Width="16"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="30" Alias="tblnm">
+                <dxl:ConstValue TypeMdid="0.25.1.0" 
Value="AAAAGkxHTEFHUkVFX1JORVdMX1RZUF9SRUY=" LintValue="3353216647"/>
+              </dxl:ProjElem>
+              <dxl:ProjElem ColId="31" Alias="count">
+                <dxl:Ident ColId="31" ColName="count" TypeMdid="0.20.1.0"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:OneTimeFilter/>
+            <dxl:Aggregate AggregationStrategy="Plain" StreamSafe="false">
+              <dxl:Properties>
+                <dxl:Cost StartupCost="0" TotalCost="431.000002" 
Rows="1.000000" Width="8"/>
+              </dxl:Properties>
+              <dxl:GroupingColumns/>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="31" Alias="count">
+                  <dxl:AggFunc AggMdid="0.2803.1.0" AggDistinct="false" 
AggStage="Normal" AggKind="n" AggArgTypes="">
+                    <dxl:ValuesList ParamType="aggargs"/>
+                    <dxl:ValuesList ParamType="aggdirectargs"/>
+                    <dxl:ValuesList ParamType="aggorder"/>
+                    <dxl:ValuesList ParamType="aggdistinct"/>
+                  </dxl:AggFunc>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+              <dxl:Filter/>
+              <dxl:CTEConsumer CTEId="0" Columns="32,33">
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="431.000002" 
Rows="1.000000" Width="1"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="32" Alias="name">
+                    <dxl:Ident ColId="32" ColName="name" TypeMdid="0.1043.1.0" 
TypeModifier="14"/>
+                  </dxl:ProjElem>
+                  <dxl:ProjElem ColId="33" Alias="rank_desc">
+                    <dxl:Ident ColId="33" ColName="rank_desc" 
TypeMdid="0.20.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+              </dxl:CTEConsumer>
+            </dxl:Aggregate>
+          </dxl:Result>
+        </dxl:HashJoin>
+      </dxl:Sequence>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git 
a/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpecNonSingleton.h
 
b/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpecNonSingleton.h
index 1b2709d193..da181bae5e 100644
--- 
a/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpecNonSingleton.h
+++ 
b/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpecNonSingleton.h
@@ -36,6 +36,9 @@ private:
        // should Replicated distribution satisfy current distribution
        BOOL m_fAllowReplicated{true};
 
+       // should allow this non-singleton spec to be enforced?
+       BOOL m_fAllowEnforced{true};
+
 public:
        CDistributionSpecNonSingleton(const CDistributionSpecNonSingleton &) =
                delete;
@@ -44,7 +47,8 @@ public:
        CDistributionSpecNonSingleton();
 
        //ctor
-       explicit CDistributionSpecNonSingleton(BOOL fAllowReplicated);
+       explicit CDistributionSpecNonSingleton(BOOL fAllowReplicated,
+                                                                               
   BOOL fAllowEnforced);
 
        // should Replicated distribution satisfy current distribution
        BOOL
@@ -53,6 +57,13 @@ public:
                return m_fAllowReplicated;
        }
 
+       // should allow this non-singleton spec to be enforced?
+       BOOL
+       FAllowEnforced() const
+       {
+               return m_fAllowEnforced;
+       }
+
        // accessor
        EDistributionType
        Edt() const override
diff --git 
a/src/backend/gporca/libgpopt/src/base/CDistributionSpecNonSingleton.cpp 
b/src/backend/gporca/libgpopt/src/base/CDistributionSpecNonSingleton.cpp
index 47660c543d..7a8aaf171c 100644
--- a/src/backend/gporca/libgpopt/src/base/CDistributionSpecNonSingleton.cpp
+++ b/src/backend/gporca/libgpopt/src/base/CDistributionSpecNonSingleton.cpp
@@ -41,8 +41,8 @@ 
CDistributionSpecNonSingleton::CDistributionSpecNonSingleton() = default;
 //
 //---------------------------------------------------------------------------
 CDistributionSpecNonSingleton::CDistributionSpecNonSingleton(
-       BOOL fAllowReplicated)
-       : m_fAllowReplicated(fAllowReplicated)
+       BOOL fAllowReplicated, BOOL fAllowEnforced)
+       : m_fAllowReplicated(fAllowReplicated), m_fAllowEnforced(fAllowEnforced)
 {
 }
 
diff --git a/src/backend/gporca/libgpopt/src/base/CEnfdDistribution.cpp 
b/src/backend/gporca/libgpopt/src/base/CEnfdDistribution.cpp
index 286fa5904b..d3d73936e3 100644
--- a/src/backend/gporca/libgpopt/src/base/CEnfdDistribution.cpp
+++ b/src/backend/gporca/libgpopt/src/base/CEnfdDistribution.cpp
@@ -15,6 +15,7 @@
 
 #include "gpopt/base/CDistributionSpec.h"
 #include "gpopt/base/CDistributionSpecHashed.h"
+#include "gpopt/base/CDistributionSpecNonSingleton.h"
 #include "gpopt/base/CDistributionSpecSingleton.h"
 #include "gpopt/base/CDrvdPropPlan.h"
 #include "gpopt/base/CReqdPropPlan.h"
@@ -141,6 +142,12 @@ CEnfdDistribution::Epet(CExpressionHandle &exprhdl, 
CPhysical *popPhysical,
                        return EpetUnnecessary;
                }
 
+               if (CDistributionSpec::EdtNonSingleton == m_pds->Edt() &&
+                       
!CDistributionSpecNonSingleton::PdsConvert(m_pds)->FAllowEnforced())
+               {
+                       return EpetProhibited;
+               }
+
                // N.B.: subtlety ahead:
                // We used to do the following check in 
CPhysicalMotion::FValidContext
                // which was correct but more costly. We are able to move the 
outer
diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalSequence.cpp 
b/src/backend/gporca/libgpopt/src/operators/CPhysicalSequence.cpp
index 604d15318d..49cf80518f 100644
--- a/src/backend/gporca/libgpopt/src/operators/CPhysicalSequence.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalSequence.cpp
@@ -228,6 +228,37 @@ CPhysicalSequence::PdsRequired(CMemoryPool *mp,
                return GPOS_NEW(mp) CDistributionSpecAny(this->Eopid());
        }
 
+       //      When the producer is not a singleton/Universal, we do request a 
non-singleton
+       //      on all the children. But when the producer is replicated still 
we were requesting
+       //      non-singleton which will possibly generate a risky plan and 
could cause a possible
+       //      hang too.For example in the following plan the slice 1 and 
slice 2 are executed on
+       //      a single segment but the producer is skewed on all the 
segments. So in this case the
+       //      producer on the other two segments undergoes starvation which 
causes the query to hang.
+       //
+       //      Gather Motion 3:1 (slice4; segments: 3)
+       //        -> Sequence
+       //          -> Shared Scan (share slice:id 4:0)
+       //                 ...
+       //              -> Random Redistribute Motion 1:3 (slice3)
+       //                -> Hash Join
+       //                  -> Gather Motion 1:1 (slice1; segments: 1)
+       //                        -> Shared Scan (share slice:id 1:0)
+       //                      -> Hash
+       //                        -> Aggregate
+       //                          -> Gather Motion 1:1 (slice2; segments: 1)
+       //                                -> Shared Scan (share slice:id 2:0)
+       //
+       //
+       // So adding a check if the producer is replicated, request a 
non-singleton spec that is not
+       // allowed to be enforced, to avoid potential hang issues.
+
+       if (CDistributionSpec::EdtTaintedReplicated == pds->Edt() ||
+               CDistributionSpec::EdtStrictReplicated == pds->Edt())
+       {
+               return GPOS_NEW(mp) CDistributionSpecNonSingleton(
+                       true /* fAllowReplicated */, false /* fAllowEnforced 
*/);
+       }
+
        // first child is non-singleton, request a non-singleton distribution 
on second child
        return GPOS_NEW(mp) CDistributionSpecNonSingleton();
 }
diff --git 
a/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp 
b/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp
index 8b72a96b19..76cfa8fda0 100644
--- a/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalSerialUnionAll.cpp
@@ -121,7 +121,7 @@ CPhysicalSerialUnionAll::PdsRequired(
                {
                        // Request 2: NON-SINGLETON from outer child
                        return GPOS_NEW(mp)
-                               CDistributionSpecNonSingleton(false 
/*fAllowReplicated*/);
+                               CDistributionSpecNonSingleton(false 
/*fAllowReplicated*/, true /* fAllowEnforced */);
                }
                else
                {
@@ -163,8 +163,8 @@ CPhysicalSerialUnionAll::PdsRequired(
        // we need to the inner child to be distributed across segments that 
does
        // not generate duplicate results. That is, inner child should not be 
replicated.
 
-       return GPOS_NEW(mp)
-               CDistributionSpecNonSingleton(false /*fAllowReplicated*/);
+       return GPOS_NEW(mp) CDistributionSpecNonSingleton(
+               false /*fAllowReplicated*/, true /*fAllowEnforced*/);
 }
 
 // EOF
diff --git a/src/backend/gporca/server/CMakeLists.txt 
b/src/backend/gporca/server/CMakeLists.txt
index 3c175fa9ce..4eae552f12 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -331,7 +331,7 @@ ReplicatedTableInClause ReplicatedTableSequenceInsert;
 
 CTaintedReplicatedTest:
 InsertNonSingleton NonSingleton TaintedReplicatedAgg 
TaintedReplicatedWindowAgg TaintedReplicatedLimit TaintedReplicatedFilter
-InsertReplicatedIntoSerialHashDistributedTable;
+InsertReplicatedIntoSerialHashDistributedTable TaintedReplicatedTablesCTE;
 
 CDqaTest:
 NonSplittableAgg DqaHavingMax DqaMax DqaMin DqaSubqueryMax DqaNoRedistribute;
diff --git a/src/test/regress/expected/rpt.out 
b/src/test/regress/expected/rpt.out
index 47ee448c44..1c1ea7b09e 100644
--- a/src/test/regress/expected/rpt.out
+++ b/src/test/regress/expected/rpt.out
@@ -9,6 +9,88 @@
 --
 create schema rpt;
 set search_path to rpt;
+-- If the producer is replicated, request a non-singleton spec
+-- that is not allowed to be enforced, to avoid potential CTE hang issue
+drop table if exists with_test1 cascade;
+NOTICE:  table "with_test1" does not exist, skipping
+create table with_test1 (i character varying(10)) DISTRIBUTED REPLICATED;
+explain
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+                                                       QUERY PLAN              
                                         
+------------------------------------------------------------------------------------------------------------------------
+ Hash Left Join  (cost=10928.13..11529.44 rows=48100 width=72)
+   Hash Cond: (('COL1'::text) = ('COL1'::text))
+   ->  Gather Motion 1:1  (slice1; segments: 1)  (cost=5644.43..5644.43 
rows=48100 width=32)
+         ->  Subquery Scan on cte1  (cost=4321.68..5163.43 rows=48100 width=32)
+               ->  WindowAgg  (cost=4321.68..5163.43 rows=48100 width=46)
+                     Partition By: with_test1.i
+                     ->  Sort  (cost=4321.68..4441.93 rows=48100 width=38)
+                           Sort Key: with_test1.i
+                           ->  Seq Scan on with_test1  (cost=0.00..581.00 
rows=48100 width=38)
+   ->  Hash  (cost=5283.69..5283.69 rows=1 width=40)
+         ->  Aggregate  (cost=5283.68..5283.69 rows=1 width=40)
+               ->  Gather Motion 1:1  (slice2; segments: 1)  
(cost=5163.43..5163.43 rows=48100 width=0)
+                     ->  Subquery Scan on cte1_1  (cost=4321.68..5163.43 
rows=48100 width=0)
+                           ->  WindowAgg  (cost=4321.68..5163.43 rows=48100 
width=46)
+                                 Partition By: with_test1_1.i
+                                 ->  Sort  (cost=4321.68..4441.93 rows=48100 
width=38)
+                                       Sort Key: with_test1_1.i
+                                       ->  Seq Scan on with_test1 with_test1_1 
 (cost=0.00..581.00 rows=48100 width=38)
+ Optimizer: Postgres query optimizer
+(19 rows)
+
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+     cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) 
X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+ tblnm | tblnm | diffcnt 
+-------+-------+---------
+(0 rows)
+
+-- This is expected to fall back to planner.
+drop table if exists with_test2 cascade;
+NOTICE:  table "with_test2" does not exist, skipping
+drop table if exists with_test3 cascade;
+NOTICE:  table "with_test3" does not exist, skipping
+create table with_test2 (id bigserial NOT NULL, isc varchar(15) NOT NULL,iscd 
varchar(15) NULL) DISTRIBUTED REPLICATED;
+create table with_test3 (id numeric NULL, rc varchar(255) NULL,ri numeric 
NULL) DISTRIBUTED REPLICATED;
+insert into with_test2 (isc,iscd) values ('CMN_BIN_YES', 'CMN_BIN_YES');
+insert into with_test3 (id,rc,ri) values (113551,'CMN_BIN_YES',101991), 
(113552,'CMN_BIN_NO',101991), (113553,'CMN_BIN_ERR',101991), 
(113554,'CMN_BIN_NULL',101991);
+explain
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+                                               QUERY PLAN                      
                          
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)  (cost=390.63..390.63 rows=1 
width=104)
+   ->  Limit  (cost=332.63..390.63 rows=1 width=104)
+         ->  Hash Join  (cost=332.63..739.49 rows=7 width=104)
+               Hash Cond: ((with_test2.iscd)::text = (r1.rc)::text)
+               ->  Hash Join  (cost=166.32..572.78 rows=129 width=104)
+                     Hash Cond: ((with_test2.isc)::text = (r.rc)::text)
+                     ->  Seq Scan on with_test2  (cost=0.00..344.00 rows=24400 
width=104)
+                     ->  Hash  (cost=166.25..166.25 rows=5 width=516)
+                           ->  Subquery Scan on r  (cost=0.00..166.25 rows=5 
width=516)
+                                 ->  Seq Scan on with_test3  
(cost=0.00..166.25 rows=5 width=548)
+                                       Filter: (ri = '101991'::numeric)
+               ->  Hash  (cost=166.25..166.25 rows=5 width=516)
+                     ->  Subquery Scan on r1  (cost=0.00..166.25 rows=5 
width=516)
+                           ->  Seq Scan on with_test3 with_test3_1  
(cost=0.00..166.25 rows=5 width=548)
+                                 Filter: (ri = '101991'::numeric)
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+ id |     isc     |    iscd     
+----+-------------+-------------
+  1 | CMN_BIN_YES | CMN_BIN_YES
+(1 row)
+
 ---------
 -- INSERT
 ---------
diff --git a/src/test/regress/expected/rpt_optimizer.out 
b/src/test/regress/expected/rpt_optimizer.out
index 1783eed147..d64f039a41 100644
--- a/src/test/regress/expected/rpt_optimizer.out
+++ b/src/test/regress/expected/rpt_optimizer.out
@@ -9,6 +9,87 @@
 --
 create schema rpt;
 set search_path to rpt;
+-- If the producer is replicated, request a non-singleton spec
+-- that is not allowed to be enforced, to avoid potential CTE hang issue
+drop table if exists with_test1 cascade;
+NOTICE:  table "with_test1" does not exist, skipping
+create table with_test1 (i character varying(10)) DISTRIBUTED REPLICATED;
+explain
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+                                             QUERY PLAN                        
                     
+----------------------------------------------------------------------------------------------------
+ Sequence  (cost=0.00..1293.00 rows=2 width=24)
+   ->  Shared Scan (share slice:id 0:0)  (cost=0.00..431.00 rows=1 width=1)
+         ->  Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..431.00 
rows=1 width=16)
+               ->  WindowAgg  (cost=0.00..431.00 rows=3 width=16)
+                     Partition By: with_test1.i
+                     ->  Sort  (cost=0.00..431.00 rows=3 width=10)
+                           Sort Key: with_test1.i
+                           ->  Seq Scan on with_test1  (cost=0.00..431.00 
rows=3 width=10)
+   ->  Hash Left Join  (cost=0.00..862.00 rows=2 width=24)
+         Hash Cond: (('COL1'::text) = ('COL1'::text))
+         ->  Result  (cost=0.00..431.00 rows=1 width=8)
+               ->  Shared Scan (share slice:id 0:0)  (cost=0.00..431.00 rows=1 
width=1)
+         ->  Hash  (cost=431.00..431.00 rows=1 width=16)
+               ->  Result  (cost=0.00..431.00 rows=1 width=16)
+                     Filter: (('COL1'::text) = 'COL1'::text)
+                     ->  Aggregate  (cost=0.00..431.00 rows=1 width=8)
+                           ->  Shared Scan (share slice:id 0:0)  
(cost=0.00..431.00 rows=1 width=1)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(18 rows)
+
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+     cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) 
X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+ tblnm | tblnm | diffcnt 
+-------+-------+---------
+(0 rows)
+
+-- This is expected to fall back to planner.
+drop table if exists with_test2 cascade;
+NOTICE:  table "with_test2" does not exist, skipping
+drop table if exists with_test3 cascade;
+NOTICE:  table "with_test3" does not exist, skipping
+create table with_test2 (id bigserial NOT NULL, isc varchar(15) NOT NULL,iscd 
varchar(15) NULL) DISTRIBUTED REPLICATED;
+create table with_test3 (id numeric NULL, rc varchar(255) NULL,ri numeric 
NULL) DISTRIBUTED REPLICATED;
+insert into with_test2 (isc,iscd) values ('CMN_BIN_YES', 'CMN_BIN_YES');
+insert into with_test3 (id,rc,ri) values (113551,'CMN_BIN_YES',101991), 
(113552,'CMN_BIN_NO',101991), (113553,'CMN_BIN_ERR',101991), 
(113554,'CMN_BIN_NULL',101991);
+explain
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+                                               QUERY PLAN                      
                          
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)  (cost=390.63..390.63 rows=1 
width=104)
+   ->  Limit  (cost=332.63..390.63 rows=1 width=104)
+         ->  Hash Join  (cost=332.63..739.49 rows=7 width=104)
+               Hash Cond: ((with_test2.iscd)::text = (r1.rc)::text)
+               ->  Hash Join  (cost=166.32..572.78 rows=129 width=104)
+                     Hash Cond: ((with_test2.isc)::text = (r.rc)::text)
+                     ->  Seq Scan on with_test2  (cost=0.00..344.00 rows=24400 
width=104)
+                     ->  Hash  (cost=166.25..166.25 rows=5 width=516)
+                           ->  Subquery Scan on r  (cost=0.00..166.25 rows=5 
width=516)
+                                 ->  Seq Scan on with_test3  
(cost=0.00..166.25 rows=5 width=548)
+                                       Filter: (ri = '101991'::numeric)
+               ->  Hash  (cost=166.25..166.25 rows=5 width=516)
+                     ->  Subquery Scan on r1  (cost=0.00..166.25 rows=5 
width=516)
+                           ->  Seq Scan on with_test3 with_test3_1  
(cost=0.00..166.25 rows=5 width=548)
+                                 Filter: (ri = '101991'::numeric)
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+ id |     isc     |    iscd     
+----+-------------+-------------
+  1 | CMN_BIN_YES | CMN_BIN_YES
+(1 row)
+
 ---------
 -- INSERT
 ---------
diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql
index 96f0f28814..21a3bbf31b 100644
--- a/src/test/regress/sql/rpt.sql
+++ b/src/test/regress/sql/rpt.sql
@@ -10,6 +10,38 @@
 create schema rpt;
 set search_path to rpt;
 
+-- If the producer is replicated, request a non-singleton spec
+-- that is not allowed to be enforced, to avoid potential CTE hang issue
+drop table if exists with_test1 cascade;
+create table with_test1 (i character varying(10)) DISTRIBUTED REPLICATED;
+
+explain
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+
+WITH cte1 AS ( SELECT *,ROW_NUMBER() OVER ( PARTITION BY i) AS RANK_DESC FROM 
with_test1),
+     cte2 AS ( SELECT 'COL1' TBLNM,COUNT(*) DIFFCNT FROM ( SELECT * FROM cte1) 
X)
+select * FROM ( SELECT 'COL1' TBLNM FROM cte1) A LEFT JOIN cte2 C ON 
A.TBLNM=C.TBLNM;
+
+-- This is expected to fall back to planner.
+drop table if exists with_test2 cascade;
+drop table if exists with_test3 cascade;
+create table with_test2 (id bigserial NOT NULL, isc varchar(15) NOT NULL,iscd 
varchar(15) NULL) DISTRIBUTED REPLICATED;
+create table with_test3 (id numeric NULL, rc varchar(255) NULL,ri numeric 
NULL) DISTRIBUTED REPLICATED;
+insert into with_test2 (isc,iscd) values ('CMN_BIN_YES', 'CMN_BIN_YES');
+insert into with_test3 (id,rc,ri) values (113551,'CMN_BIN_YES',101991), 
(113552,'CMN_BIN_NO',101991), (113553,'CMN_BIN_ERR',101991), 
(113554,'CMN_BIN_NULL',101991);
+explain
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+
+WITH
+    t1 AS (SELECT * FROM with_test2),
+    t2 AS (SELECT id, rc FROM with_test3 WHERE ri = 101991)
+SELECT p.*FROM t1 p JOIN t2 r ON p.isc = r.rc JOIN t2 r1 ON p.iscd = r1.rc 
LIMIT 1;
+
 ---------
 -- INSERT
 ---------


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to