yjhjstz commented on code in PR #1398:
URL: https://github.com/apache/cloudberry/pull/1398#discussion_r2448611852
##########
src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp:
##########
@@ -2410,6 +2416,120 @@ CCostModelGPDB::CostScan(CMemoryPool *, // mp
}
+//---------------------------------------------------------------------------
+// @function:
+// CCostModelGPDB::CostParallelTableScan
+//
+// @doc:
+// Cost of parallel table scan
+//
+//---------------------------------------------------------------------------
+CCost
+CCostModelGPDB::CostParallelTableScan(CMemoryPool *mp,
+
CExpressionHandle &exprhdl,
+ const
CCostModelGPDB *pcmgpdb,
+ const
SCostingInfo *pci)
+{
+ GPOS_ASSERT(nullptr != pcmgpdb);
+ GPOS_ASSERT(nullptr != pci);
+
+ COperator *pop = exprhdl.Pop();
+ GPOS_ASSERT(COperator::EopPhysicalParallelTableScan == pop->Eopid());
+
+ // Get the parallel table scan operator
+ CPhysicalParallelTableScan *popParallelScan =
+ CPhysicalParallelTableScan::PopConvert(pop);
+ ULONG ulWorkers = popParallelScan->UlParallelWorkers();
+
+ // If only 1 worker, use regular scan cost
+ if (ulWorkers <= 1)
+ {
+ return CostScan(mp, exprhdl, pcmgpdb, pci);
+ }
+
+ // Get base scan parameters
+ const CDouble dInitScan =
+ pcmgpdb->GetCostModelParams()
+ ->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
+ ->Get();
+ const CDouble dTableWidth =
+ CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
+ const CDouble dTableScanCostUnit =
+ pcmgpdb->GetCostModelParams()
+ ->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
+ ->Get();
+
+ // Calculate base scan cost
+ CDouble dBaseScanCost = dInitScan + pci->Rows() * dTableWidth *
dTableScanCostUnit;
+
+ // Calculate parallel efficiency (decreases with more workers)
+ CDouble dParallelEfficiency = CalculateParallelEfficiency(ulWorkers);
+
+ // Parallel scan cost = base cost / (workers * efficiency)
+ CDouble dParallelScanCost = dBaseScanCost / (ulWorkers *
dParallelEfficiency);
+
+ // Add worker startup cost
+ CDouble dWorkerStartupCost = GetWorkerStartupCost(pcmgpdb, ulWorkers);
+
+ // Total cost
+ return CCost(pci->NumRebinds() * (dParallelScanCost +
dWorkerStartupCost));
+}
+
+//---------------------------------------------------------------------------
+// @function:
+// CCostModelGPDB::CalculateParallelEfficiency
+//
+// @doc:
+// Calculate parallel efficiency factor (0-1) based on worker count
+//
+//---------------------------------------------------------------------------
+CDouble
+CCostModelGPDB::CalculateParallelEfficiency(ULONG ulWorkers)
+{
+ if (ulWorkers <= 1)
+ {
+ return 1.0;
+ }
+
+ // Efficiency decreases logarithmically with more workers
+ // Formula: efficiency = 1 / (1 + 0.1 * log2(workers))
+ // This gives: 2 workers = 0.91, 4 workers = 0.83, 8 workers = 0.77
Review Comment:
A practical approximation to Amdahl's Law
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]