This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 6e33101046d81d8c098d5726945a43d34017b44f
Author: Jianghua Yang <[email protected]>
AuthorDate: Fri Jul 18 01:30:07 2025 +0800

    Fix fast analyze for PAX tables and simplify acquisition function selection
    
    This commit addresses several issues with fast analyze:
    
    1. For PAX tables, we now properly estimate the number of blocks by using
     table_relation_estimate_size() rather than RelationGetNumberOfBlocks(),
     since PAX uses non-fixed block layout. This provides more accurate
     sampling for PAX tables.
    
    2. Simplified the acquisition function selection logic by always using
     gp_acquire_sample_rows_func for regular tables, removing the conditional
     check for rd_tableam->relation_acquire_sample_rows. This makes the code
     more straightforward and consistent.
    
    3. Fixed an issue in datumstream.c by resetting blockRowCount when
     closing a file during analyze operations.
---
 src/backend/access/aocs/aocsam.c             | 15 +++++++----
 src/backend/access/appendonly/appendonlyam.c |  6 +++++
 src/backend/commands/analyze.c               | 37 +++++++++++++++-------------
 src/backend/utils/datumstream/datumstream.c  |  1 +
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c
index d3e5c15802a..d5e3fd45e59 100644
--- a/src/backend/access/aocs/aocsam.c
+++ b/src/backend/access/aocs/aocsam.c
@@ -478,8 +478,8 @@ aocs_blkdirscan_init(AOCSScanDesc scan)
        if (scan->aocsfetch == NULL)
        {
                int natts = RelationGetNumberOfAttributes(scan->rs_base.rs_rd);
-               scan->proj = palloc(natts * sizeof(*scan->proj));
-               MemSet(scan->proj, true, natts * sizeof(*scan->proj));
+               scan->proj = palloc(natts * sizeof(bool));
+               MemSet(scan->proj, true, natts * sizeof(bool));
 
                scan->aocsfetch = aocs_fetch_init(scan->rs_base.rs_rd,
                                                                                
  scan->rs_base.rs_snapshot,
@@ -662,10 +662,9 @@ aocs_beginscan_internal(Relation relation,
                                                           AccessShareLock,
                                                           
appendOnlyMetaDataSnapshot);
 
-               if ((flags & SO_TYPE_ANALYZE) != 0)
+               if ((flags & SO_TYPE_ANALYZE) != 0 && OidIsValid(blkdirrelid))
                {
-                       if (OidIsValid(blkdirrelid))
-                               aocs_blkdirscan_init(scan);
+                       aocs_blkdirscan_init(scan);
                }
        }
 
@@ -752,6 +751,12 @@ aocs_locate_target_segment(AOCSScanDesc scan, int64 
targrow)
                if (rowcount <= 0)
                        continue;
 
+               if (scan->seginfo[i]->state == AOSEG_STATE_AWAITING_DROP)
+               {
+                       /* skip this segment, it is awaiting drop */
+                       continue;
+               }
+
                if (scan->segfirstrow + rowcount - 1 >= targrow)
                {
                        /* found the target segment */
diff --git a/src/backend/access/appendonly/appendonlyam.c 
b/src/backend/access/appendonly/appendonlyam.c
index 508ddcfbaa9..b5f6a17a597 100755
--- a/src/backend/access/appendonly/appendonlyam.c
+++ b/src/backend/access/appendonly/appendonlyam.c
@@ -1132,6 +1132,12 @@ appendonly_locate_target_segment(AppendOnlyScanDesc 
scan, int64 targrow)
                if (rowcount <= 0)
                        continue;
 
+               if (scan->aos_segfile_arr[i]->state == 
AOSEG_STATE_AWAITING_DROP)
+               {
+                       /* skip this segment, it is awaiting drop */
+                       continue;
+               }
+
                if (scan->segfirstrow + rowcount - 1 >= targrow)
                {
                        /* found the target segment */
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 5a066e211b1..e06dbea2870 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -358,16 +358,11 @@ analyze_rel_internal(Oid relid, RangeVar *relation,
                onerel->rd_rel->relkind == RELKIND_MATVIEW ||
                onerel->rd_rel->relkind == RELKIND_DIRECTORY_TABLE)
        {
-               /* Regular table, so we'll use the regular row acquisition 
function */
-               if (onerel->rd_tableam)
-                       acquirefunc = 
onerel->rd_tableam->relation_acquire_sample_rows;
-
                /*
                 * If the TableAmRoutine's gp_acquire_sample_rows_func if NULL, 
we use
                 * gp_acquire_sample_rows_func as default.
                 */
-               if (acquirefunc == NULL)
-                       acquirefunc = gp_acquire_sample_rows_func;
+               acquirefunc = gp_acquire_sample_rows_func;
 
                /* Also get regular table's size */
                relpages = AcquireNumberOfBlocks(onerel);
@@ -1716,8 +1711,24 @@ acquire_sample_rows(Relation onerel, int elevel,
         * the relation should not be an AO/CO table.
         */
        Assert(!RelationIsAppendOptimized(onerel));
+       if (RelationIsPax(onerel))
+       {
+               /* PAX use non-fixed block layout */
+               BlockNumber pages;
+               double          tuples;
+               double          allvisfrac;
+               int32           attr_widths;
 
-       totalblocks = RelationGetNumberOfBlocks(onerel);
+               table_relation_estimate_size(onerel,    &attr_widths, &pages,
+                                                                       
&tuples, &allvisfrac);
+
+               if (tuples > UINT_MAX)
+                       tuples = UINT_MAX;
+
+               totalblocks = (BlockNumber)tuples;
+       }
+       else
+               totalblocks = RelationGetNumberOfBlocks(onerel);
 
        /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
        OldestXmin = GetOldestNonRemovableTransactionId(onerel);
@@ -2055,16 +2066,8 @@ acquire_inherited_sample_rows(Relation onerel, int 
elevel,
                        childrel->rd_rel->relkind == RELKIND_MATVIEW ||
                        childrel->rd_rel->relkind == RELKIND_DIRECTORY_TABLE)
                {
-                       /* Regular table, so use the regular row acquisition 
function */
-                       if (childrel->rd_tableam)
-                               acquirefunc = 
childrel->rd_tableam->relation_acquire_sample_rows;
-
-                       /*
-                        * If the TableAmRoutine's relation_acquire_sample_rows 
if NULL, we use
-                        * relation_acquire_sample_rows as default.
-                        */
-                       if (acquirefunc == NULL)
-                               acquirefunc = gp_acquire_sample_rows_func;
+                       /* use relation_acquire_sample_rows as default. */
+                       acquirefunc = gp_acquire_sample_rows_func;
 
                        relpages = AcquireNumberOfBlocks(childrel);
                }
diff --git a/src/backend/utils/datumstream/datumstream.c 
b/src/backend/utils/datumstream/datumstream.c
index 82c2b5a7cb3..53addc99a95 100644
--- a/src/backend/utils/datumstream/datumstream.c
+++ b/src/backend/utils/datumstream/datumstream.c
@@ -876,6 +876,7 @@ datumstreamread_close_file(DatumStreamRead * ds)
 {
        AppendOnlyStorageRead_CloseFile(&ds->ao_read);
 
+       ds->blockRowCount = 0;
        ds->need_close_file = false;
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to