This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 6e33101046d81d8c098d5726945a43d34017b44f Author: Jianghua Yang <[email protected]> AuthorDate: Fri Jul 18 01:30:07 2025 +0800 Fix fast analyze for PAX tables and simplify acquisition function selection This commit addresses several issues with fast analyze: 1. For PAX tables, we now properly estimate the number of blocks by using table_relation_estimate_size() rather than RelationGetNumberOfBlocks(), since PAX uses non-fixed block layout. This provides more accurate sampling for PAX tables. 2. Simplified the acquisition function selection logic by always using gp_acquire_sample_rows_func for regular tables, removing the conditional check for rd_tableam->relation_acquire_sample_rows. This makes the code more straightforward and consistent. 3. Fixed an issue in datumstream.c by resetting blockRowCount when closing a file during analyze operations. --- src/backend/access/aocs/aocsam.c | 15 +++++++---- src/backend/access/appendonly/appendonlyam.c | 6 +++++ src/backend/commands/analyze.c | 37 +++++++++++++++------------- src/backend/utils/datumstream/datumstream.c | 1 + 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index d3e5c15802a..d5e3fd45e59 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -478,8 +478,8 @@ aocs_blkdirscan_init(AOCSScanDesc scan) if (scan->aocsfetch == NULL) { int natts = RelationGetNumberOfAttributes(scan->rs_base.rs_rd); - scan->proj = palloc(natts * sizeof(*scan->proj)); - MemSet(scan->proj, true, natts * sizeof(*scan->proj)); + scan->proj = palloc(natts * sizeof(bool)); + MemSet(scan->proj, true, natts * sizeof(bool)); scan->aocsfetch = aocs_fetch_init(scan->rs_base.rs_rd, scan->rs_base.rs_snapshot, @@ -662,10 +662,9 @@ aocs_beginscan_internal(Relation relation, AccessShareLock, appendOnlyMetaDataSnapshot); - if ((flags & SO_TYPE_ANALYZE) != 0) + if ((flags & SO_TYPE_ANALYZE) != 0 && OidIsValid(blkdirrelid)) { - if (OidIsValid(blkdirrelid)) - aocs_blkdirscan_init(scan); + aocs_blkdirscan_init(scan); } } @@ -752,6 +751,12 @@ aocs_locate_target_segment(AOCSScanDesc scan, int64 targrow) if (rowcount <= 0) continue; + if (scan->seginfo[i]->state == AOSEG_STATE_AWAITING_DROP) + { + /* skip this segment, it is awaiting drop */ + continue; + } + if (scan->segfirstrow + rowcount - 1 >= targrow) { /* found the target segment */ diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 508ddcfbaa9..b5f6a17a597 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1132,6 +1132,12 @@ appendonly_locate_target_segment(AppendOnlyScanDesc scan, int64 targrow) if (rowcount <= 0) continue; + if (scan->aos_segfile_arr[i]->state == AOSEG_STATE_AWAITING_DROP) + { + /* skip this segment, it is awaiting drop */ + continue; + } + if (scan->segfirstrow + rowcount - 1 >= targrow) { /* found the target segment */ diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 5a066e211b1..e06dbea2870 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -358,16 +358,11 @@ analyze_rel_internal(Oid relid, RangeVar *relation, onerel->rd_rel->relkind == RELKIND_MATVIEW || onerel->rd_rel->relkind == RELKIND_DIRECTORY_TABLE) { - /* Regular table, so we'll use the regular row acquisition function */ - if (onerel->rd_tableam) - acquirefunc = onerel->rd_tableam->relation_acquire_sample_rows; - /* * If the TableAmRoutine's gp_acquire_sample_rows_func if NULL, we use * gp_acquire_sample_rows_func as default. */ - if (acquirefunc == NULL) - acquirefunc = gp_acquire_sample_rows_func; + acquirefunc = gp_acquire_sample_rows_func; /* Also get regular table's size */ relpages = AcquireNumberOfBlocks(onerel); @@ -1716,8 +1711,24 @@ acquire_sample_rows(Relation onerel, int elevel, * the relation should not be an AO/CO table. */ Assert(!RelationIsAppendOptimized(onerel)); + if (RelationIsPax(onerel)) + { + /* PAX use non-fixed block layout */ + BlockNumber pages; + double tuples; + double allvisfrac; + int32 attr_widths; - totalblocks = RelationGetNumberOfBlocks(onerel); + table_relation_estimate_size(onerel, &attr_widths, &pages, + &tuples, &allvisfrac); + + if (tuples > UINT_MAX) + tuples = UINT_MAX; + + totalblocks = (BlockNumber)tuples; + } + else + totalblocks = RelationGetNumberOfBlocks(onerel); /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ OldestXmin = GetOldestNonRemovableTransactionId(onerel); @@ -2055,16 +2066,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, childrel->rd_rel->relkind == RELKIND_MATVIEW || childrel->rd_rel->relkind == RELKIND_DIRECTORY_TABLE) { - /* Regular table, so use the regular row acquisition function */ - if (childrel->rd_tableam) - acquirefunc = childrel->rd_tableam->relation_acquire_sample_rows; - - /* - * If the TableAmRoutine's relation_acquire_sample_rows if NULL, we use - * relation_acquire_sample_rows as default. - */ - if (acquirefunc == NULL) - acquirefunc = gp_acquire_sample_rows_func; + /* use relation_acquire_sample_rows as default. */ + acquirefunc = gp_acquire_sample_rows_func; relpages = AcquireNumberOfBlocks(childrel); } diff --git a/src/backend/utils/datumstream/datumstream.c b/src/backend/utils/datumstream/datumstream.c index 82c2b5a7cb3..53addc99a95 100644 --- a/src/backend/utils/datumstream/datumstream.c +++ b/src/backend/utils/datumstream/datumstream.c @@ -876,6 +876,7 @@ datumstreamread_close_file(DatumStreamRead * ds) { AppendOnlyStorageRead_CloseFile(&ds->ao_read); + ds->blockRowCount = 0; ds->need_close_file = false; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
