Skip to content

Commit af8d3dd

Browse files
committed
Fix AOCS sampling with large values (#353)
The gp_acquire_sample_rows function is called when the ANALYZE query is executed. The error occurred when gp_acquire_sample_rows was called for AOCS table with values which was more than block size and gp_use_fastanalyze was enabled. The error manifested itself in the datumstreamread_nth function, because largeObjectState was DatumStreamLargeObjectState_HaveAoContent. The fast ANALYZE for AO tables implementation was got from Cloudberry (see 23aef441a827a77f3338e81950a0e66a26f6eaf7). Cloudberry calls aocs_gettuple first time when a data block has not been read yet, because the required tuple may not be in the first block. Cloudberry skips reading the block in open_all_datumstreamread_segfiles when the SO_TYPE_ANALYZE flag is set (when AOCSScanDesc is created in aoco_acquire_sample_rows). This skipping was not backported to OpenGPDB later. The error is fixed with backporting of the skipping. The targrow field is used instead of flag to don't break ABI. Pass AOCSScanDesc to open_all_datumstreamread_segfiles instead of fields of this structure.
1 parent 54d7cbe commit af8d3dd

4 files changed

Lines changed: 41 additions & 17 deletions

File tree

src/backend/access/aocs/aocsam.c

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,25 @@ open_datumstreamread_segfile(
9999
* the block directory.
100100
*/
101101
static void
102-
open_all_datumstreamread_segfiles(Relation rel,
103-
AOCSFileSegInfo *segInfo,
104-
DatumStreamRead **ds,
105-
int *proj_atts,
106-
int num_proj_atts,
107-
AppendOnlyBlockDirectory *blockDirectory)
102+
open_all_datumstreamread_segfiles(AOCSScanDesc scan, AOCSFileSegInfo *segInfo)
108103
{
104+
Relation rel = scan->aos_rel;
109105
char *basepath = relpathbackend(rel->rd_node, rel->rd_backend, MAIN_FORKNUM);
110106
int i;
111107

112-
Assert(proj_atts);
108+
Assert(scan->proj_atts);
113109

114-
for (i = 0; i < num_proj_atts; i++)
110+
for (i = 0; i < scan->num_proj_atts; i++)
115111
{
116-
int attno = proj_atts[i];
112+
int attno = scan->proj_atts[i];
113+
114+
open_datumstreamread_segfile(basepath, rel->rd_node, segInfo, scan->ds[attno], attno);
115+
116+
/* skip reading block for ANALYZE */
117+
if (scan->targrow >= 0)
118+
continue;
117119

118-
open_datumstreamread_segfile(basepath, rel->rd_node, segInfo, ds[attno], attno);
119-
datumstreamread_block(ds[attno], blockDirectory, attno);
120+
datumstreamread_block(scan->ds[attno], scan->blockDirectory, attno);
120121
}
121122

122123
pfree(basepath);
@@ -398,12 +399,7 @@ open_next_scan_seg(AOCSScanDesc scan)
398399
firstSequence);
399400
}
400401

401-
open_all_datumstreamread_segfiles(scan->aos_rel,
402-
curSegInfo,
403-
scan->ds,
404-
scan->proj_atts,
405-
scan->num_proj_atts,
406-
scan->blockDirectory);
402+
open_all_datumstreamread_segfiles(scan, curSegInfo);
407403

408404
return scan->cur_seg;
409405
}
@@ -550,6 +546,7 @@ aocs_beginscan_internal(Relation relation,
550546
aocs_initscan(scan);
551547

552548
scan->blockDirectory = NULL;
549+
scan->targrow = -1;
553550

554551
AppendOnlyVisimap_Init(&scan->visibilityMap,
555552
relation->rd_appendonly->visimaprelid,

src/include/cdb/cdbaocsam.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ typedef struct AOCSScanDescData
164164
* which starts from 0.
165165
* In other words, if we have seg0 rownums: [1, 100], seg1 rownums: [1, 200]
166166
* If targrow = 150, then we are referring to seg1's rownum=51.
167+
* -1 means that targrow is not set.
167168
*/
168169
int64 targrow;
169170

src/test/regress/expected/analyze.out

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,3 +1016,17 @@ select relhassubclass from gp_dist_random('pg_class') where relname = 'test_tb_1
10161016
f
10171017
(3 rows)
10181018

1019+
-- Check than gp_acquire_sample_rows works without errors when
1020+
-- gp_use_fastanalyze is enabled and AOCS table contains large values
1021+
set gp_use_fastanalyze = on;
1022+
create table t1
1023+
with (appendoptimized = true, orientation = column)
1024+
as select i, repeat('A', 40000) from generate_series(1, 500) i
1025+
distributed by (i);
1026+
select count(*)
1027+
from (select pg_catalog.gp_acquire_sample_rows('t1'::regclass, 10, 'f')) a;
1028+
count
1029+
-------
1030+
33
1031+
(1 row)
1032+

src/test/regress/sql/analyze.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,3 +506,15 @@ select relhassubclass from gp_dist_random('pg_class') where relname = 'test_tb_1
506506
ANALYZE;
507507
select relhassubclass from pg_class where relname = 'test_tb_14644';
508508
select relhassubclass from gp_dist_random('pg_class') where relname = 'test_tb_14644';
509+
510+
-- Check than gp_acquire_sample_rows works without errors when
511+
-- gp_use_fastanalyze is enabled and AOCS table contains large values
512+
set gp_use_fastanalyze = on;
513+
514+
create table t1
515+
with (appendoptimized = true, orientation = column)
516+
as select i, repeat('A', 40000) from generate_series(1, 500) i
517+
distributed by (i);
518+
519+
select count(*)
520+
from (select pg_catalog.gp_acquire_sample_rows('t1'::regclass, 10, 'f')) a;

0 commit comments

Comments
 (0)