arenadata · mos65o2 · Apr 14, 2025 · Apr 15, 2025 · Apr 16, 2025 · Apr 20, 2025
diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c
@@ -112,6 +112,7 @@ typedef struct decorate_subplans_with_motions_context
 	/* Current position in the tree. */
 	int			sliceDepth;
 	Flow	   *currentPlanFlow;
+	bool		shouldOmitMaterial;
 } decorate_subplans_with_motions_context;
 
 /* State for the recursive build_slice_table() function. */
@@ -725,6 +726,7 @@ cdbllize_decorate_subplans_with_motions(PlannerInfo *root, Plan *plan)
 	planner_init_plan_tree_base(&context.base, root);
 	context.sliceDepth = 0;
 	context.subplan_workingQueue = NIL;
+	context.shouldOmitMaterial = false;
 
 	nsubplans = list_length(root->glob->subplans);
 	context.subplans = (decorate_subplan_info *)
@@ -989,10 +991,10 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte
 		 * For non-top slice, if this motion is QE singleton and subplan's locus
 		 * is CdbLocusType_SegmentGeneral, omit this motion.
 		 */
-		shouldOmit |= context->sliceDepth > 0 &&
-					  context->currentPlanFlow->flotype == FLOW_SINGLETON &&
+		shouldOmit |= context->currentPlanFlow->flotype == FLOW_SINGLETON &&
 					  context->currentPlanFlow->segindex == 0 &&
-					  motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral;
+					  (motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral ||
+					   motion->plan.lefttree->flow->locustype == CdbLocusType_SingleQE);
 
 		if (shouldOmit)
 		{
@@ -1006,6 +1008,7 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte
 			child->initPlan = list_concat(child->initPlan, motion->plan.initPlan);
 
 			newnode = (Node *) child;
+			context->shouldOmitMaterial = true;
 		}
 		else
 		{
@@ -1023,6 +1026,16 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte
 		if (plan->flow != NULL && plan->flow->locustype != CdbLocusType_OuterQuery)
 			context->currentPlanFlow = plan->flow;
 		newnode = plan_tree_mutator(node, fix_outer_query_motions_mutator, context, false);
+
+		/* If the underlying node was Motion, then omit Matierilze */
+		if (IsA(newnode, Material) && context->shouldOmitMaterial)
+		{
+			Plan *materialPlan = (Plan *) newnode;
+			materialPlan->initPlan = list_concat(materialPlan->initPlan, materialPlan->lefttree->initPlan);			
+			materialPlan = materialPlan->lefttree;
+			newnode = (Node *) materialPlan;
+		}
+		context->shouldOmitMaterial = false;
 		context->currentPlanFlow = saveCurrentPlanFlow;
 	}
 

diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c
@@ -2362,6 +2362,15 @@ try_redistribute(PlannerInfo *root, CdbpathMfjRel *g, CdbpathMfjRel *o,
 	return false;
 }
 
+Path *
+cdbpath_create_motion_to_outer_query(PlannerInfo *root,
+									 Path *subpath)
+{
+	CdbPathLocus outerQueryLocus;
+	CdbPathLocus_MakeOuterQuery(&outerQueryLocus);
+	return (Path *) make_motion_path(root, subpath, outerQueryLocus, false, NULL);
+}
+
 /*
  * Add a suitable Motion Path so that the input tuples from 'subpath' are
  * distributed correctly for insertion into target table.

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
@@ -522,9 +522,21 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals)
 		Path	   *path;
 		CdbPathLocus outerquery_locus;
 
-		if (CdbPathLocus_IsGeneral(origpath->locus) ||
-			CdbPathLocus_IsOuterQuery(origpath->locus))
+		/*
+		 * We can change the locus and add Motion here if we need OuterQuery.
+		 * However, if there is a volatile function in TL, we should do this
+		 * later. The reason for this is that the volatile function in this
+		 * case can be in the Result node (for each segment). We want the
+		 * volatile function to be executed once if possible. So, the locus
+		 * change and Motion addition occurs later after the scan/join path
+		 * is generated (see cdbpath_create_motion_to_outer_query()).
+		*/
+		if (CdbPathLocus_IsGeneral(origpath->locus) || CdbPathLocus_IsOuterQuery(origpath->locus) ||
+		   ((CdbPathLocus_IsSegmentGeneral(origpath->locus) || CdbPathLocus_IsSingleQE(origpath->locus))
+		     && contain_volatile_functions((Node *) root->processed_tlist)))
+		{
 			path = origpath;
+		}
 		else
 		{
 			/*

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
@@ -2600,6 +2600,42 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 									   scanjoin_target_parallel_safe,
 									   scanjoin_target_same_exprs);
 
+		/*
+		 * If the TL of the subquery contains a volatile function and the data is available
+		 * on all segments, we should change the path locus to SingleQE in order to get a
+		 * single dataset on all segments. We do not take this into account if the final
+		 * locus is Replicated (this case is processed later).
+		*/
+		if (contain_volatile_functions((Node *) scanjoin_target->exprs) && !CdbPathLocus_IsReplicated(root->final_locus))
+		{
+			foreach(lc, current_rel->pathlist)
+			{
+				Path *path = (Path *) lfirst(lc);
+				if (CdbPathLocus_IsGeneral(path->locus) || CdbPathLocus_IsSegmentGeneral(path->locus))
+				{
+					CdbPathLocus_MakeSingleQE(&(path->locus), getgpsegmentCount());
+				}
+			}
+		}
+		/*
+		 * If the subquery contains parameterized operators (correlated), the locus should be
+		 * changed to OuterQuery. We do it here, instead of bring_to_outer_query().
+		*/
+		if (root->is_correlated_subplan && !CdbPathLocus_IsReplicated(root->final_locus))
+		{
+			foreach(lc, current_rel->pathlist)
+			{
+				Path *path = (Path *) lfirst(lc);
+
+				if (CdbPathLocus_IsSingleQE(path->locus))
+				{
+					Path *motion_path = cdbpath_create_motion_to_outer_query(root, path);
+					Path *material_path = (Path *) create_material_path(root, motion_path->parent, motion_path);
+					lfirst(lc) = material_path;
+				}
+			}
+			set_cheapest(current_rel);
+		}
 		/*
 		 * Save the various upper-rel PathTargets we just computed into
 		 * root->upper_targets[].  The core code doesn't use this, but it

diff --git a/src/include/cdb/cdbpath.h b/src/include/cdb/cdbpath.h
@@ -40,6 +40,9 @@ extern Path *cdbpath_create_redistribute_motion_path_for_exprs(PlannerInfo *root
 												  List *hashExprs,
 												  List *hashFamilies);
 
+extern Path *cdbpath_create_motion_to_outer_query(PlannerInfo *root,
+                                                  Path *subpath);
+
 extern Path *create_motion_path_for_ctas(PlannerInfo *root, GpPolicy *policy, Path *subpath);
 extern Path *create_motion_path_for_insert(PlannerInfo *root, GpPolicy *targetPolicy, Path *subpath);
 extern Path *create_motion_path_for_upddel(PlannerInfo *root, Index rti, GpPolicy *targetPolicy, Path *subpath);

diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out
@@ -369,3 +369,39 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
  45020 | 45020
 (3 rows)
 
+-- Check the operation of the parameterized Limit in a subquery with a volatile function
+create table limit_tbl(i int) distributed by (i);
+insert into limit_tbl select * from generate_series(1, 3) i;
+create function f(i int) returns int language plpgsql as $$ begin return i; end; $$;
+explain (verbose, costs off)
+select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: ((SubPlan 1))
+   ->  Seq Scan on public.limit_tbl
+         Output: (SubPlan 1)
+         SubPlan 1
+           ->  Limit
+                 Output: (f(a.a))
+                 ->  Materialize
+                       Output: (f(a.a))
+                       ->  Broadcast Motion 1:3  (slice2; segments: 1)
+                             Output: (f(a.a))
+                             ->  Function Scan on pg_catalog.generate_series a
+                                   Output: f(a.a)
+                                   Function Call: generate_series(1, 4)
+ Optimizer: Postgres-based planner
+ Settings: optimizer = 'off'
+(16 rows)
+
+select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl;
+ r 
+---
+ 3
+ 4
+ 2
+(3 rows)
+
+drop function f(int);
+drop table limit_tbl;
diff --git a/src/test/regress/expected/limit_optimizer.out b/src/test/regress/expected/limit_optimizer.out
@@ -393,3 +393,38 @@ select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
  45020 | 45020
 (3 rows)
 
+-- Check the operation of the parameterized Limit in a subquery with a volatile function
+create table limit_tbl(i int) distributed by (i);
+insert into limit_tbl select * from generate_series(1, 3) i;
+create function f(i int) returns int language plpgsql as $$ begin return i; end; $$;
+explain (verbose, costs off)
+select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Result
+   Output: (SubPlan 1)
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: limit_tbl.i
+         ->  Seq Scan on public.limit_tbl
+               Output: limit_tbl.i
+   SubPlan 1
+     ->  Result
+           Output: f(generate_series.generate_series)
+           ->  Limit
+                 Output: generate_series.generate_series
+                 ->  Function Scan on pg_catalog.generate_series
+                       Output: generate_series.generate_series
+                       Function Call: generate_series(1, 4)
+ Optimizer: GPORCA
+(15 rows)
+
+select (select f(a) from generate_series(1, 4) a limit 1 offset limit_tbl.i) as r from limit_tbl;
+ r 
+---
+ 2
+ 3
+ 4
+(3 rows)
+
+drop function f(int);
+drop table limit_tbl;
diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out
@@ -781,12 +781,10 @@ explain (costs off) select * from t_hashdist where a > All (select random() from
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on t_hashdist
    ->  Materialize
-         ->  Result
-               ->  Gather Motion 1:1  (slice2; segments: 1)
-                     ->  Subquery Scan on "NotIn_SUBQUERY"
-                           ->  Seq Scan on t_replicate_volatile
+         ->  Subquery Scan on "NotIn_SUBQUERY"
+               ->  Seq Scan on t_replicate_volatile
  Optimizer: Postgres query optimizer
-(10 rows)
+(8 rows)
 
 explain (costs off) select * from t_hashdist where a in (select random()::int from t_replicate_volatile);
                               QUERY PLAN                              
@@ -799,11 +797,10 @@ explain (costs off) select * from t_hashdist where a in (select random()::int fr
                ->  Redistribute Motion 1:3  (slice2; segments: 1)
                      Hash Key: ((random())::integer)
                      ->  HashAggregate
-                           Group Key: ((random())::integer)
-                           ->  Result
-                                 ->  Seq Scan on t_replicate_volatile
+                           Group Key: (random())::integer
+                           ->  Seq Scan on t_replicate_volatile
  Optimizer: Postgres query optimizer
-(12 rows)
+(11 rows)
 
 -- subplan
 explain (costs off, verbose) select * from t_hashdist left join t_replicate_volatile on t_hashdist.a > any (select random() from t_replicate_volatile);
@@ -819,7 +816,7 @@ explain (costs off, verbose) select * from t_hashdist left join t_replicate_vola
                SubPlan 1
                  ->  Materialize
                        Output: (random())
-                       ->  Broadcast Motion 1:3  (slice2; segments: 1)
+                       ->  Broadcast Motion 1:3  (slice2)
                              Output: (random())
                              ->  Seq Scan on rpt.t_replicate_volatile t_replicate_volatile_1
                                    Output: random()
@@ -839,11 +836,9 @@ explain (costs off) select * from t_hashdist cross join (select random () from t
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on t_hashdist
    ->  Materialize
-         ->  Result
-               ->  Gather Motion 1:1  (slice2; segments: 1)
-                     ->  Seq Scan on t_replicate_volatile
- Optimizer: Postgres query optimizer
-(8 rows)
+         ->  Seq Scan on t_replicate_volatile
+ Optimizer: Postgres-based planner
+(6 rows)
 
 explain (costs off) select * from t_hashdist cross join (select a, sum(random()) from t_replicate_volatile group by a) x;
                               QUERY PLAN                              
@@ -869,15 +864,13 @@ explain (costs off) select * from t_hashdist cross join (select random() as k, s
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  Seq Scan on t_hashdist
    ->  Materialize
-         ->  Result
-               ->  Gather Motion 1:1  (slice2; segments: 1)
-                     ->  GroupAggregate
-                           Group Key: (random())
-                           ->  Sort
-                                 Sort Key: (random())
-                                 ->  Seq Scan on t_replicate_volatile
+         ->  GroupAggregate
+               Group Key: (random())
+               ->  Sort
+                     Sort Key: (random())
+                     ->  Seq Scan on t_replicate_volatile
  Optimizer: Postgres query optimizer
-(12 rows)
+(10 rows)
 
 explain (costs off) select * from t_hashdist cross join (select a, sum(b) as s from t_replicate_volatile group by a having sum(b) > random() order by a) x ;
                                            QUERY PLAN                                           
@@ -903,7 +896,7 @@ explain (costs off) insert into t_replicate_volatile select random() from t_repl
 ---------------------------------------------------------------------------
  Insert on t_replicate_volatile
    ->  Broadcast Motion 1:3  (slice1; segments: 1)
-         ->  Result
+         ->  Subquery Scan on "*SELECT*"
                ->  Seq Scan on t_replicate_volatile t_replicate_volatile_1
  Optimizer: Postgres query optimizer
 (5 rows)
@@ -1297,24 +1290,20 @@ select * from cte join (select * from t1 join cte using(a)) b using(a);
                      ->  Redistribute Motion 1:3  (slice3; segments: 1)
                            Output: share0_ref2.a
                            Hash Key: share0_ref2.a
-                           ->  Result
+                           ->  Shared Scan (share slice:id 3:0)
                                  Output: share0_ref2.a
-                                 ->  Shared Scan (share slice:id 3:0)
-                                       Output: share0_ref2.a
          ->  Hash
                Output: share0_ref1.a
                ->  Redistribute Motion 1:3  (slice4; segments: 1)
                      Output: share0_ref1.a
                      Hash Key: share0_ref1.a
-                     ->  Result
+                     ->  Shared Scan (share slice:id 4:0)
                            Output: share0_ref1.a
-                           ->  Shared Scan (share slice:id 4:0)
-                                 Output: share0_ref1.a
-                                 ->  Seq Scan on rpt.t2
-                                       Output: ((t2.a)::double precision * random())
+                           ->  Seq Scan on rpt.t2
+                                 Output: ((t2.a)::double precision * random())
  Optimizer: Postgres-based planner
  Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', gp_cte_sharing = 'on', optimizer = 'off'
-(35 rows)
+(31 rows)
 
 explain (costs off, verbose) with cte as (
     select a, count(*) from t2 group by a having count(*) > random()
@@ -1384,13 +1373,11 @@ select * from cte join t1 using(a);
                ->  Redistribute Motion 1:3  (slice2; segments: 1)
                      Output: t2.a, (((t2.a)::double precision * random()))
                      Hash Key: t2.a
-                     ->  Result
-                           Output: t2.a, (((t2.a)::double precision * random()))
-                           ->  Seq Scan on rpt.t2
-                                 Output: t2.a, ((t2.a)::double precision * random())
+                     ->  Seq Scan on rpt.t2
+                           Output: t2.a, ((t2.a)::double precision * random())
  Optimizer: Postgres-based planner
  Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off'
-(18 rows)
+(16 rows)
 
 explain (costs off, verbose) with cte as (
     select a, count(*) from t2 group by a having count(*) > random()
@@ -1448,10 +1435,10 @@ explain (costs off, verbose) select * from (
                      Hash Key: (count(*))
                      ->  Aggregate
                            Output: count(*)
-                           ->  Result
-                                 ->  Table Function Scan on pg_catalog.anytable_out
-                                       ->  Seq Scan on rpt.t2
-                                             Output: (random())::integer
+                           ->  Table Function Scan on pg_catalog.anytable_out
+                                 Output: anytable_out
+                                 ->  Seq Scan on rpt.t2
+                                       Output: (random())::integer
  Optimizer: Postgres-based planner
  Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off'
 (21 rows)
@@ -1522,17 +1509,13 @@ a join t_hashdist on a.a = t_hashdist.a;
                            Output: (random())
                            Group Key: (random())
                            ->  Append
-                                 ->  Result
-                                       Output: (random())
-                                       ->  Seq Scan on rpt.t2
-                                             Output: random()
-                                 ->  Result
-                                       Output: (random())
-                                       ->  Seq Scan on rpt.t2 t2_1
-                                             Output: random()
+                                 ->  Seq Scan on rpt.t2
+                                       Output: random()
+                                 ->  Seq Scan on rpt.t2 t2_1
+                                       Output: random()
  Optimizer: Postgres-based planner
  Settings: enable_bitmapscan = 'off', enable_seqscan = 'off', optimizer = 'off'
-(30 rows)
+(26 rows)
 
 explain (costs off, verbose) select * from (
     select a, count(*) from t2 group by a having count(*) > random()