bart.dias/bdias_pattern_codegen.py at main · ratem/bart.dias · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
"""
BDiasPatternCodegen: Pattern-Based Code Generation Module for Bart.dIAs

This module implements pattern-specific code generators that transform identified
parallel patterns into optimized parallel implementations. It uses AST transformation
combined with Jinja2 templates to generate readable, efficient parallel code.

Features:
- Pattern-specific AST transformers for each parallel pattern
- Integration with the pattern analyzer to get pattern characteristics
- Template-based code generation for readability and maintainability
- Support for different partitioning strategies
- Hardware-aware code generation that adapts to available system resources

Currently implemented patterns:
- Map-Reduce: Transforms independent operations followed by associative reduction
  into parallel implementations using Python's multiprocessing module

Each pattern transformer:
1. Analyzes the AST to extract key components of the pattern
2. Selects appropriate templates based on the pattern and partitioning strategy
3. Generates parallelized code that adapts to the available hardware
4. Provides hardware-specific recommendations for optimal performance

Classes:
- BDiasPatternTransformer: Base class for pattern-specific AST transformers
- MapReducePatternTransformer: Transforms Map-Reduce patterns into parallel implementations

Dependencies:
- ast: For AST manipulation
- jinja2: For template-based code generation
"""

import ast
import jinja2
from typing import Dict, List, Any, Optional, Tuple
import copy

class BDiasPatternTransformer(ast.NodeTransformer):
    """Base class for pattern-specific AST transformers."""

    def __init__(self, bottleneck: Dict[str, Any], partitioning_strategy: List[str]):
        """
        Initialize the pattern transformer.

        Args:
            bottleneck: Dictionary containing bottleneck information
            partitioning_strategy: List of recommended partitioning strategies
        """
        self.bottleneck = bottleneck
        self.partitioning_strategy = partitioning_strategy
        self.imports_to_add = []
        self.context = {}  # For template rendering

        # Initialize Jinja2 environment
        import os
        template_dir = os.path.join(os.path.dirname(__file__), 'templates')
        self.env = jinja2.Environment(
            loader=jinja2.FileSystemLoader(template_dir),
            trim_blocks=True,
            lstrip_blocks=True
        )

    def add_import(self, module: str, name: Optional[str] = None, alias: Optional[str] = None):
        """
        Add an import statement to be inserted at the top of the module.

        Args:
            module: Module to import
            name: Specific name to import from module (for from ... import ...)
            alias: Alias for the import
        """
        self.imports_to_add.append((module, name, alias))

    def finalize(self, tree: ast.AST) -> ast.AST:
        """
        Add necessary imports and finalize the transformed AST.

        Args:
            tree: The AST to finalize

        Returns:
            The finalized AST
        """
        # Add imports at the top of the file
        for module, name, alias in self.imports_to_add:
            import_node = self._create_import_node(module, name, alias)
            if isinstance(tree, ast.Module):
                tree.body.insert(0, import_node)

        return tree

    def _create_import_node(self, module: str, name: Optional[str] = None, alias: Optional[str] = None) -> ast.stmt:
        """
        Create an import AST node.

        Args:
            module: Module to import
            name: Specific name to import from module (for from ... import ...)
            alias: Alias for the import

        Returns:
            Import AST node
        """
        if name:
            # from module import name as alias
            return ast.ImportFrom(
                module=module,
                names=[ast.alias(name=name, asname=alias)],
                level=0
            )
        else:
            # import module as alias
            return ast.Import(
                names=[ast.alias(name=module, asname=alias)]
            )

    def get_template_context(self) -> Dict[str, Any]:
        """
        Return the context for template rendering.

        Returns:
            Dictionary containing context for template rendering
        """
        return self.context

    def generate_code(self) -> str:
        """
        Generate code using the template and context.

        Returns:
            Generated code as a string
        """
        raise NotImplementedError("Subclasses must implement generate_code()")


class MapReducePatternTransformer(BDiasPatternTransformer):
    """
    Transforms Map-Reduce patterns into parallel implementations.

    This class analyzes code structures that exhibit the Map-Reduce pattern
    and transforms them into parallel implementations using Python's
    multiprocessing module. It handles both function definitions and for loops,
    and supports different partitioning strategies.

    The Map-Reduce pattern involves:
    1. Map phase: Applying the same operation independently to each element in a dataset
    2. Reduce phase: Combining the results using an associative operation

    The transformation process:
    1. Identifies the map and reduce components in the original code
    2. Creates parallel implementations for both phases
    3. Adapts the implementation to the available hardware
    4. Generates code with appropriate error handling and synchronization

    Supported partitioning strategies:
    - SDP (Spatial Domain Partitioning): Divides data into chunks processed by separate workers
    - SIP (Spatial Instruction Partitioning): Applies the same operation to different data elements

    The generated code automatically adapts to the number of available processors
    and includes hardware-specific optimizations.
    """


    def visit_FunctionDef(self, node):
        """Transform function definitions into parallel map-reduce implementations."""
        # Skip if this isn't the target bottleneck
        if not self._is_target_node(node):
            return self.generic_visit(node)

        # Extract function components
        func_name = node.name
        func_args = [arg.arg for arg in node.args.args]
        func_body = ast.unparse(node.body)

        # Store context for template rendering
        self.context.update({
            'func_name': func_name,
            'func_args': func_args,
            'func_body': func_body,
            'body': func_body,  # For compatibility
            'partitioning_strategy': self.partitioning_strategy,
            'is_class_method': func_args and func_args[0] == 'self'
        })

        # Add necessary imports
        self.add_import('multiprocessing', 'Pool')
        self.add_import('functools', 'reduce')

        # Return the original node (transformation happens in template)
        return node

    def visit_For(self, node: ast.For) -> ast.AST:
        """Transform for loops into parallel map-reduce implementations."""
        # Skip if this isn't the target bottleneck
        if not self._is_target_node(node):
            return self.generic_visit(node)

        # Extract loop components
        loop_var = ast.unparse(node.target)
        iter_expr = ast.unparse(node.iter)
        # Transform body to define 'result'
        # We need to find the append call and replace it with assignment to 'result'
        body = self._transform_body_for_map(node.body)

        # Store context for template rendering
        self.context.update({
            'loop_var': loop_var,
            'iter_expr': iter_expr,
            'body': body,
            'partitioning_strategy': self.partitioning_strategy
        })

        # Add necessary imports
        self.add_import('multiprocessing', 'Pool')
        self.add_import('functools', 'reduce')

        # Return the original node (transformation happens in template)
        return node

    def _transform_body_for_map(self, body_nodes: List[ast.stmt]) -> str:
        """
        Transform loop body to return result instead of appending to list.
        Finds list.append(x) and replaces with result = x.
        """
        new_body = []
        found_append = False

        for node in body_nodes:
            # Check for Expr(Call(func=Attribute(attr='append')))
            if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
                call = node.value
                if isinstance(call.func, ast.Attribute) and call.func.attr == 'append':
                    # Found append! Replace with result = arg
                    if call.args:
                        assign = ast.Assign(
                            targets=[ast.Name(id='result', ctx=ast.Store())],
                            value=call.args[0]
                        )
                        # Copy location from original node to avoid 'lineno' errors
                        ast.copy_location(assign, node)
                        ast.fix_missing_locations(assign)
                        new_body.append(assign)
                        found_append = True
                        continue

            new_body.append(node)

        if not found_append:
            # If no append found, add result = None to avoid NameError
            assign = ast.Assign(
                targets=[ast.Name(id='result', ctx=ast.Store())],
                value=ast.Constant(value=None)
            )
            if body_nodes:
                ast.copy_location(assign, body_nodes[-1])
            else:
                assign.lineno = 1
                assign.col_offset = 0
            new_body.append(assign)

        # Wrap in a Module to unparse a list of statements
        wrapper = ast.Module(body=new_body, type_ignores=[])
        return ast.unparse(wrapper)

    def _is_target_node(self, node: ast.AST) -> bool:
        """
        Check if this node is the target bottleneck.

        Args:
            node: AST node to check

        Returns:
            True if this is the target bottleneck, False otherwise
        """
        # First check if the node has a line number
        if not hasattr(node, 'lineno'):
            return False

        # Get the bottleneck line number
        bottleneck_lineno = self.bottleneck.get('lineno', -1)

        # For function definitions, also check the function name
        if isinstance(node, ast.FunctionDef):
            # If the bottleneck is a function, check both line number and name
            if self.bottleneck.get('type') == 'function':
                # Extract function name from source if available
                source = self.bottleneck.get('source', '')
                if source.startswith('def '):
                    func_name = source.split('(')[0].replace('def ', '').strip()
                    if node.name == func_name:
                        return True

            # Allow for small differences in line numbers for functions
            return abs(node.lineno - bottleneck_lineno) <= 2

        # For loop nodes, allow for small differences in line numbers
        if isinstance(node, ast.For):
            # If the bottleneck is a for loop, check both line number and iteration expression
            if self.bottleneck.get('type') == 'for_loop':
                # Extract iteration expression from source if available
                source = self.bottleneck.get('source', '')
                if source.startswith('for '):
                    # Try to match the iteration expression
                    iter_expr = ast.unparse(node.iter)
                    if iter_expr in source:
                        return True

            # Allow for small differences in line numbers for loops
            return abs(node.lineno - bottleneck_lineno) <= 2

        # For other node types, use exact line number matching with a small tolerance
        return abs(node.lineno - bottleneck_lineno) <= 1

    def generate_code(self) -> str:
        """
        Generate parallel code for the Map-Reduce pattern.

        Returns:
            Generated code as a string
        """
        # Add precomputed values to the context - hardware specific values
        import multiprocessing
        processor_count = multiprocessing.cpu_count()
        data_size = 10  # Default value

        # Estimate data size from context
        if 'func_body' in self.context:
            func_body = self.context['func_body']
            # Look for array/list declarations with size information
            import re
            size_patterns = [
                r'shape=\((\d+),\)',  # numpy array shape
                r'range\((\d+)\)',  # range function
                r'len\((\w+)\)',  # length of something
            ]

            for pattern in size_patterns:
                matches = re.findall(pattern, func_body)
                if matches:
                    try:
                        data_size = max(data_size, int(matches[0]))
                    except (ValueError, IndexError):
                        pass
        elif 'iter_expr' in self.context:
            iter_expr = self.context['iter_expr']
            if 'range' in iter_expr:
                # Try to extract the range parameters
                try:
                    range_params = iter_expr.replace('range(', '').replace(')', '').split(',')
                    if len(range_params) == 1:
                        data_size = int(range_params[0])
                    elif len(range_params) == 2:
                        data_size = int(range_params[1]) - int(range_params[0])
                    elif len(range_params) == 3:
                        data_size = (int(range_params[1]) - int(range_params[0])) // int(range_params[2])
                except (ValueError, IndexError):
                    # Keep default if parsing fails
                    pass
            elif 'len(' in iter_expr:
                # This is a common pattern in the for loop at line 197
                # For example: for t in range(1, len(self.accelerations_y)):
                data_size = 100  # Use a reasonable default for array lengths

        elements_per_processor = max(1, data_size // processor_count)

        # Precompute processor ranges
        processor_ranges = []
        for p in range(processor_count):
            start = p * elements_per_processor
            end = min((p + 1) * elements_per_processor, data_size)
            processor_ranges.append((p, start, end))

        # Add to context
        self.context['processor_ranges'] = processor_ranges
        self.context['processor_count'] = processor_count
        self.context['data_size'] = data_size
        self.context['elements_per_processor'] = elements_per_processor

        # Determine if we're dealing with a function or a loop
        is_function = 'func_name' in self.context

        # Choose template based on node type and partitioning strategy
        if is_function:
            # Function templates
            if "SDP" in self.partitioning_strategy:
                template_name = "map_reduce/function_sdp_multiprocessing.j2"
            elif "SIP" in self.partitioning_strategy:
                template_name = "map_reduce/function_sip_multiprocessing.j2"
            else:
                template_name = "map_reduce/function_default_multiprocessing.j2"
        else:
            # Loop templates
            if "SDP" in self.partitioning_strategy:
                template_name = "map_reduce/sdp_multiprocessing.j2"
            elif "SIP" in self.partitioning_strategy:
                template_name = "map_reduce/sip_multiprocessing.j2"
            else:
                template_name = "map_reduce/default_multiprocessing.j2"

        # Load and render the template
        try:
            template = self.env.get_template(template_name)
            return template.render(**self.context)
        except jinja2.exceptions.TemplateNotFound:
            # Fallback to default template if specific template not found
            fallback_template_name = "map_reduce/default_multiprocessing.j2" if not is_function else "map_reduce/function_default_multiprocessing.j2"
            template = self.env.get_template(fallback_template_name)
            return template.render(**self.context)


# ==== Helpers para extrair expressões canônicas de pipeline ====

def _canonicalize_expr(node: ast.AST, loop_var: str) -> ast.AST:
    """Renomeia a variável do loop para 'x' para padronizar a expressão do estágio."""
    node = copy.deepcopy(node)
    if loop_var:
        node = _RenameVar(loop_var, 'x').visit(node)
    ast.fix_missing_locations(node)
    return node

def _extract_stage_exprs_from_function(func_node: ast.FunctionDef) -> list[str]:
    """
    Retorna uma lista de expressões str, uma por estágio do pipeline, *em função de x*.
    - Estágio 1..k dentro do primeiro for: inlining de temporários até o append.
    - Estágio final: loops seguintes que fazem .append(...) no 'results' (ou similar).
    Cobre o caso nested_pipeline.
    """
    stage_exprs: list[str] = []

    # 1) Primeiro 'for' da função: calcula o estágio que preenche o primeiro buffer
    first_for: ast.For | None = None
    for stmt in func_node.body:
        if isinstance(stmt, ast.For):
            first_for = stmt
            break

    if first_for is not None:
        loop_var = getattr(first_for.target, 'id', None)
        env: dict[str, ast.AST] = {}  # variáveis temporárias dentro do for

        for s in first_for.body:
            # temp = <expr>
            if isinstance(s, ast.Assign) and len(s.targets) == 1 and isinstance(s.targets[0], ast.Name):
                env[s.targets[0].id] = s.value

            # temp += <expr>
            elif isinstance(s, ast.AugAssign) and isinstance(s.target, ast.Name):
                t = s.target.id
                base = env.get(t, ast.Name(id=t, ctx=ast.Load()))
                env[t] = ast.BinOp(left=base, op=s.op, right=s.value)

            # buffer.append(<expr>)
            elif isinstance(s, ast.Expr) and isinstance(s.value, ast.Call):
                call = s.value
                if getattr(call.func, 'attr', None) == 'append' and call.args:
                    arg = call.args[0]
                    # inlining: se for um Name temporário, expande usando env
                    if isinstance(arg, ast.Name) and arg.id in env:
                        arg = env[arg.id]
                    arg = _canonicalize_expr(arg, loop_var)
                    stage_exprs.append(_src(arg))
                    # Só um 'append' por estágio nesse for; se houver mais, pode-se coletar todos.

    # 2) Demais 'for' da função: estágios subsequentes que fazem .append(...)
    # (ex.: for val in intermediate: results.append(val**2))
    for stmt in func_node.body:
        if isinstance(stmt, ast.For) and stmt is not first_for:
            loop_var = getattr(stmt.target, 'id', None)
            for s in stmt.body:
                if isinstance(s, ast.Expr) and isinstance(s.value, ast.Call):
                    call = s.value
                    if getattr(call.func, 'attr', None) == 'append' and call.args:
                        arg = call.args[0]
                        arg = _canonicalize_expr(arg, loop_var)
                        stage_exprs.append(_src(arg))

    return stage_exprs

def _extract_stage_descs_from_function(func_node: ast.FunctionDef):
    """Retorna lista de estágios, cada um: {'expr': <str>, 'pred': <str|None>}."""
    stages = []

    # Primeiro for (estágio 1…k internos)
    first_for = next((st for st in func_node.body if isinstance(st, ast.For)), None)
    if first_for is not None:
        loop_var = getattr(first_for.target, 'id', None)
        pairs = _collect_appends_with_pred(first_for.body, loop_var)
        for expr_node, pred_node in pairs:
            stages.append({
                'expr': _src(expr_node),
                'pred': _src(pred_node) if pred_node is not None else None,
            })

    # Demais for (estágios subsequentes)
    for st in func_node.body:
        if isinstance(st, ast.For) and st is not first_for:
            loop_var = getattr(st.target, 'id', None)
            pairs = _collect_appends_with_pred(st.body, loop_var)
            for expr_node, pred_node in pairs:
                stages.append({
                    'expr': _src(expr_node),
                    'pred': _src(pred_node) if pred_node is not None else None,
                })

    return stages


class _RenameVar(ast.NodeTransformer):
    def __init__(self, old, new): self.old, self.new = old, new
    def visit_Name(self, node):
        if isinstance(node, ast.Name) and node.id == self.old:
            return ast.copy_location(ast.Name(id=self.new, ctx=node.ctx), node)
        return node

def _canon(node, loop_var):
    n = copy.deepcopy(node)
    if loop_var:
        n = _RenameVar(loop_var, 'x').visit(n)
    ast.fix_missing_locations(n)
    return n

def _src(node): return ast.unparse(node)

def _and(a, b):
    import ast as _ast
    if a and b:
        return _ast.BoolOp(op=_ast.And(), values=[a, b])
    return a or b

def _not(a):
    import ast as _ast
    return _ast.UnaryOp(op=_ast.Not(), operand=a)

def _collect_appends_with_pred(stmts, loop_var, env=None, pred_ctx=None):
    """for+append → [(expr_node, pred_node|None), ...]"""
    if env is None: env = {}
    out = []
    for s in stmts:
        if isinstance(s, ast.Assign) and len(s.targets)==1 and isinstance(s.targets[0], ast.Name):
            env[s.targets[0].id] = s.value
            continue
        if isinstance(s, ast.AugAssign) and isinstance(s.target, ast.Name):
            t = s.target.id
            base = env.get(t, ast.Name(id=t, ctx=ast.Load()))
            env[t] = ast.BinOp(left=base, op=s.op, right=s.value)
            continue
        if isinstance(s, ast.If):
            out += _collect_appends_with_pred(s.body, loop_var, env.copy(), _and(pred_ctx, s.test))
            if s.orelse:
                out += _collect_appends_with_pred(s.orelse, loop_var, env.copy(), _and(pred_ctx, _not(s.test)))
            continue
        if isinstance(s, ast.Expr) and isinstance(getattr(s, 'value', None), ast.Call):
            call = s.value
            if getattr(call.func, 'attr', None) == 'append' and call.args:
                arg = call.args[0]
                if isinstance(arg, ast.Name) and arg.id in env:
                    arg = env[arg.id]
                arg = _canon(arg, loop_var)
                pred = _canon(pred_ctx, loop_var) if pred_ctx is not None else None
                out.append((arg, pred))
            continue
        # recursão leve para blocos compostos
        body = getattr(s, 'body', None)
        if isinstance(body, list):
            out += _collect_appends_with_pred(body, loop_var, env.copy(), pred_ctx)
    return out

def _extract_pipeline_stages_from_for_append(func_node: ast.FunctionDef):
    """Funções com loops e appends → [{'expr': str, 'pred': str|None}, ...]"""
    stages = []
    # primeiro for (pode ter subestágios dentro)
    first_for = next((st for st in func_node.body if isinstance(st, ast.For)), None)
    if first_for is not None:
        loop_var = getattr(first_for.target, 'id', None)
        for expr_node, pred_node in _collect_appends_with_pred(first_for.body, loop_var):
            stages.append({'expr': _src(expr_node), 'pred': _src(pred_node) if pred_node else None})
    # for(s) seguintes (estágios subsequentes)
    for st in func_node.body:
        if isinstance(st, ast.For) and st is not first_for:
            loop_var = getattr(st.target, 'id', None)
            for expr_node, pred_node in _collect_appends_with_pred(st.body, loop_var):
                stages.append({'expr': _src(expr_node), 'pred': _src(pred_node) if pred_node else None})
    return stages

def _extract_pipeline_stages_from_listcomps(func_node: ast.FunctionDef, func_args: list[str]):
    """Cadeia dependente de ListComp → [{'expr': str, 'pred': str|None}, ...]"""
    stages = []
    last_target = None
    for stmt in func_node.body:
        # Assign target = ListComp
        if isinstance(stmt, ast.Assign) and len(stmt.targets)==1 and isinstance(stmt.targets[0], ast.Name):
            tgt = stmt.targets[0].id
            lc  = stmt.value
            if not (isinstance(lc, ast.ListComp) and len(lc.generators)==1):
                continue
            gen = lc.generators[0]
            # DEP: 1º estágio consome arg; próximos consomem o target anterior
            if last_target is None:
                ok = isinstance(gen.iter, ast.Name) and gen.iter.id in func_args
            else:
                ok = isinstance(gen.iter, ast.Name) and gen.iter.id == last_target
            if not ok:
                stages.clear(); last_target=None; continue
            expr = _src(_canon(lc.elt, getattr(gen.target, 'id', None)))
            pred = None
            if gen.ifs:
                cond = gen.ifs[0]
                for e in gen.ifs[1:]:
                    cond = ast.BoolOp(op=ast.And(), values=[cond, e])
                pred = _src(_canon(cond, getattr(gen.target, 'id', None)))
            stages.append({'expr': expr, 'pred': pred})
            last_target = tgt
            continue
        # Return ListComp dependente do último target
        if isinstance(stmt, ast.Return) and isinstance(stmt.value, ast.ListComp) and last_target:
            lc = stmt.value
            if lc.generators:
                gen = lc.generators[0]
                if isinstance(gen.iter, ast.Name) and gen.iter.id == last_target:
                    expr = _src(_canon(lc.elt, getattr(gen.target, 'id', None)))
                    pred = None
                    if gen.ifs:
                        cond = gen.ifs[0]
                        for e in gen.ifs[1:]:
                            cond = ast.BoolOp(op=ast.And(), values=[cond, e])
                        pred = _src(_canon(cond, getattr(gen.target, 'id', None)))
                    stages.append({'expr': expr, 'pred': pred})
            break
    return stages
# ==== Fim dos helpers ====

class PipelinePatternTransformer(BDiasPatternTransformer):
    """
    Transformer for the Pipeline pattern.  It picks out the top-level
    function whose name matches the bottleneck, extracts the append-expressions
    from each stage, and populates context with:
      - func_name, func_args, input_data
      - stage_count, stage_exprs
    """

    def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.AST:
        # Garantir que é a função-alvo (pelo nome)
        if (self.bottleneck.get('type') != 'function'
            or node.name != self.bottleneck.get('name')):
            return node

        func_args = [arg.arg for arg in node.args.args]
        input_data = func_args[0] if func_args else 'data'

        # 1) Tenta cadeia de ListComps com dependência; 2) fallback para for+append
        stages = _extract_pipeline_stages_from_listcomps(node, func_args)
        if not stages:
            stages = _extract_pipeline_stages_from_for_append(node)

        # Proteção: não gere pipeline vazia
        if not stages:
            raise ValueError(f"Pipeline reconhecida em '{node.name}', mas nenhum estágio extraído.")

        # Contexto para o template
        self.context.update({
            'func_name':   node.name,
            'func_args':   func_args,
            'input_data':  input_data,
            'stage_exprs': stages,          # lista de dicts {'expr','pred'}
            'stage_count': len(stages),
            'partitioning_strategy': self.partitioning_strategy,
            'is_class_method': func_args and func_args[0] == 'self',
        })

        # Imports (usa mp.Queue, não precisa 'queue.Queue')
        self.add_import('multiprocessing', None, None)
        return node

def generate_hardware_recommendations(context: Dict[str, Any]) -> str:
    """
    Generate hardware-specific recommendations based on the context.

    Args:
        context: Dictionary containing context information

    Returns:
        Hardware recommendations as a string
    """
    recommendations = []

    processor_count = context.get('processor_count', 0)
    data_size = context.get('data_size', 0)
    elements_per_processor = context.get('elements_per_processor', 0)
    is_memory_bound = context.get('is_memory_bound', False)

    if processor_count > 0:
        recommendations.append(f"This code will utilize {processor_count} processors.")

    if elements_per_processor > 0:
        recommendations.append(f"Each processor will handle approximately {elements_per_processor} elements.")

    if is_memory_bound:
        recommendations.append(
            "This operation is memory-bound. Consider reducing the data size or increasing available memory.")
    else:
        recommendations.append("This operation is CPU-bound and should scale well with additional processors.")

    if data_size < processor_count:
        recommendations.append(
            "The data size is smaller than the number of processors. Consider using fewer processors to avoid overhead.")

    return "\n".join(recommendations)


def generate_parallel_code(
    bottleneck: Dict[str, Any],
    pattern: str,
    partitioning_strategy: List[str],
    target_runtime: str = "multiprocessing"
) -> Tuple[str, str, Dict[str, Any]]:
    """
    Dispatch to the correct transformer, run the AST pass, finalize imports,
    then render the appropriate Jinja2 template with a fully-populated context.

    Args:
        bottleneck: Dictionary containing bottleneck information
        pattern: Pattern type ('map_reduce', 'pipeline', 'pool_workers', etc.)
        partitioning_strategy: List of partitioning strategies
        target_runtime: Target parallel runtime - 'multiprocessing' or 'ray' (default: 'multiprocessing')

    Returns:
        Tuple of (source_code, transformed_code, context)
    """
    source_code = bottleneck['source']
    tree = ast.parse(source_code)

    # 1. Instantiate the right transformer
    if pattern == 'pipeline':
        transformer = PipelinePatternTransformer(bottleneck, partitioning_strategy)
    elif pattern == 'map_reduce':
        transformer = MapReducePatternTransformer(bottleneck, partitioning_strategy)
    elif pattern in ('pool_worker', 'pool_workers', 'worker_pool', 'pool'):
        pattern = "pool_workers"
        transformer = MasterWorkerPatternTransformer(bottleneck, partitioning_strategy)
    elif pattern in ('master_slave', 'master-worker', 'master_worker'):
        pattern = "master_slave"
        transformer = MasterSlavePatternTransformer(bottleneck, partitioning_strategy)
    else:
        raise NotImplementedError(f"Pattern '{pattern}' not supported for code generation.")

    # 2. Run the AST transformer to build transformer.context
    transformer.visit(tree)
    # sanity: não gerar pipeline vazia
    if pattern == 'pipeline' and transformer.context.get('stage_count', 0) == 0:
        raise ValueError("Pipeline reconhecida, mas nenhum estágio extraído para codegen.")
    transformer.finalize(tree)

    # Extract preamble (code before the bottleneck) to preserve context (imports, variables)
    bottleneck_lineno = bottleneck.get('lineno', 1)
    lines = source_code.split('\n')
    # Keep lines before the bottleneck.
    # Note: This is a simple heuristic. For complex nested structures, AST replacement would be better.
    if bottleneck_lineno > 1:
        preamble = '\n'.join(lines[:bottleneck_lineno - 1])
        transformer.context['preamble'] = preamble
    else:
        transformer.context['preamble'] = ""

    # 3. Select the template based on pattern, function status, strategy, and target runtime
    # Unified logic for all patterns
    is_function = 'func_name' in transformer.context
    strat = partitioning_strategy[0].lower()

    # Normalize target_runtime
    if target_runtime not in ('multiprocessing', 'ray'):
        raise ValueError(f"Invalid target_runtime: '{target_runtime}'. Must be 'multiprocessing' or 'ray'.")

    # Store runtime in context for templates that might need it
    transformer.context['target_runtime'] = target_runtime

    # Seleção especial para master_slave "worker-like"
    if pattern == 'master_slave' and transformer.context.get('wrap_existing_worker'):
        template_name = f"{pattern}/function_wrap_worker_threads.j2"
    else:
        # Template selection based on runtime
        if target_runtime == 'ray':
            # Ray templates: always use default for now (can be extended with strategies later)
            if is_function:
                template_name = f"{pattern}/function_default_ray.j2"
            else:
                template_name = f"{pattern}/default_ray.j2"
        else:
            # Multiprocessing templates (original behavior)
            if is_function:
                template_name = f"{pattern}/function_{strat}_multiprocessing.j2"
            else:
                template_name = f"{pattern}/{strat}_multiprocessing.j2"

    try:
        tpl = transformer.env.get_template(template_name)
    except jinja2.exceptions.TemplateNotFound:
        # Fallback logic
        if pattern == 'master_slave' and transformer.context.get('wrap_existing_worker'):
            tpl = transformer.env.get_template(f"{pattern}/function_wrap_worker_threads.j2")
        else:
            if target_runtime == 'ray':
                # Fallback to default Ray template
                fallback_name = f"{pattern}/function_default_ray.j2" if is_function else f"{pattern}/default_ray.j2"
                try:
                    tpl = transformer.env.get_template(fallback_name)
                except jinja2.exceptions.TemplateNotFound:
                    raise ValueError(
                        f"Ray template not found: '{template_name}' or fallback '{fallback_name}'. "
                        f"Please ensure Ray templates are created in templates/{pattern}/"
                    )
            else:
                # Fallback to default multiprocessing template
                if is_function:
                    tpl = transformer.env.get_template(f"{pattern}/function_default_multiprocessing.j2")
                else:
                    tpl = transformer.env.get_template(f"{pattern}/default_multiprocessing.j2")


    # 4. Render the template with the populated context
    transformed_code = tpl.render(**transformer.context)

    # 5. Optionally add hardware recommendations into context
    # (if your templates or presenter use them)
    if hasattr(transformer, 'context'):
        from multiprocessing import cpu_count
        transformer.context.setdefault('processor_count', cpu_count())

    return source_code, transformed_code, transformer.context


# ======== Pool of workers

class MasterWorkerPatternTransformer(BDiasPatternTransformer):
    """
    Gera uma versão paralela (pool of workers) para funções com padrão
    pool-workers: um loop que consome itens de uma coleção e produz
    um resultado por item (append em um buffer/resultado).
    Extrai a expressão do "append(...)" e, se existir, um predicado "if".
    """

    def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.AST:
        # só a função-alvo
        if (self.bottleneck.get('type') != 'function'
            or node.name != self.bottleneck.get('name')):
            return node

        func_args = [a.arg for a in node.args.args]
        input_data = func_args[0] if func_args else 'data'

        # 1) pegue o primeiro for top-level
        first_for = next((st for st in node.body if isinstance(st, ast.For)), None)
        if first_for is None:
            raise ValueError(f"pool-workers: não encontrei loop top-level em '{node.name}'.")

        loop_var = getattr(first_for.target, 'id', None)

        # 2) colete appends (expr, pred) dentro do for (usa seus helpers)
        pairs = _collect_appends_with_pred(first_for.body, loop_var)
        if not pairs:
            raise ValueError("pool-workers: loop não contém append(...) para extrair tarefa.")

        # Heurística simples: use o primeiro append do loop
        expr_node, pred_node = pairs[0]
        task_expr = _src(expr_node)
        pred_expr = _src(pred_node) if pred_node is not None else None

        # Contexto para template
        self.context.update({
            'pattern': 'pool_worker',
            'func_name': node.name,
            'func_args': func_args,
            'input_data': input_data,
            'loop_var': loop_var or 'x',
            'task_expr': task_expr,     # ex.: "(x*2 + 10)"
            'pred_expr': pred_expr,     # ex.: "x % 2 == 0" (ou None)
            'is_class_method': func_args and func_args[0] == 'self',
            'partitioning_strategy': self.partitioning_strategy,
        })

        # Imports (multiprocessing)
        self.add_import('multiprocessing', None, None)
        return node


# ======== Master–Slave

def _is_worker_like_function(node: ast.FunctionDef) -> bool:
    """
    Heurística: função que roda em laço consumindo de 'q.get()' ou 'conn.recv()'
    e produz com 'out.put()' ou 'conn.send()'. Não confundir com Pool/Executor.
    """
    has_loop_getrecv = False
    has_put_or_send = False

    # loop com get()/recv()
    for n in ast.walk(node):
        if isinstance(n, (ast.While, ast.For)):
            for call in ast.walk(n):
                if isinstance(call, ast.Call) and isinstance(call.func, ast.Attribute):
                    if call.func.attr in ("get", "recv"):
                        has_loop_getrecv = True
                        break

    # put()/send() em algum ponto
    for n in ast.walk(node):
        if isinstance(n, ast.Call) and isinstance(n.func, ast.Attribute):
            if n.func.attr in ("put", "send"):
                has_put_or_send = True
                break

    return has_loop_getrecv and has_put_or_send

class MasterSlavePatternTransformer(BDiasPatternTransformer):
    """
    Gera versão master-slave (coordenador + N escravos) para funções com
    um loop que produz 1 saída por item (append em buffer/resultado).
    """

    def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.AST:
        # só a função-alvo
        if (self.bottleneck.get('type') != 'function'
            or node.name != self.bottleneck.get('name')):
            return node

        func_args = [a.arg for a in node.args.args]
        input_data = func_args[0] if func_args else 'data'

        # helper local
        def _worker_calls_task_done(fn: ast.FunctionDef) -> bool:
            for n in ast.walk(fn):
                if isinstance(n, ast.Call) and isinstance(n.func, ast.Attribute) and n.func.attr == "task_done":
                    return True
            return False

        # --- Caminho B: função já é "worker-like" → gerar WRAPPER com threads ---
        if _is_worker_like_function(node):
            # Contexto para template de WRAPPER (threads + queue)
            self.context.update({
                "pattern": "master_slave",
                "wrap_existing_worker": True,
                "worker_name": node.name,
                # ⚠️ injete APENAS a definição do worker, não o módulo inteiro:
                "worker_def_src": ast.unparse(node),
                "worker_calls_task_done": _worker_calls_task_done(node),
                "func_name": node.name,
                "func_args": [a.arg for a in node.args.args],
                "partitioning_strategy": self.partitioning_strategy,
            })
            return node

        # --- Caminho A: função SEQUENCIAL (for+append) → gerar MS por processos ---
        # 1) pegar primeiro for top-level
        first_for = next((st for st in node.body if isinstance(st, ast.For)), None)
        if first_for is None:
            # Se não é worker-like e não há for, não dá para extrair o "map" → aborta limpo
            raise ValueError(f"master-slave: função '{node.name}' não é worker-like nem possui loop for+append.")

        loop_var = getattr(first_for.target, 'id', None)

        # 2) coleta appends (expr, pred)
        pairs = _collect_appends_with_pred(first_for.body, loop_var)
        if not pairs:
            raise ValueError("master-slave: loop não contém append(...) para extrair tarefa.")

        expr_node, pred_node = pairs[0]
        task_expr = _src(expr_node)
        pred_expr = _src(pred_node) if pred_node is not None else None

        self.context.update({
            'pattern': 'master_slave',
            'func_name': node.name,
            'func_args': func_args,
            'input_data': input_data,
            'loop_var': loop_var or 'x',
            'task_expr': task_expr,
            'pred_expr': pred_expr,
            'is_class_method': func_args and func_args[0] == 'self',
            'partitioning_strategy': self.partitioning_strategy,
        })
        return node