From bc50003dc7fdf3bd5d97a3d99d008ef5eda45e97 Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 18:31:42 +0300
Subject: [PATCH 1/6] =?UTF-8?q?fix:=20remove=20misleading=20'encoder-decod?=
 =?UTF-8?q?er'=20label=20from=20AutoModel=20log=20=E2=80=94=20can=20also?=
 =?UTF-8?q?=20be=20multimodal=20(AutoModelForImageTextToText)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 transformer_lens/benchmarks/main_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/transformer_lens/benchmarks/main_benchmark.py b/transformer_lens/benchmarks/main_benchmark.py
index 05735095f..2fe3b0d40 100644
--- a/transformer_lens/benchmarks/main_benchmark.py
+++ b/transformer_lens/benchmarks/main_benchmark.py
@@ -935,7 +935,7 @@ def cleanup_model(model, model_name_str: str):
             # Use appropriate AutoModel class (e.g., AutoModelForSeq2SeqLM for T5)
             auto_model_class = get_auto_model_class(model_name, trust_remote_code=trust_remote_code)
             if verbose and auto_model_class != AutoModelForCausalLM:
-                print(f"Using {auto_model_class.__name__} for encoder-decoder model")
+                print(f"Using {auto_model_class.__name__}")
             # Ensure pad_token_id exists (some models crash without it during init).
             hf_config = AutoConfig.from_pretrained(
                 model_name, trust_remote_code=trust_remote_code, token=_hf_token()

From 1b25029d850bd02c8be620d96f5e8bdc1b82ad1e Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 18:32:14 +0300
Subject: [PATCH 2/6] =?UTF-8?q?fix:=20skip=20Phase=202=20header=20when=20p?=
 =?UTF-8?q?hase=20not=20selected=20=E2=80=94=20empty=20header=20printed=20?=
 =?UTF-8?q?in=20every=20run?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 transformer_lens/benchmarks/main_benchmark.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/transformer_lens/benchmarks/main_benchmark.py b/transformer_lens/benchmarks/main_benchmark.py
index 2fe3b0d40..d2ccbfa70 100644
--- a/transformer_lens/benchmarks/main_benchmark.py
+++ b/transformer_lens/benchmarks/main_benchmark.py
@@ -1209,14 +1209,14 @@ def cleanup_model(model, model_name_str: str):
     # PHASE 2: Bridge (unprocessed) + HookedTransformer (unprocessed)
     # ========================================================================
     current_phase[0] = 2
-    if verbose:
-        print(f"\n{'='*80}")
-        print("PHASE 2: TransformerBridge (unprocessed) + HookedTransformer (unprocessed)")
-        print(f"{'='*80}\n")
 
     # OPTIMIZATION: Run generation benchmarks first (only bridge in memory)
     # Then cleanup bridge before loading HT to reduce peak memory
     if should_run_phase(2) and bridge_unprocessed:
+        if verbose:
+            print(f"\n{'='*80}")
+            print("PHASE 2: TransformerBridge (unprocessed) + HookedTransformer (unprocessed)")
+            print(f"{'='*80}\n")
         if verbose:
             print("Running Phase 2 benchmarks...\n")
 

From f41eaed4f7d0c47048dc4d1518cead2039fa3054 Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 18:34:54 +0300
Subject: [PATCH 3/6] =?UTF-8?q?fix:=20skip=20component=20benchmarking=20fo?=
 =?UTF-8?q?r=20DelegatedAttentionBlockBridge=20=E2=80=94=20attn/PLE/rotary?=
 =?UTF-8?q?=5Femb=20can't=20be=20tested=20in=20isolation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../benchmarks/component_outputs.py           | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py
index 1efe59f82..9aa260d93 100644
--- a/transformer_lens/benchmarks/component_outputs.py
+++ b/transformer_lens/benchmarks/component_outputs.py
@@ -419,6 +419,31 @@ def _test_component_recursive(
         ):
             return
 
+        # Skip attention and PLE submodules when using DelegatedAttentionBlockBridge.
+        # These architectures delegate all math to HF; the benchmark can't call the HF
+        # attention in isolation (missing position_embeddings, attention_mask, etc.) and
+        # PLE submodules receive per-layer inputs at a different dimension than hidden_states.
+        _is_delegated = (
+            hasattr(self.bridge_model, "blocks")
+            and "hook_q_input"
+            not in getattr(
+                self.bridge_model.blocks, "hook_aliases", {"hook_q_input": True}
+            )
+        )
+        if _is_delegated and "attn" in component_path:
+            return
+        if _is_delegated and component_path == "rotary_emb":
+            return
+        if _is_delegated and any(
+            name in component_path
+            for name in (
+                "per_layer_input_gate",
+                "per_layer_projection",
+                "post_per_layer_input_norm",
+            )
+        ):
+            return
+
         # Skip models whose MLP/attn forward signatures require extra context from the block:
         # - BLOOM: MLP requires residual and alibi bias
         # - T5: requires cache_position for relative position embeddings

From 50dc4a206d94662e7d008bef850cd233ce142d2b Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 18:49:34 +0300
Subject: [PATCH 4/6] fix: detect DelegatedAttentionBlockBridge via
 adapter.component_mapping instead of bridge_model.blocks

---
 transformer_lens/benchmarks/component_outputs.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py
index 9aa260d93..bc2add46a 100644
--- a/transformer_lens/benchmarks/component_outputs.py
+++ b/transformer_lens/benchmarks/component_outputs.py
@@ -423,12 +423,9 @@ def _test_component_recursive(
         # These architectures delegate all math to HF; the benchmark can't call the HF
         # attention in isolation (missing position_embeddings, attention_mask, etc.) and
         # PLE submodules receive per-layer inputs at a different dimension than hidden_states.
-        _is_delegated = (
-            hasattr(self.bridge_model, "blocks")
-            and "hook_q_input"
-            not in getattr(
-                self.bridge_model.blocks, "hook_aliases", {"hook_q_input": True}
-            )
+        _blocks_component = getattr(self.adapter, "component_mapping", {}).get("blocks") if self.adapter is not None else None
+        _is_delegated = _blocks_component is not None and (
+            "hook_q_input" not in getattr(_blocks_component, "hook_aliases", {"hook_q_input": True})
         )
         if _is_delegated and "attn" in component_path:
             return

From 8ebb72962bfbbc609754d30b4467f1322cc90379 Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 18:56:29 +0300
Subject: [PATCH 5/6] fix: add rotary_emb skip in _test_component for delegated
 attention blocks

---
 .../benchmarks/component_outputs.py           | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py
index bc2add46a..09688b611 100644
--- a/transformer_lens/benchmarks/component_outputs.py
+++ b/transformer_lens/benchmarks/component_outputs.py
@@ -199,6 +199,17 @@ def print_detailed_analysis(self) -> None:
 class ComponentBenchmarker:
     """Benchmarking utility for testing TransformerBridge components against HuggingFace."""
 
+    def _is_delegated_block(self) -> bool:
+        """Return True if the blocks component uses DelegatedAttentionBlockBridge."""
+        blocks = (
+            getattr(self.adapter, "component_mapping", {}).get("blocks")
+            if self.adapter is not None
+            else None
+        )
+        return blocks is not None and (
+            "hook_q_input" not in getattr(blocks, "hook_aliases", {"hook_q_input": True})
+        )
+
     def __init__(
         self,
         bridge_model: nn.Module,
@@ -423,10 +434,7 @@ def _test_component_recursive(
         # These architectures delegate all math to HF; the benchmark can't call the HF
         # attention in isolation (missing position_embeddings, attention_mask, etc.) and
         # PLE submodules receive per-layer inputs at a different dimension than hidden_states.
-        _blocks_component = getattr(self.adapter, "component_mapping", {}).get("blocks") if self.adapter is not None else None
-        _is_delegated = _blocks_component is not None and (
-            "hook_q_input" not in getattr(_blocks_component, "hook_aliases", {"hook_q_input": True})
-        )
+        _is_delegated = self._is_delegated_block()
         if _is_delegated and "attn" in component_path:
             return
         if _is_delegated and component_path == "rotary_emb":
@@ -548,6 +556,12 @@ def _test_component(
             ComponentTestResult or None if the component cannot be tested
         """
         try:
+            # Skip rotary_emb for DelegatedAttentionBlockBridge architectures.
+            # Gemma4's RotaryEmbeddingBridge wraps a rotary that returns a set-like
+            # structure which the benchmark comparison can't subscript.
+            if self._is_delegated_block() and component_path == "rotary_emb":
+                return None
+
             # Get bridge component
             # The adapter returns nn.Module, but for bridge models it's actually GeneralizedComponent
             bridge_component = cast(

From 4d64b5d0c86bb3484f08a7b34080448a55cce59b Mon Sep 17 00:00:00 2001
From: huseyincavusbi <huseyincavus@proton.me>
Date: Wed, 24 Jun 2026 21:09:28 +0300
Subject: [PATCH 6/6] fix: remove dead rotary_emb skip from
 _test_component_recursive (rotary is top-level, only _test_component handles
 it)

---
 transformer_lens/benchmarks/component_outputs.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py
index 09688b611..adbb76924 100644
--- a/transformer_lens/benchmarks/component_outputs.py
+++ b/transformer_lens/benchmarks/component_outputs.py
@@ -437,8 +437,6 @@ def _test_component_recursive(
         _is_delegated = self._is_delegated_block()
         if _is_delegated and "attn" in component_path:
             return
-        if _is_delegated and component_path == "rotary_emb":
-            return
         if _is_delegated and any(
             name in component_path
             for name in (