NVIDIA · yufeiwu-nv · Apr 10, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
@@ -93,7 +93,6 @@ def get_model_yaml_config(model_label: str,
                     'enable_padding': False
                 },
                 'moe_config': {
-                    'backend': 'TRTLLM',
                     'max_num_tokens': 32768
                 },
                 'speculative_config': {
@@ -228,18 +227,6 @@ def get_model_yaml_config(model_label: str,
                 'enable_attention_dp': True,
             }
         },
-        # Qwen3 models with fp4 quantization on B200 with moe backend equal to TRTLLM
-        {
-            'patterns': [
-                'qwen3_235b_a22b_fp4-bench-pytorch-float4-maxbs:512-maxnt:2048-input_output_len:1000,2000-con:8-ep:8-gpus:8',
-            ],
-            'config': {
-                'enable_attention_dp': False,
-                'moe_config': {
-                    'backend': 'TRTLLM'
-                }
-            }
-        },
         {
             'patterns': [
                 'qwen3_4b-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4',
@@ -317,9 +304,6 @@ def get_model_yaml_config(model_label: str,
                     'enable_padding': True,
                     'max_batch_size': 720,
                 },
-                'moe_config': {
-                    'backend': 'TRTLLM'
-                },
                 'stream_interval': 10,
                 'num_postprocess_workers': 4
             }
@@ -336,9 +320,6 @@ def get_model_yaml_config(model_label: str,
                     'enable_padding': True,
                     'max_batch_size': 720,
                 },
-                'moe_config': {
-                    'backend': 'TRTLLM'
-                },
                 'stream_interval': 10,
                 'num_postprocess_workers': 4
             }

diff --git a/tests/integration/test_lists/dev/.gitignore b/tests/integration/test_lists/dev/.gitignore