From c2f63982dd10ce6a0feb9d4a6f18b3fe19389137 Mon Sep 17 00:00:00 2001 From: Savitha Srinivasan Date: Fri, 20 Feb 2026 23:11:12 +0000 Subject: [PATCH 1/2] fix default buffer size to 5k Signed-off-by: Savitha Srinivasan --- .../recipes/llama3_native_te/hydra_config/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml index d6c181598f..f738914a7c 100644 --- a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml +++ b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml @@ -20,7 +20,7 @@ dataset: num_workers: 1 max_seq_length: 8192 # Window size for genomic sequences stride: 200 # Overlap for windowing - buffer_size: 500_000 # Shuffle buffer size + buffer_size: 5000 # Shuffle buffer size use_stateful_dataloader: false # Until https://github.com/pytorch/pytorch/pull/163102 is resolved with torchdata. pad_sequences_to_be_divisible_by: null load_dataset_kwargs: From 0493076ad95f5df7ee17ec15e262d4449dbe447f Mon Sep 17 00:00:00 2001 From: Savitha Srinivasan Date: Fri, 20 Feb 2026 23:11:46 +0000 Subject: [PATCH 2/2] fix formatting Signed-off-by: Savitha Srinivasan --- .../recipes/llama3_native_te/hydra_config/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml index f738914a7c..dbbbd655d0 100644 --- a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml +++ b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml @@ -20,7 +20,7 @@ dataset: num_workers: 1 max_seq_length: 8192 # Window size for genomic sequences stride: 200 # Overlap for windowing - buffer_size: 5000 # Shuffle buffer size + buffer_size: 5_000 # Shuffle buffer size use_stateful_dataloader: false # Until https://github.com/pytorch/pytorch/pull/163102 is resolved with torchdata. pad_sequences_to_be_divisible_by: null load_dataset_kwargs: