diff --git a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml index d6c181598f..dbbbd655d0 100644 --- a/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml +++ b/bionemo-recipes/recipes/llama3_native_te/hydra_config/defaults.yaml @@ -20,7 +20,7 @@ dataset: num_workers: 1 max_seq_length: 8192 # Window size for genomic sequences stride: 200 # Overlap for windowing - buffer_size: 500_000 # Shuffle buffer size + buffer_size: 5_000 # Shuffle buffer size use_stateful_dataloader: false # Until https://github.com/pytorch/pytorch/pull/163102 is resolved with torchdata. pad_sequences_to_be_divisible_by: null load_dataset_kwargs: