From 10c1b9f6747da5e1211bef124dc056a80308de46 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 19 May 2026 14:49:05 -0400 Subject: [PATCH 1/4] Update versions --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index ff8420800..ea6480dd5 100644 --- a/setup.py +++ b/setup.py @@ -46,13 +46,13 @@ 'Brotli>=1.0.9', 'google-cloud-storage>=2.9.0,<3.3.0', 'matplotlib>=3.5.2,<4', - 'numpy>=1.21.5,<2.2.0', - 'paramiko>=2.11.0,<5', + 'numpy>=1.21.5,<2.5.0', + 'paramiko>=2.11.0,<6', 'python-snappy>=0.6.1,<1', 'torch>=1.10,<3', 'torchvision>=0.10', 'tqdm>=4.64.0,<5', - 'transformers>=4.21.3,<5', + 'transformers>=4.21.3,<6', 'xxhash>=3.0.0,<4', 'zstd>=1.5.2.5,<2', 'oci>=2.88,<3', @@ -126,7 +126,7 @@ ] extra_deps['hf'] = [ - 'huggingface_hub>=0.23.4,<1.4', + 'huggingface_hub>=0.23.4,<1.15', ] extra_deps['testing'] = [ From 65f987970693d758eaf1cf34ee9317f6c4db2085 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 19 May 2026 14:49:43 -0400 Subject: [PATCH 2/4] Update dataloader.py --- streaming/base/dataloader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/streaming/base/dataloader.py b/streaming/base/dataloader.py index 9487c5e2b..2465c1e5d 100644 --- a/streaming/base/dataloader.py +++ b/streaming/base/dataloader.py @@ -7,8 +7,7 @@ from torch import Tensor from torch.utils.data import DataLoader -from transformers.feature_extraction_utils import BatchFeature -from transformers.tokenization_utils_base import BatchEncoding +from transformers import BatchEncoding, BatchFeature from streaming.base.dataset import StreamingDataset from streaming.base.world import World From ec20d02c1f865302b5cc87a3f9c06ef45de72fcf Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 19 May 2026 14:50:18 -0400 Subject: [PATCH 3/4] Update c4.py --- streaming/text/c4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming/text/c4.py b/streaming/text/c4.py index bfde0c787..5ca94e8ae 100644 --- a/streaming/text/c4.py +++ b/streaming/text/c4.py @@ -9,7 +9,7 @@ from typing import Any, Optional -from transformers.models.auto.tokenization_auto import AutoTokenizer +from transformers import AutoTokenizer from streaming.base import StreamingDataset From e2307abbd6fdb3bb363e4590bdea64fe656e77a8 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 19 May 2026 14:50:48 -0400 Subject: [PATCH 4/4] Update pile.py --- streaming/text/pile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming/text/pile.py b/streaming/text/pile.py index 59379b602..1d27c520b 100644 --- a/streaming/text/pile.py +++ b/streaming/text/pile.py @@ -9,7 +9,7 @@ from typing import Any, Optional -from transformers.models.auto.tokenization_auto import AutoTokenizer +from transformers import AutoTokenizer from streaming.base import StreamingDataset