diff --git a/setup.py b/setup.py index ff8420800..ea6480dd5 100644 --- a/setup.py +++ b/setup.py @@ -46,13 +46,13 @@ 'Brotli>=1.0.9', 'google-cloud-storage>=2.9.0,<3.3.0', 'matplotlib>=3.5.2,<4', - 'numpy>=1.21.5,<2.2.0', - 'paramiko>=2.11.0,<5', + 'numpy>=1.21.5,<2.5.0', + 'paramiko>=2.11.0,<6', 'python-snappy>=0.6.1,<1', 'torch>=1.10,<3', 'torchvision>=0.10', 'tqdm>=4.64.0,<5', - 'transformers>=4.21.3,<5', + 'transformers>=4.21.3,<6', 'xxhash>=3.0.0,<4', 'zstd>=1.5.2.5,<2', 'oci>=2.88,<3', @@ -126,7 +126,7 @@ ] extra_deps['hf'] = [ - 'huggingface_hub>=0.23.4,<1.4', + 'huggingface_hub>=0.23.4,<1.15', ] extra_deps['testing'] = [ diff --git a/streaming/base/dataloader.py b/streaming/base/dataloader.py index 9487c5e2b..2465c1e5d 100644 --- a/streaming/base/dataloader.py +++ b/streaming/base/dataloader.py @@ -7,8 +7,7 @@ from torch import Tensor from torch.utils.data import DataLoader -from transformers.feature_extraction_utils import BatchFeature -from transformers.tokenization_utils_base import BatchEncoding +from transformers import BatchEncoding, BatchFeature from streaming.base.dataset import StreamingDataset from streaming.base.world import World diff --git a/streaming/text/c4.py b/streaming/text/c4.py index bfde0c787..5ca94e8ae 100644 --- a/streaming/text/c4.py +++ b/streaming/text/c4.py @@ -9,7 +9,7 @@ from typing import Any, Optional -from transformers.models.auto.tokenization_auto import AutoTokenizer +from transformers import AutoTokenizer from streaming.base import StreamingDataset diff --git a/streaming/text/pile.py b/streaming/text/pile.py index 59379b602..1d27c520b 100644 --- a/streaming/text/pile.py +++ b/streaming/text/pile.py @@ -9,7 +9,7 @@ from typing import Any, Optional -from transformers.models.auto.tokenization_auto import AutoTokenizer +from transformers import AutoTokenizer from streaming.base import StreamingDataset