Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@
'Brotli>=1.0.9',
'google-cloud-storage>=2.9.0,<3.3.0',
'matplotlib>=3.5.2,<4',
'numpy>=1.21.5,<2.2.0',
'paramiko>=2.11.0,<5',
'numpy>=1.21.5,<2.5.0',
'paramiko>=2.11.0,<6',
'python-snappy>=0.6.1,<1',
'torch>=1.10,<3',
'torchvision>=0.10',
'tqdm>=4.64.0,<5',
'transformers>=4.21.3,<5',
'transformers>=4.21.3,<6',
'xxhash>=3.0.0,<4',
'zstd>=1.5.2.5,<2',
'oci>=2.88,<3',
Expand Down Expand Up @@ -126,7 +126,7 @@
]

extra_deps['hf'] = [
'huggingface_hub>=0.23.4,<1.4',
'huggingface_hub>=0.23.4,<1.15',
]

extra_deps['testing'] = [
Expand Down
3 changes: 1 addition & 2 deletions streaming/base/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@

from torch import Tensor
from torch.utils.data import DataLoader
from transformers.feature_extraction_utils import BatchFeature
from transformers.tokenization_utils_base import BatchEncoding
from transformers import BatchEncoding, BatchFeature

from streaming.base.dataset import StreamingDataset
from streaming.base.world import World
Expand Down
2 changes: 1 addition & 1 deletion streaming/text/c4.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from typing import Any, Optional

from transformers.models.auto.tokenization_auto import AutoTokenizer
from transformers import AutoTokenizer

from streaming.base import StreamingDataset

Expand Down
2 changes: 1 addition & 1 deletion streaming/text/pile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from typing import Any, Optional

from transformers.models.auto.tokenization_auto import AutoTokenizer
from transformers import AutoTokenizer

from streaming.base import StreamingDataset

Expand Down