@@ -34,9 +34,16 @@ class Builder(tfds.core.GeneratorBasedBuilder):
3434 VERSION = tfds .core .Version ("2.1.0" )
3535 RELEASE_NOTES = {
3636 "1.0.0" : "Initial release." ,
37- "2.0.0" : "Update the dataset with valid URLs." ,
38- "2.1.0" : "Update the dataset with cleaned URLs." ,
37+ "2.0.0" : "[Do not use] Update the dataset with valid URLs." ,
38+ "2.1.0" : (
39+ "Update the dataset with the correct URLs. The URLs in this version"
40+ " come from HuggingFace's dataset repo, which is curated by the same"
41+ " author: https://huggingface.co/datasets/alexfabbri/multi_news."
42+ ),
3943 }
44+ BLOCKED_VERSIONS = tfds .core .utils .BlockedVersions (
45+ versions = {"2.0.0" : "The URLs of this version are invalid." }
46+ )
4047
4148 def _info (self ) -> tfds .core .DatasetInfo :
4249 """Returns the dataset metadata."""
@@ -77,9 +84,10 @@ def _generate_examples(self, src_file, tgt_file):
7784 ).open () as tgt_f :
7885 for i , (src_line , tgt_line ) in enumerate (zip (src_f , tgt_f )):
7986 yield i , {
80- # In original file, each line has one example and natural newline
81- # tokens "\n" are being replaced with "NEWLINE_CHAR". Here restore
82- # the natural newline token to avoid special vocab "NEWLINE_CHAR".
87+ # In the original file, each line has one example and natural
88+ # newline tokens "\n" are being replaced with "NEWLINE_CHAR"
89+ # Here, we restore the natural newline token to avoid the special
90+ # vocab token "NEWLINE_CHAR".
8391 _DOCUMENT : src_line .strip ().replace ("NEWLINE_CHAR" , "\n " ),
8492 _SUMMARY : tgt_line .strip ().lstrip (),
8593 }
0 commit comments