|
18 | 18 | "title": "mahmoud2019/ReceiptQA", |
19 | 19 | "sourceType": "huggingface", |
20 | 20 | "language": "ara", |
21 | | - "languages": [ |
22 | | - "ara" |
23 | | - ], |
| 21 | + "languages": ["ara"], |
24 | 22 | "license": "MIT", |
25 | 23 | "evidencePath": "licenses/ara/mahmoud2019-receiptqa.json", |
26 | 24 | "totalAvailable": 6, |
|
754 | 752 | "title": "Lakshmiperumal/scanned_receipts", |
755 | 753 | "sourceType": "huggingface", |
756 | 754 | "language": "eng", |
757 | | - "languages": [ |
758 | | - "eng" |
759 | | - ], |
| 755 | + "languages": ["eng"], |
760 | 756 | "license": "CC-BY-4.0", |
761 | 757 | "evidencePath": "licenses/eng/lakshmiperumal-scanned-receipts.json", |
762 | 758 | "totalAvailable": 713, |
|
1562 | 1558 | "mediaType": "application/octet-stream" |
1563 | 1559 | } |
1564 | 1560 | ], |
1565 | | - "skipped": [ |
1566 | | - "Limit reached (80 of 713 files)." |
1567 | | - ], |
| 1561 | + "skipped": ["Limit reached (80 of 713 files)."], |
1568 | 1562 | "notes": "Scanned receipt images from the FiftyOne mirror." |
1569 | 1563 | }, |
1570 | 1564 | { |
1571 | 1565 | "sourceId": "Saran-R12/Receipts", |
1572 | 1566 | "title": "Saran-R12/Receipts", |
1573 | 1567 | "sourceType": "huggingface", |
1574 | 1568 | "language": "eng", |
1575 | | - "languages": [ |
1576 | | - "eng" |
1577 | | - ], |
| 1569 | + "languages": ["eng"], |
1578 | 1570 | "license": "Apache-2.0", |
1579 | 1571 | "evidencePath": "licenses/eng/saran-r12-receipts.json", |
1580 | 1572 | "totalAvailable": 1, |
|
1948 | 1940 | "title": "Voxel51/scanned_receipts", |
1949 | 1941 | "sourceType": "huggingface", |
1950 | 1942 | "language": "eng", |
1951 | | - "languages": [ |
1952 | | - "eng" |
1953 | | - ], |
| 1943 | + "languages": ["eng"], |
1954 | 1944 | "license": "CC-BY-4.0", |
1955 | 1945 | "evidencePath": "licenses/eng/voxel51-scanned-receipts.json", |
1956 | 1946 | "totalAvailable": 713, |
|
2756 | 2746 | "mediaType": "application/octet-stream" |
2757 | 2747 | } |
2758 | 2748 | ], |
2759 | | - "skipped": [ |
2760 | | - "Limit reached (80 of 713 files)." |
2761 | | - ], |
| 2749 | + "skipped": ["Limit reached (80 of 713 files)."], |
2762 | 2750 | "notes": "Primarily English scanned receipt images from the SROIE mirror." |
2763 | 2751 | }, |
2764 | 2752 | { |
2765 | 2753 | "sourceId": "https://zenodo.org/records/13688441/files/Dataset.zip?download=1", |
2766 | 2754 | "title": "Zenodo hand-captured restaurant receipts", |
2767 | 2755 | "sourceType": "direct", |
2768 | 2756 | "language": "eng", |
2769 | | - "languages": [ |
2770 | | - "eng" |
2771 | | - ], |
| 2757 | + "languages": ["eng"], |
2772 | 2758 | "license": "CC-BY-4.0", |
2773 | 2759 | "evidencePath": "licenses/eng/zenodo-hand-captured-restaurant-receipts.json", |
2774 | 2760 | "totalAvailable": 1, |
|
3502 | 3488 | "title": "Voxel51/consolidated_receipt_dataset", |
3503 | 3489 | "sourceType": "huggingface", |
3504 | 3490 | "language": "ind", |
3505 | | - "languages": [ |
3506 | | - "ind" |
3507 | | - ], |
| 3491 | + "languages": ["ind"], |
3508 | 3492 | "license": "CC-BY-4.0", |
3509 | 3493 | "evidencePath": "licenses/ind/voxel51-consolidated-receipt-dataset.json", |
3510 | 3494 | "totalAvailable": 801, |
|
4310 | 4294 | "mediaType": "application/octet-stream" |
4311 | 4295 | } |
4312 | 4296 | ], |
4313 | | - "skipped": [ |
4314 | | - "Limit reached (80 of 801 files)." |
4315 | | - ], |
| 4297 | + "skipped": ["Limit reached (80 of 801 files)."], |
4316 | 4298 | "notes": "Indonesian receipt dataset mirrored through FiftyOne." |
4317 | 4299 | }, |
4318 | 4300 | { |
4319 | 4301 | "sourceId": "HumynLabs/Korean_Receipts_Dataset", |
4320 | 4302 | "title": "HumynLabs/Korean_Receipts_Dataset", |
4321 | 4303 | "sourceType": "huggingface", |
4322 | 4304 | "language": "kor", |
4323 | | - "languages": [ |
4324 | | - "kor" |
4325 | | - ], |
| 4305 | + "languages": ["kor"], |
4326 | 4306 | "license": "CC-BY-4.0", |
4327 | 4307 | "evidencePath": "licenses/kor/humynlabs-korean-receipts-dataset.json", |
4328 | 4308 | "totalAvailable": 20, |
|
4536 | 4516 | "title": "cdek-ocr/receipt-ocr-ru", |
4537 | 4517 | "sourceType": "huggingface", |
4538 | 4518 | "language": "rus", |
4539 | | - "languages": [ |
4540 | | - "rus" |
4541 | | - ], |
| 4519 | + "languages": ["rus"], |
4542 | 4520 | "license": "MIT", |
4543 | 4521 | "evidencePath": "licenses/rus/cdek-ocr-receipt-ocr-ru.json", |
4544 | 4522 | "totalAvailable": 988, |
|
5344 | 5322 | "mediaType": "application/octet-stream" |
5345 | 5323 | } |
5346 | 5324 | ], |
5347 | | - "skipped": [ |
5348 | | - "Limit reached (80 of 988 files)." |
5349 | | - ], |
| 5325 | + "skipped": ["Limit reached (80 of 988 files)."], |
5350 | 5326 | "notes": "Russian receipt OCR image dataset." |
5351 | 5327 | }, |
5352 | 5328 | { |
5353 | 5329 | "sourceId": "CC1984/mall_receipt_extraction_dataset", |
5354 | 5330 | "title": "CC1984/mall_receipt_extraction_dataset", |
5355 | 5331 | "sourceType": "huggingface", |
5356 | 5332 | "language": "zho", |
5357 | | - "languages": [ |
5358 | | - "zho" |
5359 | | - ], |
| 5333 | + "languages": ["zho"], |
5360 | 5334 | "license": "MIT", |
5361 | 5335 | "evidencePath": "licenses/zho/cc1984-mall-receipt-extraction-dataset.json", |
5362 | 5336 | "totalAvailable": 994, |
|
6162 | 6136 | "mediaType": "application/octet-stream" |
6163 | 6137 | } |
6164 | 6138 | ], |
6165 | | - "skipped": [ |
6166 | | - "Limit reached (80 of 994 files)." |
6167 | | - ], |
| 6139 | + "skipped": ["Limit reached (80 of 994 files)."], |
6168 | 6140 | "notes": "Chinese mall receipt image dataset." |
6169 | 6141 | } |
6170 | 6142 | ] |
|
0 commit comments