-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathminimal_spec.json
More file actions
57 lines (57 loc) · 1.46 KB
/
minimal_spec.json
File metadata and controls
57 lines (57 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
{
"_comment": "Copy-paste this object into the `datasets` array in sources.json and edit the placeholder values. Validates against sources.schema.json. Use the Python load-edit-dump pattern from AGENTS.md to insert it — never `sed`.",
"slug": "my-dataset",
"short_name": "My Dataset",
"full_name": "My Dataset (Publisher Attribution, Year)",
"description": "One-line summary describing what this dataset contains.",
"family": "direct",
"license": {
"spdx": "CC0-1.0",
"source_url": "https://example.com/LICENSE",
"redistribution_permitted": true,
"attribution_required": false,
"notes": null,
"scrape_advisory": null
},
"fetch": {
"type": "http",
"urls": ["https://example.com/data.csv"],
"auth": null,
"expected_bytes": null,
"expected_sha256": null
},
"extract": {
"type": "passthrough",
"include": null,
"exclude": null,
"post": null
},
"parse": {
"reader": "csv",
"options": {
"delimiter": ",",
"has_header": true,
"encoding": "utf-8"
}
},
"transform": {
"handler": "tighten_types",
"params": {}
},
"write": {
"output": "my-dataset.parquet",
"compression": "zstd",
"row_group_size_rows": 1048576,
"statistics": true,
"page_index": false
},
"expect": {
"rows": null,
"schema_hash": null,
"notes": "Populate `rows` after the first successful build."
},
"convert": {
"vortex": true,
"vortex_skip_reason": null
}
}