Skip to content

Commit 72e3d2f

Browse files
committed
Merge branch 'main' into feat/fk-musts
2 parents d328ffe + 5f2bc06 commit 72e3d2f

File tree

8 files changed

+147
-21
lines changed

8 files changed

+147
-21
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ often, sometimes several in a day. It also means any individual release
99
will not have many changes within it. Below is a list of releases along
1010
with what was changed within it.
1111

12+
## 0.24.0 (2025-12-08)
13+
14+
### Feat
15+
16+
- :sparkles: check primary key exists (#218)
17+
1218
## 0.23.1 (2025-11-27)
1319

1420
### Fix

CITATION.cff

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
title: "Check Data Package: Ensure the correctness and compliance of your Data Package"
2-
# TODO: Add abstract of Python package.
3-
abstract: ""
2+
abstract: |
3+
Check your Data Package's metadata against the Data Package standard to ensure that
4+
it's compliant with the standard. See https://check-datapackage.seedcase-project.org
5+
for more information.
46
authors:
57
- family-names: Johnston
68
given-names: Luke William
@@ -18,9 +20,12 @@ authors:
1820
given-names: Marton
1921
affiliation: "Steno Diabetes Center Aarhus"
2022
orcid: "https://orcid.org/0009-0007-4628-655X"
23+
- family-names: Ostblom
24+
given-names: Joel
25+
orcid: "https://orcid.org/0000-0003-0051-3239"
2126
cff-version: 1.2.0
22-
# doi: ""
23-
# date-released: ""
27+
doi: "10.5281/zenodo.17733655"
28+
date-released: "2025-11-27"
2429
keywords:
2530
- "data package"
2631
- "data package checks"
@@ -31,9 +36,7 @@ keywords:
3136
- "metadata"
3237
- "metadata checks"
3338
- "metadata verification"
34-
- "verification"
3539
- "data engineering"
36-
- "metadata checks"
3740
license: MIT
3841
message: "If you use this Python package, please cite it using these metadata."
3942
repository-code: "https://github.com/seedcase-project/check-datapackage"

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,26 @@ This project is licensed under the [MIT License](LICENSE.md).
140140
## Changelog
141141

142142
For a list of changes, see our [changelog](CHANGELOG.md) page.
143+
144+
## Citing
145+
146+
This project is part of the [Seedcase
147+
Project](https://seedcase-project.org), which is a collaborative effort
148+
to create a framework for data management and analysis in research. If
149+
you use this project in your work, please cite it as follows:
150+
151+
Johnston L.W., Brødbæk S.K., Beicher K., Vago M., Ostblom J. (2025).
152+
Check Data Package: Ensure the correctness and compliance of your Data
153+
Package DOI: 10.5281/zenodo.17733655 URL:
154+
https://check-datapackage.seedcase-project.org
155+
156+
Or as a BibTeX entry:
157+
158+
@misc{YourReferenceHere,
159+
author = {Johnston, Luke William and Brødbæk, Signe Kirk and Beicher, Kristiane and Vago, Marton and Ostblom, Joel},
160+
doi = {10.5281/zenodo.17733655},
161+
month = {11},
162+
title = {Check Data Package: Ensure the correctness and compliance of your Data Package},
163+
url = {https://check-datapackage.seedcase-project.org},
164+
year = {2025}
165+
}

README.qmd

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
format: gfm
33
metadata-files:
44
- _metadata.yml
5+
execute:
6+
echo: false
7+
jupyter: python3
58
---
69

710
<!-- NOTE: This README.md is auto-generated from README.qmd. Edit that file. -->
@@ -119,3 +122,21 @@ This project is licensed under the [MIT License](LICENSE.md).
119122
## Changelog
120123

121124
For a list of changes, see our [changelog](CHANGELOG.md) page.
125+
126+
## Citing
127+
128+
This project is part of the [Seedcase
129+
Project](https://seedcase-project.org), which is a collaborative effort
130+
to create a framework for data management and analysis in research. If
131+
you use this project in your work, please cite it as follows:
132+
133+
```{python}
134+
#| output: asis
135+
!uvx --quiet cffconvert --format apalike
136+
```
137+
138+
Or as a BibTeX entry:
139+
140+
```{python}
141+
!uvx --quiet cffconvert --format bibtex
142+
```

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
[project]
22
name = "check-datapackage"
3-
version = "0.23.1"
4-
# TODO: Add a description of the package.
5-
description = ""
3+
version = "0.24.0"
4+
description = "Check your Data Package's metadata against the Data Package standard to ensure that it's compliant with the standard"
65
authors = [
76
{name = "Luke W. Johnston", email = "lwjohnst@gmail.com" },
87
{name = "Kristiane Beicher", email = "kris.beicher@clin.au.dk" },
98
{name = "Signe Kirk Brødbæk", email = "signekb@clin.au.dk" },
109
{name = "Marton Vago", email = "marton.vago95@gmail.com" },
10+
{name = "Joel Ostblom", email = "joel@joelostblom.com" },
1111
]
1212
maintainers = [
1313
{name = "Luke W. Johnston", email = "lwjohnst@gmail.com" },
1414
{name = "Kristiane Beicher", email = "kris.beicher@clin.au.dk" },
1515
{name = "Signe Kirk Brødbæk", email = "signekb@clin.au.dk" },
1616
{name = "Marton Vago", email = "marton.vago95@gmail.com" },
17+
{name = "Joel Ostblom", email = "joel@joelostblom.com" },
1718
]
1819
readme = "README.md"
1920
license = "MIT"

src/check_datapackage/check.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,14 @@ class for more details, especially about the default values.
143143
def _check_keys(properties: dict[str, Any], issues: list[Issue]) -> list[Issue]:
144144
"""Check that primary and foreign keys exist."""
145145
# Primary keys
146-
key_issues = []
146+
resources_with_pk = _get_fields_at_jsonpath(
147+
"$.resources[?(length(@.schema.primaryKey) > 0 || @.schema.primaryKey == '')]",
148+
properties,
149+
)
150+
resources_with_pk = _keep_resources_with_no_issue_at_property(
151+
resources_with_pk, issues, "schema.primaryKey"
152+
)
153+
key_issues = _flat_map(resources_with_pk, _check_primary_key)
147154

148155
# Foreign keys
149156
resources_with_fk = _get_fields_at_jsonpath(
@@ -179,6 +186,27 @@ def _keep_resources_with_no_issue_at_property(
179186
)
180187

181188

189+
def _check_primary_key(resource: PropertyField) -> list[Issue]:
190+
"""Check that primary key fields exist in the resource."""
191+
pk_fields = resolve("/schema/primaryKey", resource.value)
192+
pk_fields_list = _key_fields_as_str_list(pk_fields)
193+
unknown_fields = _get_unknown_key_fields(pk_fields_list, resource.value)
194+
195+
if not unknown_fields:
196+
return []
197+
198+
return [
199+
Issue(
200+
jsonpath=f"{resource.jsonpath}.schema.primaryKey",
201+
type="primary-key",
202+
message=(
203+
f"No fields found in resource for primary key fields: {unknown_fields}."
204+
),
205+
instance=pk_fields,
206+
)
207+
]
208+
209+
182210
def _check_foreign_keys(
183211
resource: PropertyField, properties: dict[str, Any]
184212
) -> list[Issue]:

tests/test_check.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,60 @@ def test_fails_properties_with_pattern_mismatch():
9191
assert issues[0].jsonpath == "$.contributors[0].path"
9292

9393

94+
@mark.parametrize("primary_key", ["id", ["id", "name"]])
95+
def test_pass_good_primary_key(primary_key):
96+
properties = example_package_properties()
97+
properties["resources"][0]["schema"]["primaryKey"] = primary_key
98+
properties["resources"][0]["schema"]["fields"].extend(
99+
[
100+
{"name": "id", "type": "integer"},
101+
{"name": "name", "type": "string"},
102+
]
103+
)
104+
105+
issues = check(properties)
106+
107+
assert issues == []
108+
109+
110+
@mark.parametrize("primary_key", ["", "last_name", ["first_name", "last_name"]])
111+
def test_fail_primary_key_with_unknown_fields(primary_key):
112+
properties = example_package_properties()
113+
properties["resources"][0]["schema"]["primaryKey"] = primary_key
114+
115+
issues = check(properties)
116+
117+
assert len(issues) == 1
118+
assert issues[0].jsonpath == "$.resources[0].schema.primaryKey"
119+
assert issues[0].type == "primary-key"
120+
assert issues[0].instance == primary_key
121+
122+
123+
@mark.parametrize("primary_key", [None, 123, [], [123, "a_field"]])
124+
def test_do_not_check_bad_primary_key_against_fields(primary_key):
125+
properties = example_package_properties()
126+
properties["resources"][0]["schema"]["primaryKey"] = primary_key
127+
128+
issues = check(properties)
129+
130+
assert len(issues) == 1
131+
assert issues[0].type != "primary-key"
132+
133+
134+
def test_do_not_check_primary_key_against_bad_field():
135+
properties = example_package_properties()
136+
properties["resources"][0]["schema"]["primaryKey"] = "eye-colour"
137+
properties["resources"][0]["schema"]["fields"].append(
138+
# Bad name
139+
{"name": 123, "type": "integer"},
140+
)
141+
142+
issues = check(properties)
143+
144+
assert len(issues) == 1
145+
assert issues[0].type != "primary-key"
146+
147+
94148
def test_pass_good_foreign_keys_same_resource():
95149
properties = example_package_properties()
96150
properties["resources"][0]["schema"]["fields"].extend(
@@ -832,16 +886,6 @@ def test_fail_foreign_keys_with_bad_array_item():
832886
)
833887

834888

835-
@mark.parametrize("primary_key", ["id", ["name", "address"]])
836-
def test_pass_good_primary_key(primary_key):
837-
properties = example_package_properties()
838-
properties["resources"][0]["schema"]["primaryKey"] = primary_key
839-
840-
issues = check(properties)
841-
842-
assert issues == []
843-
844-
845889
def test_fail_primary_key_of_bad_type():
846890
properties = example_package_properties()
847891
properties["resources"][0]["schema"]["primaryKey"] = 123

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)