diff --git a/tests/integration/concepts_integration.py b/tests/integration/concepts_integration.py index 236790d..23d0922 100644 --- a/tests/integration/concepts_integration.py +++ b/tests/integration/concepts_integration.py @@ -1,6 +1,6 @@ import pytest -from units.concepts import get_all_data_for_qk_iri, get_qk_for_iri +from units.concepts import get_all_data_for_qk_iri, get_qk_for_iri, get_quantity_kinds def test_qk(): @@ -33,3 +33,17 @@ def test_gadfqi_namespaces(): assert "https://vocab.sentier.dev/qudt/unit/M-SEC" in result subset = result["https://vocab.sentier.dev/qudt/unit/M-SEC"] assert ("http://www.w3.org/2004/02/skos/core#prefLabel", "Metre second", "en-gb") in subset + + +def test_get_quantity_kinds(): + result = get_quantity_kinds( + remove_namespaces=True, + ) + assert "https://vocab.sentier.dev/qudt/quantity-kind/Acceleration" in result + subset = result["https://vocab.sentier.dev/qudt/quantity-kind/Acceleration"] + assert subset["informativeReference"] == "http://en.wikipedia.org/wiki/Acceleration" + assert ("accélération", "fr") in subset["prefLabel"] + assert len(subset["prefLabel"]) > 1 + + subset = result["https://vocab.sentier.dev/qudt/quantity-kind/Enthalpy"] + assert len(subset["prefLabel"]) > 1 diff --git a/units/concepts.py b/units/concepts.py index 49b4d6e..6b1e338 100644 --- a/units/concepts.py +++ b/units/concepts.py @@ -1,3 +1,4 @@ +from collections import defaultdict from functools import partial from itertools import groupby @@ -74,6 +75,63 @@ def get_qk_for_iri(iri: str) -> str | None: return response[0]["qk"] +def get_quantity_kinds( + lang: str | None = None, + remove_namespaces: bool = True, +) -> dict: + """Get the all QUDT quantity kinds.""" + logger.debug("Using sparql endpoint url %s", settings.SPARQL_URL) + + QUERY = f""" +PREFIX qudt: +PREFIX skos: + +SELECT ?s ?p ?o +FROM <{settings.VOCAB_PREFIX}qudt/> +where {{ + ?s ?p ?o + FILTER ( + contains(STR(?s), "https://vocab.sentier.dev/qudt/quantity-kind/") + ) + FILTER ( + ?p IN (skos:prefLabel, skos:altLabel, skos:exactMatch, skos:related, skos:definition, qudt:informativeReference) + ) +}} + """ + + logger.debug("Executing query %s", QUERY) + response = httpx.post(settings.SPARQL_URL, data={"query": QUERY}) + response.raise_for_status() + + lang_checker = partial(language_filter, lang=lang.lower() if lang else None) + data = [ + ( + obj["s"]["value"], + reformat_predicate_object(obj, remove_namespaces=remove_namespaces), + ) + for obj in response.json()["results"]["bindings"] + if lang_checker(obj["o"]) + ] + logger.info(f"Retrieved {len(data)} quantity kinds") + + results = {} + + # Sorry this is shit but I lost 30 mins fighting with groupby + # and am too burned out to find a better way... + for qk, elem in data: + if qk not in results: + results[qk] = defaultdict(list) + if len(elem) == 2: + results[qk][elem[0]].append(elem[1]) + else: + results[qk][elem[0]].append(elem[1:]) + + return { + key: {a: (b[0] if len(b) == 1 else b) for a, b in value.items()} + for key, value in results.items() + } + + def get_all_data_for_qk_iri( iri: str, lang: str | None = None, diff --git a/units/routes.py b/units/routes.py index 7bd90b0..d0c2349 100644 --- a/units/routes.py +++ b/units/routes.py @@ -3,7 +3,7 @@ from fastapi_versioning import version from units.schema import VersionResponse -from units.concepts import get_qk_for_iri, get_all_data_for_qk_iri +from units.concepts import get_qk_for_iri, get_all_data_for_qk_iri, get_quantity_kinds router = APIRouter() @@ -35,10 +35,9 @@ async def get_concept_data( ) -> str: """ Returns all concepts in the same QUDT quantity kind class as the unit `iri`. Data is formatted - as a JSON `Map`, with keys of unit IRIs, and values of lists of `[predicate, object]`. - - We use lists because a given unit can share the same predicate relation with more than one - object. For example, a unit could have multiple preferred labels in different languages. + as a JSON `Map`, with keys of unit IRIs, and values of values of maps of `{key: value}`. Because + the same key can be used more than once (e.g. a `prefLabel` can have values in different + languages), the `value` can be either a single object or an array of objects. Pass `remove_namespaces` to control the verbosity of the response. By default, some common namespace prefixes of the predicates and objects are removed: @@ -65,7 +64,36 @@ async def get_concept_data( return JSONResponse(content=result) -# https://vocab.sentier.dev/qudt/unit/M-SEC +@router.get("/quantity_kinds") +@version(0, 1) +async def get_quantity_kinds_data( + remove_namespaces: bool = True, + lang: str | None = None, +) -> str: + """ + Returns all quantity kinds concepts. Data is formatted as a JSON `Map`, with keys of unit IRIs, + and values of maps of `{key: value}`. Because the same key can be used more than once (e.g. a + `prefLabel` can have values in different languages), the `value` can be either a single object + or an array of objects. + + Pass `remove_namespaces` to control the verbosity of the response. By default, some common + namespace prefixes of the predicates and objects are removed: + + * http://qudt.org/schema/qudt/ + * http://www.w3.org/1999/02/22-rdf-syntax-ns# + * http://www.w3.org/2004/02/skos/core# + + Use `lang` to control what language codes are available in the response. Response data can + include RDF literals with many languages, and the default is not to do any filtering. If you + pass `lang`, then only RDF literals who explicitly provide a language code which starts the same + as `lang` will be returned. In other words `lang='en'` will return object literals without a + language code, with a `en` language code, with a `en_GB` language code, but not a `jp` code. + + """ + result = get_quantity_kinds( + lang=lang, remove_namespaces=remove_namespaces + ) + return JSONResponse(content=result) @router.get("/version")