From 36ea66adbb18ab456c7c694cbea08261178fffeb Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 30 Oct 2013 14:27:14 +0000 Subject: [PATCH 1/2] Use search to get datasets, as it is significantly faster Getting stuff from the search index is much faster than querying the DB --- ckanext/datajson/plugin.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py index 6e38c8dc..76eefc96 100644 --- a/ckanext/datajson/plugin.py +++ b/ckanext/datajson/plugin.py @@ -128,7 +128,27 @@ def validator(self): def make_json(): # Build the data.json file. - packages = p.toolkit.get_action("current_package_list_with_resources")(None, {}) - return [make_datajson_entry(pkg) for pkg in packages if pkg["type"] == "dataset"] - + return [make_datajson_entry(dataset) for dataset in _get_ckan_datasets()] + +def _get_ckan_datasets(): + + n = 500 + page = 1 + datasets = [] + + while True: + search_data_dict = { + 'q': '*:*', + 'fq': 'dataset_type:dataset', + 'sort': 'metadata_modified desc', + 'rows': n, + 'start': n * (page - 1), + } + query = p.toolkit.get_action('package_search')({}, search_data_dict) + if len(query['results']): + datasets.extend(query['results']) + page = page + 1 + else: + break + return datasets From ac1a480e8b659639defaad7f2326c688f1a69b97 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 30 Oct 2013 14:48:10 +0000 Subject: [PATCH 2/2] No need to split keywords in json-ld, as they are already a list --- ckanext/datajson/build_datajsonld.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ckanext/datajson/build_datajsonld.py b/ckanext/datajson/build_datajsonld.py index bc2d9974..3627c711 100644 --- a/ckanext/datajson/build_datajsonld.py +++ b/ckanext/datajson/build_datajsonld.py @@ -70,10 +70,6 @@ def apply_jsonld_metadata_mapping(data, newdict): # skip fields with no mapping to RDF if k not in jsonld_metadata_mapping: continue - - # specially handle 'keyword' which in JSON is packed in a comma-separated field - if k == "keyword": - v = v.split(",") # specially handle literal fields with datatypes if k in jsonld_metadata_datatypes: