From 98003c6731a850bc84cd4dc075ab9fd6555d756e Mon Sep 17 00:00:00 2001 From: Noor <210715619+nalduu@users.noreply.github.com> Date: Thu, 26 Feb 2026 18:46:07 +0000 Subject: [PATCH] updated validator to query datasets instead of discovery metadata, now that levels has been removed from DM updated elasticsearch to query DM based on the new DM id --- woudc_api/plugins/validate.py | 15 +++++---------- woudc_api/provider/elasticsearch.py | 8 +++----- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/woudc_api/plugins/validate.py b/woudc_api/plugins/validate.py index 3670e80..b5d4eff 100644 --- a/woudc_api/plugins/validate.py +++ b/woudc_api/plugins/validate.py @@ -269,20 +269,15 @@ def check_content(self): if not self.ecsv._add_to_report(217, valueline, value=1.0): success = False self.level = 1.0 - self.dataset += '_' + str(self.level) if not success: return success - index = 'discovery_metadata' - field = '_id' + index = 'dataset' + field = 'dataset_name' content_body = self.query_by_field(index, field, self.dataset) - _levels = content_body[0]['_source']['properties']['levels'] - levels = [] - for level in _levels: - label = level['label_en'] - levels.append(label[len(label)-3:]) - + levels = [content_body[i]['_source']['properties']['dataset_level'] + for i in range(0, len(content_body))] if str(self.level) not in levels: if not self.ecsv._add_to_report(309, valueline, dataset=self.dataset): @@ -716,7 +711,7 @@ def query_by_field(self, index, field, value): query = { "query": { "term": { - "_id": value + "_en", + "_id": value, } } } diff --git a/woudc_api/provider/elasticsearch.py b/woudc_api/provider/elasticsearch.py index 5b79683..090945f 100644 --- a/woudc_api/provider/elasticsearch.py +++ b/woudc_api/provider/elasticsearch.py @@ -162,8 +162,7 @@ def query(self, offset=0, limit=10, resulttype='results', if self.index_name.endswith('discovery_metadata'): LOGGER.debug('Intercepting default ES response') for feature in records['features']: - if feature['id'].endswith(language): - feature['id'] = feature['id'].rsplit(f'_{language}')[0] + if feature['properties']['language']['code'] == language: new_features.append(feature) records['features'] = new_features records['numberMatched'] = len(records['features']) + offset @@ -191,12 +190,11 @@ def get(self, identifier, **kwargs): identifier, language) if self.index_name.endswith('discovery_metadata'): - identifier2 = f'{identifier}_{language}' + new_id = identifier.split(':')[-1] + identifier2 = f'{new_id}_{language}' else: identifier2 = identifier dataset = super().get(identifier2, **kwargs) - dataset['id'] = dataset['id'].rsplit(f'_{language}')[0] - return dataset