From 98003c6731a850bc84cd4dc075ab9fd6555d756e Mon Sep 17 00:00:00 2001
From: Noor <210715619+nalduu@users.noreply.github.com>
Date: Thu, 26 Feb 2026 18:46:07 +0000
Subject: [PATCH] updated validator to query datasets instead of discovery
 metadata, now that levels has been removed from DM

updated elasticsearch to query DM based on the new DM id
---
 woudc_api/plugins/validate.py       | 15 +++++----------
 woudc_api/provider/elasticsearch.py |  8 +++-----
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/woudc_api/plugins/validate.py b/woudc_api/plugins/validate.py
index 3670e80..b5d4eff 100644
--- a/woudc_api/plugins/validate.py
+++ b/woudc_api/plugins/validate.py
@@ -269,20 +269,15 @@ def check_content(self):
                 if not self.ecsv._add_to_report(217, valueline, value=1.0):
                     success = False
                 self.level = 1.0
-            self.dataset += '_' + str(self.level)
 
         if not success:
             return success
 
-        index = 'discovery_metadata'
-        field = '_id'
+        index = 'dataset'
+        field = 'dataset_name'
         content_body = self.query_by_field(index, field, self.dataset)
-        _levels = content_body[0]['_source']['properties']['levels']
-        levels = []
-        for level in _levels:
-            label = level['label_en']
-            levels.append(label[len(label)-3:])
-
+        levels = [content_body[i]['_source']['properties']['dataset_level']
+                  for i in range(0, len(content_body))]
         if str(self.level) not in levels:
             if not self.ecsv._add_to_report(309, valueline,
                                             dataset=self.dataset):
@@ -716,7 +711,7 @@ def query_by_field(self, index, field, value):
             query = {
                 "query": {
                     "term": {
-                        "_id": value + "_en",
+                        "_id": value,
                     }
                 }
             }
diff --git a/woudc_api/provider/elasticsearch.py b/woudc_api/provider/elasticsearch.py
index 5b79683..090945f 100644
--- a/woudc_api/provider/elasticsearch.py
+++ b/woudc_api/provider/elasticsearch.py
@@ -162,8 +162,7 @@ def query(self, offset=0, limit=10, resulttype='results',
         if self.index_name.endswith('discovery_metadata'):
             LOGGER.debug('Intercepting default ES response')
             for feature in records['features']:
-                if feature['id'].endswith(language):
-                    feature['id'] = feature['id'].rsplit(f'_{language}')[0]
+                if feature['properties']['language']['code'] == language:
                     new_features.append(feature)
             records['features'] = new_features
             records['numberMatched'] = len(records['features']) + offset
@@ -191,12 +190,11 @@ def get(self, identifier, **kwargs):
                     identifier, language)
 
         if self.index_name.endswith('discovery_metadata'):
-            identifier2 = f'{identifier}_{language}'
+            new_id = identifier.split(':')[-1]
+            identifier2 = f'{new_id}_{language}'
         else:
             identifier2 = identifier
 
         dataset = super().get(identifier2, **kwargs)
 
-        dataset['id'] = dataset['id'].rsplit(f'_{language}')[0]
-
         return dataset