66from jsonschema import Draft7Validator , RefResolver
77from lxml import etree
88from urllib .request import pathname2url
9+ from .utils import read_json_schema_from_url
10+ from .constants import ECHO10_C , SCHEMA_PATHS , UMM_C , UMM_G
911
10- from .constants import ECHO10_C , SCHEMA_PATHS , UMM_C
1112
13+ SUPPORTED_UMM_C_VERSIONS = ["v1.18.4" , "v1.18.3" , "v1.18.2" ]
14+ DEFAULT_UMM_C_VERSION = "v1.18.4" # Or any other version you prefer as default
15+
16+ # Define UMM-G versions if you want to make it flexible as well
17+ SUPPORTED_UMM_G_VERSIONS = ["v1.6.6" ]
18+ DEFAULT_UMM_G_VERSION = "v1.6.6"
19+
20+ SCHEMA_CDN_BASE = "https://cdn.earthdata.nasa.gov/umm"
21+
22+ REMOTE_XML_SCHEMAS = {
23+ "echo10_collection" : "https://git.earthdata.nasa.gov/projects/EMFD/repos/echo-schemas/browse/schemas/10.0/Collection.xsd" ,
24+ "echo10_granule" : "https://git.earthdata.nasa.gov/projects/EMFD/repos/echo-schemas/browse/schemas/10.0/Granule.xsd"
25+ }
1226
1327class SchemaValidator :
1428 """
@@ -21,6 +35,10 @@ def __init__(
2135 self ,
2236 check_messages ,
2337 metadata_format = ECHO10_C ,
38+ # Add a new parameter for UMM-C version
39+ umm_c_version = DEFAULT_UMM_C_VERSION ,
40+ # Add a new parameter for UMM-G version (if you want to make it flexible too)
41+ umm_g_version = DEFAULT_UMM_G_VERSION
2442 ):
2543 """
2644 Args:
@@ -29,41 +47,95 @@ def __init__(
2947 validation_paths (list of str): The path of the fields in the
3048 metadata that need to be validated. In the form
3149 ['Collection/StartDate', ...].
50+ umm_c_version (str): The specific UMM-C version to use for validation (e.g., "v1.18.4").
51+ umm_g_version (str): The specific UMM-G version to use for validation (e.g., "v1.6.6").
52+ check_messages (dict): A dictionary of check messages for errors.
3253 """
3354 self .metadata_format = metadata_format
55+ # Validate and store the UMM-C version
56+ if umm_c_version not in SUPPORTED_UMM_C_VERSIONS :
57+ raise ValueError (
58+ f"Unsupported UMM-C version: { umm_c_version } . "
59+ f"Supported versions are: { ', ' .join (SUPPORTED_UMM_C_VERSIONS )} "
60+ )
61+ self .umm_c_version = umm_c_version
62+
63+ # Validate and store the UMM-G version
64+ if umm_g_version not in SUPPORTED_UMM_G_VERSIONS :
65+ raise ValueError (
66+ f"Unsupported UMM-G version: { umm_g_version } . "
67+ f"Supported versions are: { ', ' .join (SUPPORTED_UMM_G_VERSIONS )} "
68+ )
69+ self .umm_g_version = umm_g_version
70+
3471 if metadata_format .startswith ("umm-" ):
3572 self .validator_func = self .run_json_validator
3673 else :
3774 self .validator_func = self .run_xml_validator
3875 self .check_messages = check_messages
3976
77+
78+
4079 def read_xml_schema (self ):
4180 """
42- Reads the xml schema file
81+ Reads the XML schema file (either from a remote URL or local path).
4382 """
44- # The XML schema file (echo10_xml.xsd) imports another schema file (MetadataCommon.xsd)
45- # Python cannot figure out the import if they are in a different location than the calling script
46- # Thus we need to set an environment variable to let it know where the files are located
47- # Path to catalog must be a url
83+ from urllib .request import urlopen
84+
85+ # Maintain XML catalog handling
4886 catalog_path = f"file:{ pathname2url (str (SCHEMA_PATHS ['catalog' ]))} "
49- # Temporarily set the environment variable
5087 os .environ ["XML_CATALOG_FILES" ] = os .environ .get (
5188 "XML_CATALOG_FILES" , catalog_path
5289 )
5390
54- with open ( SCHEMA_PATHS [ f" { self . metadata_format } _schema" ]) as schema_file :
55- file_content = schema_file . read (). encode ()
56- xmlschema_doc = etree . parse ( BytesIO ( file_content ))
57- schema = etree . XMLSchema ( xmlschema_doc )
58- return schema
91+ def get_raw_schema_url ( browse_url : str ) -> str :
92+ """Convert /browse/ URL into /raw/ for direct XML download."""
93+ if "/browse/" in browse_url :
94+ return browse_url . replace ( "/browse/" , "/raw/" ) + "?at=refs%2Fheads%2Fmaster"
95+ return browse_url
5996
97+ # Select remote schema if metadata_format matches
98+ schema_url = REMOTE_XML_SCHEMAS .get (self .metadata_format )
99+ try :
100+ if schema_url :
101+ raw_url = get_raw_schema_url (schema_url )
102+ print (f"Fetching schema remotely from: { raw_url } " )
103+ import ssl
104+ ssl_context = ssl ._create_unverified_context () # Disable certificate check safely for this fetch
105+ with urlopen (raw_url , context = ssl_context ) as response :
106+ file_content = response .read ()
107+ else :
108+ # Fallback to local schema file
109+ with open (SCHEMA_PATHS [f"{ self .metadata_format } _schema" ]) as schema_file :
110+ file_content = schema_file .read ().encode ()
111+
112+ xmlschema_doc = etree .parse (BytesIO (file_content ))
113+ schema = etree .XMLSchema (xmlschema_doc )
114+ return schema
115+
116+ except Exception as e :
117+ print (f"⚠️ Remote fetch failed or unavailable for { self .metadata_format } : { e } " )
118+ print ("Falling back to local schema file..." )
119+ with open (SCHEMA_PATHS [f"{ self .metadata_format } _schema" ]) as schema_file :
120+ file_content = schema_file .read ().encode ()
121+ xmlschema_doc = etree .parse (BytesIO (file_content ))
122+ schema = etree .XMLSchema (xmlschema_doc )
123+ return schema
124+
60125 def read_json_schema (self ):
61126 """
62127 Reads the json schema file
63128 """
129+ if self .metadata_format == UMM_C :
130+ schema_url = (f"{ SCHEMA_CDN_BASE } /collection/{ self .umm_c_version } /umm-c-json-schema.json" )
131+ return read_json_schema_from_url (schema_url )
132+
133+ if self .metadata_format == UMM_G :
134+ schema_url = (f"{ SCHEMA_CDN_BASE } /granule/{ self .umm_g_version } /umm-g-json-schema.json" )
135+ return read_json_schema_from_url (schema_url )
136+
64137 with open (SCHEMA_PATHS [f"{ self .metadata_format } -json-schema" ]) as schema_file :
65- schema = json .load (schema_file )
66- return schema
138+ return json .load (schema_file )
67139
68140 def run_json_validator (self , content_to_validate ):
69141 """
@@ -77,19 +149,28 @@ def run_json_validator(self, content_to_validate):
77149 schema_store = {}
78150
79151 if self .metadata_format == UMM_C :
80- with open (SCHEMA_PATHS ["umm-cmn-json-schema" ]) as schema_file :
81- schema_base = json .load (schema_file )
82152
83- # workaround to read local referenced schema file (only supports uri)
84- schema_store = {
85- schema_base .get ("$id" , "/umm-cmn-json-schema.json" ): schema_base ,
86- schema_base .get ("$id" , "umm-cmn-json-schema.json" ): schema_base ,
87- }
88153
89- errors = {}
154+ #umm_cmn_schema_url = f"{SCHEMA_CDN_BASE}/collection/{self.umm_c_version}/umm-c-json-schema.json"
155+ # If it's *not* versioned and always the latest or a specific fixed version, adjust this URL
156+ # e.g., f"{SCHEMA_CDN_BASE}/common/umm-cmn-json-schema.json" or from SCHEMA_PATHS
90157
91- resolver = RefResolver .from_schema (schema , store = schema_store )
158+ try :
159+ with open (SCHEMA_PATHS ["umm-cmn-json-schema" ]) as common_schema_file :
160+ schema_base = json .load (common_schema_file )
161+ # 1. Add the schema using its $id (most common canonical reference)
162+ if "$id" in schema_base :
163+ schema_store [schema_base ["$id" ]] = schema_base
164+
165+ # 2. Add the schema using the full URL you fetched it from (if different from $id or for robustness)
166+ schema_store ["/umm-cmn-json-schema.json" ] = schema_base
167+ schema_store ["umm-cmn-json-schema.json" ] = schema_base
168+ except Exception as e :
169+ print (f"Error loading UMM Common schema from { SCHEMA_PATHS ['umm-cmn-json-schema' ]} : { e } " )
170+ print ("Schema validation for UMM-C might proceed without common schema, leading to incomplete validation." )
92171
172+ errors = {}
173+ resolver = RefResolver .from_schema (schema , store = schema_store )
93174 validator = Draft7Validator (
94175 schema , format_checker = Draft7Validator .FORMAT_CHECKER , resolver = resolver
95176 )
0 commit comments