88import os
99import traceback
1010from collections import deque
11- from copy import deepcopy
1211from datetime import datetime
1312from threading import Condition , Lock
14- from typing import Any , Callable , Deque , Dict , Optional , Tuple
13+ from typing import Any , Callable , Deque , Dict , List , Optional , TYPE_CHECKING , Tuple
1514from urllib .parse import quote as urlescape
1615
1716import requests
1817from requests .adapters import Response
18+ from pydantic import BaseModel , Field
1919
2020from .constants import config
2121from .exceptions import ResourceException
2222from .fetch import make_fetcher
2323from .logs import get_log
2424from .utils import Watchable , hex_digest , img_to_data , non_blocking_lock , resource_string , safe_write , url_get , utc_now
2525
26+ if TYPE_CHECKING :
27+ from .parse import PyffParser
28+ from .pipes import PipelineCallback
29+
2630requests .packages .urllib3 .disable_warnings ()
2731
2832log = get_log (__name__ )
@@ -131,11 +135,32 @@ def i_handle(self, t: Resource, url=None, response=None, exception=None, last_fe
131135 t .info ['Exception' ] = ex
132136
133137
138+ class ResourceOpts (BaseModel ):
139+ alias : Optional [str ] = Field (None , alias = 'as' ) # TODO: Rename to 'name'?
140+ # a certificate (file) or a SHA1 fingerprint to use for signature verification
141+ verify : Optional [str ] = None
142+ via : List [Any ] = Field ([]) # list of PipelineCallback
143+ # A list of PipelineCallback that can be used to pre-process parsed metadata before validation. Use as a clue-bat.
144+ cleanup : List [Any ] = Field ([]) # list of PipelineCallback
145+ fail_on_error : bool = False
146+ # remove invalid EntityDescriptor elements rather than raise an error
147+ filter_invalid : bool = True
148+ # set to False to turn off all XML schema validation
149+ validate_schema : bool = Field (True , alias = 'validate' )
150+
151+ def to_dict (self ) -> Dict [str , Any ]:
152+ res = self .dict ()
153+ # Compensate for the 'alias' field options
154+ res ['as' ] = res .pop ('alias' )
155+ res ['validate' ] = res .pop ('validate_schema' )
156+ return res
157+
158+
134159class Resource (Watchable ):
135- def __init__ (self , url = None , ** kwargs ):
160+ def __init__ (self , url : Optional [ str ], opts : ResourceOpts ):
136161 super ().__init__ ()
137162 self .url : str = url
138- self .opts = kwargs
163+ self .opts = opts
139164 self .t = None
140165 self .type = "text/plain"
141166 self .etag = None
@@ -148,12 +173,6 @@ def __init__(self, url=None, **kwargs):
148173 self ._setup ()
149174
150175 def _setup (self ):
151- self .opts .setdefault ('cleanup' , [])
152- self .opts .setdefault ('via' , [])
153- self .opts .setdefault ('fail_on_error' , False )
154- self .opts .setdefault ('verify' , None )
155- self .opts .setdefault ('filter_invalid' , True )
156- self .opts .setdefault ('validate' , True )
157176 if self .url is not None :
158177 if "://" not in self .url :
159178 pth = os .path .abspath (self .url )
@@ -178,20 +197,20 @@ def local_copy_fn(self):
178197 return os .path .join (config .local_copy_dir , urlescape (self .url ))
179198
180199 @property
181- def post (self ):
182- return self .opts [ ' via' ]
200+ def post (self ) -> List [ 'PipelineCallback' ] :
201+ return self .opts . via
183202
184- def add_via (self , callback ) :
185- self .opts [ ' via' ] .append (callback )
203+ def add_via (self , callback : 'PipelineCallback' ) -> None :
204+ self .opts . via .append (callback )
186205
187206 @property
188- def cleanup (self ):
189- return self .opts [ ' cleanup' ]
207+ def cleanup (self ) -> List [ 'PipelineCallback' ] :
208+ return self .opts . cleanup
190209
191210 def __str__ (self ):
192211 return "Resource {} expires at {} using " .format (
193212 self .url if self .url is not None else "(root)" , self .expire_time
194- ) + "," .join (["{}={}" .format (k , v ) for k , v in list (self .opts .items ())])
213+ ) + "," .join (["{}={}" .format (k , v ) for k , v in sorted ( list (self .opts .dict (). items () ))])
195214
196215 def reload (self , fail_on_error = False ):
197216 with non_blocking_lock (self .lock ):
@@ -252,25 +271,24 @@ def _replace(self, r: Resource) -> None:
252271 return
253272 raise ValueError ("Resource {} not present - use add_child" .format (r .url ))
254273
255- def add_child (self , url : str , ** kwargs ) -> Resource :
256- opts = deepcopy (self .opts )
257- if 'as' in opts :
258- del opts ['as' ]
259- opts .update (kwargs )
260- r = Resource (url , ** opts )
274+ def add_child (self , url : str , opts : ResourceOpts ) -> Resource :
275+ r = Resource (url , opts )
261276 if r in self .children :
277+ log .debug (f'\n \n { self } :\n URL { url } \n Replacing child { r } ' )
262278 self ._replace (r )
263279 else :
280+ log .debug (f'\n \n { self } :\n URL { url } \n Adding child { r } ' )
281+ if not r .opts .via :
282+ log .debug ('Empty Via' )
264283 self .children .append (r )
265284
266285 return r
267286
268287 @property
269288 def name (self ) -> Optional [str ]:
270- if 'as' in self .opts :
271- return self .opts ['as' ]
272- else :
273- return self .url
289+ if self .opts .alias :
290+ return self .opts .alias
291+ return self .url
274292
275293 @property
276294 def info (self ):
@@ -359,7 +377,7 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
359377
360378 return data , status , info
361379
362- def parse (self , getter ) :
380+ def parse (self , getter : Callable [[ str ], Response ]) -> Deque [ Resource ] :
363381 data , status , info = self .load_resource (getter )
364382 info ['State' ] = 'Parsing'
365383 # local import to avoid circular import
@@ -375,10 +393,10 @@ def parse(self, getter):
375393
376394 info ['State' ] = 'Parsed'
377395 if self .t is not None :
378- if self .post and isinstance ( self . post , list ) :
396+ if self .post :
379397 for cb in self .post :
380398 if self .t is not None :
381- self .t = cb (self .t , ** self .opts )
399+ self .t = cb (self .t , self .opts . dict () )
382400
383401 if self .is_expired ():
384402 info ['Expired' ] = True
0 commit comments