99import traceback
1010from collections import deque
1111from datetime import datetime
12+ from enum import Enum
1213from threading import Condition , Lock
13- from typing import TYPE_CHECKING , Any , Callable , Deque , Dict , Iterable , List , Optional , Tuple
14+ from typing import Any , Callable , Deque , Dict , Iterable , List , Mapping , Optional , TYPE_CHECKING , Tuple
1415from urllib .parse import quote as urlescape
1516
1617import requests
3435)
3536
3637if TYPE_CHECKING :
37- from pyff .parse import PyffParser
38+ from pyff .parse import ParserInfo , PyffParser
3839 from pyff .pipes import PipelineCallback
3940 from pyff .utils import Lambda
4041
@@ -141,14 +142,14 @@ def thing_to_url(self, t: Resource) -> Optional[str]:
141142 def i_handle (self , t : Resource , url = None , response = None , exception = None , last_fetched = None ):
142143 try :
143144 if exception is not None :
144- t .info [ 'Exception' ] = exception
145+ t .info . exception = exception
145146 else :
146147 children = t .parse (lambda u : response )
147148 self .i_schedule (children )
148149 except BaseException as ex :
149150 log .debug (traceback .format_exc ())
150151 log .error (f'Failed handling resource: { ex } ' )
151- t .info [ 'Exception' ] = ex
152+ t .info . exception = ex
152153
153154
154155class ResourceOpts (BaseModel ):
@@ -177,6 +178,50 @@ def to_dict(self) -> Dict[str, Any]:
177178 return res
178179
179180
181+ class ResourceLoadState (str , Enum ):
182+ Fetched = 'Fetched'
183+ Parsing = 'Parsing'
184+ Parsed = 'Parsed'
185+ Ready = 'Ready'
186+
187+
188+ class ResourceInfo (BaseModel ):
189+ resource : str # URL
190+ state : Optional [ResourceLoadState ] = None
191+ http_headers : Dict [str , Any ] = Field ({})
192+ reason : Optional [str ] = None
193+ status_code : Optional [str ] # HTTP status code as string. TODO: change to int
194+ parser_info : Optional [ParserInfo ] = None
195+ expired : Optional [bool ] = None
196+ exception : Optional [BaseException ] = None
197+
198+ class Config :
199+ arbitrary_types_allowed = True
200+
201+ def to_dict (self ):
202+ def _format_key (k : str ) -> str :
203+ special = {'http_headers' : 'HTTP Response Headers' }
204+ if k in special :
205+ return special [k ]
206+ # Turn validation_errors into 'Validation Errors'
207+ return k .replace ('_' , ' ' ).title ()
208+
209+ res = {_format_key (k ): v for k , v in self .dict ().items ()}
210+
211+ if self .parser_info :
212+ # Move contents from sub-dict to top of dict, for backwards compatibility
213+ res .update (self .parser_info .to_dict ())
214+ del res ['Parser Info' ]
215+
216+ # backwards compat
217+ if res ['Description' ] == 'SAML Metadata' :
218+ del res ['Description' ]
219+ if res ['Exception' ] is None :
220+ del res ['Exception' ]
221+
222+ return res
223+
224+
180225class Resource (Watchable ):
181226 def __init__ (self , url : Optional [str ], opts : ResourceOpts ):
182227 super ().__init__ ()
@@ -189,7 +234,7 @@ def __init__(self, url: Optional[str], opts: ResourceOpts):
189234 self .never_expires : bool = False
190235 self .last_seen : Optional [datetime ] = None
191236 self .last_parser : Optional ['PyffParser' ] = None # importing PyffParser in this module causes a loop
192- self ._infos : Deque [Dict ] = deque (maxlen = config .info_buffer_size )
237+ self ._infos : Deque [ResourceInfo ] = deque (maxlen = config .info_buffer_size )
193238 self .children : Deque [Resource ] = deque ()
194239 self ._setup ()
195240
@@ -281,10 +326,8 @@ def is_expired(self) -> bool:
281326 def is_valid (self ) -> bool :
282327 return not self .is_expired () and self .last_seen is not None and self .last_parser is not None
283328
284- def add_info (self ) -> Dict [str , Any ]:
285- info : Dict [str , Any ] = dict ()
286- info ['State' ] = None
287- info ['Resource' ] = self .url
329+ def add_info (self ) -> ResourceInfo :
330+ info = ResourceInfo (resource = self .url )
288331 self ._infos .append (info )
289332 return info
290333
@@ -315,18 +358,19 @@ def name(self) -> Optional[str]:
315358 return self .url
316359
317360 @property
318- def info (self ):
361+ def info (self ) -> ResourceInfo :
319362 if self ._infos is None or not self ._infos :
320- return dict ( )
363+ return ResourceInfo ( resource = self . url )
321364 else :
322365 return self ._infos [- 1 ]
323366
324367 @property
325- def errors (self ):
326- if 'Validation Errors' in self .info :
327- return self .info ['Validation Errors' ]
328- else :
329- return []
368+ def errors (self ) -> Mapping [str , Any ]:
369+ # TODO: Maybe caller should get the ResourceInfo instance instead? Why this shortcut?
370+ parser_info = self .info .parser_info
371+ if not parser_info :
372+ return {}
373+ return parser_info .validation_errors
330374
331375 def load_backup (self ) -> Optional [str ]:
332376 if config .local_copy_dir is None :
@@ -352,7 +396,7 @@ def save_backup(self, data: Optional[str]) -> None:
352396 except IOError as ex :
353397 log .warning ("unable to save backup copy of {}: {}" .format (self .url , ex ))
354398
355- def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , Dict [ str , Any ] ]:
399+ def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , ResourceInfo ]:
356400 data : Optional [str ] = None
357401 status : int = 500
358402 info = self .add_info ()
@@ -366,14 +410,14 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
366410 try :
367411 r = getter (self .url )
368412
369- info [ 'HTTP Response Headers' ] = r .headers
413+ info . http_headers = dict ( r .headers )
370414 log .debug (
371415 "got status_code={:d}, encoding={} from_cache={} from {}" .format (
372416 r .status_code , r .encoding , getattr (r , "from_cache" , False ), self .url
373417 )
374418 )
375419 status = r .status_code
376- info [ 'Reason' ] = r .reason
420+ info . reason = r .reason
377421
378422 if r .ok :
379423 data = r .text
@@ -389,25 +433,25 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
389433 )
390434 data = self .load_backup ()
391435 if data is not None and len (data ) > 0 :
392- info [ 'Reason' ] = "Retrieved from local cache because status: {} != 200" .format (status )
436+ info . reason = "Retrieved from local cache because status: {} != 200" .format (status )
393437 status = 218
394438
395- info [ 'Status Code' ] = str (status )
439+ info . status_code = str (status )
396440
397441 except IOError as ex :
398442 if self .local_copy_fn is not None :
399443 log .warning ("caught exception from {} - trying local backup: {}" .format (self .url , ex ))
400444 data = self .load_backup ()
401445 if data is not None and len (data ) > 0 :
402- info [ 'Reason' ] = "Retrieved from local cache because exception: {}" .format (ex )
446+ info . reason = "Retrieved from local cache because exception: {}" .format (ex )
403447 status = 218
404448 if data is None or not len (data ) > 0 :
405449 raise ex # propagate exception if we can't find a backup
406450
407451 if data is None or not len (data ) > 0 :
408452 raise ResourceException ("failed to fetch {} (status: {:d})" .format (self .url , status ))
409453
410- info [ 'State' ] = ' Fetched'
454+ info . state = ResourceLoadState . Fetched
411455
412456 return data , status , info
413457
@@ -417,34 +461,33 @@ def parse(self, getter: Callable[[str], Response]) -> Deque[Resource]:
417461 if not data :
418462 raise ResourceException (f'Nothing to parse when loading resource { self } ' )
419463
420- info [ 'State' ] = ' Parsing'
464+ info . state = ResourceLoadState . Parsing
421465 # local import to avoid circular import
422466 from pyff .parse import parse_resource
423467
424- parse_info = parse_resource (self , data )
425- if parse_info is not None :
426- info .update (parse_info )
468+ info .parser_info = parse_resource (self , data )
427469
428470 if status != 218 : # write backup unless we just loaded from backup
429471 self .last_seen = utc_now ().replace (microsecond = 0 )
430472 self .save_backup (data )
431473
432- info [ 'State' ] = ' Parsed'
474+ info . state = ResourceLoadState . Parsed
433475 if self .t is not None :
434476 if self .post :
435477 for cb in self .post :
436478 if self .t is not None :
437479 self .t = cb (self .t , self .opts .dict ())
438480
439481 if self .is_expired ():
440- info [ 'Expired' ] = True
482+ info . expired = True
441483 raise ResourceException ("Resource at {} expired on {}" .format (self .url , self .expire_time ))
442484 else :
443- info [ 'Expired' ] = False
485+ info . expired = False
444486
445- for (eid , error ) in list (info ['Validation Errors' ].items ()):
446- log .error (error )
487+ if info .parser_info :
488+ for (eid , error ) in list (info .parser_info .validation_errors .items ()):
489+ log .error (error )
447490
448- info [ 'State' ] = ' Ready'
491+ info . state = ResourceLoadState . Ready
449492
450493 return self .children
0 commit comments