@@ -33,6 +33,8 @@ def request(url: str,
3333class SourceItem :
3434 patch : Optional [List [Statement ]] = None
3535 proposed_label : Optional [str ] = None
36+ prior_ident : Optional [Statement ] = None
37+ new_ident : Optional [Statement ] = None
3638 _lookup_cache : ClassVar [Dict [str , Dict [Any , Optional [str ]]]] = {}
3739 _registry : ClassVar [List [type ]] = []
3840 _connectors_loaded : ClassVar [bool ] = False
@@ -145,37 +147,50 @@ def register_new_item(statements: List[Statement]) -> None:
145147 property_id , value , qid , SourceItem ._lookup_cache [property_id ][value ])
146148 SourceItem ._lookup_cache [property_id ][value ] = qid
147149
148- def parse (self , text : str ) -> bool :
150+ def parse (self , text : str , ident : Statement ) -> None :
149151 raise NotImplementedError ('Subclasses must implement parse' )
150152
151153 @classmethod
152- def extract (cls , ident : Statement ) -> Optional [SourceItem ]:
153- """Returns None on retrieval failure; instance with empty patch if datasource
154- confirms the record does not exist; instance with non-empty patch where
155- patch[0] is the requested id (possibly updated from a redirect)."""
154+ def extract (cls , ident : Statement ) -> SourceItem :
155+ """Fetch and parse the record for `ident`. Always returns a SourceItem.
156+
157+ Empty (patch/prior_ident/new_ident all None): no info — network error,
158+ unrecognised property, or parse failure.
159+
160+ prior_ident set, new_ident None: ident confirmed gone; caller should
161+ deprecate that statement.
162+
163+ prior_ident and new_ident set: ident redirected; caller should replace
164+ old value with new_ident. patch carries any additional claims.
165+
166+ Only patch set: ident unchanged; patch carries new claims to merge.
167+ """
156168 if not (req := cls .make_request (ident )):
157- return None
169+ return cls ()
158170 url = req .full_url
159171 try :
160172 resp = build_opener ().open (req , timeout = 30 )
161173 except urllib .error .HTTPError as e :
162174 resp = e
163175 except Exception as e :
164176 logging .error ('Request failed for %s: %s' , url , e )
165- return None
177+ return cls ()
166178 handled = cls ._config .get ('extract' , [])
167179 with resp :
168180 if ((code := resp .getcode ()) == 404 ) and (404 in handled ):
169- return cls (patch = [])
181+ first_prop = next (iter (cls ._config .get ('properties' , {})), None )
182+ if ident .mainsnak .property == first_prop :
183+ return cls (prior_ident = ident )
184+ return cls ()
170185 destination = resp .geturl ()
171186 text = resp .read ().decode ('utf-8' )
172187 if code != 200 :
173188 logging .error (f'Returned code { code } for { url } , message: { text } ' )
174189 destination_url = destination or url
175190 redirected = 301 in handled and destination_url .lower () != url .lower ()
191+ item = cls ()
176192 if redirected :
177- patch_ident = cls .update_ident (ident , destination_url )
178- else :
179- patch_ident = ident
180- item = cls (patch = [patch_ident ])
181- return item if item .parse (text ) else None
193+ item .prior_ident = ident
194+ item .new_ident = cls .update_ident (ident , destination_url )
195+ item .parse (text , ident )
196+ return item
0 commit comments