diff --git a/core/utils/harvesters.py b/core/utils/harvesters.py index ebd0e3fa..46337beb 100644 --- a/core/utils/harvesters.py +++ b/core/utils/harvesters.py @@ -110,7 +110,7 @@ def format_raw(self, pid_v3, item): "item": item, # o nome do é status (o ideal é que fosse is_public) mas o OPAC retorna "false" ou "true" na chave status # e o valor deve ser True se explicitamente é o contrário de "false" - "is_public": item.get("status") != "false", + "is_public": item.get("status"), } def format_normalized(self, pid_v3, item): diff --git a/pid_provider/base_pid_provider.py b/pid_provider/base_pid_provider.py index baf8a640..362d6051 100644 --- a/pid_provider/base_pid_provider.py +++ b/pid_provider/base_pid_provider.py @@ -172,7 +172,11 @@ def provide_pid_for_xml_uri( """ # a) Try to obtain XML from URI try: + xml_url_record = None + xml_with_pre = None + resp = None xml_with_pre = list(XMLWithPre.create(uri=xml_uri))[0] + xml_url_record = XMLURL.record(user, xml_uri, None, document_item, xml_with_pre=xml_with_pre) except Exception as e: return XMLURL.record(user, xml_uri, "xml_fetch_failed", document_item, exception=e) @@ -197,11 +201,17 @@ def provide_pid_for_xml_uri( pass # If xml_with_pre is not present or cannot be removed, ignore and log rest of response if response.get("error_type") or response.get("error_msg") or response.get("error_message"): - XMLURL.record(user, xml_uri, "pid_provider_xml_failed", document_item, response=resp, xml_with_pre=xml_with_pre, name=name) + status = "pid_provider_xml_failed" else: - XMLURL.record(user, xml_uri, "success", document_item, response=resp, xml_with_pre=xml_with_pre, name=name) + status = "success" + + xml_url_record.update_record(user, status, exception=None, response=resp, xml_with_pre=xml_with_pre) return response except Exception as e: + if xml_url_record: + xml_url_record.update_record(user, status=None, exception=e, response=resp, xml_with_pre=xml_with_pre) + return resp + return self._handle_unexpected_error(e, xml_uri, name, user, origin_date, force_update, is_published, document_item) def _handle_unexpected_error(self, exception, xml_uri, name, user, origin_date, force_update, is_published, document_item): diff --git a/pid_provider/models.py b/pid_provider/models.py index c46016e6..c5bb371a 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -1764,24 +1764,48 @@ def save_file(self, xml_content, filename=None): @classmethod def record(cls, user, url, status, document_item, *, exception=None, response=None, xml_with_pre=None, name=None): + pid = None + is_public = None detail = {"document_item": document_item} + + if document_item: + pid = pid or document_item.get("pid_v3") + is_public = document_item.get("status") + if xml_with_pre: + pid = xml_with_pre.v3 or pid + if exception is not None: detail["exceptions"] = traceback.format_exc() if response is not None: detail["response"] = response - - pid = response.get("v3") if response else None - - is_public = None - if document_item: - doc_status = document_item.get("status") - if doc_status is not None: - is_public = doc_status != "false" + pid = response.get("v3") or pid xmlurl_obj = cls.create_or_update(user=user, url=url, status=status, pid=pid, detail=detail, is_public=is_public) if xml_with_pre is not None: - filename = name or pid or "content.xml" + name = xml_with_pre.sps_pkg_name + filename = f"{name}.xml" xmlurl_obj.save_file(xml_with_pre.tostring(), filename=filename) return xmlurl_obj + + def update_record(self, user, status, exception=None, response=None, xml_with_pre=None): + detail = self.detail + + if exception is not None: + detail["exceptions"] = traceback.format_exc() + if response is not None: + detail["response"] = response + + self.status = status + self.detail = detail + self.updated_by = user + + if xml_with_pre is not None: + self.pid = xml_with_pre.v3 + name = xml_with_pre.sps_pkg_name + filename = f"{name}.xml" + self.save_file(xml_with_pre.tostring(), filename=filename) + + self.save() + return self \ No newline at end of file diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index 3cd5bdd7..9529e073 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -142,9 +142,6 @@ def task_load_records_from_counter_dict( url = document.get("url") origin_date = document.get("origin_date") - if not document.get("is_public"): - continue - document_item = document.get("item") or {} task_load_record_from_xml_url.delay(