From 7ee5918b1f1381b4454820b633a7861ff63795a6 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 23 Jun 2026 13:23:55 -0300 Subject: [PATCH 1/3] =?UTF-8?q?fix(harvester):=20corrige=20infer=C3=AAncia?= =?UTF-8?q?=20de=20is=5Fpublic=20a=20partir=20do=20status=20do=20OPAC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ajusta o mapeamento para aceitar o valor booleano nativo retornado pela chave 'status' do item, impedindo que 'status: false' seja avaliado erroneamente como True devido à comparação de string antiga. --- core/utils/harvesters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/utils/harvesters.py b/core/utils/harvesters.py index ebd0e3fa..46337beb 100644 --- a/core/utils/harvesters.py +++ b/core/utils/harvesters.py @@ -110,7 +110,7 @@ def format_raw(self, pid_v3, item): "item": item, # o nome do é status (o ideal é que fosse is_public) mas o OPAC retorna "false" ou "true" na chave status # e o valor deve ser True se explicitamente é o contrário de "false" - "is_public": item.get("status") != "false", + "is_public": item.get("status"), } def format_normalized(self, pid_v3, item): From e339687ccbe167e11b10a675a28b39d1e154ee82 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 23 Jun 2026 13:23:55 -0300 Subject: [PATCH 2/3] =?UTF-8?q?fix(pid=5Fprovider):=20corrige=20grava?= =?UTF-8?q?=C3=A7=C3=A3o=20de=20is=5Fpublic=20e=20remove=20trava=20na=20ta?= =?UTF-8?q?sk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Atualiza o método 'XMLURL.record' para avaliar 'is_public' corretamente através do booleano de 'document_item'. - Remove o 'continue' precoce na task de carga, permitindo que o fluxo siga para o provedor tratar o status adequadamente. --- pid_provider/models.py | 42 +++++++++++++++++++++++++++++++++--------- pid_provider/tasks.py | 3 --- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/pid_provider/models.py b/pid_provider/models.py index c46016e6..c5bb371a 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -1764,24 +1764,48 @@ def save_file(self, xml_content, filename=None): @classmethod def record(cls, user, url, status, document_item, *, exception=None, response=None, xml_with_pre=None, name=None): + pid = None + is_public = None detail = {"document_item": document_item} + + if document_item: + pid = pid or document_item.get("pid_v3") + is_public = document_item.get("status") + if xml_with_pre: + pid = xml_with_pre.v3 or pid + if exception is not None: detail["exceptions"] = traceback.format_exc() if response is not None: detail["response"] = response - - pid = response.get("v3") if response else None - - is_public = None - if document_item: - doc_status = document_item.get("status") - if doc_status is not None: - is_public = doc_status != "false" + pid = response.get("v3") or pid xmlurl_obj = cls.create_or_update(user=user, url=url, status=status, pid=pid, detail=detail, is_public=is_public) if xml_with_pre is not None: - filename = name or pid or "content.xml" + name = xml_with_pre.sps_pkg_name + filename = f"{name}.xml" xmlurl_obj.save_file(xml_with_pre.tostring(), filename=filename) return xmlurl_obj + + def update_record(self, user, status, exception=None, response=None, xml_with_pre=None): + detail = self.detail + + if exception is not None: + detail["exceptions"] = traceback.format_exc() + if response is not None: + detail["response"] = response + + self.status = status + self.detail = detail + self.updated_by = user + + if xml_with_pre is not None: + self.pid = xml_with_pre.v3 + name = xml_with_pre.sps_pkg_name + filename = f"{name}.xml" + self.save_file(xml_with_pre.tostring(), filename=filename) + + self.save() + return self \ No newline at end of file diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index 3cd5bdd7..9529e073 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -142,9 +142,6 @@ def task_load_records_from_counter_dict( url = document.get("url") origin_date = document.get("origin_date") - if not document.get("is_public"): - continue - document_item = document.get("item") or {} task_load_record_from_xml_url.delay( From a099cc5a62056dde65fc0c765defc93c326f6afe Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 23 Jun 2026 13:23:55 -0300 Subject: [PATCH 3/3] refactor(pid_provider): cria log antecipadamente e adiciona update_record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modifica o fluxo do 'BasePidProvider' para instanciar o registro 'XMLURL' antes de realizar operações externas. Introduz o uso do 'update_record' para persistir o resultado final (sucesso ou falha interna), garantindo a rastreabilidade correta demandada na issue. --- pid_provider/base_pid_provider.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pid_provider/base_pid_provider.py b/pid_provider/base_pid_provider.py index baf8a640..362d6051 100644 --- a/pid_provider/base_pid_provider.py +++ b/pid_provider/base_pid_provider.py @@ -172,7 +172,11 @@ def provide_pid_for_xml_uri( """ # a) Try to obtain XML from URI try: + xml_url_record = None + xml_with_pre = None + resp = None xml_with_pre = list(XMLWithPre.create(uri=xml_uri))[0] + xml_url_record = XMLURL.record(user, xml_uri, None, document_item, xml_with_pre=xml_with_pre) except Exception as e: return XMLURL.record(user, xml_uri, "xml_fetch_failed", document_item, exception=e) @@ -197,11 +201,17 @@ def provide_pid_for_xml_uri( pass # If xml_with_pre is not present or cannot be removed, ignore and log rest of response if response.get("error_type") or response.get("error_msg") or response.get("error_message"): - XMLURL.record(user, xml_uri, "pid_provider_xml_failed", document_item, response=resp, xml_with_pre=xml_with_pre, name=name) + status = "pid_provider_xml_failed" else: - XMLURL.record(user, xml_uri, "success", document_item, response=resp, xml_with_pre=xml_with_pre, name=name) + status = "success" + + xml_url_record.update_record(user, status, exception=None, response=resp, xml_with_pre=xml_with_pre) return response except Exception as e: + if xml_url_record: + xml_url_record.update_record(user, status=None, exception=e, response=resp, xml_with_pre=xml_with_pre) + return resp + return self._handle_unexpected_error(e, xml_uri, name, user, origin_date, force_update, is_published, document_item) def _handle_unexpected_error(self, exception, xml_uri, name, user, origin_date, force_update, is_published, document_item):