From ee0d80454222248f8c5929849b35df0280c52e18 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:30:32 -0300
Subject: [PATCH 01/31] chore: remover models.py do app article

---
 article/models.py | 143 ----------------------------------------------
 1 file changed, 143 deletions(-)
 delete mode 100644 article/models.py

diff --git a/article/models.py b/article/models.py
deleted file mode 100644
index 80d2a97..0000000
--- a/article/models.py
+++ /dev/null
@@ -1,143 +0,0 @@
-from django.db import models
-from django.utils.translation import gettext_lazy as _
-
-from core.models import CommonControlField
-from collection.models import Collection
-
-
-class Article(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.CASCADE,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    scielo_issn = models.CharField(
-        verbose_name=_('SciELO ISSN'),
-        max_length=9,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    pid_v2 = models.CharField(
-        verbose_name=_('PID V2'),
-        max_length=23,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    pid_v3 = models.CharField(
-        verbose_name=_('PID V3'),
-        max_length=23,
-        blank=True,
-        null=True,
-        db_index=True,
-    )
-
-    pid_generic = models.CharField(
-        verbose_name=_('PID Generic'),
-        max_length=50,
-        blank=True,
-        null=True,
-        db_index=True,
-    )
-
-    files = models.JSONField(
-        verbose_name=_('Files'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-
-    default_lang = models.CharField(
-        verbose_name=_('Default Language'),
-        max_length=2,
-        blank=False,
-        null=False,
-    )
-
-    text_langs = models.JSONField(
-        verbose_name=_('Text Languages'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-
-    processing_date = models.CharField(
-        verbose_name=_('Processing Date'), 
-        max_length=32,
-        null=False,
-        blank=False,
-    )
-
-    publication_date = models.CharField(
-        verbose_name=_('Publication Date'), 
-        max_length=32,
-        null=False,
-        blank=False,
-    )
-
-    publication_year = models.CharField(
-        verbose_name=_('Publication Year'), 
-        max_length=4,
-        null=False,
-        blank=False,
-        db_index=True,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.scielo_issn} - {self.pid_v2 or self.pid_v3 or self.pid_generic}'
-
-    @classmethod
-    def metadata(cls, collection=None):
-        qs = cls.objects.select_related('collection').only(
-            'collection__acron3',
-            'default_lang',
-            'files',
-            'pid_v2',
-            'pid_v3',
-            'pid_generic',
-            'processing_date',
-            'publication_date',
-            'publication_year',
-            'scielo_issn',
-            'text_langs',
-        )
-
-        if collection:
-            qs = qs.filter(collection=collection)
-
-        for a in qs.iterator():
-            yield {
-                'collection': a.collection.acron3,
-                'default_lang': a.default_lang,
-                'files': a.files,
-                'pid_v2': a.pid_v2,
-                'pid_v3': a.pid_v3,
-                'pid_generic': a.pid_generic,
-                'processing_date': a.processing_date,
-                'publication_date': a.publication_date,
-                'publication_year': a.publication_year,
-                'scielo_issn': a.scielo_issn,
-                'text_langs': a.text_langs,
-            }
-
-    class Meta:
-        verbose_name = _('Article')
-        verbose_name_plural = _('Articles')
-        unique_together = (
-            'collection',
-            'scielo_issn', 
-            'pid_v2',
-            'pid_v3',
-            'pid_generic',
-        )
-        indexes = [
-            models.Index(fields=['collection', 'scielo_issn'], name='collection_scielo_issn_idx'),
-        ]
-

From a342af06fb15fdd3d5346bc6448f90e6f5f06165 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:30:41 -0300
Subject: [PATCH 02/31] chore: remover migrations, tasks, utils, views, admin e
 hooks do app article

---
 article/__init__.py                           |   0
 article/admin.py                              |   3 -
 article/apps.py                               |   6 -
 article/management/__init__.py                |   0
 article/management/commands/__init__.py       |   0
 .../commands/load_articles_by_year.py         |  80 ------
 article/migrations/0001_initial.py            | 137 ---------
 ..._unique_together_article_files_and_more.py |  42 ---
 ...0003_article_collection_scielo_issn_idx.py |  21 --
 article/migrations/__init__.py                |   0
 article/tasks.py                              | 259 ------------------
 article/tests.py                              |   3 -
 article/utils.py                              | 204 --------------
 article/views.py                              |   3 -
 article/wagtail_hooks.py                      |  39 ---
 15 files changed, 797 deletions(-)
 delete mode 100644 article/__init__.py
 delete mode 100644 article/admin.py
 delete mode 100644 article/apps.py
 delete mode 100644 article/management/__init__.py
 delete mode 100644 article/management/commands/__init__.py
 delete mode 100644 article/management/commands/load_articles_by_year.py
 delete mode 100644 article/migrations/0001_initial.py
 delete mode 100644 article/migrations/0002_alter_article_unique_together_article_files_and_more.py
 delete mode 100644 article/migrations/0003_article_collection_scielo_issn_idx.py
 delete mode 100644 article/migrations/__init__.py
 delete mode 100644 article/tasks.py
 delete mode 100644 article/tests.py
 delete mode 100644 article/utils.py
 delete mode 100644 article/views.py
 delete mode 100644 article/wagtail_hooks.py

diff --git a/article/__init__.py b/article/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/article/admin.py b/article/admin.py
deleted file mode 100644
index 8c38f3f..0000000
--- a/article/admin.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
diff --git a/article/apps.py b/article/apps.py
deleted file mode 100644
index 8c0e2c9..0000000
--- a/article/apps.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class ArticleConfig(AppConfig):
-    default_auto_field = "django.db.models.BigAutoField"
-    name = "article"
diff --git a/article/management/__init__.py b/article/management/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/article/management/commands/__init__.py b/article/management/commands/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/article/management/commands/load_articles_by_year.py b/article/management/commands/load_articles_by_year.py
deleted file mode 100644
index 335598e..0000000
--- a/article/management/commands/load_articles_by_year.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from django.core.management.base import BaseCommand
-
-from article.tasks import task_load_article_from_opac, task_load_article_from_article_meta
-
-
-class Command(BaseCommand):
-    help = 'Generate task requests for loading article data from Article Meta for each year from 1900 to 2025'
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            '--start-year',
-            type=int,
-            default=1990,
-            help='Start year (default: 1990)'
-        )
-        parser.add_argument(
-            '--end-year',
-            type=int,
-            default=2025,
-            help='End year (default: 2025)'
-        )
-        parser.add_argument(
-            '--collection',
-            type=str,
-            default='scl',
-            help='Collection code (default: scl)'
-        )
-        parser.add_argument(
-            '--task',
-            choices=['load_article_from_opac', 'load_article_from_article_meta'],
-            default='load_article_from_opac',
-            help='Task to execute (default: load_article_from_opac)',
-        )
-
-    def handle(self, *args, **options):
-        start_year = options['start_year']
-        end_year = options['end_year']
-        collection = options['collection']
-        
-        self.stdout.write(
-            self.style.SUCCESS(
-                f'Generating task requests from {start_year} to {end_year} for collection: {collection}'
-            )
-        )
-        
-        total_tasks = 0
-        
-        for year in range(start_year, end_year + 1):
-            from_date = f'{year}-01-01'
-            until_date = f'{year}-12-31'
-            
-            self.stdout.write(f'Queuing task for year {year}...')
-            
-            # Queue the task for each year
-            if options['task'] == 'load_article_from_article_meta':
-                task_result = task_load_article_from_article_meta.delay(
-                    from_date=from_date,
-                    until_date=until_date,
-                    collection=collection
-                )
-            else:
-                task_result = task_load_article_from_opac.delay(
-                    from_date=from_date,
-                    until_date=until_date,
-                    collection=collection
-                )
-            
-            total_tasks += 1
-            
-            self.stdout.write(
-                self.style.SUCCESS(
-                    f'✓ Task queued for year {year}: {from_date} to {until_date} (Task ID: {task_result.id})'
-                )
-            )
-        
-        self.stdout.write(
-            self.style.SUCCESS(
-                f'\nCompleted! {total_tasks} tasks have been queued successfully.'
-            )
-        )
diff --git a/article/migrations/0001_initial.py b/article/migrations/0001_initial.py
deleted file mode 100644
index 816d61e..0000000
--- a/article/migrations/0001_initial.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Generated by Django 5.0.7 on 2025-02-07 17:50
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Article",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "scielo_issn",
-                    models.CharField(
-                        db_index=True, max_length=9, verbose_name="SciELO ISSN"
-                    ),
-                ),
-                (
-                    "pid_v2",
-                    models.CharField(
-                        db_index=True, max_length=23, verbose_name="PID V2"
-                    ),
-                ),
-                (
-                    "pid_v3",
-                    models.CharField(
-                        blank=True,
-                        db_index=True,
-                        max_length=23,
-                        null=True,
-                        verbose_name="PID V3",
-                    ),
-                ),
-                (
-                    "pdfs",
-                    models.JSONField(
-                        blank=True,
-                        default=dict,
-                        null=True,
-                        verbose_name="Format with Language",
-                    ),
-                ),
-                (
-                    "default_lang",
-                    models.CharField(max_length=2, verbose_name="Default Language"),
-                ),
-                (
-                    "text_langs",
-                    models.JSONField(
-                        blank=True,
-                        default=dict,
-                        null=True,
-                        verbose_name="Text Languages",
-                    ),
-                ),
-                (
-                    "processing_date",
-                    models.CharField(max_length=32, verbose_name="Processing Date"),
-                ),
-                (
-                    "publication_date",
-                    models.CharField(max_length=32, verbose_name="Publication Date"),
-                ),
-                (
-                    "publication_year",
-                    models.CharField(
-                        db_index=True, max_length=4, verbose_name="Publication Year"
-                    ),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Article",
-                "verbose_name_plural": "Articles",
-                "unique_together": {("collection", "scielo_issn", "pid_v2", "pid_v3")},
-            },
-        ),
-    ]
diff --git a/article/migrations/0002_alter_article_unique_together_article_files_and_more.py b/article/migrations/0002_alter_article_unique_together_article_files_and_more.py
deleted file mode 100644
index cee055c..0000000
--- a/article/migrations/0002_alter_article_unique_together_article_files_and_more.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Generated by Django 5.0.7 on 2025-04-01 01:09
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0001_initial"),
-        ("collection", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name="article",
-            name="files",
-            field=models.JSONField(
-                blank=True, default=dict, null=True, verbose_name="Files"
-            ),
-        ),
-        migrations.AddField(
-            model_name="article",
-            name="pid_generic",
-            field=models.CharField(
-                blank=True,
-                db_index=True,
-                max_length=50,
-                null=True,
-                verbose_name="PID Generic",
-            ),
-        ),
-        migrations.RemoveField(
-            model_name="article",
-            name="pdfs",
-        ),
-        migrations.AlterUniqueTogether(
-            name="article",
-            unique_together={
-                ("collection", "scielo_issn", "pid_v2", "pid_v3", "pid_generic")
-            },
-        ),
- 
-    ]
diff --git a/article/migrations/0003_article_collection_scielo_issn_idx.py b/article/migrations/0003_article_collection_scielo_issn_idx.py
deleted file mode 100644
index 753ac98..0000000
--- a/article/migrations/0003_article_collection_scielo_issn_idx.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0002_alter_article_unique_together_article_files_and_more"),
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.AddIndex(
-            model_name="article",
-            index=models.Index(
-                fields=["collection", "scielo_issn"], name="collection_scielo_issn_idx"
-            ),
-        ),
-    ]
diff --git a/article/migrations/__init__.py b/article/migrations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/article/tasks.py b/article/tasks.py
deleted file mode 100644
index 3514fca..0000000
--- a/article/tasks.py
+++ /dev/null
@@ -1,259 +0,0 @@
-import logging
-
-from django.contrib.auth import get_user_model
-from django.db.models import Q
-from django.db import DataError
-from django.utils.translation import gettext as _
-
-from collection.models import Collection
-from config import celery_app
-from core.utils import date_utils
-from core.utils.utils import _get_user
-
-from journal.models import Journal
-
-from tracker.models import ArticleEvent
-from tracker.choices import ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED, ARTICLE_EVENT_TYPE_DATA_ERROR
-
-from . import models, utils
-
-
-User = get_user_model()
-
-@celery_app.task(bind=True, name=_('Load article data from Article Meta'), timelimit=-1, queue='load')
-def task_load_article_from_article_meta(self, from_date=None, until_date=None, days_to_go_back=None, collection=None, issn=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading articles from Article Meta. From: {from_date}, Until: {until_date}, Collection: {collection}, ISSN: {issn}.')
-
-    offset = 0
-    limit = 1000
-    while True:
-        logging.info(f'{from_date}, {until_date}, {offset}, {limit}, {collection}, {issn}')
-        response = utils.fetch_article_meta_dict(from_date, until_date, offset=offset, limit=limit, collection=collection, issn=issn)
-        objects = response.get('objects')
-        if not objects:
-            break
-
-        for obj in objects:
-            codes = obj.get('code_title')
-
-            for issn_code in codes:
-                jou = Journal.objects.filter(
-                    Q(issns__electronic_issn=issn_code) | 
-                    Q(issns__scielo_issn=issn_code) | 
-                    Q(issns__print_issn=issn_code)
-                ).first()
-                if not jou:
-                    continue
-
-            if not jou:
-                logging.info(f'Journal not found for ISSNs: {codes}')
-                continue
-
-            col_obj = Collection.objects.get(acron3=obj.get('collection'))
-            if not col_obj:
-                logging.info(f'Collection not found: {obj.get("collection")}')
-                continue
-
-            try:
-                article, created = models.Article.objects.get_or_create(collection=col_obj, scielo_issn=jou.scielo_issn, pid_v2=obj.get('code'))
-                if created or force_update:
-                    article.files = obj.get('pdfs') or {}
-                    article.processing_date = obj.get('processing_date') or ''
-                    article.publication_date = obj.get('publication_date') or ''
-                    article.publication_year = obj.get('publication_year') or ''
-                    article.default_lang = obj.get('default_language') or ''
-                    article.text_langs = obj.get('text_langs') or ''
-
-                article.save()
-                logging.info(f'Article {"created" if created else "updated"}: {article}')
-            except models.Article.MultipleObjectsReturned as e:
-                logging.error(f'Error getting Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                    message=f'Error getting Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}',
-                    data=obj
-                )
-                continue
-            except DataError as e:
-                logging.error(f'Error saving Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                    message=f'Error saving Article: {e}. Collection: {col_obj}, ISSN: {jou.scielo_issn}, PIDv2: {obj.get("code")}',
-                    data=obj
-                )
-                continue
-
-        offset += limit
-
-    return True
-
-
-@celery_app.task(bind=True, name=_('Load article data from OPAC'), timelimit=-1, queue='load')
-def task_load_article_from_opac(self, collection='scl', from_date=None, until_date=None, days_to_go_back=None, page=1, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading articles from OPAC. From: {from_date}, Until: {until_date}')
-
-    while True:
-        response = utils.fetch_opac_dict(from_date, until_date, page=page)
-
-        documents = response.get('documents')
-
-        for doc_id, doc in documents.items():
-            col_obj = Collection.objects.get(acron3=collection)
-            if not col_obj:
-                logging.error(f'Collection not found: {collection}')
-                continue
-
-            journal = Journal.objects.get(collection=col_obj, acronym=doc.get('journal_acronym'))
-            if not journal:
-                logging.error(f'Journal not found: {doc.get("journal_acronym")}')
-                continue
-
-            try:
-                article, created = models.Article.objects.get_or_create(collection=col_obj, scielo_issn=journal.scielo_issn, pid_v2=doc.get('pid_v2'))
-
-                if created or force_update:
-                    article.pid_v3 = doc.get('pid_v3') or ''
-                    if not created:
-                        article.pid_v2 = doc.get('pid_v2') or ''
-                        article.publication_date = doc.get('publication_date') or article.publication_date or ''
-                        article.default_lang = doc.get('default_language') or article.default_lang or ''
-            
-                        try:
-                            article.publication_year = article.publication_date[:4]
-                        except IndexError:
-                            article.publication_year = ''
-
-                article.save()
-                logging.info(f'Article {"created" if created else "updated"}: {article}')
-
-            except models.Article.MultipleObjectsReturned as e:
-                logging.error(f'Error getting Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                    message=f'Error creating Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}',
-                    data=doc
-                )
-                continue
-            except DataError as e:
-                logging.error(f'Error saving Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}')
-                ArticleEvent.create(
-                    event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                    message=f'Error saving Article: {e}. Collection: {col_obj}, Journal: {journal.scielo_issn}, PIDv2: {doc.get("pid_v2")}',
-                    data=doc
-                )
-                continue
-
-        page += 1
-        if page > int(response.get('pages', 0)):
-            break
-
-    return True
-
-
-@celery_app.task(bind=True, name=_('Load preprint data from SciELO Preprints'), timelimit=-1, queue='load')
-def task_load_preprints_from_preprints_api(self, from_date=None, until_date=None, days_to_go_back=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading preprints from SciELO Preprints. From: {from_date}, Until: {until_date}')
-
-    col_obj = Collection.objects.get(acron3='preprints')
-    if not col_obj:
-        logging.error(f'Collection not found: preprints')
-        return False
-
-    for record in utils.fetch_preprint_oai_pmh(from_date, until_date):
-        data = utils.extract_preprint_data(record)
-
-        if not data.get('pid_generic'):
-            logging.error(f'Preprint ID not found in record: {record}')
-            continue
-
-        try:
-            article, created = models.Article.objects.get_or_create(collection=col_obj, pid_generic=data['pid_generic'])
-            if created or force_update:
-                article.text_langs = data.get('text_langs')
-                article.default_lang = data.get('default_language')
-                article.publication_date = data.get('publication_date')
-                article.publication_year = data.get('publication_year')
-                
-                # Preprints do not have a scielo_issn yet
-                article.scielo_issn = '0000-0000'
-
-                article.save()
-                logging.debug(f'Article {"created" if created else "updated"}: {article}')
-        except models.Article.MultipleObjectsReturned as e:
-            logging.error(f'Error creating Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                message=f'Error creating Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}',
-                data=data
-            )
-            continue
-        except DataError as e:
-            logging.error(f'Error saving Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                message=f'Error saving Article: {e}. Collection: {col_obj}, PID: {data["pid_generic"]}',
-                data=data
-            )
-            continue
-
-
-@celery_app.task(bind=True, name=_('Load dataset metadata from Dataverse'), timelimit=-1, queue='load')
-def task_load_dataset_metadata_from_dataverse(self, from_date=None, until_date=None, days_to_go_back=None, force_update=True, user_id=None, username=None):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = date_utils.get_date_range_str(from_date, until_date, days_to_go_back)
-    logging.info(f'Loading dataset metadata from SciELO Data. From: {from_date}, Until: {until_date}')
-
-    col_obj = Collection.objects.get(acron3='data')
-    if not col_obj:
-        logging.error(f'Collection not found: data')
-        return False
-
-    for record in utils.fetch_dataverse_metadata(from_date, until_date):
-        dataset_doi = record.get('dataset_doi')
-        if not dataset_doi:
-            logging.error(f'Dataset DOI not found in record: {record}')
-            continue
-
-        try:
-            dataset, created = models.Article.objects.get_or_create(collection=col_obj, pid_generic=dataset_doi)
-            if created or force_update:
-                dataset.publication_date = record.get('dataset_published')
-
-                file_persistent_id = record.get('file_persistent_id')
-                file_id = record.get('file_id')
-                file_name = record.get('file_name')
-                file_url = record.get('file_url')
-
-                if file_id:
-                    dataset.files[file_id] = {'name': file_name, 'url': file_url, 'file_persisent_id': file_persistent_id}
-
-                dataset.save()
-                logging.debug(f'Dataset {"created" if created else "updated"}: {dataset}')
-        except models.Article.MultipleObjectsReturned as e:
-            logging.error(f'Error creating Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED,
-                message=f'Error creating Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}',
-                data=record
-            )
-            continue
-        except DataError as e:
-            logging.error(f'Error saving Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}')
-            ArticleEvent.create(
-                event_type=ARTICLE_EVENT_TYPE_DATA_ERROR,
-                message=f'Error saving Dataset: {e}. Collection: {col_obj}, PID: {dataset_doi}',
-                data=record
-            )
-            continue
-
-    return True
diff --git a/article/tests.py b/article/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/article/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/article/utils.py b/article/utils.py
deleted file mode 100644
index b9a094e..0000000
--- a/article/utils.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import logging
-import requests
-import os
-
-from sickle import Sickle
-from time import sleep
-
-from core.utils import standardizer
-
-
-ARTICLEMETA_ENDPOINT = os.environ.get('ARTICLEMETA_COLLECT_URL', 'http://articlemeta.scielo.org/api/v1/article/counter_dict')
-ARTICLEMETA_MAX_RETRIES = int(os.environ.get('ARTICLEMETA_MAX_RETRIES', 5))
-ARTICLEMETA_SLEEP_TIME = int(os.environ.get('ARTICLEMETA_SLEEP_TIME', 30))
-
-OPAC_ENDPOINT = os.environ.get('OPAC_ENDPOINT', 'https://www.scielo.br/api/v1/counter_dict')
-OPAC_MAX_RETRIES = int(os.environ.get('OPAC_MAX_RETRIES', 5))
-OPAC_SLEEP_TIME = int(os.environ.get('OPAC_SLEEP_TIME', 30))
-
-OAI_PMH_PREPRINT_ENDPOINT = os.environ.get('OAI_PMH_PREPRINT_ENDPOINT', 'https://preprints.scielo.org/index.php/scielo/oai')
-OAI_METADATA_PREFIX = os.environ.get('OAI_METADATA_PREFIX', 'oai_dc')
-OAI_PMH_MAX_RETRIES = int(os.environ.get('OAI_PMH_MAX_RETRIES', 5))
-
-DATAVERSE_ENDPOINT = os.environ.get('DATAVERSE_ENDPOINT', 'https://data.scielo.org/api')
-DATAVERSE_ROOT_COLLECTION = os.environ.get('DATAVERSE_ROOT_COLLECTION', 'scielodata')
-DATAVERSE_MAX_RETRIES = int(os.environ.get('DATAVERSE_MAX_RETRIES', 5))
-DATAVERSE_SLEEP_TIME = int(os.environ.get('DATAVERSE_SLEEP_TIME', 30))
-
-
-def fetch_article_meta_dict(from_date, until_date, offset=0, limit=1000, collection=None, issn=None):
-    for t in range(1, ARTICLEMETA_MAX_RETRIES + 1):
-        params = {
-            'from': from_date,
-            'until': until_date,
-            'offset': offset,
-            'limit': limit
-        }
-
-        if collection:
-            params['collection'] = collection
-
-        if issn:
-            params['issn'] = issn
-
-        response = requests.get(ARTICLEMETA_ENDPOINT, params=params)
-
-        try:
-            response.raise_for_status()
-            logging.info(response.url)
-
-        except requests.exceptions.HTTPError:
-            logging.warning(
-                'Failed to collect data from %s. Waiting %d seconds before retry %d of %d' % (
-                    response.url, 
-                    ARTICLEMETA_SLEEP_TIME, 
-                    t, 
-                    ARTICLEMETA_MAX_RETRIES
-                )
-            )
-            sleep(ARTICLEMETA_SLEEP_TIME)
-
-        else:
-            return response.json()
-
-
-def fetch_opac_dict(from_date, until_date, page=1):
-    for t in range(1, OPAC_MAX_RETRIES + 1):
-        params = {
-            'begin_date': from_date, 
-            'end_date': until_date, 
-            'page': page
-        }
-
-        response = requests.get(url=OPAC_ENDPOINT, params=params, verify=False)
-
-        try:
-            response.raise_for_status()
-            logging.info(response.url)
-
-        except requests.exceptions.HTTPError:
-            logging.warning('Não foi possível coletar dados de %s. Aguardando %d segundos para tentativa %d de %d' % (response.url, OPAC_SLEEP_TIME, t, OPAC_MAX_RETRIES))
-            sleep(OPAC_SLEEP_TIME)
-
-        else:
-            return response.json()
-
-
-def fetch_preprint_oai_pmh(from_date, until_date):
-    oai_client = Sickle(endpoint=OAI_PMH_PREPRINT_ENDPOINT, max_retries=OAI_PMH_MAX_RETRIES, verify=False)
-    records = oai_client.ListRecords(**{
-        'metadataPrefix': OAI_METADATA_PREFIX,
-        'from': from_date,
-        'until': until_date,
-    })
-
-    for r in records:
-        yield r
-
-
-def extract_preprint_data(record):
-    pid_generic = _extract_preprint_compatible_identifer(record.header.identifier)
-    text_langs = [standardizer.standardize_language_code(l) for l in record.metadata.get('language', [])]
-    publication_date = record.metadata.get('date', [''])[0]
-    default_language = text_langs[0] if text_langs else ''
-    publication_year = _extract_preprint_publication_year_from_date(publication_date)
-
-    data = {
-        'pid_generic': pid_generic,
-        'text_langs': text_langs,
-        'publication_date': publication_date,
-        'default_language': default_language,
-        'publication_year': publication_year
-    }
-
-    return data
-
-
-def _extract_preprint_compatible_identifer(pid_v2):
-    try:
-        # piv_v2 should be something like oai:ops.preprints.scielo.org:preprint/1195
-        # we are using the last part of the string as the identifier
-        return pid_v2.split(':')[-1].split('/')[1]
-    except IndexError:
-        return ''
-
-
-def _extract_preprint_publication_year_from_date(date_str):
-    try:
-        return date_str[:4]
-    except IndexError:
-        return ''
-
-
-def fetch_dataverse_metadata(from_date=None, until_date=None):
-    def get_subdataverses():
-        url = f"{DATAVERSE_ENDPOINT}/dataverses/{DATAVERSE_ROOT_COLLECTION}/contents"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching subdataverses: {e}")
-            return []
-
-    def get_datasets(subdataverse_id):
-        url = f"{DATAVERSE_ENDPOINT}/dataverses/{subdataverse_id}/contents"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching datasets for subdataverse {subdataverse_id}: {e}")
-            return []
-
-    def get_files(dataset_id):
-        url = f"{DATAVERSE_ENDPOINT}/datasets/{dataset_id}/versions/:latest/files"
-        try:
-            response = requests.get(url, timeout=DATAVERSE_SLEEP_TIME)
-            response.raise_for_status()
-            return response.json().get("data", [])
-        except requests.exceptions.RequestException as e:
-            logging.error(f"Error fetching files for dataset {dataset_id}: {e}")
-            return []
-
-    subdataverses = get_subdataverses()
-
-    for subdataverse in subdataverses:
-        if subdataverse["type"] != "dataverse":
-            continue
-
-        subdataverse_id = subdataverse["id"]
-        subdataverse_title = subdataverse["title"]
-        datasets = get_datasets(subdataverse_id)
-
-        for dataset in datasets:
-            if dataset["type"] != "dataset":
-                continue
-
-            dataset_id = dataset["id"]
-            doi = standardizer.standardize_doi(dataset.get("persistentUrl"))
-            if not doi:
-                logging.warning(f"Dataset {dataset_id} does not have a DOI.")
-                continue
-
-            publication_date = dataset.get("publicationDate", None)
-
-            if publication_date:
-                if (from_date and publication_date < from_date) or (until_date and publication_date > until_date):
-                    continue
-
-            files = get_files(dataset_id)
-
-            for file in files:
-                file_persistent_id = file["dataFile"].get("persistentId", None)
-                file_persistent_id_stz = standardizer.standardize_pid_generic(file_persistent_id) if file_persistent_id else None
-
-                yield {
-                    "title": subdataverse_title,
-                    "dataset_doi": doi,
-                    "dataset_published": publication_date,
-                    "file_id": file["dataFile"]["id"],
-                    "file_name": file["label"],
-                    "file_url": f"{DATAVERSE_ENDPOINT}/access/datafile/{file['dataFile']['id']}",
-                    "file_persistent_id": file_persistent_id_stz,
-                }
diff --git a/article/views.py b/article/views.py
deleted file mode 100644
index 91ea44a..0000000
--- a/article/views.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.shortcuts import render
-
-# Create your views here.
diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py
deleted file mode 100644
index 4cf55bd..0000000
--- a/article/wagtail_hooks.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
-
-from config.menu import get_menu_order
-
-from .models import Article
-
-
-class ArticleSnippetViewSet(SnippetViewSet):
-    model = Article
-    icon = "folder-open-inverse"
-    menu_name = "article"
-    menu_label = _("Article")
-    menu_order = get_menu_order("article")
-    add_to_admin_menu = True
-
-    list_display = (
-        "collection",
-        "scielo_issn",
-        "pid_v2",
-        "pid_v3",
-        "pid_generic",
-        "files",
-        "publication_year",
-    )
-    list_filter = (
-        "collection",
-        "scielo_issn",
-        "publication_year",
-    )
-    search_fields = (
-        "scielo_issn",
-        "pid_v2",
-        "pid_v3",
-        "pid_generic",
-    )
-
-register_snippet(ArticleSnippetViewSet)

From f67d6dcf0efbf7a9b2647f3ff90ee5ee15b3e546 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:30:51 -0300
Subject: [PATCH 03/31] chore: remover models.py do app journal

---
 journal/models.py | 100 ----------------------------------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 journal/models.py

diff --git a/journal/models.py b/journal/models.py
deleted file mode 100644
index 0d830e9..0000000
--- a/journal/models.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from django.db import models
-from django.utils.translation import gettext_lazy as _
-
-from core.models import CommonControlField
-from collection.models import Collection
-
-
-class Journal(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.CASCADE,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    scielo_issn = models.CharField(
-        verbose_name=_('SciELO ISSN'),
-        max_length=9,
-        blank=False,
-        null=False,
-        db_index=True,
-    )
-
-    issns = models.JSONField(
-        verbose_name=_('ISSNs'),
-        null=True,
-        blank=True,
-        default=dict,
-    )
-    
-    acronym = models.CharField(
-        verbose_name=_('Journal Acronym'),
-        max_length=32,
-        blank=True,
-        null=True,
-        default='',
-    )
-
-    title = models.CharField(
-        verbose_name=_('Journal Title'),
-        max_length=255,
-        blank=False,
-        null=False,
-    )
-
-    publisher_name = models.JSONField(
-        verbose_name=_('Publisher Name'),
-        blank=True,
-        null=True,
-        default=list,
-    )
-
-    subject_areas = models.JSONField(
-        verbose_name=_('Subject Areas (CAPES)'),
-        null=False,
-        blank=False,
-        default=list,
-    )
-
-    wos_subject_areas = models.JSONField(
-        verbose_name=_('Subject Areas (WoS)'),
-        null=False,
-        blank=False,
-        default=list,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron2} - {self.scielo_issn} - {self.acronym}'
-
-    @classmethod
-    def metadata(cls, collection=None):
-        queryset = cls.objects.all()
-        if collection:
-            queryset = queryset.filter(collection=collection)
-
-        for journal in queryset.only(
-            'acronym', 'collection__acron3', 'issns', 'publisher_name',
-            'scielo_issn', 'subject_areas', 'title', 'wos_subject_areas'
-        ):
-            yield {
-                'acronym': journal.acronym,
-                'collection': journal.collection.acron3,
-                'issns': set([v for v in journal.issns.values() if v]),
-                'publisher_name': journal.publisher_name,
-                'scielo_issn': journal.scielo_issn,
-                'subject_areas': journal.subject_areas,
-                'title': journal.title,
-                'wos_subject_areas': journal.wos_subject_areas,
-            }
-
-    class Meta:
-        verbose_name = _('Journal')
-        verbose_name_plural = _('Journals')
-        unique_together = (
-            'collection',
-            'scielo_issn', 
-            'acronym',
-        )

From b4b16ac6a17c4523b2c2019836ae400d89507b6c Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:30:51 -0300
Subject: [PATCH 04/31] chore: remover migrations, tasks, utils, views, admin e
 hooks do app journal

---
 journal/__init__.py                           |   0
 journal/admin.py                              |   3 -
 journal/apps.py                               |   6 -
 journal/migrations/0001_initial.py            | 122 ------------------
 .../0002_alter_journal_scielo_issn.py         |  19 ---
 journal/migrations/__init__.py                |   0
 journal/tasks.py                              |  56 --------
 journal/tests.py                              |   3 -
 journal/utils.py                              |  19 ---
 journal/views.py                              |   3 -
 journal/wagtail_hooks.py                      |  40 ------
 11 files changed, 271 deletions(-)
 delete mode 100644 journal/__init__.py
 delete mode 100644 journal/admin.py
 delete mode 100644 journal/apps.py
 delete mode 100644 journal/migrations/0001_initial.py
 delete mode 100644 journal/migrations/0002_alter_journal_scielo_issn.py
 delete mode 100644 journal/migrations/__init__.py
 delete mode 100644 journal/tasks.py
 delete mode 100644 journal/tests.py
 delete mode 100644 journal/utils.py
 delete mode 100644 journal/views.py
 delete mode 100644 journal/wagtail_hooks.py

diff --git a/journal/__init__.py b/journal/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/journal/admin.py b/journal/admin.py
deleted file mode 100644
index 8c38f3f..0000000
--- a/journal/admin.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
diff --git a/journal/apps.py b/journal/apps.py
deleted file mode 100644
index e10a171..0000000
--- a/journal/apps.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class JournalConfig(AppConfig):
-    default_auto_field = "django.db.models.BigAutoField"
-    name = "journal"
diff --git a/journal/migrations/0001_initial.py b/journal/migrations/0001_initial.py
deleted file mode 100644
index 7164bbc..0000000
--- a/journal/migrations/0001_initial.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Generated by Django 5.0.7 on 2025-02-07 17:50
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ("collection", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Journal",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "scielo_issn",
-                    models.CharField(max_length=9, verbose_name="SciELO ISSN"),
-                ),
-                (
-                    "issns",
-                    models.JSONField(
-                        blank=True, default=dict, null=True, verbose_name="ISSNs"
-                    ),
-                ),
-                (
-                    "acronym",
-                    models.CharField(
-                        blank=True,
-                        default="",
-                        max_length=32,
-                        null=True,
-                        verbose_name="Journal Acronym",
-                    ),
-                ),
-                (
-                    "title",
-                    models.CharField(max_length=255, verbose_name="Journal Title"),
-                ),
-                (
-                    "publisher_name",
-                    models.JSONField(
-                        blank=True,
-                        default=list,
-                        null=True,
-                        verbose_name="Publisher Name",
-                    ),
-                ),
-                (
-                    "subject_areas",
-                    models.JSONField(
-                        default=list, verbose_name="Subject Areas (CAPES)"
-                    ),
-                ),
-                (
-                    "wos_subject_areas",
-                    models.JSONField(default=list, verbose_name="Subject Areas (WoS)"),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Journal",
-                "verbose_name_plural": "Journals",
-                "unique_together": {("collection", "scielo_issn", "acronym")},
-            },
-        ),
-    ]
diff --git a/journal/migrations/0002_alter_journal_scielo_issn.py b/journal/migrations/0002_alter_journal_scielo_issn.py
deleted file mode 100644
index 07cf94f..0000000
--- a/journal/migrations/0002_alter_journal_scielo_issn.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("journal", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="journal",
-            name="scielo_issn",
-            field=models.CharField(
-                db_index=True, max_length=9, verbose_name="SciELO ISSN"
-            ),
-        ),
-    ]
diff --git a/journal/migrations/__init__.py b/journal/migrations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/journal/tasks.py b/journal/tasks.py
deleted file mode 100644
index 71681cb..0000000
--- a/journal/tasks.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import logging
-
-from django.contrib.auth import get_user_model
-from django.db import IntegrityError
-from django.utils import timezone
-from django.utils.translation import gettext as _
-
-from collection.models import Collection
-from config import celery_app
-from core.utils.utils import _get_user
-
-from . import models, utils
-
-
-User = get_user_model()
-
-
-@celery_app.task(bind=True, name=_('Load journal data from Article Meta'), queue='load')
-def task_load_journal_data_from_article_meta(self, collections=[], force_update=True, user_id=None, username=None, mode='thrift'):
-    user = _get_user(user_id, username)
-
-    for col in collections or Collection.acron3_list():
-        for j in utils.fetch_article_meta_journals(collection=col, mode=mode):
-            collection = Collection.objects.get(acron3=j.collection_acronym)
-            if not collection:
-                logging.error(f'Collection {j.collection_acronym} does not exist')
-                continue
-
-            try:
-                journal, created = models.Journal.objects.get_or_create(collection=collection, scielo_issn=j.scielo_issn)
-            except IntegrityError as e:
-                logging.error(f'Journal {j} has not been created due to error: {e}')
-                continue
-
-            if created:
-                journal.creator = user
-                journal.created = timezone.now()
-            
-            if created or force_update:
-                journal.updated_by = user
-                journal.updated = timezone.now()
-                journal.issns = {
-                    'electronic_issn': j.electronic_issn or '', 
-                    'print_issn': j.print_issn or '',
-                    'scielo_issn': j.scielo_issn
-                }
-                journal.acronym = j.acronym
-                journal.title = j.title
-                journal.publisher_name = j.publisher_name or ''
-                journal.subject_areas = j.subject_areas or []
-                journal.wos_subject_areas = j.wos_subject_areas or []
-                logging.info(f'Journal {"created" if created else "updated"}: {journal}')
-
-            journal.save()
-
-    return True
diff --git a/journal/tests.py b/journal/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/journal/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/journal/utils.py b/journal/utils.py
deleted file mode 100644
index 8a80521..0000000
--- a/journal/utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from articlemeta.client import ThriftClient, RestfulClient
-
-
-def fetch_article_meta_journals(collection='scl', mode='rest'):
-    """
-    Fetches article metadata from journals.
-
-    Returns
-    -------
-    list
-        A list of article metadata.
-    """
-    if mode == 'rest':
-        am = RestfulClient()
-    elif mode == 'thrift':
-        am = ThriftClient()
-    
-    for j in am.journals(collection=collection):
-        yield j
diff --git a/journal/views.py b/journal/views.py
deleted file mode 100644
index 91ea44a..0000000
--- a/journal/views.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.shortcuts import render
-
-# Create your views here.
diff --git a/journal/wagtail_hooks.py b/journal/wagtail_hooks.py
deleted file mode 100644
index 725b370..0000000
--- a/journal/wagtail_hooks.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
-
-from config.menu import get_menu_order
-
-from .models import Journal
-
-
-class JournalSnippetViewSet(SnippetViewSet):
-    model = Journal
-    icon = "folder-open-inverse"
-    menu_name = "journal"
-    menu_label = _("Journal")
-    menu_order = get_menu_order('journal')
-    add_to_admin_menu = True
-
-    list_display = (
-        "collection",
-        "scielo_issn",
-        "acronym",
-        "title",
-        "issns",
-        "publisher_name",
-        "subject_areas",
-        "wos_subject_areas",
-    )
-    list_filter = (
-        "collection",
-    )
-    search_fields = (
-        "issns",
-        "acronym",
-        "publisher_name",
-        "subject_areas",
-        "wos_subject_areas",
-    )
-
-
-register_snippet(JournalSnippetViewSet)

From 2ada2b0a12ef9daa4ce3d41d3693de9df81be81e Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:31:03 -0300
Subject: [PATCH 05/31] =?UTF-8?q?chore:=20remover=20documenta=C3=A7=C3=A3o?=
 =?UTF-8?q?=20Sphinx=20n=C3=A3o=20utilizada=20(docs/)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .readthedocs.yml | 12 ---------
 docs/Makefile    | 29 ----------------------
 docs/__init__.py |  1 -
 docs/conf.py     | 64 ------------------------------------------------
 docs/howto.rst   | 38 ----------------------------
 docs/index.rst   | 23 -----------------
 docs/make.bat    | 46 ----------------------------------
 docs/users.rst   | 15 ------------
 8 files changed, 228 deletions(-)
 delete mode 100644 .readthedocs.yml
 delete mode 100644 docs/Makefile
 delete mode 100644 docs/__init__.py
 delete mode 100644 docs/conf.py
 delete mode 100644 docs/howto.rst
 delete mode 100644 docs/index.rst
 delete mode 100644 docs/make.bat
 delete mode 100644 docs/users.rst

diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index b4cf0c0..0000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-version: 2
-
-sphinx:
-  configuration: docs/conf.py
-
-build:
-  image: testing
-
-python:
-  version: 3.9
-  install:
-    - requirements: requirements/local.txt
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 6957700..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = ./_build
-APP = /app
-
-.PHONY: help livehtml apidocs Makefile
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -c .
-
-# Build, watch and serve docs with live reload
-livehtml:
-	sphinx-autobuild -b html --host 0.0.0.0 --port 9000 --watch $(APP) -c . $(SOURCEDIR) $(BUILDDIR)/html
-
-# Outputs rst files from django application code
-apidocs:
-	sphinx-apidoc -o $(SOURCEDIR)/api $(APP)
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -c .
diff --git a/docs/__init__.py b/docs/__init__.py
deleted file mode 100644
index 8772c82..0000000
--- a/docs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Included so that Django's startproject comment runs against the docs directory
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 51cd921..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-
-import os
-import sys
-
-import django
-
-if os.getenv("READTHEDOCS", default=False) == "True":
-    sys.path.insert(0, os.path.abspath(".."))
-    os.environ["DJANGO_READ_DOT_ENV_FILE"] = "True"
-    os.environ["USE_DOCKER"] = "no"
-else:
-    sys.path.insert(0, os.path.abspath("/app"))
-os.environ["DATABASE_URL"] = "sqlite:///readthedocs.db"
-os.environ["CELERY_BROKER_URL"] = os.getenv("REDIS_URL", "redis://redis:6379")
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
-django.setup()
-
-# -- Project information -----------------------------------------------------
-
-project = "SciELO Core"
-copyright = """2022, SciELO"""
-author = "SciELO"
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.napoleon",
-]
-
-# Add any paths that contain templates here, relative to this directory.
-# templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "alabaster"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ["_static"]
diff --git a/docs/howto.rst b/docs/howto.rst
deleted file mode 100644
index 9fae300..0000000
--- a/docs/howto.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-How To - Project Documentation
-======================================================================
-
-Get Started
-----------------------------------------------------------------------
-
-Documentation can be written as rst files in `core/docs`.
-
-
-To build and serve docs, use the commands::
-    
-    docker compose -f local.yml up docs
-
-
-
-Changes to files in `docs/_source` will be picked up and reloaded automatically.
-
-`Sphinx <https://www.sphinx-doc.org/>`_ is the tool used to build documentation.
-
-Docstrings to Documentation
-----------------------------------------------------------------------
-
-The sphinx extension `apidoc <https://www.sphinx-doc.org/en/master/man/sphinx-apidoc.html/>`_ is used to automatically document code using signatures and docstrings.
-
-Numpy or Google style docstrings will be picked up from project files and availble for documentation. See the `Napoleon <https://sphinxcontrib-napoleon.readthedocs.io/en/latest/>`_ extension for details.
-
-For an in-use example, see the `page source <_sources/users.rst.txt>`_ for :ref:`users`.
-
-To compile all docstrings automatically into documentation source files, use the command:
-    ::
-    
-        make apidocs
-
-
-This can be done in the docker container:
-    :: 
-        
-        docker run --rm docs make apidocs
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index b6c6ded..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-.. SciELO Content Manager  documentation master file, created by
-   sphinx-quickstart.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to SciELO Core's documentation!
-======================================================================
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   howto
-   users
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 4f70eed..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,46 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build -c .
-)
-set SOURCEDIR=_source
-set BUILDDIR=_build
-set APP=..\core
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.Install sphinx-autobuild for live serving.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -b %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:livehtml
-sphinx-autobuild -b html --open-browser -p 9000 --watch %APP% -c . %SOURCEDIR% %BUILDDIR%/html
-GOTO :EOF
-
-:apidocs
-sphinx-apidoc -o %SOURCEDIR%/api %APP%
-GOTO :EOF
-
-:help
-%SPHINXBUILD% -b help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/users.rst b/docs/users.rst
deleted file mode 100644
index 21e08aa..0000000
--- a/docs/users.rst
+++ /dev/null
@@ -1,15 +0,0 @@
- .. _users:
-
-Users
-======================================================================
-
-Starting a new project, it’s highly recommended to set up a custom user model, 
-even if the default User model is sufficient for you. 
-
-This model behaves identically to the default user model, 
-but you’ll be able to customize it in the future if the need arises.
-
-.. automodule:: core.users.models
-   :members:
-   :noindex:
-

From a9fbd227ad6f75b0a40cba9db3082166246e159d Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:31:03 -0300
Subject: [PATCH 06/31] =?UTF-8?q?chore:=20remover=20arquivos=20de=20config?=
 =?UTF-8?q?ura=C3=A7=C3=A3o=20mortos=20(.pylintrc,=20COPYING,=20merge=5Fpr?=
 =?UTF-8?q?oduction=5Fdotenvs)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pylintrc                             |  14 -
 COPYING                               | 674 --------------------------
 merge_production_dotenvs_in_dotenv.py |  67 ---
 3 files changed, 755 deletions(-)
 delete mode 100644 .pylintrc
 delete mode 100644 COPYING
 delete mode 100644 merge_production_dotenvs_in_dotenv.py

diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index 55509fe..0000000
--- a/.pylintrc
+++ /dev/null
@@ -1,14 +0,0 @@
-[MASTER]
-load-plugins=pylint_django, pylint_celery
-django-settings-module=config.settings.base
-[FORMAT]
-max-line-length=120
-
-[MESSAGES CONTROL]
-disable=missing-docstring,invalid-name
-
-[DESIGN]
-max-parents=13
-
-[TYPECHECK]
-generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 94a9ed0..0000000
--- a/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/merge_production_dotenvs_in_dotenv.py b/merge_production_dotenvs_in_dotenv.py
deleted file mode 100644
index d1170ef..0000000
--- a/merge_production_dotenvs_in_dotenv.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import os
-from pathlib import Path
-from typing import Sequence
-
-import pytest
-
-ROOT_DIR_PATH = Path(__file__).parent.resolve()
-PRODUCTION_DOTENVS_DIR_PATH = ROOT_DIR_PATH / ".envs" / ".production"
-PRODUCTION_DOTENV_FILE_PATHS = [
-    PRODUCTION_DOTENVS_DIR_PATH / ".django",
-    PRODUCTION_DOTENVS_DIR_PATH / ".postgres",
-]
-DOTENV_FILE_PATH = ROOT_DIR_PATH / ".env"
-
-
-def merge(
-    output_file_path: str, merged_file_paths: Sequence[str], append_linesep: bool = True
-) -> None:
-    with open(output_file_path, "w") as output_file:
-        for merged_file_path in merged_file_paths:
-            with open(merged_file_path, "r") as merged_file:
-                merged_file_content = merged_file.read()
-                output_file.write(merged_file_content)
-                if append_linesep:
-                    output_file.write(os.linesep)
-
-
-def main():
-    merge(DOTENV_FILE_PATH, PRODUCTION_DOTENV_FILE_PATHS)
-
-
-@pytest.mark.parametrize("merged_file_count", range(3))
-@pytest.mark.parametrize("append_linesep", [True, False])
-def test_merge(tmpdir_factory, merged_file_count: int, append_linesep: bool):
-    tmp_dir_path = Path(str(tmpdir_factory.getbasetemp()))
-
-    output_file_path = tmp_dir_path / ".env"
-
-    expected_output_file_content = ""
-    merged_file_paths = []
-    for i in range(merged_file_count):
-        merged_file_ord = i + 1
-
-        merged_filename = ".service{}".format(merged_file_ord)
-        merged_file_path = tmp_dir_path / merged_filename
-
-        merged_file_content = merged_filename * merged_file_ord
-
-        with open(merged_file_path, "w+") as file:
-            file.write(merged_file_content)
-
-        expected_output_file_content += merged_file_content
-        if append_linesep:
-            expected_output_file_content += os.linesep
-
-        merged_file_paths.append(merged_file_path)
-
-    merge(output_file_path, merged_file_paths, append_linesep)
-
-    with open(output_file_path, "r") as output_file:
-        actual_output_file_content = output_file.read()
-
-    assert actual_output_file_content == expected_output_file_content
-
-
-if __name__ == "__main__":
-    main()

From 50135405d71b4632b5c961c504fc066eca844302 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:31:03 -0300
Subject: [PATCH 07/31] chore: remover start-dev.sh (credenciais hardcoded,
 usar Docker)

---
 start-dev.sh | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 start-dev.sh

diff --git a/start-dev.sh b/start-dev.sh
deleted file mode 100644
index 92d064a..0000000
--- a/start-dev.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-# Change this value to the local ethernet.
-ethernet=wlp0s20f3
-
-# Linux IP.
-export IP=$(/sbin/ip -o -4 addr list $ethernet | awk '{print $4}' | cut -d/ -f1)
-
-# Mac OS IP.
-#export IP=$(ifconfig $ethernet | grep inet | grep -v inet6 | awk '{print $2}')
-
-export DATABASE_URL=postgres://GVRFlLmcCNfGLhsFvSnCioYOPJPYpyfj:BQ4hSUL4rdj5WZLdR8ilDLRQMvCtzo0caMaXDO0olGsmycQjlcZlTVK9DepZR8kk@$IP:5432/scielo_core
-export CELERY_BROKER_URL=redis://$IP:6379/0
-export USE_DOCKER=no
-export IPYTHONDIR=/app/.ipython
-export REDIS_URL=redis://$IP:6379/0
-export CELERY_FLOWER_USER=PhFRdLexbrsBvrrbSXxjcMMOcVOavCrZ
-export CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xbVYZbrwR
-export EMAIL_HOST=$IP
-export SOLR_URL=http://$IP:8983/solr/
-
-
-docker stop scielo_core_local_django
-# workon scms
-python manage.py runserver_plus 0.0.0.0:8000

From 833ecfc76b6939ccc8e54aa39dddd0871546285a Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:31:03 -0300
Subject: [PATCH 08/31] =?UTF-8?q?refactor:=20remover=20refer=C3=AAncias=20?=
 =?UTF-8?q?a=20article=20e=20journal=20em=20collection=20e=20core?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                  |  9 ++++++++-
 collection/tasks.py         |  4 ++--
 collection/wagtail_hooks.py | 10 +---------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6342047..dd2c92d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,4 +169,11 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+# Local agent/editor state
+.continue/
+temp/
+.envs/.local/.django
+start-dev.sh
+opencode.json
diff --git a/collection/tasks.py b/collection/tasks.py
index 02fd0e7..19372de 100644
--- a/collection/tasks.py
+++ b/collection/tasks.py
@@ -1,14 +1,14 @@
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext as _
 
-from core.utils.utils import _get_user
+from core.utils.request_utils import _get_user
 from collection.models import Collection
 from config import celery_app
 
 User = get_user_model()
 
 
-@celery_app.task(bind=True, name=_('Load collection data'))
+@celery_app.task(bind=True, name=_('[Collection] Load Collection Data'))
 def task_load_collections(self, user_id=None, username=None):
     user = _get_user(self.request, username=username, user_id=user_id)
     Collection.load(user)
diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py
index e7b7e97..52b31a8 100644
--- a/collection/wagtail_hooks.py
+++ b/collection/wagtail_hooks.py
@@ -1,8 +1,5 @@
 from django.utils.translation import gettext as _
 from wagtail.snippets.views.snippets import SnippetViewSet
-from wagtail.snippets.models import register_snippet
-
-from config.menu import get_menu_order
 
 from .models import Collection
 
@@ -10,10 +7,8 @@
 class CollectionSnippetViewSet(SnippetViewSet):
     model = Collection
     icon = "folder-open-inverse"
-    menu_name = 'collection'
     menu_label = _("Collection")
-    menu_order = get_menu_order("collection")
-    add_to_admin_menu = True
+    menu_order = 100
 
     list_display = (
         "main_name",
@@ -57,6 +52,3 @@ class CollectionSnippetViewSet(SnippetViewSet):
         "updated_by",
     )
     export_filename = "collections"
-
-
-register_snippet(CollectionSnippetViewSet)

From 4045b12350c78501a1e6ec79606a5e963b0acd68 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:51:02 -0300
Subject: [PATCH 09/31] fix: remover article e journal do INSTALLED_APPS

---
 config/settings/base.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/config/settings/base.py b/config/settings/base.py
index 4e96ed4..9638274 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -114,10 +114,8 @@
     "core.users",
     "core_settings",
     # Your stuff: custom apps go here
-    "article",
     "collection",
     "core",
-    "journal",
     "log_manager",
     "log_manager_config",
     "metrics",

From 509408d728182854243ff8769b433fa0612a166f Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:32:50 -0300
Subject: [PATCH 10/31] =?UTF-8?q?feat:=20adicionar=20modelo=20Document=20c?=
 =?UTF-8?q?om=20metadados=20unificados=20e=20migra=C3=A7=C3=A3o?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 document/migrations/0001_initial.py | 279 ++++++++++++++++++++++++++++
 document/migrations/__init__.py     |   1 +
 document/models.py                  | 258 +++++++++++++++++++++++++
 3 files changed, 538 insertions(+)
 create mode 100644 document/migrations/0001_initial.py
 create mode 100644 document/migrations/__init__.py
 create mode 100644 document/models.py

diff --git a/document/migrations/0001_initial.py b/document/migrations/0001_initial.py
new file mode 100644
index 0000000..bff11be
--- /dev/null
+++ b/document/migrations/0001_initial.py
@@ -0,0 +1,279 @@
+# Generated by Django 5.0.7 on 2026-03-15 00:00
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+        ("source", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Document",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True,
+                        verbose_name="Creation date",
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True,
+                        verbose_name="Last update date",
+                    ),
+                ),
+                (
+                    "document_type",
+                    models.CharField(
+                        choices=[
+                            ("article", "Article"),
+                            ("preprint", "Preprint"),
+                            ("dataset", "Dataset"),
+                            ("book", "Book"),
+                            ("chapter", "Chapter"),
+                            ("other", "Other"),
+                        ],
+                        db_index=True,
+                        max_length=32,
+                        verbose_name="Document Type",
+                    ),
+                ),
+                (
+                    "document_id",
+                    models.CharField(
+                        db_index=True,
+                        max_length=255,
+                        verbose_name="Document ID",
+                    ),
+                ),
+                (
+                    "scielo_issn",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=9,
+                        null=True,
+                        verbose_name="SciELO ISSN",
+                    ),
+                ),
+                (
+                    "pid_v2",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=23,
+                        null=True,
+                        verbose_name="PID V2",
+                    ),
+                ),
+                (
+                    "pid_v3",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=23,
+                        null=True,
+                        verbose_name="PID V3",
+                    ),
+                ),
+                (
+                    "pid_generic",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=255,
+                        null=True,
+                        verbose_name="PID Generic",
+                    ),
+                ),
+                (
+                    "title",
+                    models.CharField(
+                        blank=True,
+                        max_length=500,
+                        null=True,
+                        verbose_name="Document Title",
+                    ),
+                ),
+                (
+                    "identifiers",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Identifiers",
+                    ),
+                ),
+                (
+                    "files",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Files",
+                    ),
+                ),
+                (
+                    "default_lang",
+                    models.CharField(
+                        blank=True,
+                        max_length=8,
+                        null=True,
+                        verbose_name="Default Language",
+                    ),
+                ),
+                (
+                    "text_langs",
+                    models.JSONField(
+                        blank=True,
+                        default=list,
+                        null=True,
+                        verbose_name="Text Languages",
+                    ),
+                ),
+                (
+                    "default_media_format",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Default Media Format",
+                    ),
+                ),
+                (
+                    "processing_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Processing Date",
+                    ),
+                ),
+                (
+                    "publication_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Publication Date",
+                    ),
+                ),
+                (
+                    "publication_year",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=4,
+                        null=True,
+                        verbose_name="Publication Year",
+                    ),
+                ),
+                (
+                    "extra_data",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Extra Data",
+                    ),
+                ),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "parent_document",
+                    models.ForeignKey(
+                        blank=True,
+                        db_index=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="child_documents",
+                        to="document.document",
+                        verbose_name="Parent Document",
+                    ),
+                ),
+                (
+                    "source",
+                    models.ForeignKey(
+                        blank=True,
+                        db_index=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="documents",
+                        to="source.source",
+                        verbose_name="Source",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Document",
+                "verbose_name_plural": "Documents",
+                "unique_together": {("collection", "document_type", "document_id")},
+                "indexes": [
+                    models.Index(
+                        fields=["collection", "document_type"],
+                        name="document_collection_type_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "scielo_issn"],
+                        name="document_collection_issn_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "pid_v2"],
+                        name="document_collection_pidv2_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "pid_generic"],
+                        name="doc_coll_pidgen_idx",
+                    ),
+                ],
+            },
+        ),
+    ]
diff --git a/document/migrations/__init__.py b/document/migrations/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/migrations/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/models.py b/document/models.py
new file mode 100644
index 0000000..5197692
--- /dev/null
+++ b/document/models.py
@@ -0,0 +1,258 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+from source.models import Source
+
+
+class Document(CommonControlField):
+    DOCUMENT_TYPE_ARTICLE = "article"
+    DOCUMENT_TYPE_PREPRINT = "preprint"
+    DOCUMENT_TYPE_DATASET = "dataset"
+    DOCUMENT_TYPE_BOOK = "book"
+    DOCUMENT_TYPE_CHAPTER = "chapter"
+    DOCUMENT_TYPE_OTHER = "other"
+    DOCUMENT_TYPE_CHOICES = (
+        (DOCUMENT_TYPE_ARTICLE, _("Article")),
+        (DOCUMENT_TYPE_PREPRINT, _("Preprint")),
+        (DOCUMENT_TYPE_DATASET, _("Dataset")),
+        (DOCUMENT_TYPE_BOOK, _("Book")),
+        (DOCUMENT_TYPE_CHAPTER, _("Chapter")),
+        (DOCUMENT_TYPE_OTHER, _("Other")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source = models.ForeignKey(
+        Source,
+        verbose_name=_("Source"),
+        on_delete=models.CASCADE,
+        related_name="documents",
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    parent_document = models.ForeignKey(
+        "self",
+        verbose_name=_("Parent Document"),
+        on_delete=models.SET_NULL,
+        related_name="child_documents",
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    document_type = models.CharField(
+        verbose_name=_("Document Type"),
+        max_length=32,
+        choices=DOCUMENT_TYPE_CHOICES,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    document_id = models.CharField(
+        verbose_name=_("Document ID"),
+        max_length=255,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    scielo_issn = models.CharField(
+        verbose_name=_("SciELO ISSN"),
+        max_length=9,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_v2 = models.CharField(
+        verbose_name=_("PID V2"),
+        max_length=23,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_v3 = models.CharField(
+        verbose_name=_("PID V3"),
+        max_length=23,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    pid_generic = models.CharField(
+        verbose_name=_("PID Generic"),
+        max_length=255,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    title = models.CharField(
+        verbose_name=_("Document Title"),
+        max_length=500,
+        blank=True,
+        null=True,
+    )
+
+    identifiers = models.JSONField(
+        verbose_name=_("Identifiers"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    files = models.JSONField(
+        verbose_name=_("Files"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    default_lang = models.CharField(
+        verbose_name=_("Default Language"),
+        max_length=8,
+        blank=True,
+        null=True,
+    )
+
+    text_langs = models.JSONField(
+        verbose_name=_("Text Languages"),
+        null=True,
+        blank=True,
+        default=list,
+    )
+
+    default_media_format = models.CharField(
+        verbose_name=_("Default Media Format"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    processing_date = models.CharField(
+        verbose_name=_("Processing Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_date = models.CharField(
+        verbose_name=_("Publication Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_year = models.CharField(
+        verbose_name=_("Publication Year"),
+        max_length=4,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    extra_data = models.JSONField(
+        verbose_name=_("Extra Data"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    def __str__(self):
+        return f"{self.collection.acron3} - {self.document_type} - {self.document_id}"
+
+    @classmethod
+    def metadata(cls, collection=None):
+        queryset = cls.objects.select_related("collection", "source").only(
+            "collection__acron3",
+            "default_lang",
+            "default_media_format",
+            "document_id",
+            "document_type",
+            "extra_data",
+            "files",
+            "identifiers",
+            "parent_document__document_id",
+            "pid_generic",
+            "pid_v2",
+            "pid_v3",
+            "processing_date",
+            "publication_date",
+            "publication_year",
+            "scielo_issn",
+            "source__scielo_issn",
+            "source__source_id",
+            "source__source_type",
+            "text_langs",
+            "title",
+        )
+
+        if collection:
+            queryset = queryset.filter(collection=collection)
+
+        for document in queryset.iterator():
+            source = document.source
+            yield {
+                "collection": document.collection.acron3,
+                "default_lang": document.default_lang,
+                "default_media_format": document.default_media_format,
+                "document_id": document.document_id,
+                "document_type": document.document_type,
+                "extra_data": document.extra_data or {},
+                "files": document.files or {},
+                "identifiers": document.identifiers or {},
+                "parent_document_id": (
+                    document.parent_document.document_id if document.parent_document else None
+                ),
+                "pid_generic": document.pid_generic,
+                "pid_v2": document.pid_v2,
+                "pid_v3": document.pid_v3,
+                "processing_date": document.processing_date,
+                "publication_date": document.publication_date,
+                "publication_year": document.publication_year,
+                "scielo_issn": document.scielo_issn or (source.scielo_issn if source else None),
+                "source_id": source.source_id if source else None,
+                "source_type": source.source_type if source else None,
+                "text_langs": document.text_langs or [],
+                "title": document.title,
+            }
+
+    class Meta:
+        verbose_name = _("Document")
+        verbose_name_plural = _("Documents")
+        unique_together = (
+            "collection",
+            "document_type",
+            "document_id",
+        )
+        indexes = [
+            models.Index(
+                fields=["collection", "document_type"],
+                name="document_collection_type_idx",
+            ),
+            models.Index(
+                fields=["collection", "scielo_issn"],
+                name="document_collection_issn_idx",
+            ),
+            models.Index(
+                fields=["collection", "pid_v2"],
+                name="document_collection_pidv2_idx",
+            ),
+            models.Index(
+                fields=["collection", "pid_generic"],
+                name="doc_coll_pidgen_idx",
+            ),
+        ]

From f49779e83a1da617b6236ae69a0980305366a7f4 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:07 -0300
Subject: [PATCH 11/31] =?UTF-8?q?feat:=20adicionar=20servi=C3=A7os=20Docum?=
 =?UTF-8?q?ent=20para=20artigos=20(ArticleMeta=20+=20OPAC)=20e=20helpers?=
 =?UTF-8?q?=20comuns?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 document/services/articles.py | 166 ++++++++++++++++++++++++++++++++++
 document/services/common.py   |  58 ++++++++++++
 2 files changed, 224 insertions(+)
 create mode 100644 document/services/articles.py
 create mode 100644 document/services/common.py

diff --git a/document/services/articles.py b/document/services/articles.py
new file mode 100644
index 0000000..09244b3
--- /dev/null
+++ b/document/services/articles.py
@@ -0,0 +1,166 @@
+from document.models import Document
+
+from .common import build_document_id, compact_dict, get_existing_document, normalize_langs, normalize_year
+
+
+def upsert_article_document_from_articlemeta(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+):
+    pid_v2 = payload.get("code")
+    document_id = build_document_id(pid_v2, payload.get("pid_v3"), payload.get("pid_generic"))
+    if not document_id:
+        return None
+
+    document = get_existing_document(
+        collection,
+        Document.DOCUMENT_TYPE_ARTICLE,
+        document_id,
+        pid_v2,
+    )
+    created = document is None
+    if created:
+        document = Document(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id=document_id,
+        )
+        if user:
+            document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = source.scielo_issn if source else None
+        document.pid_v2 = pid_v2 or document.pid_v2
+        document.pid_v3 = payload.get("pid_v3") or document.pid_v3
+        document.pid_generic = payload.get("pid_generic") or document.pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = _merge_dicts(
+            document.identifiers,
+            _build_articlemeta_identifiers(payload, source),
+        )
+        document.files = payload.get("pdfs") or document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs"))
+        document.default_media_format = document.default_media_format
+        document.processing_date = payload.get("processing_date") or document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = _merge_dicts(
+            document.extra_data,
+            compact_dict(
+                {
+                    "provider": "articlemeta",
+                    "issn_codes": payload.get("code_title"),
+                }
+            ),
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def upsert_article_document_from_opac(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+):
+    pid_v2 = payload.get("pid_v2")
+    pid_v3 = payload.get("pid_v3")
+    document_id = build_document_id(pid_v2, pid_v3, payload.get("pid_generic"))
+    if not document_id:
+        return None
+
+    document = get_existing_document(
+        collection,
+        Document.DOCUMENT_TYPE_ARTICLE,
+        document_id,
+        pid_v2,
+        pid_v3,
+        payload.get("pid_generic"),
+    )
+    created = document is None
+    if created:
+        document = Document(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id=document_id,
+        )
+        if user:
+            document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = source.scielo_issn if source else None
+        document.pid_v2 = pid_v2 or document.pid_v2
+        document.pid_v3 = pid_v3 or document.pid_v3
+        document.pid_generic = payload.get("pid_generic") or document.pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = _merge_dicts(
+            document.identifiers,
+            _build_opac_identifiers(payload, source),
+        )
+        document.files = document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs")) or document.text_langs or []
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = _merge_dicts(
+            document.extra_data,
+            compact_dict(
+                {
+                    "provider": "opac",
+                    "journal_acronym": payload.get("journal_acronym"),
+                }
+            ),
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def _build_articlemeta_identifiers(payload, source):
+    return compact_dict(
+        {
+            "pid_v2": payload.get("code"),
+            "scielo_issn": source.scielo_issn if source else None,
+        }
+    )
+
+
+def _build_opac_identifiers(payload, source):
+    return compact_dict(
+        {
+            "pid_v2": payload.get("pid_v2"),
+            "pid_v3": payload.get("pid_v3"),
+            "scielo_issn": source.scielo_issn if source else None,
+            "journal_acronym": payload.get("journal_acronym"),
+        }
+    )
+
+
+def _merge_dicts(current, new_values):
+    merged = dict(current or {})
+    merged.update(new_values or {})
+    return merged
diff --git a/document/services/common.py b/document/services/common.py
new file mode 100644
index 0000000..91e103d
--- /dev/null
+++ b/document/services/common.py
@@ -0,0 +1,58 @@
+from document.models import Document
+
+
+def build_document_id(*values):
+    for value in values:
+        if value not in (None, ""):
+            return str(value)
+    return None
+
+
+def get_existing_document(collection, document_type, *identifiers):
+    identifiers = [str(value) for value in identifiers if value not in (None, "")]
+    if not identifiers:
+        return None
+
+    queryset = Document.objects.filter(
+        collection=collection,
+        document_type=document_type,
+    )
+
+    for field_name in ("document_id", "pid_v2", "pid_v3", "pid_generic"):
+        for identifier in identifiers:
+            document = queryset.filter(**{field_name: identifier}).first()
+            if document:
+                return document
+
+    return None
+
+
+def normalize_langs(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return [item for item in value if item not in (None, "")]
+
+    if isinstance(value, dict):
+        return [key for key, enabled in value.items() if enabled]
+
+    return [value]
+
+
+def normalize_year(value, fallback_date=None):
+    if value not in (None, ""):
+        return str(value)[:4]
+
+    if fallback_date not in (None, ""):
+        return str(fallback_date)[:4]
+
+    return None
+
+
+def compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }

From 760222c3856a410588ddca1bed17d3bdbe55990d Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:07 -0300
Subject: [PATCH 12/31] =?UTF-8?q?feat:=20adicionar=20servi=C3=A7os=20Docum?=
 =?UTF-8?q?ent=20para=20books,=20datasets=20e=20preprints?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 document/services/__init__.py  |   1 +
 document/services/books.py     | 256 +++++++++++++++++++++++++++++++++
 document/services/datasets.py  |  69 +++++++++
 document/services/preprints.py |  58 ++++++++
 4 files changed, 384 insertions(+)
 create mode 100644 document/services/__init__.py
 create mode 100644 document/services/books.py
 create mode 100644 document/services/datasets.py
 create mode 100644 document/services/preprints.py

diff --git a/document/services/__init__.py b/document/services/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/services/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/services/books.py b/document/services/books.py
new file mode 100644
index 0000000..96d92e1
--- /dev/null
+++ b/document/services/books.py
@@ -0,0 +1,256 @@
+from document.models import Document
+
+
+def build_book_pid_generic(book_id):
+    if book_id in (None, ""):
+        return None
+    return f"book:{book_id}"
+
+
+def build_chapter_pid_generic(book_id, chapter_id):
+    if book_id in (None, "") or chapter_id in (None, ""):
+        return None
+    return f"book:{book_id}/chapter:{chapter_id}"
+
+
+def enrich_part_payload(payload, monograph_payload):
+    if not monograph_payload:
+        return payload
+
+    enriched = dict(payload)
+    enriched["monograph_title"] = monograph_payload.get("title")
+    enriched["monograph_language"] = monograph_payload.get("language")
+    enriched["monograph_publication_date"] = monograph_payload.get("publication_date")
+    enriched["monograph_year"] = monograph_payload.get("year")
+    enriched["monograph_publisher"] = monograph_payload.get("publisher")
+    enriched["monograph_isbn"] = monograph_payload.get("isbn")
+    enriched["monograph_eisbn"] = monograph_payload.get("eisbn")
+    enriched["monograph_doi_number"] = monograph_payload.get("doi_number")
+    enriched["monograph_creators"] = monograph_payload.get("creators")
+    return enriched
+
+
+def upsert_monograph_document(
+    payload,
+    collection,
+    source=None,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Monograph":
+        return None
+
+    book_id = str(payload.get("id"))
+    pid_generic = build_book_pid_generic(book_id)
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or book_id
+        document.identifiers = _build_monograph_identifiers(payload)
+        document.files = {}
+        document.default_lang = payload.get("language") or None
+        document.text_langs = _unique_list(payload.get("language"))
+        document.default_media_format = None
+        document.processing_date = None
+        document.publication_date = payload.get("publication_date") or None
+        document.publication_year = _normalize_year(payload.get("year"))
+        document.extra_data = _build_monograph_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def upsert_part_document(
+    payload,
+    collection,
+    source=None,
+    parent_document=None,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Part":
+        return None
+
+    book_id = payload.get("monograph")
+    chapter_id = payload.get("id")
+    pid_generic = build_chapter_pid_generic(book_id, chapter_id)
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_CHAPTER,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = source
+        document.parent_document = parent_document
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or str(chapter_id)
+        document.identifiers = _build_part_identifiers(payload)
+        document.files = {}
+        document.default_lang = (
+            payload.get("text_language")
+            or payload.get("monograph_language")
+            or None
+        )
+        document.text_langs = _unique_list(
+            payload.get("text_language") or payload.get("monograph_language")
+        )
+        document.default_media_format = None
+        document.processing_date = None
+        document.publication_date = payload.get("monograph_publication_date") or None
+        document.publication_year = _normalize_year(payload.get("monograph_year"))
+        document.extra_data = _build_part_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
+
+
+def delete_book_document(collection, book_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=build_book_pid_generic(book_id),
+    ).delete()
+
+
+def delete_document_by_raw_id(collection, raw_id):
+    return Document.objects.filter(
+        collection=collection,
+        extra_data__raw_id=str(raw_id),
+    ).delete()
+
+
+def has_monograph_document_for_raw_id(collection, raw_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        extra_data__raw_id=str(raw_id),
+    ).exists()
+
+
+def get_monograph_document(collection, book_id):
+    return Document.objects.filter(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_BOOK,
+        document_id=build_book_pid_generic(book_id),
+    ).first()
+
+
+def _build_monograph_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("isbn"),
+        "eisbn": payload.get("eisbn"),
+        "doi": payload.get("doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_part_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "chapter_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("monograph_isbn"),
+        "eisbn": payload.get("monograph_eisbn"),
+        "doi": payload.get("doi_number"),
+        "book_doi": payload.get("monograph_doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_monograph_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "city": payload.get("city"),
+        "country": payload.get("country"),
+        "pages": payload.get("pages"),
+        "publisher": payload.get("publisher"),
+        "creators": payload.get("creators"),
+        "translated_titles": payload.get("translated_titles"),
+        "translated_synopses": payload.get("translated_synopses"),
+        "synopsis": payload.get("synopsis"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _build_part_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "order": payload.get("order"),
+        "pages": payload.get("pages"),
+        "creators": payload.get("creators"),
+        "translated_titles": payload.get("translated_titles"),
+        "monograph_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "monograph_title": payload.get("monograph_title"),
+        "monograph_language": payload.get("monograph_language"),
+        "monograph_publication_date": payload.get("monograph_publication_date"),
+        "monograph_year": payload.get("monograph_year"),
+        "monograph_publisher": payload.get("monograph_publisher"),
+        "monograph_creators": payload.get("monograph_creators"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _unique_list(value):
+    if not value:
+        return []
+    return [value]
+
+
+def _normalize_year(value):
+    if value in (None, ""):
+        return None
+    return str(value)[:4]
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/document/services/datasets.py b/document/services/datasets.py
new file mode 100644
index 0000000..2496b20
--- /dev/null
+++ b/document/services/datasets.py
@@ -0,0 +1,69 @@
+from document.models import Document
+
+from .common import compact_dict, normalize_year
+
+
+def upsert_dataset_document(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+):
+    dataset_doi = payload.get("dataset_doi")
+    if not dataset_doi:
+        return None
+
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_DATASET,
+        document_id=dataset_doi,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        files = dict(document.files or {})
+        file_id = payload.get("file_id")
+        if file_id:
+            files[str(file_id)] = compact_dict(
+                {
+                    "name": payload.get("file_name"),
+                    "url": payload.get("file_url"),
+                    "file_persistent_id": payload.get("file_persistent_id"),
+                }
+            )
+
+        document.source = None
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = dataset_doi
+        document.title = payload.get("title") or document.title
+        document.identifiers = compact_dict(
+            {
+                "dataset_doi": dataset_doi,
+            }
+        )
+        document.files = files
+        document.default_lang = document.default_lang
+        document.text_langs = document.text_langs or []
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("dataset_published") or document.publication_date
+        document.publication_year = normalize_year(
+            None,
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = compact_dict(
+            {
+                "provider": "dataverse",
+            }
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document
diff --git a/document/services/preprints.py b/document/services/preprints.py
new file mode 100644
index 0000000..4be89f1
--- /dev/null
+++ b/document/services/preprints.py
@@ -0,0 +1,58 @@
+from document.models import Document
+
+from .common import compact_dict, normalize_langs, normalize_year
+
+
+def upsert_preprint_document(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+):
+    pid_generic = payload.get("pid_generic")
+    if not pid_generic:
+        return None
+
+    document, created = Document.objects.get_or_create(
+        collection=collection,
+        document_type=Document.DOCUMENT_TYPE_PREPRINT,
+        document_id=pid_generic,
+    )
+
+    if created and user:
+        document.creator = user
+
+    if created or force_update:
+        document.source = None
+        document.parent_document = None
+        document.scielo_issn = None
+        document.pid_v2 = None
+        document.pid_v3 = None
+        document.pid_generic = pid_generic
+        document.title = payload.get("title") or document.title
+        document.identifiers = compact_dict(
+            {
+                "pid_generic": pid_generic,
+            }
+        )
+        document.files = document.files or {}
+        document.default_lang = payload.get("default_language") or document.default_lang
+        document.text_langs = normalize_langs(payload.get("text_langs"))
+        document.default_media_format = document.default_media_format
+        document.processing_date = document.processing_date
+        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_year = normalize_year(
+            payload.get("publication_year"),
+            fallback_date=document.publication_date,
+        )
+        document.extra_data = compact_dict(
+            {
+                "provider": "preprints",
+            }
+        )
+
+    if user:
+        document.updated_by = user
+
+    document.save()
+    return document

From f1f43803953d9f014c3fc603fbf9cc077733bbf1 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:16 -0300
Subject: [PATCH 13/31] feat: adicionar modelo Source com tipos journal, book e
 server

---
 source/migrations/0001_initial.py            | 210 ++++++++++++++++++
 source/migrations/0002_source_access_type.py |  25 +++
 source/migrations/0003_alter_source_title.py |  15 ++
 source/migrations/__init__.py                |   1 +
 source/models.py                             | 219 +++++++++++++++++++
 5 files changed, 470 insertions(+)
 create mode 100644 source/migrations/0001_initial.py
 create mode 100644 source/migrations/0002_source_access_type.py
 create mode 100644 source/migrations/0003_alter_source_title.py
 create mode 100644 source/migrations/__init__.py
 create mode 100644 source/models.py

diff --git a/source/migrations/0001_initial.py b/source/migrations/0001_initial.py
new file mode 100644
index 0000000..cc736e3
--- /dev/null
+++ b/source/migrations/0001_initial.py
@@ -0,0 +1,210 @@
+# Generated by Django 5.0.7 on 2026-03-15 00:00
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Source",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True,
+                        verbose_name="Creation date",
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True,
+                        verbose_name="Last update date",
+                    ),
+                ),
+                (
+                    "source_type",
+                    models.CharField(
+                        choices=[
+                            ("journal", "Journal"),
+                            ("book", "Book"),
+                            ("preprint_server", "Preprint Server"),
+                            ("data_repository", "Data Repository"),
+                            ("other", "Other"),
+                        ],
+                        db_index=True,
+                        max_length=32,
+                        verbose_name="Source Type",
+                    ),
+                ),
+                (
+                    "source_id",
+                    models.CharField(
+                        db_index=True,
+                        max_length=255,
+                        verbose_name="Source ID",
+                    ),
+                ),
+                (
+                    "scielo_issn",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=9,
+                        null=True,
+                        verbose_name="SciELO ISSN",
+                    ),
+                ),
+                (
+                    "acronym",
+                    models.CharField(
+                        blank=True,
+                        default="",
+                        max_length=64,
+                        null=True,
+                        verbose_name="Source Acronym",
+                    ),
+                ),
+                (
+                    "title",
+                    models.CharField(
+                        max_length=255,
+                        verbose_name="Source Title",
+                    ),
+                ),
+                (
+                    "identifiers",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Identifiers",
+                    ),
+                ),
+                (
+                    "publisher_name",
+                    models.JSONField(
+                        blank=True,
+                        default=list,
+                        null=True,
+                        verbose_name="Publisher Name",
+                    ),
+                ),
+                (
+                    "subject_areas",
+                    models.JSONField(
+                        default=list,
+                        verbose_name="Subject Areas (CAPES)",
+                    ),
+                ),
+                (
+                    "wos_subject_areas",
+                    models.JSONField(
+                        default=list,
+                        verbose_name="Subject Areas (WoS)",
+                    ),
+                ),
+                (
+                    "default_lang",
+                    models.CharField(
+                        blank=True,
+                        max_length=8,
+                        null=True,
+                        verbose_name="Default Language",
+                    ),
+                ),
+                (
+                    "publication_date",
+                    models.CharField(
+                        blank=True,
+                        max_length=32,
+                        null=True,
+                        verbose_name="Publication Date",
+                    ),
+                ),
+                (
+                    "publication_year",
+                    models.CharField(
+                        blank=True,
+                        db_index=True,
+                        max_length=4,
+                        null=True,
+                        verbose_name="Publication Year",
+                    ),
+                ),
+                (
+                    "extra_data",
+                    models.JSONField(
+                        blank=True,
+                        default=dict,
+                        null=True,
+                        verbose_name="Extra Data",
+                    ),
+                ),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Source",
+                "verbose_name_plural": "Sources",
+                "unique_together": {("collection", "source_type", "source_id")},
+                "indexes": [
+                    models.Index(
+                        fields=["collection", "source_type"],
+                        name="source_collection_type_idx",
+                    ),
+                    models.Index(
+                        fields=["collection", "scielo_issn"],
+                        name="source_collection_issn_idx",
+                    ),
+                ],
+            },
+        ),
+    ]
diff --git a/source/migrations/0002_source_access_type.py b/source/migrations/0002_source_access_type.py
new file mode 100644
index 0000000..e148c15
--- /dev/null
+++ b/source/migrations/0002_source_access_type.py
@@ -0,0 +1,25 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("source", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="source",
+            name="access_type",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("open_access", "Open Access"),
+                    ("commercial", "Commercial"),
+                ],
+                db_index=True,
+                max_length=32,
+                null=True,
+                verbose_name="Access Type",
+            ),
+        ),
+    ]
diff --git a/source/migrations/0003_alter_source_title.py b/source/migrations/0003_alter_source_title.py
new file mode 100644
index 0000000..354a82a
--- /dev/null
+++ b/source/migrations/0003_alter_source_title.py
@@ -0,0 +1,15 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("source", "0002_source_access_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="source",
+            name="title",
+            field=models.CharField(max_length=500, verbose_name="Source Title"),
+        ),
+    ]
diff --git a/source/migrations/__init__.py b/source/migrations/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/migrations/__init__.py
@@ -0,0 +1 @@
+
diff --git a/source/models.py b/source/models.py
new file mode 100644
index 0000000..48d3e00
--- /dev/null
+++ b/source/models.py
@@ -0,0 +1,219 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+
+
+class Source(CommonControlField):
+    SOURCE_TYPE_JOURNAL = "journal"
+    SOURCE_TYPE_BOOK = "book"
+    SOURCE_TYPE_PREPRINT_SERVER = "preprint_server"
+    SOURCE_TYPE_DATA_REPOSITORY = "data_repository"
+    SOURCE_TYPE_OTHER = "other"
+    SOURCE_TYPE_CHOICES = (
+        (SOURCE_TYPE_JOURNAL, _("Journal")),
+        (SOURCE_TYPE_BOOK, _("Book")),
+        (SOURCE_TYPE_PREPRINT_SERVER, _("Preprint Server")),
+        (SOURCE_TYPE_DATA_REPOSITORY, _("Data Repository")),
+        (SOURCE_TYPE_OTHER, _("Other")),
+    )
+
+    ACCESS_TYPE_OPEN_ACCESS = "open_access"
+    ACCESS_TYPE_COMMERCIAL = "commercial"
+    ACCESS_TYPE_CHOICES = (
+        (ACCESS_TYPE_OPEN_ACCESS, _("Open Access")),
+        (ACCESS_TYPE_COMMERCIAL, _("Commercial")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source_type = models.CharField(
+        verbose_name=_("Source Type"),
+        max_length=32,
+        choices=SOURCE_TYPE_CHOICES,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    source_id = models.CharField(
+        verbose_name=_("Source ID"),
+        max_length=255,
+        blank=False,
+        null=False,
+        db_index=True,
+    )
+
+    scielo_issn = models.CharField(
+        verbose_name=_("SciELO ISSN"),
+        max_length=9,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    acronym = models.CharField(
+        verbose_name=_("Source Acronym"),
+        max_length=64,
+        blank=True,
+        null=True,
+        default="",
+    )
+
+    title = models.CharField(
+        verbose_name=_("Source Title"),
+        max_length=500,
+        blank=False,
+        null=False,
+    )
+
+    identifiers = models.JSONField(
+        verbose_name=_("Identifiers"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    publisher_name = models.JSONField(
+        verbose_name=_("Publisher Name"),
+        blank=True,
+        null=True,
+        default=list,
+    )
+
+    subject_areas = models.JSONField(
+        verbose_name=_("Subject Areas (CAPES)"),
+        null=False,
+        blank=False,
+        default=list,
+    )
+
+    wos_subject_areas = models.JSONField(
+        verbose_name=_("Subject Areas (WoS)"),
+        null=False,
+        blank=False,
+        default=list,
+    )
+
+    default_lang = models.CharField(
+        verbose_name=_("Default Language"),
+        max_length=8,
+        blank=True,
+        null=True,
+    )
+
+    publication_date = models.CharField(
+        verbose_name=_("Publication Date"),
+        max_length=32,
+        blank=True,
+        null=True,
+    )
+
+    publication_year = models.CharField(
+        verbose_name=_("Publication Year"),
+        max_length=4,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    access_type = models.CharField(
+        verbose_name=_("Access Type"),
+        max_length=32,
+        choices=ACCESS_TYPE_CHOICES,
+        blank=True,
+        null=True,
+        db_index=True,
+    )
+
+    extra_data = models.JSONField(
+        verbose_name=_("Extra Data"),
+        null=True,
+        blank=True,
+        default=dict,
+    )
+
+    def __str__(self):
+        return f"{self.collection.acron3} - {self.source_type} - {self.source_id}"
+
+    @staticmethod
+    def _extract_issns(identifiers):
+        if not isinstance(identifiers, dict):
+            return set()
+
+        return {
+            value
+            for key, value in identifiers.items()
+            if value and "issn" in str(key).lower()
+        }
+
+    @classmethod
+    def metadata(cls, collection=None):
+        queryset = cls.objects.select_related("collection").only(
+            "acronym",
+            "collection__acron3",
+            "default_lang",
+            "extra_data",
+            "identifiers",
+            "publication_date",
+            "publication_year",
+            "access_type",
+            "publisher_name",
+            "scielo_issn",
+            "source_id",
+            "source_type",
+            "subject_areas",
+            "title",
+            "wos_subject_areas",
+        )
+
+        if collection:
+            queryset = queryset.filter(collection=collection)
+
+        for source in queryset.iterator():
+            identifiers = source.identifiers or {}
+            yield {
+                "acronym": source.acronym,
+                "collection": source.collection.acron3,
+                "default_lang": source.default_lang,
+                "extra_data": source.extra_data or {},
+                "identifiers": identifiers,
+                "issns": cls._extract_issns(identifiers),
+                "publication_date": source.publication_date,
+                "publication_year": source.publication_year,
+                "access_type": source.access_type,
+                "publisher_name": source.publisher_name or [],
+                "scielo_issn": source.scielo_issn,
+                "source_id": source.source_id,
+                "source_type": source.source_type,
+                "subject_areas": source.subject_areas or [],
+                "title": source.title,
+                "wos_subject_areas": source.wos_subject_areas or [],
+            }
+
+    class Meta:
+        verbose_name = _("Source")
+        verbose_name_plural = _("Sources")
+        unique_together = (
+            "collection",
+            "source_type",
+            "source_id",
+        )
+        indexes = [
+            models.Index(
+                fields=["collection", "source_type"],
+                name="source_collection_type_idx",
+            ),
+            models.Index(
+                fields=["collection", "scielo_issn"],
+                name="source_collection_issn_idx",
+            ),
+        ]

From 685b8d07edc092ccbddd340f56563948a80ef1bb Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:25 -0300
Subject: [PATCH 14/31] =?UTF-8?q?feat:=20adicionar=20servi=C3=A7os=20Sourc?=
 =?UTF-8?q?e,=20tasks,=20wagtail=20hooks=20e=20testes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 source/__init__.py          |   1 +
 source/apps.py              |   6 ++
 source/services/__init__.py |   1 +
 source/services/books.py    | 137 +++++++++++++++++++++++++++++++++
 source/services/journals.py | 118 ++++++++++++++++++++++++++++
 source/tasks.py             | 148 ++++++++++++++++++++++++++++++++++++
 source/tests.py             | 133 ++++++++++++++++++++++++++++++++
 source/wagtail_hooks.py     |  32 ++++++++
 8 files changed, 576 insertions(+)
 create mode 100644 source/__init__.py
 create mode 100644 source/apps.py
 create mode 100644 source/services/__init__.py
 create mode 100644 source/services/books.py
 create mode 100644 source/services/journals.py
 create mode 100644 source/tasks.py
 create mode 100644 source/tests.py
 create mode 100644 source/wagtail_hooks.py

diff --git a/source/__init__.py b/source/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/__init__.py
@@ -0,0 +1 @@
+
diff --git a/source/apps.py b/source/apps.py
new file mode 100644
index 0000000..06d886d
--- /dev/null
+++ b/source/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class SourceConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "source"
diff --git a/source/services/__init__.py b/source/services/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/source/services/__init__.py
@@ -0,0 +1 @@
+
diff --git a/source/services/books.py b/source/services/books.py
new file mode 100644
index 0000000..df9bd4d
--- /dev/null
+++ b/source/services/books.py
@@ -0,0 +1,137 @@
+from collection.models import Collection
+from source.models import Source
+
+
+BOOKS_COLLECTION_ACRONYM = "books"
+
+
+def get_books_collection(acronym=BOOKS_COLLECTION_ACRONYM):
+    return Collection.objects.get(acron3=acronym)
+
+
+def upsert_monograph_source(
+    payload,
+    collection,
+    user=None,
+    force_update=True,
+    source_url=None,
+    last_seq=None,
+):
+    if payload.get("TYPE") != "Monograph":
+        return None
+
+    source, created = Source.objects.get_or_create(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_BOOK,
+        source_id=str(payload.get("id")),
+    )
+
+    if created and user:
+        source.creator = user
+
+    if created or force_update:
+        source.scielo_issn = None
+        source.acronym = ""
+        source.title = payload.get("title") or str(payload.get("id"))
+        source.identifiers = _build_source_identifiers(payload)
+        source.publisher_name = _as_list(payload.get("publisher"))
+        source.subject_areas = []
+        source.wos_subject_areas = []
+        source.default_lang = payload.get("language") or None
+        source.publication_date = payload.get("publication_date") or None
+        source.publication_year = _normalize_year(payload.get("year"))
+        source.access_type = _normalize_access_type(payload.get("is_comercial"))
+        source.extra_data = _build_source_extra_data(
+            payload,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    if user:
+        source.updated_by = user
+
+    source.save()
+    return source
+
+
+def delete_book_source(collection, book_id):
+    return Source.objects.filter(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_BOOK,
+        source_id=str(book_id),
+    ).delete()
+
+
+def _build_source_identifiers(payload):
+    identifiers = {
+        "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
+        "isbn": payload.get("isbn"),
+        "eisbn": payload.get("eisbn"),
+        "doi": payload.get("doi_number"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _build_source_extra_data(payload, source_url=None, last_seq=None):
+    extra_data = {
+        "raw_type": payload.get("TYPE"),
+        "source_url": source_url,
+        "last_seq": last_seq,
+        "visible": payload.get("visible"),
+        "city": payload.get("city"),
+        "country": payload.get("country"),
+        "pages": payload.get("pages"),
+        "collection_data": payload.get("collection"),
+        "creators": payload.get("creators"),
+        "is_comercial": payload.get("is_comercial"),
+        "use_licence": payload.get("use_licence"),
+        "price_reais": payload.get("price_reais"),
+        "price_dollar": payload.get("price_dollar"),
+        "shopping_info": payload.get("shopping_info"),
+        "serie": payload.get("serie"),
+        "format": payload.get("format"),
+        "translated_titles": payload.get("translated_titles"),
+        "translated_synopses": payload.get("translated_synopses"),
+        "synopsis": payload.get("synopsis"),
+        "primary_descriptor": payload.get("primary_descriptor"),
+        "translated_primary_descriptors": payload.get("translated_primary_descriptors"),
+    }
+    return _compact_dict(extra_data)
+
+
+def _as_list(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return value
+
+    return [value]
+
+
+def _normalize_year(value):
+    if value in (None, ""):
+        return None
+    return str(value)[:4]
+
+
+def _normalize_access_type(value):
+    if value in (None, ""):
+        return None
+
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"true", "1", "yes", "y", "sim"}:
+            return Source.ACCESS_TYPE_COMMERCIAL
+        if normalized in {"false", "0", "no", "n", "nao", "não"}:
+            return Source.ACCESS_TYPE_OPEN_ACCESS
+
+    return Source.ACCESS_TYPE_COMMERCIAL if bool(value) else Source.ACCESS_TYPE_OPEN_ACCESS
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/source/services/journals.py b/source/services/journals.py
new file mode 100644
index 0000000..ac133f6
--- /dev/null
+++ b/source/services/journals.py
@@ -0,0 +1,118 @@
+from django.db.models import Q
+
+from collection.models import Collection
+from source.models import Source
+
+
+def get_collection(acronym):
+    return Collection.objects.filter(acron3=acronym).first()
+
+
+def upsert_journal_source(
+    journal,
+    collection,
+    user=None,
+    force_update=True,
+    load_mode=None,
+):
+    scielo_issn = _value(journal, "scielo_issn")
+    if not scielo_issn:
+        return None
+
+    source, created = Source.objects.get_or_create(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_JOURNAL,
+        source_id=scielo_issn,
+    )
+
+    if created and user:
+        source.creator = user
+
+    if created or force_update:
+        source.scielo_issn = scielo_issn
+        source.acronym = _value(journal, "acronym") or ""
+        source.title = _value(journal, "title") or scielo_issn
+        source.identifiers = _build_source_identifiers(journal)
+        source.publisher_name = _as_list(_value(journal, "publisher_name"))
+        source.subject_areas = _as_list(_value(journal, "subject_areas"))
+        source.wos_subject_areas = _as_list(_value(journal, "wos_subject_areas"))
+        source.default_lang = None
+        source.publication_date = None
+        source.publication_year = None
+        source.extra_data = _compact_dict(
+            {
+                "collection_acronym": _value(journal, "collection_acronym"),
+                "load_mode": load_mode,
+            }
+        )
+
+    if user:
+        source.updated_by = user
+
+    source.save()
+    return source
+
+
+def find_journal_source_by_issns(collection, issns):
+    for issn in filter(None, issns or []):
+        source = (
+            Source.objects.filter(
+                collection=collection,
+                source_type=Source.SOURCE_TYPE_JOURNAL,
+            )
+            .filter(
+                Q(scielo_issn=issn)
+                | Q(source_id=issn)
+                | Q(identifiers__electronic_issn=issn)
+                | Q(identifiers__print_issn=issn)
+                | Q(identifiers__scielo_issn=issn)
+            )
+            .first()
+        )
+        if source:
+            return source
+    return None
+
+
+def find_journal_source_by_acronym(collection, acronym):
+    if not acronym:
+        return None
+
+    return Source.objects.filter(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_JOURNAL,
+        acronym=acronym,
+    ).first()
+
+
+def _build_source_identifiers(journal):
+    identifiers = {
+        "electronic_issn": _value(journal, "electronic_issn"),
+        "print_issn": _value(journal, "print_issn"),
+        "scielo_issn": _value(journal, "scielo_issn"),
+    }
+    return _compact_dict(identifiers)
+
+
+def _as_list(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return value
+
+    return [value]
+
+
+def _value(data, key, default=None):
+    if isinstance(data, dict):
+        return data.get(key, default)
+    return getattr(data, key, default)
+
+
+def _compact_dict(data):
+    return {
+        key: value
+        for key, value in data.items()
+        if value not in (None, "", [], {}, ())
+    }
diff --git a/source/tasks.py b/source/tasks.py
new file mode 100644
index 0000000..eb1633b
--- /dev/null
+++ b/source/tasks.py
@@ -0,0 +1,148 @@
+import logging
+
+from django.utils.translation import gettext as _
+from django.conf import settings
+
+from collection.models import Collection
+from config import celery_app
+from core.collectors import articlemeta as articlemeta_collector
+from core.collectors import scielo_books as scielo_books_collector
+from core.utils.request_utils import _get_user
+from source.services import books as books_service
+from source.services import journals as journal_service
+
+
+def load_sources_from_article_meta(
+    collections=None,
+    force_update=True,
+    user=None,
+    mode="thrift",
+):
+    collection_codes = collections or Collection.acron3_list()
+
+    for collection_code in collection_codes:
+        logging.info(
+            "Loading sources from Article Meta. Collection: %s, Mode: %s",
+            collection_code,
+            mode,
+        )
+
+        for journal in articlemeta_collector.iter_journals(
+            collection=collection_code,
+            mode=mode,
+        ):
+            collection = journal_service.get_collection(journal.collection_acronym)
+            if not collection:
+                logging.error(
+                    "Collection %s does not exist",
+                    journal.collection_acronym,
+                )
+                continue
+
+            source = journal_service.upsert_journal_source(
+                journal,
+                collection=collection,
+                user=user,
+                force_update=force_update,
+                load_mode=mode,
+            )
+            logging.info(
+                "Source %s upserted for collection %s",
+                source.source_id if source else None,
+                collection.acron3,
+            )
+
+    return True
+
+
+def load_sources_from_scielo_books(
+    collection="books",
+    db_name=settings.SCIELO_BOOKS_DB_NAME,
+    since=0,
+    limit=settings.SCIELO_BOOKS_LIMIT,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    collection_obj = books_service.get_books_collection(collection)
+
+    logging.info(
+        "Loading sources from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
+        collection,
+        db_name,
+        since,
+        limit,
+    )
+
+    for item in scielo_books_collector.iter_change_documents(
+        base_url=base_url,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        headers=headers,
+    ):
+        change = item["change"]
+
+        if item["deleted"]:
+            books_service.delete_book_source(collection_obj, change.get("id"))
+            continue
+
+        payload = item["payload"] or {}
+        if payload.get("TYPE") != "Monograph":
+            continue
+
+        books_service.upsert_monograph_source(
+            payload,
+            collection=collection_obj,
+            user=user,
+            force_update=force_update,
+            source_url=item.get("source_url"),
+            last_seq=change.get("seq"),
+        )
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Sources (Article Meta)"), queue="load")
+def task_load_sources_from_article_meta(
+    self,
+    collections=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+    mode="thrift",
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_sources_from_article_meta(
+        collections=collections,
+        force_update=force_update,
+        user=user,
+        mode=mode,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Sources (SciELO Books)"), queue="load")
+def task_load_sources_from_scielo_books(
+    self,
+    collection="books",
+    db_name=settings.SCIELO_BOOKS_DB_NAME,
+    since=0,
+    limit=settings.SCIELO_BOOKS_LIMIT,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_sources_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
diff --git a/source/tests.py b/source/tests.py
new file mode 100644
index 0000000..a182f4e
--- /dev/null
+++ b/source/tests.py
@@ -0,0 +1,133 @@
+from django.test import TestCase
+
+from collection.models import Collection
+
+from .models import Source
+from .services import books as books_service
+from .services import journals as journal_service
+
+
+class SourceMetadataTests(TestCase):
+    def test_source_type_choices_include_scielo_non_journal_sources(self):
+        self.assertIn(
+            (Source.SOURCE_TYPE_PREPRINT_SERVER, "Preprint Server"),
+            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
+        )
+        self.assertIn(
+            (Source.SOURCE_TYPE_DATA_REPOSITORY, "Data Repository"),
+            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
+        )
+
+    def test_metadata_exposes_generic_and_journal_fields(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+            identifiers={
+                "electronic_issn": "1234-5678",
+                "print_issn": "8765-4321",
+                "doi": "10.1590/example",
+            },
+            publisher_name=["SciELO"],
+            subject_areas=["Health Sciences"],
+            wos_subject_areas=["Medicine"],
+            default_lang="en",
+            publication_date="2024-01-15",
+            publication_year="2024",
+            extra_data={"country": "BR"},
+        )
+
+        metadata = list(Source.metadata(collection=collection))
+
+        self.assertEqual(len(metadata), 1)
+        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(metadata[0]["source_id"], "1234-5678")
+        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
+        self.assertEqual(metadata[0]["issns"], {"1234-5678", "8765-4321"})
+        self.assertEqual(metadata[0]["title"], "Test Journal")
+
+    def test_upsert_monograph_source_maps_scielo_books_payload(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+
+        source = books_service.upsert_monograph_source(
+            {
+                "TYPE": "Monograph",
+                "id": "abcd1",
+                "title": "Sample Book",
+                "isbn": "9788578791889",
+                "eisbn": "9788578791880",
+                "doi_number": "10.1234/book",
+                "language": "pt",
+                "publication_date": "2024-05-20",
+                "year": "2024",
+                "publisher": "SciELO Books",
+                "is_comercial": False,
+                "visible": True,
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(source.source_type, Source.SOURCE_TYPE_BOOK)
+        self.assertEqual(source.source_id, "abcd1")
+        self.assertEqual(source.identifiers["isbn"], "9788578791889")
+        self.assertEqual(source.default_lang, "pt")
+        self.assertEqual(source.publication_year, "2024")
+        self.assertEqual(source.access_type, Source.ACCESS_TYPE_OPEN_ACCESS)
+
+    def test_upsert_monograph_source_accepts_long_real_world_title(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        title = (
+            "O Estado da Arte sobre Refugiados, Deslocados Internos, "
+            "Deslocados Ambientais e Apatridas no Brasil: atualizacao do "
+            "Diretorio Nacional do ACNUR de teses, dissertacoes, trabalhos "
+            "de conclusao de curso de graduacao em Joao Pessoa (Paraiba) e "
+            "artigos (2007 a 2017)"
+        )
+
+        source = books_service.upsert_monograph_source(
+            {
+                "TYPE": "Monograph",
+                "id": "9zzts",
+                "title": title,
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(source.title, title)
+
+    def test_upsert_journal_source_maps_articlemeta_payload(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+
+        source = journal_service.upsert_journal_source(
+            {
+                "collection_acronym": "scl",
+                "scielo_issn": "1234-5678",
+                "electronic_issn": "1234-5678",
+                "print_issn": "8765-4321",
+                "acronym": "testjou",
+                "title": "Test Journal",
+                "publisher_name": "SciELO",
+                "subject_areas": ["Health Sciences"],
+                "wos_subject_areas": ["Medicine"],
+            },
+            collection=collection,
+            load_mode="thrift",
+        )
+
+        self.assertEqual(source.source_type, Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(source.source_id, "1234-5678")
+        self.assertEqual(source.identifiers["electronic_issn"], "1234-5678")
+        self.assertEqual(source.publisher_name, ["SciELO"])
+        self.assertEqual(source.extra_data["load_mode"], "thrift")
+        self.assertEqual(
+            journal_service.find_journal_source_by_issns(collection, ["8765-4321"]).pk,
+            source.pk,
+        )
+        self.assertEqual(
+            journal_service.find_journal_source_by_acronym(collection, "testjou").pk,
+            source.pk,
+        )
diff --git a/source/wagtail_hooks.py b/source/wagtail_hooks.py
new file mode 100644
index 0000000..5ffad62
--- /dev/null
+++ b/source/wagtail_hooks.py
@@ -0,0 +1,32 @@
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet
+
+from .models import Source
+
+
+class SourceSnippetViewSet(SnippetViewSet):
+    model = Source
+    icon = "folder-open-inverse"
+    menu_label = _("Source")
+    menu_order = 200
+
+    list_display = (
+        "collection",
+        "source_type",
+        "source_id",
+        "scielo_issn",
+        "acronym",
+        "title",
+        "publication_year",
+    )
+    list_filter = (
+        "collection",
+        "source_type",
+        "publication_year",
+    )
+    search_fields = (
+        "source_id",
+        "scielo_issn",
+        "acronym",
+        "title",
+    )

From c547c3f02b6a50ea99372b4e4a7fa5076cfeedbd Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:34 -0300
Subject: [PATCH 15/31] =?UTF-8?q?feat:=20adicionar=20modelos=20Reports=20(?=
 =?UTF-8?q?semanal,=20mensal,=20anual)=20e=20migra=C3=A7=C3=B5es?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 reports/__init__.py                           |   0
 reports/apps.py                               |   8 +
 reports/migrations/0001_initial.py            | 140 ++++++++++++++++++
 ...alter_monthlylogreport_options_and_more.py |  36 +++++
 reports/migrations/__init__.py                |   0
 reports/models.py                             | 100 +++++++++++++
 6 files changed, 284 insertions(+)
 create mode 100644 reports/__init__.py
 create mode 100644 reports/apps.py
 create mode 100644 reports/migrations/0001_initial.py
 create mode 100644 reports/migrations/0002_alter_monthlylogreport_options_and_more.py
 create mode 100644 reports/migrations/__init__.py
 create mode 100644 reports/models.py

diff --git a/reports/__init__.py b/reports/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/reports/apps.py b/reports/apps.py
new file mode 100644
index 0000000..119ca26
--- /dev/null
+++ b/reports/apps.py
@@ -0,0 +1,8 @@
+from django.apps import AppConfig
+from django.utils.translation import gettext_lazy as _
+
+
+class ReportsConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "reports"
+    verbose_name = _("Reports")
diff --git a/reports/migrations/0001_initial.py b/reports/migrations/0001_initial.py
new file mode 100644
index 0000000..2a72923
--- /dev/null
+++ b/reports/migrations/0001_initial.py
@@ -0,0 +1,140 @@
+# Generated by Django 5.2.12 on 2026-05-01 15:50
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ("collection", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="MonthlyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                ("month", models.IntegerField(verbose_name="Month")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Monthly Log Report",
+                "verbose_name_plural": "Monthly Log Reports",
+                "ordering": ["-year", "-month", "collection__acron3"],
+                "unique_together": {("collection", "year", "month")},
+            },
+        ),
+        migrations.CreateModel(
+            name="WeeklyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                ("week", models.IntegerField(verbose_name="ISO Week")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Weekly Log Report",
+                "verbose_name_plural": "Weekly Log Reports",
+                "ordering": ["-year", "-week", "collection__acron3"],
+                "unique_together": {("collection", "year", "week")},
+            },
+        ),
+        migrations.CreateModel(
+            name="YearlyLogReport",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("total_files", models.IntegerField(default=0)),
+                ("created_files", models.IntegerField(default=0)),
+                ("validated_files", models.IntegerField(default=0)),
+                ("invalidated_files", models.IntegerField(default=0)),
+                ("errored_files", models.IntegerField(default=0)),
+                ("lines_parsed", models.IntegerField(default=0)),
+                ("valid_lines", models.IntegerField(default=0)),
+                ("discarded_lines", models.IntegerField(default=0)),
+                ("ip_local_count", models.IntegerField(default=0)),
+                ("ip_remote_count", models.IntegerField(default=0)),
+                ("ip_unknown_count", models.IntegerField(default=0)),
+                ("generated_at", models.DateTimeField(auto_now=True)),
+                ("year", models.IntegerField(verbose_name="Year")),
+                (
+                    "collection",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Yearly Log Report",
+                "verbose_name_plural": "Yearly Log Reports",
+                "ordering": ["-year", "collection__acron3"],
+                "unique_together": {("collection", "year")},
+            },
+        ),
+    ]
diff --git a/reports/migrations/0002_alter_monthlylogreport_options_and_more.py b/reports/migrations/0002_alter_monthlylogreport_options_and_more.py
new file mode 100644
index 0000000..659215c
--- /dev/null
+++ b/reports/migrations/0002_alter_monthlylogreport_options_and_more.py
@@ -0,0 +1,36 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("reports", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name="monthlylogreport",
+            options={
+                "ordering": ["collection__acron3", "year", "month"],
+                "verbose_name": "Monthly Log Report",
+                "verbose_name_plural": "Monthly Log Reports",
+            },
+        ),
+        migrations.AlterModelOptions(
+            name="weeklylogreport",
+            options={
+                "ordering": ["collection__acron3", "year", "week"],
+                "verbose_name": "Weekly Log Report",
+                "verbose_name_plural": "Weekly Log Reports",
+            },
+        ),
+        migrations.AlterModelOptions(
+            name="yearlylogreport",
+            options={
+                "ordering": ["collection__acron3", "year"],
+                "verbose_name": "Yearly Log Report",
+                "verbose_name_plural": "Yearly Log Reports",
+            },
+        ),
+    ]
diff --git a/reports/migrations/__init__.py b/reports/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/reports/models.py b/reports/models.py
new file mode 100644
index 0000000..3af1ec8
--- /dev/null
+++ b/reports/models.py
@@ -0,0 +1,100 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+
+
+class AbstractLogReport(models.Model):
+    collection = models.ForeignKey(
+        Collection,
+        on_delete=models.CASCADE,
+        verbose_name=_("Collection"),
+    )
+    total_files = models.IntegerField(default=0)
+    created_files = models.IntegerField(default=0)
+    validated_files = models.IntegerField(default=0)
+    invalidated_files = models.IntegerField(default=0)
+    errored_files = models.IntegerField(default=0)
+    lines_parsed = models.IntegerField(default=0)
+    valid_lines = models.IntegerField(default=0)
+    discarded_lines = models.IntegerField(default=0)
+    ip_local_count = models.IntegerField(default=0)
+    ip_remote_count = models.IntegerField(default=0)
+    ip_unknown_count = models.IntegerField(default=0)
+    generated_at = models.DateTimeField(auto_now=True)
+
+    class Meta:
+        abstract = True
+
+    @property
+    def pct_validated(self):
+        if not self.total_files:
+            return 0
+        return round(self.validated_files / self.total_files * 100, 1)
+    pct_validated.fget.short_description = _("% Valid Files")
+
+    @property
+    def pct_valid_lines(self):
+        if not self.lines_parsed:
+            return 0
+        return round(self.valid_lines / self.lines_parsed * 100, 1)
+    pct_valid_lines.fget.short_description = _("% Valid Lines")
+
+    @property
+    def pct_remote_ip(self):
+        total = self.ip_remote_count + self.ip_local_count
+        if not total:
+            return 0
+        return round(self.ip_remote_count / total * 100, 1)
+    pct_remote_ip.fget.short_description = _("% Remote IP")
+
+    def __str__(self):
+        return f"{self.collection.acron3} {self.period_label}"
+
+    @property
+    def period_label(self):
+        raise NotImplementedError
+
+
+class WeeklyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+    week = models.IntegerField(verbose_name=_("ISO Week"))
+
+    class Meta:
+        unique_together = [("collection", "year", "week")]
+        ordering = ["collection__acron3", "year", "week"]
+        verbose_name = _("Weekly Log Report")
+        verbose_name_plural = _("Weekly Log Reports")
+
+    @property
+    def period_label(self):
+        return f"{self.year}-W{self.week:02d}"
+
+
+class MonthlyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+    month = models.IntegerField(verbose_name=_("Month"))
+
+    class Meta:
+        unique_together = [("collection", "year", "month")]
+        ordering = ["collection__acron3", "year", "month"]
+        verbose_name = _("Monthly Log Report")
+        verbose_name_plural = _("Monthly Log Reports")
+
+    @property
+    def period_label(self):
+        return f"{self.year}-{self.month:02d}"
+
+
+class YearlyLogReport(AbstractLogReport):
+    year = models.IntegerField(verbose_name=_("Year"))
+
+    class Meta:
+        unique_together = [("collection", "year")]
+        ordering = ["collection__acron3", "year"]
+        verbose_name = _("Yearly Log Report")
+        verbose_name_plural = _("Yearly Log Reports")
+
+    @property
+    def period_label(self):
+        return str(self.year)

From d4a32f2ad1f6c7cf91db9973525ca107c6d88db9 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:46 -0300
Subject: [PATCH 16/31] feat: adicionar tasks e wagtail hooks do Reports

---
 reports/tasks.py         | 238 +++++++++++++++++++++++++++++++++++++++
 reports/wagtail_hooks.py |  75 ++++++++++++
 2 files changed, 313 insertions(+)
 create mode 100644 reports/tasks.py
 create mode 100644 reports/wagtail_hooks.py

diff --git a/reports/tasks.py b/reports/tasks.py
new file mode 100644
index 0000000..69a53a1
--- /dev/null
+++ b/reports/tasks.py
@@ -0,0 +1,238 @@
+import logging
+from collections import defaultdict
+
+from django.core.mail import send_mail
+from django.conf import settings
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils import date_utils
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from log_manager_config import models as lmc_models
+
+from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
+
+
+def _extract_date_from_log_file(lf):
+    if lf.date:
+        return lf.date
+
+    probably_date = (lf.validation or {}).get("probably_date")
+    if isinstance(probably_date, str) and probably_date:
+        return date_utils.get_date_obj(probably_date)
+
+    try:
+        import re
+        match = re.search(r"(\d{4}-\d{2}-\d{2})", lf.path)
+        if match:
+            return date_utils.get_date_obj(match.group(1))
+    except Exception:
+        pass
+
+    return None
+
+
+@celery_app.task(bind=True, name=_("[Reports] Populate All Reports"))
+def task_populate_all_reports(self, year=None, collection_acron=None):
+    qs = LogFile.objects.select_related("collection")
+    if collection_acron:
+        qs = qs.filter(collection__acron3=collection_acron)
+    qs = qs.only(
+        "id", "collection_id", "date", "path", "status", "summary", "validation"
+    )
+
+    weekly = defaultdict(lambda: defaultdict(int))
+    monthly = defaultdict(lambda: defaultdict(int))
+    yearly = defaultdict(lambda: defaultdict(int))
+
+    for lf in qs.iterator(chunk_size=2000):
+        extracted_date = _extract_date_from_log_file(lf)
+        if not extracted_date:
+            continue
+        if year and extracted_date.year != int(year):
+            continue
+
+        iso_year, iso_week, _ = extracted_date.isocalendar()
+        yr = extracted_date.year
+        mo = extracted_date.month
+
+        for agg, key in [
+            (weekly, (lf.collection_id, iso_year, iso_week)),
+            (monthly, (lf.collection_id, yr, mo)),
+            (yearly, (lf.collection_id, yr)),
+        ]:
+            r = agg[key]
+            r["total_files"] += 1
+            st = lf.status
+            if st == "CRE":
+                r["created_files"] += 1
+            elif st in ("QUE", "PAR", "PRO"):
+                r["validated_files"] += 1
+            elif st == "INV":
+                r["invalidated_files"] += 1
+            elif st == "ERR":
+                r["errored_files"] += 1
+
+            s = lf.summary or {}
+            lp = s.get("lines_parsed", 0) or 0
+            vl = s.get("valid_lines", 0) or 0
+            r["lines_parsed"] += lp
+            r["valid_lines"] += vl
+            r["discarded_lines"] += max(lp - vl, 0)
+
+            ips = (
+                (lf.validation or {})
+                .get("content", {})
+                .get("summary", {})
+                .get("ips", {})
+            )
+            r["ip_local_count"] += ips.get("local", 0) or 0
+            r["ip_remote_count"] += ips.get("remote", 0) or 0
+            r["ip_unknown_count"] += ips.get("unknown", 0) or 0
+
+    w_count = _upsert_reports(WeeklyLogReport, weekly)
+    m_count = _upsert_reports(MonthlyLogReport, monthly)
+    y_count = _upsert_reports(YearlyLogReport, yearly)
+
+    logging.info(
+        "Reports populated: %s weekly, %s monthly, %s yearly.",
+        w_count, m_count, y_count,
+    )
+    return f"Weekly: {w_count}, Monthly: {m_count}, Yearly: {y_count}"
+
+
+def _upsert_reports(model_class, data):
+    count = 0
+    unique_fields = list(model_class._meta.unique_together[0])
+    period_fields = unique_fields[1:]
+    for key, fields in data.items():
+        coll_id = key[0]
+        period_values = key[1:]
+        lookup = {"collection_id": coll_id}
+        for idx, field_name in enumerate(period_fields):
+            lookup[field_name] = period_values[idx]
+        model_class.objects.update_or_create(defaults=fields, **lookup)
+        count += 1
+    return count
+
+
+@celery_app.task(
+    bind=True,
+    name=_("[Reports] Generate Log Report Summary (Manual)"),
+    queue="load",
+)
+def task_log_files_count_status_report(
+    self,
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    user_id=None,
+    username=None,
+):
+    from_date_str, until_date_str = date_utils.get_date_range_str(
+        from_date, until_date, days_to_go_back
+    )
+    subject = _(
+        "Usage Log Report Summary "
+        f"({from_date_str} to {until_date_str})"
+    )
+
+    for collection_acron in (collections or Collection.acron3_list()):
+        try:
+            collection = Collection.objects.get(acron3=collection_acron)
+        except Collection.DoesNotExist:
+            logging.warning("Collection not found: %s", collection_acron)
+            continue
+
+        message = _build_report_message(
+            collection,
+            from_date_str,
+            until_date_str,
+        )
+
+        if not message:
+            continue
+
+        logging.info(
+            "Sending email to collection %s. Subject: %s.",
+            collection.main_name, subject,
+        )
+
+        _send_collection_email(subject, message, collection_acron)
+
+
+def _build_report_message(collection, from_date_str, until_date_str):
+    monthly = MonthlyLogReport.objects.filter(
+        collection=collection,
+    ).order_by("-year", "-month")
+
+    if not monthly.exists():
+        return ""
+
+    latest = monthly.first()
+    message = _(
+        f"Usage Log Report for {collection.acron3}\n"
+        f"Period: {from_date_str} to {until_date_str}\n\n"
+    )
+    message += _("Latest month ({latest}):\n").format(latest=latest.period_label)
+    message += (
+        f"  Total files: {latest.total_files}\n"
+        f"  Validated files: {latest.validated_files} ({latest.pct_validated}%)\n"
+        f"  Invalidated files: {latest.invalidated_files}\n"
+        f"  Errored files: {latest.errored_files}\n"
+        f"  Lines parsed: {latest.lines_parsed}\n"
+        f"  Valid lines: {latest.valid_lines} ({latest.pct_valid_lines}%)\n"
+        f"  Discarded lines: {latest.discarded_lines}\n"
+        f"  Remote IPs: {latest.ip_remote_count} ({latest.pct_remote_ip}%)\n"
+        f"  Local IPs: {latest.ip_local_count}\n"
+    )
+
+    prev_month = latest
+    if len(monthly) > 1:
+        prev_month = monthly[1]
+        message += _("\nPrevious month ({prev}):\n").format(prev=prev_month.period_label)
+        message += (
+            f"  Total files: {prev_month.total_files}\n"
+            f"  Validated files: {prev_month.validated_files} ({prev_month.pct_validated}%)\n"
+            f"  Valid lines: {prev_month.valid_lines} ({prev_month.pct_valid_lines}%)\n"
+            f"  Remote IPs: {prev_month.ip_remote_count} ({prev_month.pct_remote_ip}%)\n"
+        )
+
+        if prev_month.total_files:
+            file_diff = latest.total_files - prev_month.total_files
+            line_diff = latest.lines_parsed - prev_month.lines_parsed
+            message += _("\nMonth-over-month change:\n")
+            message += f"  Files: {file_diff:+d}\n"
+            message += f"  Lines: {line_diff:+d}\n"
+
+    message += (
+        f"\n---\n"
+        f"This report is automatically generated by SciELO Usage.\n"
+    )
+    return message
+
+
+def _send_collection_email(subject, message, collection):
+    emails = lmc_models.CollectionEmail.objects.filter(
+        config__collection__acron3=collection, active=True
+    ).values_list("email", flat=True)
+
+    if not emails:
+        logging.error(
+            "Error. Please, add an E-mail Configuration for the collection %s.",
+            collection,
+        )
+        return
+
+    try:
+        send_mail(
+            subject=subject,
+            message=message,
+            from_email=settings.DEFAULT_FROM_EMAIL,
+            recipient_list=list(emails),
+        )
+    except Exception as e:
+        logging.error("Error sending log files report for %s: %s", collection, e)
diff --git a/reports/wagtail_hooks.py b/reports/wagtail_hooks.py
new file mode 100644
index 0000000..b2aeac7
--- /dev/null
+++ b/reports/wagtail_hooks.py
@@ -0,0 +1,75 @@
+from django.contrib.auth import get_user_model
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
+from wagtail.snippets.models import register_snippet
+from wagtail.permission_policies.base import BasePermissionPolicy
+
+from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
+
+
+class ReadOnlyPermissionPolicy(BasePermissionPolicy):
+    def user_has_permission(self, user, action):
+        if action in ("add", "change", "delete"):
+            return False
+        return True
+
+    def users_with_any_permission(self, actions):
+        return get_user_model().objects.filter(is_active=True)
+
+
+COMMON_LIST_DISPLAY = (
+    "total_files",
+    "pct_validated",
+    "lines_parsed",
+    "pct_valid_lines",
+    "pct_remote_ip",
+    "generated_at",
+)
+
+
+class WeeklyLogReportSnippetViewSet(SnippetViewSet):
+    model = WeeklyLogReport
+    menu_label = _("Weekly")
+    icon = "info-circle"
+    menu_order = 100
+    list_display = ("collection", "year", "week") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year", "week")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(WeeklyLogReport)
+
+
+class MonthlyLogReportSnippetViewSet(SnippetViewSet):
+    model = MonthlyLogReport
+    menu_label = _("Monthly")
+    icon = "info-circle"
+    menu_order = 200
+    list_display = ("collection", "year", "month") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year", "month")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(MonthlyLogReport)
+
+
+class YearlyLogReportSnippetViewSet(SnippetViewSet):
+    model = YearlyLogReport
+    menu_label = _("Yearly")
+    icon = "info-circle"
+    menu_order = 300
+    list_display = ("collection", "year") + COMMON_LIST_DISPLAY
+    list_filter = ("collection", "year")
+    search_fields = ("collection__acron3",)
+    permission_policy = ReadOnlyPermissionPolicy(YearlyLogReport)
+
+
+class ReportsSnippetViewSetGroup(SnippetViewSetGroup):
+    menu_name = "usage_reports"
+    menu_label = _("Reports")
+    menu_icon = "info-circle"
+    menu_order = 350
+    items = (
+        WeeklyLogReportSnippetViewSet,
+        MonthlyLogReportSnippetViewSet,
+        YearlyLogReportSnippetViewSet,
+    )
+
+
+register_snippet(ReportsSnippetViewSetGroup)

From 3fffbe7d350cef86d32289956dff32de592a70a4 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:46 -0300
Subject: [PATCH 17/31] feat: adicionar core collectors para ArticleMeta e OPAC

---
 core/collectors/articlemeta.py | 60 ++++++++++++++++++++++++++++++++++
 core/collectors/opac.py        | 33 +++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 core/collectors/articlemeta.py
 create mode 100644 core/collectors/opac.py

diff --git a/core/collectors/articlemeta.py b/core/collectors/articlemeta.py
new file mode 100644
index 0000000..7f6ace0
--- /dev/null
+++ b/core/collectors/articlemeta.py
@@ -0,0 +1,60 @@
+import logging
+
+import requests
+from django.conf import settings
+from articlemeta.client import RestfulClient, ThriftClient
+from time import sleep
+
+
+def fetch_article_counter_dict(
+    from_date,
+    until_date,
+    offset=0,
+    limit=1000,
+    collection=None,
+    issn=None,
+):
+    for attempt in range(1, settings.ARTICLEMETA_MAX_RETRIES + 1):
+        params = {
+            "from": from_date,
+            "until": until_date,
+            "offset": offset,
+            "limit": limit,
+        }
+
+        if collection:
+            params["collection"] = collection
+
+        if issn:
+            params["issn"] = issn
+
+        response = requests.get(settings.ARTICLEMETA_COLLECT_URL, params=params)
+
+        try:
+            response.raise_for_status()
+            logging.info(response.url)
+        except requests.exceptions.HTTPError:
+            logging.warning(
+                "Failed to collect data from %s. Waiting %d seconds before retry %d of %d",
+                response.url,
+                settings.ARTICLEMETA_SLEEP_TIME,
+                attempt,
+                settings.ARTICLEMETA_MAX_RETRIES,
+            )
+            sleep(settings.ARTICLEMETA_SLEEP_TIME)
+        else:
+            return response.json()
+
+    return {}
+
+
+def iter_journals(collection="scl", mode="rest"):
+    if mode == "rest":
+        client = RestfulClient()
+    elif mode == "thrift":
+        client = ThriftClient()
+    else:
+        raise ValueError(f"Unsupported ArticleMeta mode: {mode}")
+
+    for journal in client.journals(collection=collection):
+        yield journal
diff --git a/core/collectors/opac.py b/core/collectors/opac.py
new file mode 100644
index 0000000..94122b7
--- /dev/null
+++ b/core/collectors/opac.py
@@ -0,0 +1,33 @@
+import logging
+
+import requests
+from django.conf import settings
+from time import sleep
+
+
+def fetch_counter_dict(from_date, until_date, page=1):
+    for attempt in range(1, settings.OPAC_MAX_RETRIES + 1):
+        params = {
+            "begin_date": from_date,
+            "end_date": until_date,
+            "page": page,
+        }
+
+        response = requests.get(url=settings.OPAC_ENDPOINT, params=params, verify=False)
+
+        try:
+            response.raise_for_status()
+            logging.info(response.url)
+        except requests.exceptions.HTTPError:
+            logging.warning(
+                "Could not collect data from %s. Waiting %d seconds for attempt %d of %d",
+                response.url,
+                settings.OPAC_SLEEP_TIME,
+                attempt,
+                settings.OPAC_MAX_RETRIES,
+            )
+            sleep(settings.OPAC_SLEEP_TIME)
+        else:
+            return response.json()
+
+    return {}

From 224c01d8cbb8457f788ad00abb6b1de686c8b4b2 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:46 -0300
Subject: [PATCH 18/31] feat: adicionar core collectors para Preprints,
 Dataverse e SciELO Books

---
 core/collectors/__init__.py     |   1 +
 core/collectors/dataverse.py    |  75 +++++++++++++
 core/collectors/preprints.py    |  55 ++++++++++
 core/collectors/scielo_books.py | 182 ++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+)
 create mode 100644 core/collectors/__init__.py
 create mode 100644 core/collectors/dataverse.py
 create mode 100644 core/collectors/preprints.py
 create mode 100644 core/collectors/scielo_books.py

diff --git a/core/collectors/__init__.py b/core/collectors/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/core/collectors/__init__.py
@@ -0,0 +1 @@
+
diff --git a/core/collectors/dataverse.py b/core/collectors/dataverse.py
new file mode 100644
index 0000000..ca51fd7
--- /dev/null
+++ b/core/collectors/dataverse.py
@@ -0,0 +1,75 @@
+import logging
+
+import requests
+from django.conf import settings
+
+from core.utils import standardizer
+
+
+def _request_json(url):
+    try:
+        response = requests.get(url, timeout=settings.DATAVERSE_SLEEP_TIME)
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.RequestException as exc:
+        logging.error("Error fetching %s: %s", url, exc)
+        return {}
+
+
+def _get_subdataverses():
+    url = f"{settings.DATAVERSE_ENDPOINT}/dataverses/{settings.DATAVERSE_ROOT_COLLECTION}/contents"
+    return _request_json(url).get("data", [])
+
+
+def _get_datasets(subdataverse_id):
+    url = f"{settings.DATAVERSE_ENDPOINT}/dataverses/{subdataverse_id}/contents"
+    return _request_json(url).get("data", [])
+
+
+def _get_files(dataset_id):
+    url = f"{settings.DATAVERSE_ENDPOINT}/datasets/{dataset_id}/versions/:latest/files"
+    return _request_json(url).get("data", [])
+
+
+def iter_dataset_metadata(from_date=None, until_date=None):
+    for subdataverse in _get_subdataverses():
+        if subdataverse.get("type") != "dataverse":
+            continue
+
+        subdataverse_id = subdataverse["id"]
+        subdataverse_title = subdataverse["title"]
+
+        for dataset in _get_datasets(subdataverse_id):
+            if dataset.get("type") != "dataset":
+                continue
+
+            dataset_id = dataset["id"]
+            doi = standardizer.standardize_doi(dataset.get("persistentUrl"))
+            if not doi:
+                logging.warning("Dataset %s does not have a DOI.", dataset_id)
+                continue
+
+            publication_date = dataset.get("publicationDate")
+            if publication_date:
+                if (from_date and publication_date < from_date) or (
+                    until_date and publication_date > until_date
+                ):
+                    continue
+
+            for file_data in _get_files(dataset_id):
+                file_persistent_id = file_data["dataFile"].get("persistentId")
+                standardized_persistent_id = (
+                    standardizer.standardize_pid_generic(file_persistent_id)
+                    if file_persistent_id
+                    else None
+                )
+
+                yield {
+                    "title": subdataverse_title,
+                    "dataset_doi": doi,
+                    "dataset_published": publication_date,
+                    "file_id": file_data["dataFile"]["id"],
+                    "file_name": file_data["label"],
+                    "file_url": f"{settings.DATAVERSE_ENDPOINT}/access/datafile/{file_data['dataFile']['id']}",
+                    "file_persistent_id": standardized_persistent_id,
+                }
diff --git a/core/collectors/preprints.py b/core/collectors/preprints.py
new file mode 100644
index 0000000..bead72c
--- /dev/null
+++ b/core/collectors/preprints.py
@@ -0,0 +1,55 @@
+from django.conf import settings
+from sickle import Sickle
+
+from core.utils import standardizer
+
+
+def iter_records(from_date, until_date):
+    oai_client = Sickle(
+        endpoint=settings.OAI_PMH_PREPRINT_ENDPOINT,
+        max_retries=settings.OAI_PMH_MAX_RETRIES,
+        verify=False,
+    )
+    records = oai_client.ListRecords(
+        **{
+            "metadataPrefix": settings.OAI_METADATA_PREFIX,
+            "from": from_date,
+            "until": until_date,
+        }
+    )
+
+    for record in records:
+        yield record
+
+
+def extract_record_data(record):
+    pid_generic = _extract_compatible_identifier(record.header.identifier)
+    text_langs = [
+        standardizer.standardize_language_code(language)
+        for language in record.metadata.get("language", [])
+    ]
+    publication_date = record.metadata.get("date", [""])[0]
+    default_language = text_langs[0] if text_langs else ""
+    publication_year = _extract_publication_year_from_date(publication_date)
+
+    return {
+        "pid_generic": pid_generic,
+        "text_langs": text_langs,
+        "publication_date": publication_date,
+        "default_language": default_language,
+        "publication_year": publication_year,
+    }
+
+
+def _extract_compatible_identifier(identifier):
+    try:
+        return identifier.split(":")[-1].split("/")[1]
+    except IndexError:
+        return ""
+
+
+def _extract_publication_year_from_date(date_str):
+    try:
+        return date_str[:4]
+    except IndexError:
+        return ""
diff --git a/core/collectors/scielo_books.py b/core/collectors/scielo_books.py
new file mode 100644
index 0000000..b1f2dd8
--- /dev/null
+++ b/core/collectors/scielo_books.py
@@ -0,0 +1,182 @@
+import logging
+
+import requests
+from django.conf import settings
+from urllib.parse import urlencode
+
+
+
+
+def build_url(base_url, params=None):
+    if not params:
+        return base_url
+    return f"{base_url}?{urlencode(params, doseq=True)}"
+
+
+def sanitize_raw_data(payload):
+    if not isinstance(payload, dict):
+        return payload
+
+    if "_id" not in payload:
+        return payload
+
+    sanitized = dict(payload)
+    sanitized["id"] = sanitized.pop("_id")
+    return sanitized
+
+
+def fetch_document(doc_id, base_url=None, db_name=None, headers=None):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    resolved_base_url = base_url or settings.SCIELO_BOOKS_BASE_URL
+    if not resolved_base_url:
+        logging.error("Sem base url definida para coleta de books")
+        raise ValueError("SCIELO_BOOKS_BASE_URL is not configured")
+
+    url = f"{resolved_base_url}/{db_name}/{doc_id}"
+    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response.raise_for_status()
+    payload = response.json()
+    return sanitize_raw_data(payload), url
+
+
+def fetch_changes_page(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    include_docs=False,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    resolved_base_url = base_url or settings.SCIELO_BOOKS_BASE_URL
+    if not resolved_base_url:
+        logging.error("Sem base url definida para coleta de books")
+        raise ValueError("SCIELO_BOOKS_BASE_URL is not configured")
+
+    params = {
+        "since": since,
+        "limit": limit,
+    }
+    if include_docs:
+        params["include_docs"] = "true"
+
+    url = build_url(f"{resolved_base_url}/{db_name}/_changes", params)
+    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response.raise_for_status()
+    payload = response.json()
+    return payload if isinstance(payload, dict) else {}
+
+
+def extract_changes(payload):
+    if isinstance(payload, dict) and isinstance(payload.get("results"), list):
+        return payload.get("results")
+    return []
+
+
+def extract_last_seq(payload):
+    if isinstance(payload, dict):
+        return payload.get("last_seq") or payload.get("seq")
+    return None
+
+
+def iter_changes(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    current_since = since or 0
+
+    while True:
+        payload = fetch_changes_page(
+            base_url=base_url,
+            db_name=db_name,
+            since=current_since,
+            limit=limit,
+            include_docs=False,
+            headers=headers,
+        )
+        changes = extract_changes(payload)
+        if not changes:
+            break
+
+        for change in changes:
+            yield change
+
+        last_seq = extract_last_seq(payload)
+        if last_seq is None or last_seq == current_since:
+            break
+        current_since = last_seq
+
+
+def iter_change_documents(
+    base_url=None,
+    db_name=None,
+    since=0,
+    limit=None,
+    headers=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    current_since = since or 0
+
+    while True:
+        payload = fetch_changes_page(
+            base_url=base_url,
+            db_name=db_name,
+            since=current_since,
+            limit=limit,
+            include_docs=True,
+            headers=headers,
+        )
+        changes = extract_changes(payload)
+        if not changes:
+            break
+
+        for change in changes:
+            doc_id = change.get("id")
+            if not doc_id:
+                continue
+
+            deleted = bool(change.get("deleted"))
+            raw_doc = change.get("doc") or {}
+            if deleted:
+                yield {
+                    "change": change,
+                    "deleted": True,
+                    "payload": None,
+                    "source_url": None,
+                }
+                continue
+
+            if raw_doc:
+                sanitized = sanitize_raw_data(raw_doc)
+                yield {
+                    "change": change,
+                    "deleted": False,
+                    "payload": sanitized,
+                    "source_url": f"{(base_url or settings.SCIELO_BOOKS_BASE_URL)}/{db_name}/{doc_id}",
+                }
+                continue
+
+            document_payload, source_url = fetch_document(
+                doc_id=doc_id,
+                base_url=base_url,
+                db_name=db_name,
+                headers=headers,
+            )
+            yield {
+                "change": change,
+                "deleted": False,
+                "payload": document_payload,
+                "source_url": source_url,
+            }
+
+        last_seq = extract_last_seq(payload)
+        if last_seq is None or last_seq == current_since:
+            break
+        current_since = last_seq

From 1124f4e99f232f97c2077126ca8400c3f7d5e174 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:57 -0300
Subject: [PATCH 19/31] feat: adicionar document tasks para articlemeta e opac

---
 document/tasks/articlemeta.py | 120 ++++++++++++++++++++++++++++++++++
 document/tasks/common.py      |  43 ++++++++++++
 document/tasks/opac.py        | 107 ++++++++++++++++++++++++++++++
 3 files changed, 270 insertions(+)
 create mode 100644 document/tasks/articlemeta.py
 create mode 100644 document/tasks/common.py
 create mode 100644 document/tasks/opac.py

diff --git a/document/tasks/articlemeta.py b/document/tasks/articlemeta.py
new file mode 100644
index 0000000..75b2689
--- /dev/null
+++ b/document/tasks/articlemeta.py
@@ -0,0 +1,120 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import articlemeta as articlemeta_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import articles as article_service
+from source.services import journals as journal_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_documents_from_article_meta(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    collection=None,
+    issn=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading documents from Article Meta. From: %s, Until: %s, Collection: %s, ISSN: %s",
+        from_date,
+        until_date,
+        collection,
+        issn,
+    )
+
+    offset = 0
+    limit = 1000
+    while True:
+        response = articlemeta_collector.fetch_article_counter_dict(
+            from_date,
+            until_date,
+            offset=offset,
+            limit=limit,
+            collection=collection,
+            issn=issn,
+        )
+        objects = response.get("objects") or []
+        if not objects:
+            break
+
+        for payload in objects:
+            collection_obj = _get_collection(payload.get("collection") or collection)
+            if not collection_obj:
+                logging.info(
+                    "Collection not found for payload %s",
+                    payload.get("code"),
+                )
+                continue
+
+            source = journal_service.find_journal_source_by_issns(
+                collection_obj,
+                payload.get("code_title"),
+            )
+            if not source:
+                logging.info(
+                    "Source not found for collection %s and ISSNs %s",
+                    collection_obj.acron3,
+                    payload.get("code_title"),
+                )
+                continue
+
+            try:
+                article_service.upsert_article_document_from_articlemeta(
+                    payload,
+                    collection=collection_obj,
+                    source=source,
+                    user=user,
+                    force_update=force_update,
+                )
+            except DataError as exc:
+                logging.error(
+                    "Error saving Document from Article Meta. "
+                    "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
+                    collection_obj,
+                    source.source_id,
+                    payload.get('code'),
+                    exc
+                )
+                continue
+
+        offset += limit
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Article Meta)"), timelimit=-1, queue="load")
+def task_load_documents_from_article_meta(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    collection=None,
+    issn=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_article_meta(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        collection=collection,
+        issn=issn,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/common.py b/document/tasks/common.py
new file mode 100644
index 0000000..1645918
--- /dev/null
+++ b/document/tasks/common.py
@@ -0,0 +1,43 @@
+import logging
+
+from collection.models import Collection
+from document.models import Document
+from source.models import Source
+
+
+def _get_collection(acronym):
+    if not acronym:
+        return None
+    return Collection.objects.filter(acron3=acronym).first()
+
+
+def get_latest_scielo_books_last_seq(collection="books"):
+    document_last_seq = _get_latest_last_seq_from_queryset(
+        Document.objects.filter(collection__acron3=collection).only("extra_data")
+    )
+    source_last_seq = _get_latest_last_seq_from_queryset(
+        Source.objects.filter(
+            collection__acron3=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+        ).only("extra_data")
+    )
+    return max(document_last_seq, source_last_seq)
+
+
+def _get_latest_last_seq_from_queryset(queryset):
+    latest = 0
+    for item in queryset.iterator():
+        value = _coerce_last_seq((item.extra_data or {}).get("last_seq"))
+        if value is not None and value > latest:
+            latest = value
+    return latest
+
+
+def _coerce_last_seq(value):
+    if value in (None, ""):
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        logging.warning("Ignoring invalid SciELO Books last_seq value: %r", value)
+        return None
diff --git a/document/tasks/opac.py b/document/tasks/opac.py
new file mode 100644
index 0000000..5e1c81e
--- /dev/null
+++ b/document/tasks/opac.py
@@ -0,0 +1,107 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import opac as opac_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import articles as article_service
+from source.services import journals as journal_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_documents_from_opac(
+    collection="scl",
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    page=1,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading documents from OPAC. From: %s, Until: %s, Collection: %s",
+        from_date,
+        until_date,
+        collection,
+    )
+
+    collection_obj = _get_collection(collection)
+    if not collection_obj:
+        logging.error("Collection not found: %s", collection)
+        return False
+
+    while True:
+        response = opac_collector.fetch_counter_dict(from_date, until_date, page=page)
+        documents = response.get("documents") or {}
+
+        for payload in documents.values():
+            source = journal_service.find_journal_source_by_acronym(
+                collection_obj,
+                payload.get("journal_acronym"),
+            )
+            if not source:
+                logging.info(
+                    "Source not found for collection %s and acronym %s",
+                    collection_obj.acron3,
+                    payload.get("journal_acronym"),
+                )
+                continue
+
+            try:
+                article_service.upsert_article_document_from_opac(
+                    payload,
+                    collection=collection_obj,
+                    source=source,
+                    user=user,
+                    force_update=force_update,
+                )
+            except DataError as exc:
+                logging.error(
+                    "Error saving Document from OPAC. "
+                    "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
+                    collection_obj,
+                    source.source_id,
+                    payload.get('pid_v2'),
+                    exc
+                )
+                continue
+
+        page += 1
+        if page > int(response.get("pages", 0)):
+            break
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (OPAC)"), timelimit=-1, queue="load")
+def task_load_documents_from_opac(
+    self,
+    collection="scl",
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    page=1,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_opac(
+        collection=collection,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        page=page,
+        force_update=force_update,
+        user=user,
+    )

From 1e53b38d7a79a201ee9ebef132404e58e3689233 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:57 -0300
Subject: [PATCH 20/31] feat: adicionar document tasks para preprints,
 dataverse e scielo_books

---
 document/tasks/dataverse.py    |  80 +++++++++++
 document/tasks/preprints.py    |  82 +++++++++++
 document/tasks/scielo_books.py | 247 +++++++++++++++++++++++++++++++++
 3 files changed, 409 insertions(+)
 create mode 100644 document/tasks/dataverse.py
 create mode 100644 document/tasks/preprints.py
 create mode 100644 document/tasks/scielo_books.py

diff --git a/document/tasks/dataverse.py b/document/tasks/dataverse.py
new file mode 100644
index 0000000..15618a5
--- /dev/null
+++ b/document/tasks/dataverse.py
@@ -0,0 +1,80 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import dataverse as dataverse_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import datasets as dataset_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_dataset_metadata_from_dataverse(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading dataset metadata into documents. From: %s, Until: %s",
+        from_date,
+        until_date,
+    )
+
+    collection_obj = _get_collection("data")
+    if not collection_obj:
+        logging.error("Collection not found: data")
+        return False
+
+    for payload in dataverse_collector.iter_dataset_metadata(from_date, until_date):
+        if not payload.get("dataset_doi"):
+            logging.error("Dataset DOI not found in record: %s", payload)
+            continue
+
+        try:
+            dataset_service.upsert_dataset_document(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+            )
+        except DataError as exc:
+            logging.error(
+                "Error saving Dataset Document. Collection: %s, PID: %s. Error: %s",
+                collection_obj,
+                payload.get('dataset_doi'),
+                exc
+            )
+            continue
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Dataverse)"), timelimit=-1, queue="load")
+def task_load_dataset_metadata_into_documents(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_dataset_metadata_from_dataverse(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/preprints.py b/document/tasks/preprints.py
new file mode 100644
index 0000000..ee63211
--- /dev/null
+++ b/document/tasks/preprints.py
@@ -0,0 +1,82 @@
+import logging
+
+from django.db import DataError
+from django.utils.translation import gettext as _
+
+from core.collectors import preprints as preprints_collector
+from core.utils import date_utils
+from core.utils.request_utils import _get_user
+from document.services import preprints as preprint_service
+
+from config import celery_app
+
+from .common import _get_collection
+
+
+def load_preprints_from_preprints_api(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user=None,
+):
+    from_date, until_date = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    logging.info(
+        "Loading preprints into documents. From: %s, Until: %s",
+        from_date,
+        until_date,
+    )
+
+    collection_obj = _get_collection("preprints")
+    if not collection_obj:
+        logging.error("Collection not found: preprints")
+        return False
+
+    for record in preprints_collector.iter_records(from_date, until_date):
+        payload = preprints_collector.extract_record_data(record)
+
+        if not payload.get("pid_generic"):
+            logging.error("Preprint ID not found in record: %s", record)
+            continue
+
+        try:
+            preprint_service.upsert_preprint_document(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+            )
+        except DataError as exc:
+            logging.error(
+                "Error saving Preprint Document. Collection: %s, PID: %s. Error: %s",
+                collection_obj,
+                payload.get('pid_generic'),
+                exc
+            )
+            continue
+
+    return True
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Preprints)"), timelimit=-1, queue="load")
+def task_load_preprints_into_documents(
+    self,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    force_update=True,
+    user_id=None,
+    username=None,
+):
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_preprints_from_preprints_api(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        force_update=force_update,
+        user=user,
+    )
diff --git a/document/tasks/scielo_books.py b/document/tasks/scielo_books.py
new file mode 100644
index 0000000..ddbd462
--- /dev/null
+++ b/document/tasks/scielo_books.py
@@ -0,0 +1,247 @@
+import logging
+
+from django.conf import settings
+from django.utils.translation import gettext as _
+
+from core.collectors import scielo_books as scielo_books_collector
+from core.utils.request_utils import _get_user
+from document.services import books as document_books_service
+from source.services import books as source_books_service
+
+from config import celery_app
+
+from .common import get_latest_scielo_books_last_seq
+
+
+def load_documents_from_scielo_books(
+    collection="books",
+    db_name=None,
+    since=0,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    collection_obj = source_books_service.get_books_collection(collection)
+    monograph_cache = {}
+
+    logging.info(
+        "Loading documents from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
+        collection,
+        db_name,
+        since,
+        limit,
+    )
+
+    for item in scielo_books_collector.iter_change_documents(
+        base_url=base_url,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        headers=headers,
+    ):
+        change = item["change"]
+        raw_id = change.get("id")
+
+        if item["deleted"]:
+            delete_source = document_books_service.has_monograph_document_for_raw_id(
+                collection_obj,
+                raw_id,
+            )
+            document_books_service.delete_document_by_raw_id(collection_obj, raw_id)
+            if delete_source:
+                source_books_service.delete_book_source(collection_obj, raw_id)
+            continue
+
+        payload = item["payload"] or {}
+        source_url = item.get("source_url")
+        last_seq = change.get("seq")
+
+        if payload.get("TYPE") == "Monograph":
+            source = source_books_service.upsert_monograph_source(
+                payload,
+                collection=collection_obj,
+                user=user,
+                force_update=force_update,
+                source_url=source_url,
+                last_seq=last_seq,
+            )
+            document_books_service.upsert_monograph_document(
+                payload,
+                collection=collection_obj,
+                source=source,
+                user=user,
+                force_update=force_update,
+                source_url=source_url,
+                last_seq=last_seq,
+            )
+            monograph_cache[str(payload.get("id"))] = payload
+            continue
+
+        if payload.get("TYPE") != "Part":
+            continue
+
+        monograph_payload = _get_monograph_payload(
+            payload,
+            monograph_cache=monograph_cache,
+            base_url=base_url,
+            db_name=db_name,
+            headers=headers,
+        )
+        if not monograph_payload:
+            logging.warning(
+                "Skipping part %s because monograph %s could not be loaded.",
+                payload.get("id"),
+                payload.get("monograph"),
+            )
+            continue
+
+        source = source_books_service.upsert_monograph_source(
+            monograph_payload,
+            collection=collection_obj,
+            user=user,
+            force_update=force_update,
+            source_url=None,
+            last_seq=last_seq,
+        )
+        parent_document = document_books_service.upsert_monograph_document(
+            monograph_payload,
+            collection=collection_obj,
+            source=source,
+            user=user,
+            force_update=force_update,
+            source_url=None,
+            last_seq=last_seq,
+        )
+        enriched_payload = document_books_service.enrich_part_payload(
+            payload,
+            monograph_payload,
+        )
+        document_books_service.upsert_part_document(
+            enriched_payload,
+            collection=collection_obj,
+            source=source,
+            parent_document=parent_document,
+            user=user,
+            force_update=force_update,
+            source_url=source_url,
+            last_seq=last_seq,
+        )
+
+    return True
+
+
+def sync_documents_from_scielo_books(
+    collection="books",
+    db_name=None,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    since = get_latest_scielo_books_last_seq(collection=collection)
+    logging.info(
+        "Syncing documents from SciELO Books incrementally. Collection: %s, Since: %s, Limit: %s",
+        collection,
+        since,
+        limit,
+    )
+    return load_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Manual)"), queue="load")
+def task_load_documents_from_scielo_books(
+    self,
+    collection="books",
+    db_name=None,
+    since=0,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return load_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Incremental)"), queue="load")
+def task_sync_documents_from_scielo_books(
+    self,
+    collection="books",
+    db_name=None,
+    limit=None,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user_id=None,
+    username=None,
+):
+    db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
+    limit = limit or settings.SCIELO_BOOKS_LIMIT
+    user = _get_user(self.request, username=username, user_id=user_id)
+    return sync_documents_from_scielo_books(
+        collection=collection,
+        db_name=db_name,
+        limit=limit,
+        force_update=force_update,
+        headers=headers,
+        base_url=base_url,
+        user=user,
+    )
+
+
+def _get_monograph_payload(payload, monograph_cache, base_url=None, db_name=None, headers=None):
+    monograph_id = payload.get("monograph")
+    if not monograph_id:
+        return None
+
+    monograph_key = str(monograph_id)
+    if monograph_key in monograph_cache:
+        return monograph_cache[monograph_key]
+
+    try:
+        monograph_payload, _ = scielo_books_collector.fetch_document(
+            doc_id=monograph_id,
+            base_url=base_url,
+            db_name=db_name or settings.SCIELO_BOOKS_DB_NAME,
+            headers=headers,
+        )
+    except Exception as exc:
+        logging.warning(
+            "Failed to fetch monograph %s for part %s: %s",
+            monograph_id,
+            payload.get("id"),
+            exc,
+        )
+        return None
+
+    monograph_cache[monograph_key] = monograph_payload
+    return monograph_payload

From 0ccb0e9874944bd2680d4ceba0802e8380a97772 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:33:57 -0300
Subject: [PATCH 21/31] feat: adicionar document task pipeline, wagtail hooks e
 management command

---
 document/__init__.py                          |   1 +
 document/apps.py                              |   6 +
 document/management/__init__.py               |   1 +
 document/management/commands/__init__.py      |   1 +
 .../commands/load_articles_by_year.py         |  80 ++++++
 document/tasks/__init__.py                    |  28 ++
 document/tasks/pipeline.py                    |  24 ++
 document/tests.py                             | 255 ++++++++++++++++++
 document/wagtail_hooks.py                     |  35 +++
 9 files changed, 431 insertions(+)
 create mode 100644 document/__init__.py
 create mode 100644 document/apps.py
 create mode 100644 document/management/__init__.py
 create mode 100644 document/management/commands/__init__.py
 create mode 100644 document/management/commands/load_articles_by_year.py
 create mode 100644 document/tasks/__init__.py
 create mode 100644 document/tasks/pipeline.py
 create mode 100644 document/tests.py
 create mode 100644 document/wagtail_hooks.py

diff --git a/document/__init__.py b/document/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/apps.py b/document/apps.py
new file mode 100644
index 0000000..eb482d2
--- /dev/null
+++ b/document/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class DocumentConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "document"
diff --git a/document/management/__init__.py b/document/management/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/management/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/management/commands/__init__.py b/document/management/commands/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/document/management/commands/__init__.py
@@ -0,0 +1 @@
+
diff --git a/document/management/commands/load_articles_by_year.py b/document/management/commands/load_articles_by_year.py
new file mode 100644
index 0000000..a922456
--- /dev/null
+++ b/document/management/commands/load_articles_by_year.py
@@ -0,0 +1,80 @@
+from django.core.management.base import BaseCommand
+
+from document.tasks import task_load_documents_from_article_meta
+from document.tasks import task_load_documents_from_opac
+
+
+class Command(BaseCommand):
+    help = "Generate task requests for loading document data by year"
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--start-year",
+            type=int,
+            default=1990,
+            help="Start year (default: 1990)",
+        )
+        parser.add_argument(
+            "--end-year",
+            type=int,
+            default=2025,
+            help="End year (default: 2025)",
+        )
+        parser.add_argument(
+            "--collection",
+            type=str,
+            default="scl",
+            help="Collection code (default: scl)",
+        )
+        parser.add_argument(
+            "--task",
+            choices=["load_documents_from_opac", "load_documents_from_article_meta"],
+            default="load_documents_from_opac",
+            help="Task to execute (default: load_documents_from_opac)",
+        )
+
+    def handle(self, *args, **options):
+        start_year = options["start_year"]
+        end_year = options["end_year"]
+        collection = options["collection"]
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"Generating task requests from {start_year} to {end_year} for collection: {collection}"
+            )
+        )
+
+        total_tasks = 0
+
+        for year in range(start_year, end_year + 1):
+            from_date = f"{year}-01-01"
+            until_date = f"{year}-12-31"
+
+            self.stdout.write(f"Queuing task for year {year}...")
+
+            if options["task"] == "load_documents_from_article_meta":
+                task_result = task_load_documents_from_article_meta.delay(
+                    from_date=from_date,
+                    until_date=until_date,
+                    collection=collection,
+                )
+            else:
+                task_result = task_load_documents_from_opac.delay(
+                    from_date=from_date,
+                    until_date=until_date,
+                    collection=collection,
+                )
+
+            total_tasks += 1
+
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"✓ Task queued for year {year}: {from_date} to {until_date} (Task ID: {task_result.id})"
+                )
+            )
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"\nCompleted! {total_tasks} tasks have been queued successfully."
+            )
+        )
diff --git a/document/tasks/__init__.py b/document/tasks/__init__.py
new file mode 100644
index 0000000..95a0ba5
--- /dev/null
+++ b/document/tasks/__init__.py
@@ -0,0 +1,28 @@
+from .articlemeta import (
+    load_documents_from_article_meta,
+    task_load_documents_from_article_meta,
+)
+from .common import (
+    get_latest_scielo_books_last_seq,
+)
+from .dataverse import (
+    load_dataset_metadata_from_dataverse,
+    task_load_dataset_metadata_into_documents,
+)
+from .opac import (
+    load_documents_from_opac,
+    task_load_documents_from_opac,
+)
+from .pipeline import (
+    task_daily_metadata_sync_pipeline,
+)
+from .preprints import (
+    load_preprints_from_preprints_api,
+    task_load_preprints_into_documents,
+)
+from .scielo_books import (
+    load_documents_from_scielo_books,
+    sync_documents_from_scielo_books,
+    task_load_documents_from_scielo_books,
+    task_sync_documents_from_scielo_books,
+)
diff --git a/document/tasks/pipeline.py b/document/tasks/pipeline.py
new file mode 100644
index 0000000..97bef7c
--- /dev/null
+++ b/document/tasks/pipeline.py
@@ -0,0 +1,24 @@
+import logging
+
+from celery import group
+from django.utils.translation import gettext as _
+
+from config import celery_app
+
+from .articlemeta import task_load_documents_from_article_meta
+from .dataverse import task_load_dataset_metadata_into_documents
+from .opac import task_load_documents_from_opac
+from .preprints import task_load_preprints_into_documents
+from .scielo_books import task_sync_documents_from_scielo_books
+
+
+@celery_app.task(bind=True, name=_("[Metadata] Daily Sync Routine (Auto)"), queue="load")
+def task_daily_metadata_sync_pipeline(self):
+    logging.info("Starting Daily Metadata Sync Pipeline")
+    group([
+        task_load_documents_from_article_meta.s(),
+        task_load_documents_from_opac.s(),
+        task_load_preprints_into_documents.s(),
+        task_load_dataset_metadata_into_documents.s(),
+        task_sync_documents_from_scielo_books.s(),
+    ]).apply_async()
diff --git a/document/tests.py b/document/tests.py
new file mode 100644
index 0000000..14d9bcd
--- /dev/null
+++ b/document/tests.py
@@ -0,0 +1,255 @@
+from django.test import TestCase
+from unittest.mock import patch
+
+from collection.models import Collection
+from document import tasks as document_tasks
+from source.services import books as source_books_service
+from source.models import Source
+
+from .models import Document
+from .services import articles as article_service
+from .services import books as books_service
+from .services import datasets as dataset_service
+from .services import preprints as preprint_service
+
+
+class DocumentMetadataTests(TestCase):
+    def test_metadata_includes_source_context_and_legacy_identifiers(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            title="Test Journal",
+            identifiers={"scielo_issn": "1234-5678"},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id="S123456782024000100001",
+            scielo_issn="1234-5678",
+            pid_v2="S123456782024000100001",
+            pid_v3="abc123",
+            title="Test Article",
+            identifiers={"doi": "10.1590/example"},
+            files={"pt": {"path": "/pdf/test.pdf"}},
+            default_lang="en",
+            text_langs=["en", "pt"],
+            publication_date="2024-01-15",
+            publication_year="2024",
+        )
+
+        metadata = list(Document.metadata(collection=collection))
+
+        self.assertEqual(len(metadata), 1)
+        self.assertEqual(metadata[0]["document_type"], Document.DOCUMENT_TYPE_ARTICLE)
+        self.assertEqual(metadata[0]["document_id"], "S123456782024000100001")
+        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(metadata[0]["source_id"], "1234-5678")
+        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
+
+    def test_upsert_monograph_and_part_documents_from_books_payload(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        monograph_payload = {
+            "TYPE": "Monograph",
+            "id": "abcd1",
+            "title": "Sample Book",
+            "isbn": "9788578791889",
+            "eisbn": "9788578791880",
+            "doi_number": "10.1234/book",
+            "language": "pt",
+            "publication_date": "2024-05-20",
+            "year": "2024",
+            "publisher": "SciELO Books",
+        }
+        part_payload = {
+            "TYPE": "Part",
+            "id": "18",
+            "monograph": "abcd1",
+            "title": "Chapter 18",
+            "text_language": "es",
+            "order": "18",
+        }
+
+        source = source_books_service.upsert_monograph_source(
+            monograph_payload,
+            collection=collection,
+        )
+        parent_document = books_service.upsert_monograph_document(
+            monograph_payload,
+            collection=collection,
+            source=source,
+        )
+        chapter = books_service.upsert_part_document(
+            books_service.enrich_part_payload(part_payload, monograph_payload),
+            collection=collection,
+            source=source,
+            parent_document=parent_document,
+        )
+
+        self.assertEqual(parent_document.document_type, Document.DOCUMENT_TYPE_BOOK)
+        self.assertEqual(parent_document.document_id, "book:abcd1")
+        self.assertEqual(parent_document.pid_generic, "book:abcd1")
+        self.assertEqual(chapter.document_type, Document.DOCUMENT_TYPE_CHAPTER)
+        self.assertEqual(chapter.document_id, "book:abcd1/chapter:18")
+        self.assertEqual(chapter.parent_document, parent_document)
+        self.assertEqual(chapter.identifiers["book_id"], "abcd1")
+        self.assertEqual(chapter.default_lang, "es")
+
+    def test_articlemeta_and_opac_upsert_same_document(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+            identifiers={"scielo_issn": "1234-5678"},
+        )
+
+        first = article_service.upsert_article_document_from_articlemeta(
+            {
+                "code": "S123456782024000100001",
+                "title": "Article Title",
+                "pdfs": {"en": {"url": "/pdf/en.pdf"}},
+                "processing_date": "2024-02-10",
+                "publication_date": "2024-01-15",
+                "publication_year": "2024",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+                "code_title": ["1234-5678"],
+            },
+            collection=collection,
+            source=source,
+        )
+        second = article_service.upsert_article_document_from_opac(
+            {
+                "pid_v2": "S123456782024000100001",
+                "pid_v3": "S1234-56782024000100001",
+                "title": "Article Title",
+                "journal_acronym": "testjou",
+                "publication_date": "2024-01-15",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+            },
+            collection=collection,
+            source=source,
+        )
+
+        self.assertEqual(first.pk, second.pk)
+        self.assertEqual(Document.objects.count(), 1)
+        second.refresh_from_db()
+        self.assertEqual(second.pid_v3, "S1234-56782024000100001")
+        self.assertEqual(second.identifiers["journal_acronym"], "testjou")
+
+    def test_upsert_preprint_document_maps_metadata(self):
+        collection = Collection.objects.create(acron3="preprints", acron2="pp")
+
+        document = preprint_service.upsert_preprint_document(
+            {
+                "pid_generic": "preprint/123",
+                "title": "Preprint Title",
+                "text_langs": ["en", "pt"],
+                "default_language": "en",
+                "publication_date": "2024-01-20",
+                "publication_year": "2024",
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(document.document_type, Document.DOCUMENT_TYPE_PREPRINT)
+        self.assertEqual(document.document_id, "preprint/123")
+        self.assertEqual(document.pid_generic, "preprint/123")
+        self.assertEqual(document.default_lang, "en")
+
+    def test_upsert_dataset_document_accumulates_files(self):
+        collection = Collection.objects.create(acron3="data", acron2="dt")
+
+        dataset_service.upsert_dataset_document(
+            {
+                "title": "Dataset Title",
+                "dataset_doi": "10.1234/dataset",
+                "dataset_published": "2024-03-15",
+                "file_id": "1",
+                "file_name": "first.csv",
+                "file_url": "https://example.org/first.csv",
+                "file_persistent_id": "pid:first",
+            },
+            collection=collection,
+        )
+        document = dataset_service.upsert_dataset_document(
+            {
+                "title": "Dataset Title",
+                "dataset_doi": "10.1234/dataset",
+                "dataset_published": "2024-03-15",
+                "file_id": "2",
+                "file_name": "second.csv",
+                "file_url": "https://example.org/second.csv",
+                "file_persistent_id": "pid:second",
+            },
+            collection=collection,
+        )
+
+        self.assertEqual(document.document_type, Document.DOCUMENT_TYPE_DATASET)
+        self.assertEqual(document.document_id, "10.1234/dataset")
+        self.assertEqual(set(document.files.keys()), {"1", "2"})
+
+
+class DocumentBooksSyncTests(TestCase):
+    def test_get_latest_scielo_books_last_seq_uses_documents_and_sources(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": "135"},
+        )
+
+        self.assertEqual(document_tasks.get_latest_scielo_books_last_seq("books"), 135)
+
+    def test_sync_documents_from_scielo_books_uses_computed_since(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": 135},
+        )
+
+        with patch("document.tasks.scielo_books.load_documents_from_scielo_books", return_value=True) as mocked:
+            result = document_tasks.sync_documents_from_scielo_books(
+                collection="books",
+                db_name="scielobooks_1a",
+                limit=500,
+            )
+
+        self.assertTrue(result)
+        mocked.assert_called_once_with(
+            collection="books",
+            db_name="scielobooks_1a",
+            since=135,
+            limit=500,
+            force_update=True,
+            headers=None,
+            base_url=None,
+            user=None,
+        )
diff --git a/document/wagtail_hooks.py b/document/wagtail_hooks.py
new file mode 100644
index 0000000..de291c9
--- /dev/null
+++ b/document/wagtail_hooks.py
@@ -0,0 +1,35 @@
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet
+
+from .models import Document
+
+
+class DocumentSnippetViewSet(SnippetViewSet):
+    model = Document
+    icon = "folder-open-inverse"
+    menu_label = _("Document")
+    menu_order = 300
+
+    list_display = (
+        "collection",
+        "document_type",
+        "document_id",
+        "source",
+        "title",
+        "pid_v2",
+        "pid_v3",
+        "pid_generic",
+        "publication_year",
+    )
+    list_filter = (
+        "collection",
+        "document_type",
+        "publication_year",
+    )
+    search_fields = (
+        "document_id",
+        "title",
+        "pid_v2",
+        "pid_v3",
+        "pid_generic",
+    )

From 9c1e3820acfb8271e71be0b13750a079894ed997 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:34:06 -0300
Subject: [PATCH 22/31] feat: registrar novos apps em INSTALLED_APPS, menu e
 wagtail hooks

---
 config/menu.py           | 15 +++-----
 config/settings/base.py  | 82 +++++++++++++++++++++++++---------------
 metrics/wagtail_hooks.py | 22 +++++++++++
 3 files changed, 80 insertions(+), 39 deletions(-)
 create mode 100644 metrics/wagtail_hooks.py

diff --git a/config/menu.py b/config/menu.py
index 13371c6..844ce0c 100644
--- a/config/menu.py
+++ b/config/menu.py
@@ -1,13 +1,10 @@
 WAGTAIL_MENU_APPS_ORDER = {
-    "collection": 100,
-    "article": 200,
-    "journal": 300,
-    "resources": 400,
-    "log_manager": 500,
-    "log_manager_config": 600,
-    "metrics": 700,
-    "tasks": 800,
-    "unexpected-error": 900,
+    "metadata": 100,
+    "resources": 200,
+    "log_manager": 300,
+    "tracker": 400,
+    "metrics": 500,
+    "tasks": 600,
 }
 
 def get_menu_order(app_name):
diff --git a/config/settings/base.py b/config/settings/base.py
index 9638274..e4a99fa 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -5,7 +5,8 @@
 from pathlib import Path
 
 import environ
-from django.utils.translation import gettext_lazy as _
+
+from config.collections import COLLECTION_ACRON3_SIZE_MAP  # noqa: F401
 
 ROOT_DIR = Path(__file__).resolve(strict=True).parent.parent.parent
 # core/
@@ -116,10 +117,13 @@
     # Your stuff: custom apps go here
     "collection",
     "core",
+    "document",
     "log_manager",
     "log_manager_config",
     "metrics",
+    "reports",
     "resources",
+    "source",
     "tracker",
 ]
 
@@ -402,36 +406,54 @@
 
 SEARCH_PAGINATION_ITEMS_PER_PAGE = 10
 
-# Elasticsearch
+# OpenSearch
 # ------------------------------------------------------------------------------
-ES_URL = env("ES_URL", default="http://192.168.0.33:9200/")
-ES_INDEX_NAME = env("ES_INDEX_NAME", default="usage")
-ES_API_KEY = env("ES_API_KEY", default="")
-ES_BASIC_AUTH = env("ES_BASIC_AUTH", default=("elastic", "iHktg66E"))
-ES_VERIFY_CERTS = env.bool("ES_VERIFY_CERTS", default=False)
+OPENSEARCH_URL = env("OPENSEARCH_URL", default="http://localhost:9200/")
+OPENSEARCH_INDEX_NAME = env("OPENSEARCH_INDEX_NAME", default="usage")
+OPENSEARCH_API_KEY = env("OPENSEARCH_API_KEY", default="")
+OPENSEARCH_BASIC_AUTH = env(
+    "OPENSEARCH_BASIC_AUTH",
+    default=("admin", "admin"),
+)
+OPENSEARCH_VERIFY_CERTS = env.bool(
+    "OPENSEARCH_VERIFY_CERTS",
+    default=False,
+)
+
+# Collectors configuration
+# ------------------------------------------------------------------------------
+# ArticleMeta
+ARTICLEMETA_COLLECT_URL = env(
+    "ARTICLEMETA_COLLECT_URL",
+    default="http://articlemeta.scielo.org/api/v1/article/counter_dict",
+)
+ARTICLEMETA_MAX_RETRIES = env.int("ARTICLEMETA_MAX_RETRIES", default=5)
+ARTICLEMETA_SLEEP_TIME = env.int("ARTICLEMETA_SLEEP_TIME", default=30)
+
+# Dataverse
+DATAVERSE_ENDPOINT = env("DATAVERSE_ENDPOINT", default="https://data.scielo.org/api")
+DATAVERSE_ROOT_COLLECTION = env("DATAVERSE_ROOT_COLLECTION", default="scielodata")
+DATAVERSE_SLEEP_TIME = env.int("DATAVERSE_SLEEP_TIME", default=30)
+
+# OPAC
+OPAC_ENDPOINT = env("OPAC_ENDPOINT", default="https://www.scielo.br/api/v1/counter_dict")
+OPAC_MAX_RETRIES = env.int("OPAC_MAX_RETRIES", default=5)
+OPAC_SLEEP_TIME = env.int("OPAC_SLEEP_TIME", default=30)
+
+# Preprints
+OAI_PMH_PREPRINT_ENDPOINT = env(
+    "OAI_PMH_PREPRINT_ENDPOINT",
+    default="https://preprints.scielo.org/index.php/scielo/oai",
+)
+OAI_METADATA_PREFIX = env("OAI_METADATA_PREFIX", default="oai_dc")
+OAI_PMH_MAX_RETRIES = env.int("OAI_PMH_MAX_RETRIES", default=5)
+
+# SciELO Books
+SCIELO_BOOKS_BASE_URL = env("SCIELO_BOOKS_BASE_URL", default="http://localhost:5984")
+SCIELO_BOOKS_TIMEOUT = env.int("SCIELO_BOOKS_TIMEOUT", default=60)
+SCIELO_BOOKS_DB_NAME = env("SCIELO_BOOKS_DB_NAME", default="scielobooks_1a")
+SCIELO_BOOKS_LIMIT = env.int("SCIELO_BOOKS_LIMIT", default=1000)
 
 # Collection size categories
 # ------------------------------------------------------------------------------
-EXTRA_LARGE_COLLECTIONS = env.list("EXTRA_LARGE_COLLECTIONS", default=["scl"])
-LARGE_COLLECTIONS = env.list("LARGE_COLLECTIONS", default=["chl", "col", "mex"])
-MEDIUM_COLLECTIONS = env.list("MEDIUM_COLLECTIONS", default=["cri", "esp", "psi", "prt", "ven"])
-SMALL_COLLECTIONS = env.list("SMALL_COLLECTIONS", default=["arg", "bol", "cub", "data", "ecu", "per", "preprints", "pry", "rve", "spa", "sss", "sza", "ury", "wid"])
-
-# Collection size mapping
-def _build_collection_size_map():
-    """Build mapping of collection acronyms to their size categories."""
-    size_map = {}
-    size_categories = {
-        "xlarge": EXTRA_LARGE_COLLECTIONS,
-        "large": LARGE_COLLECTIONS,
-        "medium": MEDIUM_COLLECTIONS,
-        "small": SMALL_COLLECTIONS,
-    }
-    
-    for size, collections in size_categories.items():
-        for acron3 in collections:
-            size_map[acron3] = size
-    
-    return size_map
-
-COLLECTION_ACRON3_SIZE_MAP = _build_collection_size_map()
+SUPPORTED_LOGFILE_EXTENSIONS = env.list("SUPPORTED_LOGFILE_EXTENSIONS", default=[".log", ".gz", ".zip"])
diff --git a/metrics/wagtail_hooks.py b/metrics/wagtail_hooks.py
new file mode 100644
index 0000000..94c2ffb
--- /dev/null
+++ b/metrics/wagtail_hooks.py
@@ -0,0 +1,22 @@
+from django.utils.translation import gettext_lazy as _
+from wagtail.snippets.views.snippets import SnippetViewSet
+
+from metrics.models import DailyMetricJob
+
+class DailyMetricJobSnippetViewSet(SnippetViewSet):
+    model = DailyMetricJob
+    menu_label = _("Daily Metric Jobs")
+    icon = "history"
+    menu_order = 600
+    list_display = (
+        "collection",
+        "access_date",
+        "status",
+        "input_log_count",
+        "attempts",
+        "export_started_at",
+        "exported_at",
+        "updated",
+    )
+    list_filter = ("status", "collection", "access_date")
+    search_fields = ("collection__acron3", "error_message")

From 95b5ab5d520ec97a2b445cf7cf21279f53b46585 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:37 -0300
Subject: [PATCH 23/31] refactor: simplificar modelo LogFile, adicionar
 parse_heartbeat_at

---
 log_manager/choices.py |  10 --
 log_manager/models.py  | 240 ++++++-----------------------------------
 2 files changed, 30 insertions(+), 220 deletions(-)

diff --git a/log_manager/choices.py b/log_manager/choices.py
index e98c8f2..c6e461a 100644
--- a/log_manager/choices.py
+++ b/log_manager/choices.py
@@ -19,13 +19,3 @@
     (LOG_FILE_STATUS_IGNORED, _("Ignored")),
 ]
 
-
-COLLECTION_LOG_FILE_DATE_COUNT_OK = 'OK'
-COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES = 'MIS'
-COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES = 'EXT'
-
-COLLECTION_LOG_FILE_DATE_COUNT = [
-    (COLLECTION_LOG_FILE_DATE_COUNT_OK, _("OK")),
-    (COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES, _("Missing Files")),
-    (COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES, _("Extra files")),
-]
diff --git a/log_manager/models.py b/log_manager/models.py
index fc3a8b6..6bf04d8 100644
--- a/log_manager/models.py
+++ b/log_manager/models.py
@@ -1,209 +1,20 @@
 import logging
 
-from django.db import models
-from django.db.models import Q
+from django.db import IntegrityError, models
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 from wagtail.admin.panels import FieldPanel
 from wagtailautocomplete.edit_handlers import AutocompletePanel
 
 from collection.models import Collection
-from core.forms import CoreAdminModelForm
-from core.models import CommonControlField
 
 from . import choices
 
 
-class LogFileDate(CommonControlField):
-    date = models.DateField(
-        verbose_name=_("Date"),
-        null=False,
-        blank=False,
-        db_index=True,
-    )
-
-    log_file = models.ForeignKey(
-        'LogFile',
-        verbose_name=_('Log File'),
-        blank=True,
-        on_delete=models.DO_NOTHING,
-        db_index=True,
-    )
-
-    base_form_class = CoreAdminModelForm
-
-    panel = [
-        FieldPanel('date'),
-        AutocompletePanel('log_file')
-    ]
-
-    class Meta:
-        ordering = ['-date']
-        verbose_name = _("Log File Date")
-        verbose_name_plural = _("Log File Dates")
-        unique_together = (
-            'date',
-            'log_file',
-        )
-        indexes = [
-            models.Index(fields=['date', 'log_file']),
-        ]
-
-    @classmethod
-    def create_or_update(cls, user, log_file, date):
-        obj, created = cls.objects.get_or_create(
-            log_file=log_file, 
-            date=date,
-        )
-
-        if not created:
-            obj.updated_by = user
-            obj.updated = timezone.now()
-        else:
-            obj.creator = user
-            obj.created = timezone.now()
-
-        return obj
-    
-    @classmethod
-    def filter_by_collection_and_date(cls, collection, date):
-        return cls.objects.filter(
-            ~Q(log_file__status__in=[
-                choices.LOG_FILE_STATUS_CREATED, 
-                choices.LOG_FILE_STATUS_INVALIDATED
-            ]),
-            log_file__collection__acron3=collection,
-            date=date,
-        )
-        
-    @classmethod
-    def get_number_of_found_files_for_date(cls, collection, date):
-        return cls.objects.filter(
-            ~Q(log_file__status__in=[
-                choices.LOG_FILE_STATUS_CREATED, 
-                choices.LOG_FILE_STATUS_INVALIDATED
-            ]),
-            log_file__collection__acron3=collection,
-            date=date,
-        ).count()
-
-    def __str__(self):
-        return f'{self.log_file.path}-{self.date}'
-
-
-class CollectionLogFileDateCount(CommonControlField):
-    collection = models.ForeignKey(
-        Collection, 
-        verbose_name=_('Collection'), 
-        on_delete=models.DO_NOTHING, 
-        null=False, 
-        blank=False,
-    )
-
-    date = models.DateField(
-        _('Date'),
-        null=False,
-        blank=False,
-    )
-    
-    year = models.IntegerField(
-        _('Year'),
-        null=False,
-        blank=False,
-    )
-    
-    month = models.IntegerField(
-        _('Month'),
-        null=False,
-        blank=False,
-    )
-
-    found_log_files = models.IntegerField(
-        verbose_name=_('Number of Found Valid Log Files'), 
-        default=0,
-    )
-
-    expected_log_files = models.IntegerField(
-        verbose_name=_('Number of Expected Valid Log Files'),
-        blank=True,
-        null=True,
-    )
-
-    is_usage_metric_computed = models.BooleanField(
-        verbose_name=_('Is Usage Metric Computed'),
-        default=False,
-    )
-
-    exported_files_count = models.SmallIntegerField(
-        verbose_name=_('Exported Files Count'),
-        default=0,
-    )
-    
-    status = models.CharField(
-        verbose_name=_('Status'),
-        choices=choices.COLLECTION_LOG_FILE_DATE_COUNT,
-        max_length=3,
-    )
-
-    def set_status(self):
-        if self.found_log_files < self.expected_log_files:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES
-        elif self.found_log_files > self.expected_log_files:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES
-        else:
-            self.status = choices.COLLECTION_LOG_FILE_DATE_COUNT_OK
-
-    def set_is_usage_metric_computed(self):
-        if self.exported_files_count == self.found_log_files:
-            self.is_usage_metric_computed = True
-             
-    @classmethod
-    def create_or_update(cls, user, collection, date, expected_log_files, found_log_files):
-        obj, created = cls.objects.get_or_create(
-            collection=collection, 
-            date=date,
-            month=date.month,
-            year=date.year,
-        )
-
-        if not created:
-            obj.updated_by = user
-            obj.updated = timezone.now()
-        else:
-            obj.creator = user
-            obj.created = timezone.now()
-
-        obj.expected_log_files = expected_log_files            
-        obj.found_log_files = found_log_files
-        obj.set_status()
-        
-        obj.save()
-        return obj
-    
-    class Meta:
-        ordering = ['-date']
-        verbose_name = _("Collection Log File Date Count")
-        unique_together = (
-            'collection',
-            'date',
-        )
-
-    panels = [
-        AutocompletePanel('collection'),
-        FieldPanel('date'),
-        FieldPanel('year'),
-        FieldPanel('month'),
-        FieldPanel('found_log_files'),
-        FieldPanel('expected_log_files'),
-        FieldPanel('status'),
-        FieldPanel('is_usage_metric_computed'),
-    ]
-
-    def __str__(self):
-        return f'{self.collection.acron3}-{self.date}'
-    
-
-class LogFile(CommonControlField):
+class LogFile(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
+    date = models.DateField(verbose_name=_("Date"), null=True, blank=True, db_index=True)
     hash = models.CharField(_("Hash MD5"), max_length=32, null=True, blank=True, unique=True)
 
     path = models.CharField(_("Name"), max_length=255, null=False, blank=False)
@@ -246,19 +57,25 @@ class LogFile(CommonControlField):
         default=0,
     )
 
+    parse_heartbeat_at = models.DateTimeField(
+        _("Parse Heartbeat At"),
+        null=True,
+        blank=True,
+    )
+
     panels = [
         FieldPanel('hash'),
+        FieldPanel('date'),
         FieldPanel('path'),
         FieldPanel('stat_result'),
         FieldPanel('status'),
         FieldPanel('validation'),
         FieldPanel('summary'),
         FieldPanel('last_processed_line'),
+        FieldPanel('parse_heartbeat_at'),
         AutocompletePanel('collection'),
     ]
 
-    base_form_class = CoreAdminModelForm
-
     class Meta:
         verbose_name = _("Log File")
         verbose_name_plural = _("Log Files")
@@ -268,25 +85,28 @@ def get(cls, hash):
         return cls.objects.get(hash=hash)
 
     @classmethod
-    def create_or_update(cls, user, collection, path, stat_result, hash, status=None):
+    def create_or_update(cls, collection, path, stat_result, hash, status=None):
         try:
+            obj, created = cls.objects.get_or_create(
+                hash=hash,
+                defaults={
+                    "collection": collection,
+                    "path": path,
+                    "stat_result": stat_result,
+                    "status": status or choices.LOG_FILE_STATUS_CREATED,
+                },
+            )
+        except IntegrityError:
             obj = cls.get(hash=hash)
-            obj.updated_by = user
+            created = False
+
+        if created:
+            logging.info(f'File {path} added to the database.')
+        else:
             obj.updated = timezone.now()
+            obj.save(update_fields=["updated"])
             logging.info(f'File {path} already exists in the database.')
 
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-            obj.path = path
-            obj.stat_result = stat_result
-            obj.hash = hash
-            obj.status = status or choices.LOG_FILE_STATUS_CREATED
-            logging.info(f'File {path} added to the database.')
-        
-        obj.save()
         return obj
         
     def __str__(self):

From dfeaf9711715cd20db2857c491829a7072b8852d Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:37 -0300
Subject: [PATCH 24/31] =?UTF-8?q?refactor:=20adicionar=20migra=C3=A7=C3=A3?=
 =?UTF-8?q?o=20log=5Fmanager=200010=20(remover=20modelos=20legados)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...er_logfiledate_unique_together_and_more.py | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py

diff --git a/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py b/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py
new file mode 100644
index 0000000..d30cdf4
--- /dev/null
+++ b/log_manager/migrations/0010_alter_logfiledate_unique_together_and_more.py
@@ -0,0 +1,52 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("log_manager", "0009_collectionlogfiledatecount_exported_files_count"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="log_file",
+        ),
+        migrations.RemoveField(
+            model_name="logfiledate",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="logfile",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfile",
+            name="updated_by",
+        ),
+        migrations.AddField(
+            model_name="logfile",
+            name="date",
+            field=models.DateField(
+                blank=True, db_index=True, null=True, verbose_name="Date"
+            ),
+        ),
+        migrations.AddField(
+            model_name="logfile",
+            name="parse_heartbeat_at",
+            field=models.DateTimeField(
+                blank=True, null=True, verbose_name="Parse Heartbeat At"
+            ),
+        ),
+        migrations.DeleteModel(
+            name="CollectionLogFileDateCount",
+        ),
+        migrations.DeleteModel(
+            name="LogFileDate",
+        ),
+    ]

From 495500549cbab1d0ac698cf8629523d881076ccd Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:37 -0300
Subject: [PATCH 25/31] refactor: refatorar modelos log_manager_config com
 ParentalKey

---
 log_manager_config/exceptions.py |   6 -
 log_manager_config/models.py     | 408 ++++++++-----------------------
 2 files changed, 108 insertions(+), 306 deletions(-)

diff --git a/log_manager_config/exceptions.py b/log_manager_config/exceptions.py
index ad7581a..0a6a6a9 100644
--- a/log_manager_config/exceptions.py
+++ b/log_manager_config/exceptions.py
@@ -4,11 +4,5 @@ class UndefinedCollectionLogDirectoryError(Exception):
 class UndefinedCollectionEmailError(Exception):
     ...
 
-class UndefinedCollectionFilesPerDayError(Exception):
-    ...
-
 class UndefinedSupportedLogFile(Exception):
     ...
-
-class MultipleFilesPerDayForTheSameDateError(Exception):
-    ...
diff --git a/log_manager_config/models.py b/log_manager_config/models.py
index 384368e..8cf3e34 100644
--- a/log_manager_config/models.py
+++ b/log_manager_config/models.py
@@ -4,38 +4,57 @@
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 
+from modelcluster.models import ClusterableModel
+from modelcluster.fields import ParentalKey
+from wagtail.models import Orderable
+from wagtail.admin.panels import FieldPanel, InlinePanel
+from wagtailautocomplete.edit_handlers import AutocompletePanel
+
 from collection.models import Collection
 from core.models import CommonControlField
 
-from .exceptions import MultipleFilesPerDayForTheSameDateError, UndefinedCollectionFilesPerDayError
 
 
-class CollectionLogDirectory(CommonControlField):
-    collection = models.ForeignKey(
+class LogManagerCollectionConfig(ClusterableModel, CommonControlField):
+    collection = models.OneToOneField(
         Collection,
         verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+        on_delete=models.CASCADE,
+        related_name="log_manager_config"
     )
-    path = models.CharField(
-        verbose_name=_('Path'),
-        max_length=255, 
-        blank=False, 
+    sample_size = models.FloatField(
+        verbose_name=_('Sample Size'),
+        blank=False,
         null=False,
+        default=0.1,
     )
-    directory_name = models.CharField(
-        verbose_name=_('Directory Name'),
-        max_length=255, 
-        blank=True,
-        null=True,
+    buffer_size = models.IntegerField(
+        verbose_name=_('Buffer Size'),
+        blank=False,
+        null=False,
+        default=2048,
     )
-    active = models.BooleanField(
-        verbose_name=_('Active'),
-        default=True,
+    expected_logs_per_day = models.IntegerField(
+        verbose_name=_('Expected Logs Per Day'),
+        default=1,
     )
 
+    panels = [
+        AutocompletePanel("collection"),
+        FieldPanel("sample_size"),
+        FieldPanel("buffer_size"),
+        FieldPanel("expected_logs_per_day"),
+        InlinePanel("directories", label=_("Directories")),
+        InlinePanel("emails", label=_("Emails")),
+    ]
+
     def __str__(self):
-        return f'{self.collection} - {self.path} - {self.directory_name}'
-    
+        return f'{self.collection.acron3} Config'
+
+    class Meta:
+        verbose_name = _('Log Manager Collection Config')
+        verbose_name_plural = _('Log Manager Collection Configs')
+
     @classmethod
     def load(cls, data, user):
         for item in data:
@@ -45,13 +64,12 @@ def load(cls, data, user):
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
 
-            logging.info(item)
             cls.create_or_update(
                 user=user,
                 collection=collection,
-                directory_name=item.get('directory_name'),
-                path=item.get('path'),
-                active=item.get('active', True),
+                sample_size=item.get('sample_size', 0.1),
+                buffer_size=item.get('buffer_size', 2048),
+                expected_logs_per_day=item.get('quantity', 1),
             )
 
     @classmethod
@@ -59,81 +77,66 @@ def create_or_update(
         cls,
         user,
         collection,
-        directory_name,
-        path,
-        active,
+        sample_size,
+        buffer_size,
+        expected_logs_per_day,
     ):
-        try:
-            obj = cls.objects.get(collection=collection, path=path)
-        except cls.DoesNotExist:
-            obj = cls()
+        obj, created = cls.objects.get_or_create(collection=collection)
+        if created:
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
         
         obj.updated_by = user
         obj.updated = timezone.now()
-        obj.directory_name = directory_name
-        obj.path = path
-        obj.active = active
-     
+        obj.sample_size = sample_size
+        obj.buffer_size = buffer_size
+        obj.expected_logs_per_day = expected_logs_per_day
         obj.save()
-        logging.info(f'{collection.acron3} - {directory_name} - {path}')
+        logging.info(f'Config for {collection.acron3} updated.')
         return obj
 
-    class Meta:
-        verbose_name = _('Collection Log Directory')
-        verbose_name_plural = _('Collection Log Directories')
-        constraints = [
-            models.UniqueConstraint(fields=['collection', 'path'], name='unique_collection_path')
-        ]
 
 
-class CollectionLogFilesPerDay(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+class CollectionLogDirectory(Orderable, CommonControlField):
+    config = ParentalKey(
+        'LogManagerCollectionConfig',
+        related_name='directories',
+        on_delete=models.CASCADE,
+        null=True,
+        blank=True,
     )
-    start_date = models.DateField(
-        verbose_name=_('Start Date'),
-        blank=False,
+    path = models.CharField(
+        verbose_name=_('Path'),
+        max_length=255, 
+        blank=False, 
         null=False,
     )
-    end_date = models.DateField(
-        verbose_name=_('End Date'),
+    directory_name = models.CharField(
+        verbose_name=_('Directory Name'),
+        max_length=255, 
         blank=True,
         null=True,
     )
-    quantity = models.IntegerField(
-        verbose_name=_('Quantity'),
-        default=1,
+    active = models.BooleanField(
+        verbose_name=_('Active'),
+        default=True,
+    )
+    translator_class = models.CharField(
+        verbose_name=_('URL Translator Class'),
+        blank=False,
+        null=False,
+        default='URLTranslatorClassicSite',
     )
 
     def __str__(self):
-        return f'{self.start_date} - {self.quantity}'
+        return f'{self.config.collection} - {self.path} - {self.directory_name}'
     
-    @classmethod
-    def get_number_of_expected_files_by_day(cls, collection, date):
-        files_by_day = cls.objects.filter(
-            models.Q(collection__acron3=collection) &
-            models.Q(start_date__lte=date) &
-            (models.Q(end_date__gte=date) | models.Q(end_date__isnull=True))
-        )
-
-        if files_by_day.count() > 1:
-            raise MultipleFilesPerDayForTheSameDateError(_("ERROR. Please, set the field end_date for the collection {collection}."))
-
-        if files_by_day.count() == 0:
-            raise UndefinedCollectionFilesPerDayError(_("ERROR. Please, set the number of files per day for the collection {collection}."))
-        
-        return int(files_by_day.get().quantity)
-
     @classmethod
     def load(cls, data, user):
         for item in data:
             try:
                 collection = Collection.objects.get(acron3=item.get('acronym'))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -141,52 +144,55 @@ def load(cls, data, user):
             logging.info(item)
             cls.create_or_update(
                 user=user,
-                collection=collection,
-                start_date=item.get('start_date'),
-                quantity=item.get('quantity'),
-                end_date=item.get('end_date'),
+                config=config,
+                directory_name=item.get('directory_name'),
+                path=item.get('path'),
+                active=item.get('active', True),
             )
 
     @classmethod
     def create_or_update(
         cls,
         user,
-        collection,
-        start_date,
-        quantity,
-        end_date,
+        config,
+        directory_name,
+        path,
+        active,
     ):
         try:
-            obj = cls.objects.get(collection=collection, start_date=start_date)
+            obj = cls.objects.get(config=config, path=path)
         except cls.DoesNotExist:
             obj = cls()
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
-
+            obj.config = config
+        
         obj.updated_by = user
         obj.updated = timezone.now()
-        obj.start_date = start_date
-        obj.quantity = quantity
-        obj.end_date = end_date
-        
+        obj.directory_name = directory_name
+        obj.path = path
+        obj.active = active
+     
         obj.save()
-        logging.info(f'{collection.acron3} - {start_date} - {quantity}')
+        logging.info(f'{config.collection.acron3} - {directory_name} - {path}')
         return obj
 
     class Meta:
-        verbose_name = _('Collection Log Files Per Day')
-        verbose_name_plural = _('Collection Log Files Per Day')
+        verbose_name = _('Collection Log Directory')
+        verbose_name_plural = _('Collection Log Directories')
         constraints = [
-            models.UniqueConstraint(fields=['collection', 'start_date'], name='unique_collection_start_date')
+            models.UniqueConstraint(fields=['config', 'path'], name='unique_config_path')
         ]
 
 
-class CollectionEmail(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
+
+class CollectionEmail(Orderable, CommonControlField):
+    config = ParentalKey(
+        'LogManagerCollectionConfig',
+        related_name='emails',
+        on_delete=models.CASCADE,
+        null=True,
+        blank=True,
     )
     name = models.CharField(
         verbose_name=_('Name'),
@@ -218,6 +224,7 @@ def load(cls, data, user):
         for item in data:
             try:
                 collection = Collection.objects.get(acron3=item.get('acronym'))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -225,7 +232,7 @@ def load(cls, data, user):
             logging.info(item)
             cls.create_or_update(
                 user=user,
-                collection=collection,
+                config=config,
                 email=item.get('e-mail'),
                 name=item.get('name'),
                 position=item.get('position'),
@@ -236,19 +243,19 @@ def load(cls, data, user):
     def create_or_update(
         cls,
         user,
-        collection,
+        config,
         email,
         name,
         position,
         active,
     ):
         try:
-            obj = cls.objects.get(collection=collection, email=email)
+            obj = cls.objects.get(config=config, email=email)
         except cls.DoesNotExist:
             obj = cls()
             obj.creator = user
             obj.created = timezone.now()
-            obj.collection = collection
+            obj.config = config
             obj.email = email
 
         obj.updated_by = user
@@ -258,213 +265,14 @@ def create_or_update(
         obj.active = active
         
         obj.save()
-        logging.info(f'{collection.acron3} - {name} - {position} - {email}')
+        logging.info(f'{config.collection.acron3} - {name} - {position} - {email}')
         return obj
     
     class Meta:
         verbose_name = _('Collection Email')
         verbose_name_plural = _('Collection Emails')
         constraints = [
-            models.UniqueConstraint(fields=['collection', 'email'], name='unique_collection_email')
-        ]
-
-
-class CollectionValidationParameters(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
-        primary_key=True,
-    )
-    sample_size = models.FloatField(
-        verbose_name=_('Sample Size'),
-        blank=False,
-        null=False,
-        default=0.1,
-    )
-    buffer_size = models.IntegerField(
-        verbose_name=_('Buffer Size'),
-        blank=False,
-        null=False,
-        default=2048,
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.sample_size} - {self.buffer_size}'
-
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-            except Collection.DoesNotExist:
-                logging.warning(f'Collection {item.get("acronym")} not found.')
-                continue
-
-            logging.info(item)
-            cls.create_or_update(
-                user=user,
-                collection=collection,
-                sample_size=item.get('sample_size'),
-                buffer_size=item.get('buffer_size'),
-            )
-
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        collection,
-        sample_size,
-        buffer_size,
-    ):
-        try:
-            obj = cls.objects.get(collection=collection)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.sample_size = sample_size
-        obj.buffer_size = buffer_size
-        
-        obj.save()
-        logging.info(f'{collection.acron3} - {sample_size} - {buffer_size}')
-        return obj
-    
-    class Meta:
-        verbose_name = _('Collection Validation Parameters')
-        verbose_name_plural = _('Collection Validation Parameters')
-
-
-class CollectionURLTranslatorClass(CommonControlField):
-    collection = models.ForeignKey(
-        Collection,
-        verbose_name=_('Collection'),
-        on_delete=models.DO_NOTHING,
-    )
-    directory = models.ForeignKey(
-        CollectionLogDirectory,
-        verbose_name=_('Directory'),
-        on_delete=models.DO_NOTHING,
-    )
-    translator_class = models.CharField(
-        verbose_name=_('URL Translator Class'),
-        blank=False,
-        null=False,
-        default='URLTranslatorClassicSite',
-    )
-
-    def __str__(self):
-        return f'{self.collection.acron3} - {self.directory} - {self.translator_class}'
-
-    class Meta:
-        verbose_name = _('Collection URL Translator Class')
-        verbose_name_plural = _('Collection URL Translator Classes')
-        constraints = [
-            models.UniqueConstraint(fields=['collection', 'directory'], name='unique_collection_directory')
+            models.UniqueConstraint(fields=['config', 'email'], name='unique_config_email')
         ]
 
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-            except Collection.DoesNotExist:
-                logging.warning(f'Collection {item.get("acronym")} not found.')
-                continue
-
-            try:
-                directory = CollectionLogDirectory.objects.get(collection=collection, path=item.get('path'))
-                logging.info(item)
-                cls.create_or_update(
-                    user=user,
-                    collection=collection,
-                    directory=directory,
-                    translator_class=item.get('translator_class'),
-                )
-            except CollectionLogDirectory.DoesNotExist:
-                logging.warning(f'Directory {item.get("path")} not found.')
-                continue
 
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        collection,
-        directory,
-        translator_class,
-    ):
-        try:
-            obj = cls.objects.get(collection=collection)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-            obj.collection = collection
-            obj.directory = directory
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.translator_class = translator_class
-        
-        obj.save()
-        logging.info(f'{collection.acron3} - {directory.path} - {translator_class}')
-        return obj
-    
-
-class SupportedLogFile(CommonControlField):
-    file_extension = models.CharField(
-        verbose_name=_('File Extension'),
-        max_length=255, 
-        unique=True,
-        blank=False,
-        null=False,
-    )
-    description = models.TextField(
-        verbose_name=_('Description'),
-        blank=True,
-        null=True,
-    )
-
-    def __str__(self):
-        return f'{self.file_extension}'
-
-    @classmethod
-    def load(cls, data, user):
-        for item in data:
-            logging.info(item)
-            cls.create_or_update(
-                user=user,
-                file_extension=item.get('file_extension'),
-                description=item.get('description'),
-            )
-
-    @classmethod
-    def create_or_update(
-        cls,
-        user,
-        file_extension,
-        description,
-    ):
-        try:
-            obj = cls.objects.get(file_extension=file_extension)
-        except cls.DoesNotExist:
-            obj = cls()
-            obj.creator = user
-            obj.created = timezone.now()
-
-        obj.updated_by = user
-        obj.updated = timezone.now()
-        obj.file_extension = file_extension
-        obj.description = description
-        
-        obj.save()
-        logging.info(f'{file_extension}')
-        return obj
-    
-    class Meta:
-        verbose_name = _('Supported Log File')
-        verbose_name_plural = _('Supported Log Files')

From 2b69fc13e40f5f1ed428eb1b5c93c73b1aaba5e3 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:37 -0300
Subject: [PATCH 26/31] =?UTF-8?q?refactor:=20adicionar=20migra=C3=A7=C3=A3?=
 =?UTF-8?q?o=20log=5Fmanager=5Fconfig=200004?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...004_logmanagercollectionconfig_and_more.py | 223 ++++++++++++++++++
 1 file changed, 223 insertions(+)
 create mode 100644 log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py

diff --git a/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py b/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py
new file mode 100644
index 0000000..5b6351c
--- /dev/null
+++ b/log_manager_config/migrations/0004_logmanagercollectionconfig_and_more.py
@@ -0,0 +1,223 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:27
+
+import django.db.models.deletion
+import modelcluster.fields
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("collection", "0001_initial"),
+        ("log_manager_config", "0003_alter_collectionemail_options_and_more"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="LogManagerCollectionConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(
+                        auto_now_add=True, verbose_name="Creation date"
+                    ),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(
+                        auto_now=True, verbose_name="Last update date"
+                    ),
+                ),
+                (
+                    "sample_size",
+                    models.FloatField(default=0.1, verbose_name="Sample Size"),
+                ),
+                (
+                    "buffer_size",
+                    models.IntegerField(default=2048, verbose_name="Buffer Size"),
+                ),
+                (
+                    "expected_logs_per_day",
+                    models.IntegerField(
+                        default=1, verbose_name="Expected Logs Per Day"
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Log Manager Collection Config",
+                "verbose_name_plural": "Log Manager Collection Configs",
+            },
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogfilesperday",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="directory",
+        ),
+        migrations.RemoveField(
+            model_name="collectionurltranslatorclass",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="collectionvalidationparameters",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="supportedlogfile",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="supportedlogfile",
+            name="updated_by",
+        ),
+        migrations.RemoveConstraint(
+            model_name="collectionemail",
+            name="unique_collection_email",
+        ),
+        migrations.RemoveConstraint(
+            model_name="collectionlogdirectory",
+            name="unique_collection_path",
+        ),
+        migrations.RemoveField(
+            model_name="collectionemail",
+            name="collection",
+        ),
+        migrations.RemoveField(
+            model_name="collectionlogdirectory",
+            name="collection",
+        ),
+        migrations.AddField(
+            model_name="collectionemail",
+            name="sort_order",
+            field=models.IntegerField(blank=True, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="sort_order",
+            field=models.IntegerField(blank=True, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="translator_class",
+            field=models.CharField(
+                default="URLTranslatorClassicSite", verbose_name="URL Translator Class"
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="collection",
+            field=models.OneToOneField(
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="log_manager_config",
+                to="collection.collection",
+                verbose_name="Collection",
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="creator",
+            field=models.ForeignKey(
+                editable=False,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="%(class)s_creator",
+                to=settings.AUTH_USER_MODEL,
+                verbose_name="Creator",
+            ),
+        ),
+        migrations.AddField(
+            model_name="logmanagercollectionconfig",
+            name="updated_by",
+            field=models.ForeignKey(
+                blank=True,
+                editable=False,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="%(class)s_last_mod_user",
+                to=settings.AUTH_USER_MODEL,
+                verbose_name="Updater",
+            ),
+        ),
+        migrations.AddField(
+            model_name="collectionemail",
+            name="config",
+            field=modelcluster.fields.ParentalKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="emails",
+                to="log_manager_config.logmanagercollectionconfig",
+            ),
+        ),
+        migrations.AddField(
+            model_name="collectionlogdirectory",
+            name="config",
+            field=modelcluster.fields.ParentalKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="directories",
+                to="log_manager_config.logmanagercollectionconfig",
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name="collectionemail",
+            constraint=models.UniqueConstraint(
+                fields=("config", "email"), name="unique_config_email"
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name="collectionlogdirectory",
+            constraint=models.UniqueConstraint(
+                fields=("config", "path"), name="unique_config_path"
+            ),
+        ),
+        migrations.DeleteModel(
+            name="CollectionLogFilesPerDay",
+        ),
+        migrations.DeleteModel(
+            name="CollectionURLTranslatorClass",
+        ),
+        migrations.DeleteModel(
+            name="CollectionValidationParameters",
+        ),
+        migrations.DeleteModel(
+            name="SupportedLogFile",
+        ),
+    ]

From efdc7e06ef93e96425bb89a4ae4f0cfd9f43d708 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:49 -0300
Subject: [PATCH 27/31] refactor: simplificar modelos tracker, remover
 ArticleEvent e UnexpectedEvent

---
 tracker/choices.py                            |  46 +---
 tracker/exceptions.py                         |  24 --
 .../0002_top100articlesfileevent.py           |  93 -------
 ...rdedline_delete_top100articlesfileevent.py |  98 --------
 ...4_alter_logfilediscardedline_error_type.py |  28 ---
 tracker/migrations/0005_articleevent.py       |  86 -------
 ...6_alter_logfilediscardedline_error_type.py |  29 ---
 ...7_alter_logfilediscardedline_error_type.py |  29 ---
 tracker/models.py                             | 228 +-----------------
 tracker/tasks.py                              |  34 ---
 10 files changed, 7 insertions(+), 688 deletions(-)
 delete mode 100644 tracker/migrations/0002_top100articlesfileevent.py
 delete mode 100644 tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py
 delete mode 100644 tracker/migrations/0004_alter_logfilediscardedline_error_type.py
 delete mode 100644 tracker/migrations/0005_articleevent.py
 delete mode 100644 tracker/migrations/0006_alter_logfilediscardedline_error_type.py
 delete mode 100644 tracker/migrations/0007_alter_logfilediscardedline_error_type.py
 delete mode 100644 tracker/tasks.py

diff --git a/tracker/choices.py b/tracker/choices.py
index e2c80e2..dfc562c 100644
--- a/tracker/choices.py
+++ b/tracker/choices.py
@@ -1,54 +1,16 @@
 from django.utils.translation import gettext_lazy as _
 
-ERROR = "ERROR"
-EXCEPTION = "EXCEPTION"
-INFO = "INFO"
-WARNING = "WARNING"
-
-EVENT_MSG_TYPE = [
-    (ERROR, _("error")),
-    (WARNING, _("warning")),
-    (INFO, _("info")),
-    (EXCEPTION, _("exception")),
-]
-
-
-PROGRESS_STATUS_IGNORED = "IGNORED"
-PROGRESS_STATUS_REPROC = "REPROC"
-PROGRESS_STATUS_TODO = "TODO"
-PROGRESS_STATUS_DOING = "DOING"
-PROGRESS_STATUS_DONE = "DONE"
-PROGRESS_STATUS_PENDING = "PENDING"
-
-PROGRESS_STATUS = (
-    (PROGRESS_STATUS_REPROC, _("To reprocess")),
-    (PROGRESS_STATUS_TODO, _("To do")),
-    (PROGRESS_STATUS_DONE, _("Done")),
-    (PROGRESS_STATUS_DOING, _("Doing")),
-    (PROGRESS_STATUS_PENDING, _("Pending")),
-    (PROGRESS_STATUS_IGNORED, _("ignored")),
-)
-
 
 LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA = 'MET'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE = 'ART'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL = 'JOU'
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT = 'DOC'
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE = 'SRC'
 LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION = 'URL'
 LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR = 'DBE'
 
 LOG_FILE_DISCARDED_LINE_REASON = [
     (LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA, _("Missing Metadata")),
-    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_ARTICLE, _("Missing PIDv2 or PIDv3 or PID Generic")),
-    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_JOURNAL, _("Missing ISSN")),
+    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT, _("Missing Document")),
+    (LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE, _("Missing Source")),
     (LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, _("URL Translation")),
     (LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR, _("Database Error")),
 ]
-
-
-ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED = 'MUL'
-ARTICLE_EVENT_TYPE_DATA_ERROR = 'ERR'
-
-ARTICLE_EVENT_TYPE = [
-    (ARTICLE_EVENT_TYPE_MULTIPLE_OBJS_RETURNED, _("Multiple Articles Returned")),
-    (ARTICLE_EVENT_TYPE_DATA_ERROR, _("Data Error")),
-]
diff --git a/tracker/exceptions.py b/tracker/exceptions.py
index 31ed8c8..9ef3267 100644
--- a/tracker/exceptions.py
+++ b/tracker/exceptions.py
@@ -1,26 +1,2 @@
-class ProcEventCreateError(Exception):
-    ...
-
-class UnexpectedEventCreateError(Exception):
-    ...
-
-class EventCreateError(Exception):
-    ...
-
-class EventReportCreateError(Exception):
-    ...
-
-class EventReportSaveFileError(Exception):
-    ...
-
-class EventReportCreateError(Exception):
-    ...
-
-class EventReportDeleteEventsError(Exception):
-    ...
-
 class LogFileDiscardedLineCreateError(Exception):
     ...
-
-class ArticleEventError(Exception):
-    ...
diff --git a/tracker/migrations/0002_top100articlesfileevent.py b/tracker/migrations/0002_top100articlesfileevent.py
deleted file mode 100644
index 230fb8a..0000000
--- a/tracker/migrations/0002_top100articlesfileevent.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Generated by Django 5.0.7 on 2024-08-30 21:52
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0002_alter_top100articlesfile_status"),
-        ("tracker", "0001_initial"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Top100ArticlesFileEvent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "status",
-                    models.CharField(
-                        blank=True, max_length=64, null=True, verbose_name="Status"
-                    ),
-                ),
-                (
-                    "lines",
-                    models.IntegerField(
-                        blank=True, default=0, null=True, verbose_name="Lines"
-                    ),
-                ),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "file",
-                    models.ForeignKey(
-                        blank=True,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        to="metrics.top100articlesfile",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name_plural": "Top 100 Article File Events",
-            },
-        ),
-    ]
diff --git a/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py b/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py
deleted file mode 100644
index 6e37a9f..0000000
--- a/tracker/migrations/0003_logfilediscardedline_delete_top100articlesfileevent.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("log_manager", "0002_alter_collectionconfig_unique_together_and_more"),
-        ("tracker", "0002_top100articlesfileevent"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="LogFileDiscardedLine",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "error_type",
-                    models.CharField(
-                        blank=True,
-                        choices=[
-                            ("MET", "Missing Metadata"),
-                            ("ART", "Missing Article"),
-                            ("JOU", "Missing Journal"),
-                        ],
-                        max_length=3,
-                        null=True,
-                        verbose_name="Error Type",
-                    ),
-                ),
-                ("data", models.JSONField(default=dict, verbose_name="Data")),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "log_file",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="log_manager.logfile",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "abstract": False,
-            },
-        ),
-        migrations.DeleteModel(
-            name="Top100ArticlesFileEvent",
-        ),
-    ]
diff --git a/tracker/migrations/0004_alter_logfilediscardedline_error_type.py b/tracker/migrations/0004_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index 1061793..0000000
--- a/tracker/migrations/0004_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-27 20:40
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0003_logfilediscardedline_delete_top100articlesfileevent"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing Article"),
-                    ("JOU", "Missing Journal"),
-                    ("URL", "URL Translation"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/migrations/0005_articleevent.py b/tracker/migrations/0005_articleevent.py
deleted file mode 100644
index 859910e..0000000
--- a/tracker/migrations/0005_articleevent.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Generated by Django 5.0.7 on 2025-05-23 17:27
-
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0004_alter_logfilediscardedline_error_type"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="ArticleEvent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
-                ),
-                (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
-                ),
-                (
-                    "event_type",
-                    models.CharField(
-                        blank=True,
-                        choices=[
-                            ("MUL", "Multiple Articles Returned"),
-                            ("ERR", "Data Error"),
-                        ],
-                        max_length=3,
-                        null=True,
-                        verbose_name="Event Type",
-                    ),
-                ),
-                (
-                    "message",
-                    models.TextField(blank=True, null=True, verbose_name="Message"),
-                ),
-                ("data", models.JSONField(default=dict, verbose_name="Data")),
-                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
-                (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
-                    ),
-                ),
-                (
-                    "updated_by",
-                    models.ForeignKey(
-                        blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
-                    ),
-                ),
-            ],
-            options={
-                "abstract": False,
-            },
-        ),
-    ]
diff --git a/tracker/migrations/0006_alter_logfilediscardedline_error_type.py b/tracker/migrations/0006_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index fb7f74a..0000000
--- a/tracker/migrations/0006_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-14 10:46
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0005_articleevent"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing Article"),
-                    ("JOU", "Missing Journal"),
-                    ("URL", "URL Translation"),
-                    ("DBE", "Database Error"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/migrations/0007_alter_logfilediscardedline_error_type.py b/tracker/migrations/0007_alter_logfilediscardedline_error_type.py
deleted file mode 100644
index f9ffebe..0000000
--- a/tracker/migrations/0007_alter_logfilediscardedline_error_type.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Generated by Django 5.0.7 on 2025-08-09 21:04
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("tracker", "0006_alter_logfilediscardedline_error_type"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="logfilediscardedline",
-            name="error_type",
-            field=models.CharField(
-                blank=True,
-                choices=[
-                    ("MET", "Missing Metadata"),
-                    ("ART", "Missing PIDv2 or PIDv3 or PID Generic"),
-                    ("JOU", "Missing ISSN"),
-                    ("URL", "URL Translation"),
-                    ("DBE", "Database Error"),
-                ],
-                max_length=3,
-                null=True,
-                verbose_name="Error Type",
-            ),
-        ),
-    ]
diff --git a/tracker/models.py b/tracker/models.py
index 77086ee..a394ed6 100644
--- a/tracker/models.py
+++ b/tracker/models.py
@@ -1,65 +1,13 @@
-import json
-import logging
-import traceback
-import uuid
-
-from datetime import datetime
-
-from django.core.files.base import ContentFile
 from django.db import models
 from django.utils.translation import gettext_lazy as _
 
-from core.models import CommonControlField
 from log_manager.models import LogFile
 from tracker import choices
-
-from .exceptions import *
+from .exceptions import LogFileDiscardedLineCreateError
 
 
-class ArticleEvent(CommonControlField):
-    event_type = models.CharField(
-        _("Event Type"),
-        choices=choices.ARTICLE_EVENT_TYPE,
-        max_length=3,
-        null=True,
-        blank=True,
-    )
-
-    message = models.TextField(
-        _("Message"),
-        null=True,
-        blank=True,
-    )
-
-    data = models.JSONField(
-        _("Data"),
-        default=dict,
-    )
-
-    handled = models.BooleanField(
-        _("Handled"),
-        default=False
-    )
-
-    @classmethod
-    def create(cls, event_type, message, data):
-        try:
-            obj = cls()
-            obj.event_type = event_type
-            obj.message = message
-            obj.data = data
-            obj.save()
-        except Exception as exc:
-            raise ArticleEventError(
-                f"Unable to create ArticleEvent ({data} - {event_type} - {message}). EXCEPTION {exc}"
-            )
-        return obj
-        
-    def __str__(self):
-        return f"{self.event_type} - {self.message}"
-
-
-class LogFileDiscardedLine(CommonControlField):
+class LogFileDiscardedLine(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
     log_file = models.ForeignKey(
         LogFile, 
         on_delete=models.CASCADE, 
@@ -108,174 +56,4 @@ def __str__(self):
         return f"{self.data} - {self.message}"
 
 
-class UnexpectedEvent(models.Model):
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
-    exception_type = models.TextField(_("Exception Type"), null=True, blank=True)
-    exception_msg = models.TextField(_("Exception Msg"), null=True, blank=True)
-    traceback = models.JSONField(null=True, blank=True)
-    detail = models.JSONField(null=True, blank=True)
-
-    class Meta:
-        indexes = [
-            models.Index(fields=["exception_type"]),
-        ]
-
-    def __str__(self):
-        return f"{self.exception_msg}"
-
-    @property
-    def data(self):
-        return dict(
-            created=self.created.isoformat(),
-            exception_type=self.exception_type,
-            exception_msg=self.exception_msg,
-            traceback=json.dumps(self.traceback),
-            detail=json.dumps(self.detail),
-        )
-
-    @classmethod
-    def create(
-        cls,
-        exception=None,
-        exc_traceback=None,
-        detail=None,
-    ):
-        try:
-            if exception:
-                logging.exception(exception)
-
-            obj = cls()
-            obj.exception_msg = str(exception)
-            obj.exception_type = str(type(exception))
-            try:
-                json.dumps(detail)
-                obj.detail = detail
-            except Exception as e:
-                obj.detail = str(detail)
 
-            if exc_traceback:
-                obj.traceback = traceback.format_tb(exc_traceback)
-            obj.save()
-            return obj
-        except Exception as exc:
-            raise UnexpectedEventCreateError(
-                f"Unable to create unexpected event ({exception} {exc_traceback}). EXCEPTION {exc}"
-            )
-
-
-class Event(CommonControlField):
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    message = models.TextField(_("Message"), null=True, blank=True)
-    message_type = models.CharField(
-        _("Message type"),
-        choices=choices.EVENT_MSG_TYPE,
-        max_length=16,
-        null=True,
-        blank=True,
-    )
-    detail = models.JSONField(null=True, blank=True)
-    unexpected_event = models.ForeignKey(
-        'UnexpectedEvent', on_delete=models.SET_NULL, null=True, blank=True
-    )
-
-    class Meta:
-        abstract = True
-        indexes = [
-            models.Index(fields=["message_type"]),
-        ]
-
-    @property
-    def data(self):
-        d = {}
-        d["created"] = self.created.isoformat()
-        d["user"] = self.user.username
-        d.update(
-            dict(
-                message=self.message, message_type=self.message_type, detail=self.detail
-            )
-        )
-        if self.unexpected_event:
-            d.update(self.unexpected_event.data)
-        return d
-
-    @classmethod
-    def create(
-        cls,
-        user=None,
-        message_type=None,
-        message=None,
-        e=None,
-        exc_traceback=None,
-        detail=None,
-    ):
-        try:
-            obj = cls()
-            obj.creator = user
-            obj.message = message
-            obj.message_type = message_type
-            obj.detail = detail
-            obj.save()
-
-            if e:
-                logging.exception(f"{message}: {e}")
-                obj.unexpected_event = UnexpectedEvent.create(
-                    exception=e,
-                    exc_traceback=exc_traceback,
-                )
-                obj.save()
-        except Exception as exc:
-            raise EventCreateError(
-                f"Unable to create Event ({message} {e}). EXCEPTION: {exc}"
-            )
-        return obj
-
-
-def tracker_file_directory_path(instance, filename):
-    d = datetime.now(datetime.timezone.utc)
-    return f"tracker/{d.year}/{d.month}/{d.day}/{filename}"
-
-
-class EventReport(CommonControlField):
-    file = models.FileField(
-        upload_to=tracker_file_directory_path, null=True, blank=True
-    )
-
-    class Meta:
-        abstract = True
-
-    def save_file(self, events, ext=None):
-        if not events:
-            return
-        try:
-            ext = ".json"
-            content = json.dumps(list([item.data for item in events]))
-            name = datetime.now(datetime.timezone.utc).isoformat() + ext
-            self.file.save(name, ContentFile(content))
-            self.delete_events(events)
-        except Exception as e:
-            raise EventReportSaveFileError(
-                f"Unable to save EventReport.file ({name}). Exception: {e}"
-            )
-
-    def delete_events(self, events):
-        for item in events:
-            try:
-                item.unexpected_event.delete()
-            except:
-                pass
-            try:
-                item.delete()
-            except:
-                pass
-
-    @classmethod
-    def create(cls, user):
-        try:
-            obj = cls()
-            obj.creator = user
-            obj.save()
-        except Exception as e:
-            raise EventReportCreateError(
-                f"Unable to create EventReport. Exception: {e}"
-            )
diff --git a/tracker/tasks.py b/tracker/tasks.py
deleted file mode 100644
index ace8145..0000000
--- a/tracker/tasks.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# tasks.py
-from datetime import datetime
-
-from django.contrib.auth import get_user_model
-
-from config import celery_app
-from core.utils.utils import _get_user
-
-from .models import UnexpectedEvent
-
-
-User = get_user_model()
-
-
-@celery_app.task(bind=True, name="Cleanup unexpected events")
-def delete_unexpected_events(self, exception_type, start_date=None, end_date=None, user_id=None, username=None):
-    """
-    Delete UnexpectedEvent records based on exception type and optional date range.
-    """
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    if exception_type == '__all__':
-        UnexpectedEvent.objects.all().delete()
-        return
-
-    filters = {'exception_type__icontains': exception_type}
-    if start_date:
-        start_date = datetime.fromisoformat(start_date)
-        filters['created__gte'] = start_date
-    if end_date:
-        end_date = datetime.fromisoformat(end_date)
-        filters['created__lte'] = end_date
-
-    UnexpectedEvent.objects.filter(**filters).delete()

From 0873ba7edd5096d05c2173517002e09028fb8565 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:49 -0300
Subject: [PATCH 28/31] =?UTF-8?q?refactor:=20adicionar=20migra=C3=A7=C3=A3?=
 =?UTF-8?q?o=20tracker=200002=20e=20atualizar=200001=5Finitial?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tracker/migrations/0001_initial.py            | 162 ++++++++++++++++--
 ...02_remove_articleevent_creator_and_more.py |  38 ++++
 2 files changed, 188 insertions(+), 12 deletions(-)
 create mode 100644 tracker/migrations/0002_remove_articleevent_creator_and_more.py

diff --git a/tracker/migrations/0001_initial.py b/tracker/migrations/0001_initial.py
index f207722..04fdc35 100644
--- a/tracker/migrations/0001_initial.py
+++ b/tracker/migrations/0001_initial.py
@@ -1,13 +1,18 @@
-# Generated by Django 5.0.7 on 2024-08-30 00:52
+# Generated by Codex on 2026-04-27
 
+import django.db.models.deletion
 import uuid
+from django.conf import settings
 from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
     initial = True
 
-    dependencies = []
+    dependencies = [
+        ("log_manager", "0001_initial"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
 
     operations = [
         migrations.CreateModel(
@@ -24,21 +29,15 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
                 ),
                 (
                     "exception_type",
-                    models.TextField(
-                        blank=True, null=True, verbose_name="Exception Type"
-                    ),
+                    models.TextField(blank=True, null=True, verbose_name="Exception Type"),
                 ),
                 (
                     "exception_msg",
-                    models.TextField(
-                        blank=True, null=True, verbose_name="Exception Msg"
-                    ),
+                    models.TextField(blank=True, null=True, verbose_name="Exception Msg"),
                 ),
                 ("traceback", models.JSONField(blank=True, null=True)),
                 ("detail", models.JSONField(blank=True, null=True)),
@@ -46,9 +45,148 @@ class Migration(migrations.Migration):
             options={
                 "indexes": [
                     models.Index(
-                        fields=["exception_type"], name="tracker_une_excepti_47ede4_idx"
+                        fields=["exception_type"],
+                        name="tracker_une_excepti_47ede4_idx",
                     )
                 ],
             },
         ),
+        migrations.CreateModel(
+            name="ArticleEvent",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "event_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("MUL", "Multiple Articles Returned"),
+                            ("ERR", "Data Error"),
+                        ],
+                        max_length=3,
+                        null=True,
+                        verbose_name="Event Type",
+                    ),
+                ),
+                (
+                    "message",
+                    models.TextField(blank=True, null=True, verbose_name="Message"),
+                ),
+                ("data", models.JSONField(default=dict, verbose_name="Data")),
+                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+        ),
+        migrations.CreateModel(
+            name="LogFileDiscardedLine",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "created",
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
+                ),
+                (
+                    "updated",
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "error_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("MET", "Missing Metadata"),
+                            ("DOC", "Missing Document"),
+                            ("SRC", "Missing Source"),
+                            ("URL", "URL Translation"),
+                            ("DBE", "Database Error"),
+                        ],
+                        max_length=3,
+                        null=True,
+                        verbose_name="Error Type",
+                    ),
+                ),
+                ("data", models.JSONField(default=dict, verbose_name="Data")),
+                (
+                    "message",
+                    models.TextField(blank=True, null=True, verbose_name="Message"),
+                ),
+                ("handled", models.BooleanField(default=False, verbose_name="Handled")),
+                (
+                    "creator",
+                    models.ForeignKey(
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_creator",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Creator",
+                    ),
+                ),
+                (
+                    "log_file",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="log_manager.logfile",
+                    ),
+                ),
+                (
+                    "updated_by",
+                    models.ForeignKey(
+                        blank=True,
+                        editable=False,
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="%(class)s_last_mod_user",
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Updater",
+                    ),
+                ),
+            ],
+        ),
     ]
diff --git a/tracker/migrations/0002_remove_articleevent_creator_and_more.py b/tracker/migrations/0002_remove_articleevent_creator_and_more.py
new file mode 100644
index 0000000..ee23c85
--- /dev/null
+++ b/tracker/migrations/0002_remove_articleevent_creator_and_more.py
@@ -0,0 +1,38 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("tracker", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="articleevent",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="articleevent",
+            name="updated_by",
+        ),
+        migrations.DeleteModel(
+            name="UnexpectedEvent",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="updated",
+        ),
+        migrations.RemoveField(
+            model_name="logfilediscardedline",
+            name="updated_by",
+        ),
+        migrations.DeleteModel(
+            name="ArticleEvent",
+        ),
+    ]

From 38944b7ae2b8a1a6b0373cdc6424131ef6e8706e Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:59 -0300
Subject: [PATCH 29/31] refactor: simplificar modelos resources (remover
 CommonControlField)

---
 resources/constants.py     |  2 +-
 resources/models.py        | 83 +++++++++++++++++++++++++++++++++++---
 resources/tasks.py         | 56 ++++++++++++++-----------
 resources/tests.py         |  3 --
 resources/wagtail_hooks.py | 12 ++++++
 5 files changed, 122 insertions(+), 34 deletions(-)
 delete mode 100644 resources/tests.py

diff --git a/resources/constants.py b/resources/constants.py
index feba18d..2ce64da 100644
--- a/resources/constants.py
+++ b/resources/constants.py
@@ -1,2 +1,2 @@
 DEFAULT_COUNTER_ROBOTS_URL = 'https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json'
-DEFAULT_MMDB_URL = 'https://download.db-ip.com/free/dbip-country-lite-2025-02.mmdb.gz'
+DEFAULT_MMDB_URL = 'https://download.db-ip.com/free/dbip-country-lite-2026-03.mmdb.gz'
diff --git a/resources/models.py b/resources/models.py
index a30b8d3..22663e2 100644
--- a/resources/models.py
+++ b/resources/models.py
@@ -2,11 +2,26 @@
 
 from django.db import models
 from django.utils.translation import gettext_lazy as _
+from wagtail.admin.panels import FieldPanel
 
-from core.models import CommonControlField
+class RobotUserAgent(models.Model):
+    SOURCE_ALL = "all"
+    SOURCE_COUNTER = "counter"
+    SOURCE_SCIELO = "scielo"
+    SOURCE_CHOICES = [SOURCE_ALL, SOURCE_COUNTER, SOURCE_SCIELO]
 
+    panels = [
+        FieldPanel("pattern"),
+        FieldPanel("source_counter"),
+        FieldPanel("source_scielo"),
+        FieldPanel("is_active"),
+        FieldPanel("source_url"),
+        FieldPanel("last_changed"),
+    ]
+
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
 
-class RobotUserAgent(CommonControlField):
     pattern = models.CharField(
         verbose_name=_('Pattern'),
         max_length=255,
@@ -14,21 +29,77 @@ class RobotUserAgent(CommonControlField):
         blank=False,
         primary_key=True,
     )
+    source_counter = models.BooleanField(
+        verbose_name=_("From Atmire/COUNTER"),
+        default=False,
+        db_index=True,
+    )
+    source_scielo = models.BooleanField(
+        verbose_name=_("From SciELO"),
+        default=False,
+        db_index=True,
+    )
+    is_active = models.BooleanField(
+        verbose_name=_("Active"),
+        default=True,
+        db_index=True,
+    )
+    source_url = models.URLField(
+        verbose_name=_("Source URL"),
+        max_length=255,
+        null=True,
+        blank=True,
+    )
     last_changed = models.DateField(
         verbose_name=_('Last Changed'),
-        null=False,
-        blank=False,
+        null=True,
+        blank=True,
     )
 
     @classmethod
     def get_all_patterns(cls):
-        return cls.objects.values_list('pattern', flat=True)
+        return cls.get_patterns(source=cls.SOURCE_ALL)
+
+    @classmethod
+    def normalize_source(cls, source=None):
+        normalized = (source or cls.SOURCE_ALL).lower()
+        if normalized not in cls.SOURCE_CHOICES:
+            raise ValueError(f"Unsupported robots source: {source}")
+        return normalized
+
+    @classmethod
+    def get_patterns(cls, source=None):
+        source = cls.normalize_source(source)
+        queryset = cls.objects.filter(is_active=True)
+
+        if source == cls.SOURCE_COUNTER:
+            queryset = queryset.filter(source_counter=True)
+        elif source == cls.SOURCE_SCIELO:
+            queryset = queryset.filter(source_scielo=True)
+
+        return queryset.values_list("pattern", flat=True)
+
+    @property
+    def source_labels(self):
+        labels = []
+        if self.source_counter:
+            labels.append("Atmire/COUNTER")
+        if self.source_scielo:
+            labels.append("SciELO")
+        return ", ".join(labels) or "-"
+
+    def save(self, *args, **kwargs):
+        if not self.source_counter and not self.source_scielo:
+            self.source_scielo = True
+        super().save(*args, **kwargs)
 
     def __str__(self):
         return self.pattern
 
 
-class MMDB(CommonControlField):
+class MMDB(models.Model):
+    created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
+    updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
     id = models.CharField(
         verbose_name=_('ID (HASH)'),
         max_length=64, 
diff --git a/resources/tasks.py b/resources/tasks.py
index e67cea1..4df60a9 100644
--- a/resources/tasks.py
+++ b/resources/tasks.py
@@ -1,19 +1,13 @@
 import logging
 
-from django.contrib.auth import get_user_model
-from django.utils import timezone
 from django.utils.translation import gettext as _
 
 from config import celery_app
-from core.utils.utils import _get_user
 
 from . import constants, models, utils
 
-
-User = get_user_model()
-
-@celery_app.task(bind=True, name=_('Load robots data'))
-def task_load_robots(self, url_robots=None, user_id=None, username=None):
+@celery_app.task(bind=True, name=_('[Resources] Load Robots Data'))
+def task_load_robots(self, url_robots=None):
     """
     Load robots from a given URL and save them to the database.
     This function fetches robot data from a specified URL (or a default URL if none is provided),
@@ -32,8 +26,6 @@ def task_load_robots(self, url_robots=None, user_id=None, username=None):
         - Error if there is an issue downloading or saving the robots.
         - Debug information for each robot saved.
     """
-    user = _get_user(self.request, username=username, user_id=user_id)
-    
     if not url_robots:
         url_robots = constants.DEFAULT_COUNTER_ROBOTS_URL
         logging.warning(f'No robots URL provided. Using default: {url_robots}')
@@ -45,43 +37,63 @@ def task_load_robots(self, url_robots=None, user_id=None, username=None):
         return False
 
     cleaned_robots_data = utils.clean_robots_list(robots_data)
+    fetched_patterns = set()
 
     try:
         for r_str in cleaned_robots_data:
             pattern = r_str.get('pattern')
             last_changed = r_str.get('last_changed')
+            fetched_patterns.add(pattern)
 
-            r_obj, created = models.RobotUserAgent.objects.get_or_create(pattern=pattern, last_changed=last_changed)
+            r_obj = models.RobotUserAgent.objects.filter(pattern=pattern).first()
+            created = r_obj is None
 
             if created:
-                r_obj.creator = user
-
-            r_obj.updated = timezone.now()
-            r_obj.updated_by = user
+                r_obj = models.RobotUserAgent(
+                    pattern=pattern,
+                    source_counter=True,
+                    source_scielo=False,
+                )
+            r_obj.source_counter = True
+            r_obj.is_active = True
+            r_obj.source_url = url_robots
+            r_obj.last_changed = last_changed
 
             r_obj.save()
             logging.debug(f'Robot saved: {r_obj}')
+
+        stale_counter_patterns = models.RobotUserAgent.objects.filter(
+            source_counter=True
+        ).exclude(pattern__in=fetched_patterns)
+
+        for r_obj in stale_counter_patterns:
+            r_obj.source_counter = False
+            r_obj.source_url = None
+            r_obj.last_changed = None
+            if not r_obj.source_scielo:
+                r_obj.is_active = False
+            r_obj.save()
+            logging.debug(f'Robot deactivated or detached from COUNTER source: {r_obj}')
+
         return True
 
     except Exception as e:
         logging.error(f'Error saving robots: {e}')
+        return False
 
 
-@celery_app.task(bind=True, name=_('Load geolocation and country data'))
-def task_load_geoip(self, url_geoip=None, user_id=None, username=None, validate=True):
+@celery_app.task(bind=True, name=_('[Resources] Load Geolocation Data'))
+def task_load_geoip(self, url_geoip=None, validate=True):
     """
     Load GeoIP data from a specified URL, validate it, and save it to the database.
     Args:
         url_geoip (str, optional): The URL to download the GeoIP data from. Defaults to None.
-        user_id (int, optional): The ID of the user performing the task. Defaults to None.
-        username (str, optional): The username of the user performing the task. Defaults to None.
         validate (bool, optional): Whether to validate the GeoIP data. Defaults to True.
     Returns:
         bool: True if the GeoIP data was successfully loaded and saved, False otherwise.
     Raises:
         Exception: If there is an error downloading, decompressing, or validating the GeoIP data.
     """
-    user = _get_user(self.request, username=username, user_id=user_id)
 
     if not url_geoip:
         url_geoip = constants.DEFAULT_MMDB_URL
@@ -115,10 +127,6 @@ def task_load_geoip(self, url_geoip=None, user_id=None, username=None, validate=
     except models.MMDB.DoesNotExist:
         mmdb_obj = models.MMDB.objects.create(id=mmdb_hash, data=mmdb_data)
         mmdb_obj.url = url_geoip or constants.DEFAULT_MMDB_URL
-        mmdb_obj.creator = user
-
-    mmdb_obj.updated = timezone.now()
-    mmdb_obj.updated_by = user
 
     mmdb_obj.save()
     logging.debug(f'GeoIP data has been saved: {mmdb_obj}')
diff --git a/resources/tests.py b/resources/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/resources/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/resources/wagtail_hooks.py b/resources/wagtail_hooks.py
index 758bb53..c347b22 100644
--- a/resources/wagtail_hooks.py
+++ b/resources/wagtail_hooks.py
@@ -15,13 +15,25 @@ class RobotUserAgentSnippetViewSet(SnippetViewSet):
 
     list_display = (
         "pattern",
+        "source_labels",
+        "is_active",
         "last_changed",
     )
     search_fields = (
         "pattern",
+        "source_url",
+    )
+    list_filter = (
+        "source_counter",
+        "source_scielo",
+        "is_active",
     )
     list_export = (
         "pattern",
+        "source_counter",
+        "source_scielo",
+        "is_active",
+        "source_url",
         "last_changed",
     )
     export_filename = "robots"

From 970c8310d38f0b6c9bb213f53a0b693d7afae6cb Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:40:59 -0300
Subject: [PATCH 30/31] =?UTF-8?q?refactor:=20adicionar=20migra=C3=A7=C3=A3?=
 =?UTF-8?q?o=20resources=200002?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...creator_remove_mmdb_updated_by_and_more.py | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py

diff --git a/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py b/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py
new file mode 100644
index 0000000..80bb0cc
--- /dev/null
+++ b/resources/migrations/0002_remove_mmdb_creator_remove_mmdb_updated_by_and_more.py
@@ -0,0 +1,61 @@
+# Generated by Django 5.2.12 on 2026-05-01 22:23
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("resources", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="mmdb",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="mmdb",
+            name="updated_by",
+        ),
+        migrations.RemoveField(
+            model_name="robotuseragent",
+            name="creator",
+        ),
+        migrations.RemoveField(
+            model_name="robotuseragent",
+            name="updated_by",
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="is_active",
+            field=models.BooleanField(
+                db_index=True, default=True, verbose_name="Active"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_counter",
+            field=models.BooleanField(
+                db_index=True, default=False, verbose_name="From Atmire/COUNTER"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_scielo",
+            field=models.BooleanField(
+                db_index=True, default=False, verbose_name="From SciELO"
+            ),
+        ),
+        migrations.AddField(
+            model_name="robotuseragent",
+            name="source_url",
+            field=models.URLField(
+                blank=True, max_length=255, null=True, verbose_name="Source URL"
+            ),
+        ),
+        migrations.AlterField(
+            model_name="robotuseragent",
+            name="last_changed",
+            field=models.DateField(blank=True, null=True, verbose_name="Last Changed"),
+        ),
+    ]

From e563354ad3ac41affaf14890f0273c79e5d37ae3 Mon Sep 17 00:00:00 2001
From: Rafael JP Damaceno <rafael@pitangainnovare.com.br>
Date: Fri, 1 May 2026 22:41:11 -0300
Subject: [PATCH 31/31] =?UTF-8?q?refactor:=20novo=20modelo=20DailyMetricJo?=
 =?UTF-8?q?b,=20remover=20ES=20e=20migra=C3=A7=C3=B5es=20antigas?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 metrics/es.py                                 | 385 ------------------
 metrics/fixtures/top100articles.csv           |  97 -----
 metrics/fixtures/top100articles.tar.gz        | Bin 1675 -> 0 bytes
 metrics/migrations/0001_initial.py            | 171 +++-----
 .../0002_alter_top100articlesfile_status.py   |  27 --
 ..._top100articlesfile_attachment_and_more.py | 187 ---------
 ...0004_delete_top100articlesfile_and_more.py |  49 ---
 ...ter_itemaccess_unique_together_and_more.py |  49 ---
 .../0006_alter_itemaccess_content_type.py     |  17 -
 ...007_alter_usersession_datetime_and_more.py |  23 --
 .../migrations/0008_remove_a_few_models.py    |  48 ---
 metrics/models.py                             | 108 +++++
 .../indexes/metrics/top100articles_text.txt   |  10 -
 13 files changed, 172 insertions(+), 999 deletions(-)
 delete mode 100644 metrics/es.py
 delete mode 100755 metrics/fixtures/top100articles.csv
 delete mode 100644 metrics/fixtures/top100articles.tar.gz
 delete mode 100644 metrics/migrations/0002_alter_top100articlesfile_status.py
 delete mode 100644 metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py
 delete mode 100644 metrics/migrations/0004_delete_top100articlesfile_and_more.py
 delete mode 100644 metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py
 delete mode 100644 metrics/migrations/0006_alter_itemaccess_content_type.py
 delete mode 100644 metrics/migrations/0007_alter_usersession_datetime_and_more.py
 delete mode 100644 metrics/migrations/0008_remove_a_few_models.py
 delete mode 100644 metrics/templates/search/indexes/metrics/top100articles_text.txt

diff --git a/metrics/es.py b/metrics/es.py
deleted file mode 100644
index 25ad701..0000000
--- a/metrics/es.py
+++ /dev/null
@@ -1,385 +0,0 @@
-import logging
-
-from elasticsearch import Elasticsearch, helpers, NotFoundError
-from django.conf import settings
-
-from .utils import index_utils
-
-
-DEFAULT_ES_INDEX_USAGE_MAPPINGS = {
-    "properties": {
-        "collection": {
-            "type": "keyword"
-        },
-        "journal": {
-            "properties": {
-                "scielo_issn": {
-                    "type": "keyword"
-                },
-                "main_title": {
-                    "type": "keyword"
-                },
-                "subject_area_capes": {
-                    "type": "keyword"
-                },
-                "subject_area_wos": {
-                    "type": "keyword"
-                },
-                "acronym": {
-                    "type": "keyword"
-                },
-                "publisher": {
-                    "type": "keyword"
-                }
-            }
-        },
-        "pid": {
-            "type": "keyword"
-        },
-        "pid_v2": {
-            "type": "keyword"
-        },
-        "pid_v3": {
-            "type": "keyword"
-        },
-        "pid_generic": {
-            "type": "keyword"
-        },
-        "year_of_publication": {
-            "type": "integer"
-        },
-        "media_language": {
-            "type": "keyword"
-        },
-        "country_code": {
-            "type": "keyword"
-        },
-        "date": {
-            "type": "date",
-            "format": "yyyy-MM-dd"
-        },
-        "total_requests": {
-            "type": "integer"
-        },
-        "total_investigations": {
-            "type": "integer"
-        },
-        "unique_requests": {
-            "type": "integer"
-        },
-        "unique_investigations": {
-            "type": "integer"
-        }
-    }
-}
-
-
-class ElasticSearchUsageWrapper:
-    """
-    Wrapper for Elasticsearch usage metrics operations.
-    This class provides methods to interact with Elasticsearch for indexing,
-    deleting, and managing usage metrics data.
-    """
-
-    def __init__(self, url=None, basic_auth=None, api_key=None, verify_certs=False):
-        self.client = self.get_elasticsearch_client(url, basic_auth, api_key, verify_certs)
-
-
-    def get_elasticsearch_client(self, url=None, basic_auth=None, api_key=None, verify_certs=False):
-        """
-        Create an Elasticsearch client instance using Django settings.
-
-        :param url: Elasticsearch URL. If None, it will be taken from Django settings.
-        :param basic_auth: Basic authentication credentials. If None, it will be taken from Django settings.
-        :param api_key: API key. If None, it will be taken from Django settings.
-        :param verify_certs: Whether to verify SSL certificates. If None, it will be taken from Django settings.
-        """
-        if not url:
-            url = getattr(settings, "ES_URL", None)
-
-        if not basic_auth:
-            basic_auth = getattr(settings, "ES_BASIC_AUTH", None)
-
-        if not api_key:
-            api_key = getattr(settings, "ES_API_KEY", None)
-
-        if not verify_certs:
-            verify_certs = getattr(settings, "ES_VERIFY_CERTS", False)
-
-        if basic_auth:
-            client = Elasticsearch(url, basic_auth=basic_auth, verify_certs=verify_certs)
-        elif api_key:
-            client = Elasticsearch(url, api_key=api_key, verify_certs=verify_certs)
-        else:
-            client = Elasticsearch(url, verify_certs=verify_certs)
-
-        return client
-    
-
-    def ping(self):
-        """
-        Check if the Elasticsearch client is available.
-        Returns True if the client is available, False otherwise.
-        """
-        try:
-            return self.client.ping()
-        except Exception as e:
-            logging.error(f"Error pinging Elasticsearch client: {e}")
-            return False
-
-
-    def create_index(self, index_name, mappings=None, ping_client=False):
-        """
-        Create an Elasticsearch index. 
-
-        :param index_name: Name of the index to create.
-        :param mappings: Mappings for the index. If None, default mappings will be used.
-        :param ping_client: If True, checks if the Elasticsearch client is available before creating the index.
-        """
-        if ping_client and not self.ping():
-            return
-
-        if not mappings:
-            mappings = DEFAULT_ES_INDEX_USAGE_MAPPINGS
-
-        resp = self.client.indices.create(
-            index=index_name,
-            mappings=mappings,
-        )
-        logging.info(f"Index {index_name} created: {resp}")
-
-
-    def create_index_if_not_exists(self, index_name, mappings=None, ping_client=False):
-        """
-        Create an Elasticsearch index if it does not already exist.
-
-        :param index_name: Name of the index to create.
-        :param mappings: Mappings for the index. If None, default mappings will be used.
-        :param ping_client: If True, checks if the Elasticsearch client is available before creating the index.
-        """
-        if ping_client and not self.ping():
-            return
-
-        if not self.client.indices.exists(index=index_name):
-            self.create_index(index_name, mappings, ping_client)
-        else:
-            logging.info(f"Index {index_name} already exists. Skipping creation.")
-
-
-    def delete_index(self, index_name, ping_client=False):
-        """
-        Delete an Elasticsearch index.
-
-        :param index_name: Name of the index to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the index.
-        """
-        if ping_client and not self.ping():
-            return
-        
-        self.client.indices.delete(index=index_name)
-
-
-    def index_document(self, index_name, doc_id, document, ping_client=False):
-        """
-        Index a document in Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param doc_id: ID of the document.
-        :param document: Document to index.
-        :param ping_client: If True, checks if the Elasticsearch client is available before indexing the document.
-        """
-        if ping_client and not self.ping():
-            return
-            
-        self.client.index(index=index_name, id=doc_id, document=document)
-
-
-    def index_documents(self, index_name, documents, ping_client=False):
-        """
-        Index multiple documents in Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param documents: Dictionary of documents to index, where keys are document IDs and values are the documents.
-        :param ping_client: If True, checks if the Elasticsearch client is available before indexing the documents.
-        """
-        if ping_client and not self.ping():
-            return
-        
-        helpers.bulk(
-            self.client,
-            (
-                {
-                    "_index": index_name,
-                    "_id": doc_id,
-                    "_source": document,
-                }
-                for doc_id, document in documents.items()
-            ),
-        )
-
-
-    def delete_document(self, index_name, doc_id, ping_client=False):
-        """
-        Delete a document from Elasticsearch.
-
-        :param index_name: Name of the index.
-        :param doc_id: ID of the document to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the document.
-        """
-        if ping_client and not self.ping():
-            return
-
-        try:
-            self.client.delete(index=index_name, id=doc_id)
-        except NotFoundError as e:
-            logging.error(f"Failed to delete document {doc_id} from Elasticsearch: {e}")
-
-
-    def delete_documents(self, index_name, doc_ids, ping_client=False):
-        """
-        Delete multiple documents from Elasticsearch using bulk.
-        :param index_name: Name of the index.
-        :param doc_ids: List of document IDs to delete.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the documents.
-        """
-        if ping_client and not self.ping():
-            return
-                    
-        actions = (
-            {
-                "_op_type": "delete",
-                "_index": index_name,
-                "_id": doc_id,
-            }
-            for doc_id in doc_ids
-        )
-
-        try:
-            helpers.bulk(self.client, actions)
-        except helpers.BulkIndexError as e:
-            logging.error(f"BulkIndexError occurred: {e.errors}")
-
-
-    def delete_documents_by_key(self, index_name, data, ping_client=False):
-        """
-        Delete multiple documents from Elasticsearch based on specific key-value pairs.
-
-        :param index_name: Name of the index.
-        :param data: Dictionary where keys are field names and values are single values or lists of values.
-        :param ping_client: If True, checks if the Elasticsearch client is available before deleting the documents.
-        """
-        if ping_client and not self.ping():
-            return
-
-        query = {
-            "query": {
-                "bool": {
-                    "must": [
-                        {
-                            "terms": {
-                                key: values if isinstance(values, list) else [values]
-                            }
-                        }
-                        for key, values in data.items()
-                    ]
-                }
-            }
-        }
-
-        try:
-            self.client.delete_by_query(index=index_name, body=query)
-            return True
-        except Exception as e:
-            logging.error(f"Failed to delete documents: {e}")
-
-        return False
-
-
-    def fetch_and_update_documents_locally(self, index_name, documents, batch_size=5000, ping_client=False):
-        """
-        Fetch existing documents from Elasticsearch and update local documents with accumulated metrics.
-        This function retrieves documents from Elasticsearch in batches and merges their metric fields
-        with the provided local documents. The merge operation adds values for specific metric fields
-        or sets them if they don't exist in the local documents.
-
-        Args:
-            index_name (str): Name of the Elasticsearch index to fetch documents from.
-            documents (dict): Dictionary of documents to be updated, where keys are document IDs and values
-                are dictionaries containing metric data.
-            batch_size (int, optional): Number of documents to fetch in each batch from Elasticsearch.
-                Defaults to 5000.
-            ping_client (bool, optional): If True, checks if the Elasticsearch client is available before
-                fetching documents. Defaults to False.
-        
-        Returns:
-            None: The function modifies the input documents dictionary in-place.
-        """
-        if ping_client and not self.ping():
-            return
-
-        existing_docs = {}
-        ids = list(documents.keys())
-
-        for i in range(0, len(ids), batch_size):
-            batch_ids = ids[i:i+batch_size]
-            resp = self.client.mget(index=index_name, ids=batch_ids)
-            for doc in resp.get('docs', []):
-                if doc.get('found'):
-                    existing_docs[doc['_id']] = doc['_source']
-        logging.info(f'Found {len(existing_docs)} existing documents in Elasticsearch for update.')
-
-        for doc_id, existing in existing_docs.items():
-            current = documents[doc_id]
-            for field in [
-                "total_requests",
-                "unique_requests",
-                "total_investigations",
-                "unique_investigations",
-            ]:
-                if field in existing and field in current:
-                    current[field] += existing[field]
-                elif field in existing:
-                    current[field] = existing[field]
-        
-
-    def export_to_index(self, index_name, data, batch_size=5000, ping_client=False):
-        """
-        Export data to Elasticsearch index in bulk operations.
-        This function converts input data to index documents, processes them locally,
-        and then indexes them to Elasticsearch in batches to optimize performance.
-        
-        Args:
-            index_name (str): Name of the Elasticsearch index to export data to.
-            data: The data to be exported to the Elasticsearch index
-            batch_size (int, optional): Number of documents to process in each bulk operation.
-                Defaults to 5000.
-            ping_client (bool, optional): If True, checks if the Elasticsearch client is available
-
-        Returns:
-            None: Function performs side effects by indexing data to Elasticsearch
-        """
-        if ping_client and not self.ping():
-            return
-        
-        bulk_data = []
-        documents = index_utils.convert_to_index_documents(data)
-        self.fetch_and_update_documents_locally(index_name=index_name, documents=documents)
-
-        for key, metric_data in documents.items():
-            metric_data['pid'] = metric_data.get('pid_v3') or metric_data.get('pid_v2') or metric_data.get('pid_generic', '')
-            bulk_data.append({
-                "_id": key,
-                "_source": metric_data,
-            })
-
-            if len(bulk_data) >= batch_size:
-                self.index_documents(
-                    index_name=index_name,
-                    documents={doc["_id"]: doc["_source"] for doc in bulk_data},
-                )
-                bulk_data = []
-
-        self.index_documents(
-            index_name=index_name,
-            documents={doc["_id"]: doc["_source"] for doc in bulk_data},
-        )
diff --git a/metrics/fixtures/top100articles.csv b/metrics/fixtures/top100articles.csv
deleted file mode 100755
index 9d979f3..0000000
--- a/metrics/fixtures/top100articles.csv
+++ /dev/null
@@ -1,97 +0,0 @@
-print_issn	online_issn	pid_issn	collection	pid	yop	year_month_day	total_item_requests	total_item_investigations	unique_item_requests	unique_item_investigations
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300005	2005	2024-05-26	13	16	13	16
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100020	2009	2024-05-26	9	10	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200012	2009	2024-05-26	8	9	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200018	2009	2024-05-26	8	8	8	8
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300004	2005	2024-05-26	8	11	8	11
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200011	2009	2024-05-26	8	9	8	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200001	2009	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200010	2009	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300007	2005	2024-05-26	7	10	7	10
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200003	2009	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400010	2008	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300008	2005	2024-05-26	7	9	7	9
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400008	2009	2024-05-26	7	7	7	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400009	2006	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200009	2006	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100007	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300003	2007	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100022	2009	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100006	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200002	2009	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000100002	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000200014	2007	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100021	2009	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000400010	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200001	2010	2024-05-26	6	6	6	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200002	2010	2024-05-26	6	7	6	7
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200014	2009	2024-05-26	5	6	5	6
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100014	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000200009	2005	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200004	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100016	2006	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200015	2006	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300005	2007	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000300009	2009	2024-05-26	5	5	5	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000200010	2010	2024-05-26	4	4	4	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100015	2008	2024-05-26	3	4	3	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300002	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200015	2009	2024-05-26	2	3	2	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300001	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300009	2005	2024-05-26	2	4	2	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200005	2009	2024-05-26	2	4	2	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200008	2009	2024-05-26	2	3	2	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300006	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300010	2005	2024-05-26	2	2	2	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000300003	2005	2024-05-26	2	5	2	5
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000300001	2006	2024-05-26	2	2	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100005	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200016	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400004	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100009	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000100014	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200009	2009	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100019	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200013	2009	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400007	2008	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000300010	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200006	2009	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200018	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400002	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142010000300005	2010	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200007	2006	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400004	2006	2024-05-26	1	2	1	2
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100004	2007	2024-05-26	1	3	1	3
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000200021	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100002	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100004	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400004	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400006	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400005	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000300006	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400011	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300001	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000100020	2007	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400002	2006	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000100005	2005	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200017	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100005	2008	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000200007	2009	2024-05-26	1	4	1	4
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000100023	2009	2024-05-26	1	1	1	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100008	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400008	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400005	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000200006	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400007	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000200013	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000400003	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142009000400006	2009	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300008	2007	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000200008	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000200006	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000400004	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142005000400006	2005	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142007000300006	2007	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000300003	2006	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142008000100007	2008	2024-05-26	0	1	0	1
-0002-7014	1851-8044	0002-7014	arg	S0002-70142006000100009	2006	2024-05-26	0	1	0	1
diff --git a/metrics/fixtures/top100articles.tar.gz b/metrics/fixtures/top100articles.tar.gz
deleted file mode 100644
index cd49556706cc65863c0fc842519af13978638501..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1675
zcmV;626Xu!iwFP?L#bu}1MOOCZreBz?qBXHa)B)PCUuTIfMFzV>qTm(wo@RtUy?FM
zl844Z!$FDyk!U+sjj!LlXCz7~^<$9AZnV_)QMyO%|9(3E`oeffDT8wl=~D$K=b!mV
ze>?S{tT)bR7mRw4%6X%|JV;Iw_l~#oWqV3gPTQ~B-OInP&v$<JfB!xT^8lZJ$nkpG
zA1<5y`Fs$^!^{4#8~=XYKaYPr9baB{PnZ3%TododSMk2vo;E*@hs*cP^Y&d_j+gDr
zW`EiJ*qnAh-*)HAd46qw_?0g0zil(dS-c(g={Gk&pI==5{x4bmQ>f_FW0cm4>Du=6
zP5c92wUjP>G1&l4=<%&OwNOT=>-$>NA-ie5AT#PPM;(NcB8q_msWYrbkO{2|F*Xp(
zNmiys%8Wv(@<wK<xaF1@F^ngfBQ|$P9K(8!pvizV8L|WAH?rJ@i)GBF2Du}HCPD7l
zkekGDxN?-(%ZVI)Ma*E*&k?UD1ySEa4EePZQORopD*jW<)5|eb%M|Yk42X~<aYmyd
zPEva=sUc=s9Q!PzIW1GNk7c(E&X66JflR?6^{B#{#d#51mZ8{sstXqB(TVb+TBeCu
zPD;j|a*ty`gqUID=&RHo<;M=hVj0JU2>H>WDD2Rq0n0Gx5j?yRg?2nC9e@HoE;;~{
z@XUHd=m0eGN1bHWbblN<4ogInUu2Goiz6W7DAY-hdc7VY3R_b5A>vM|SC5`sk7GxA
zgIG>7N5jR@FTBU!a#VseBdNBn)p(L)#&aQNJzA&=jsdaeo-t-36UV8#a&Uy0!8u$N
zTd(#<sD*e|=(RPT3ss71wLY%vTsTgzkwNOXh-(g8gJoJCPLdjWipXUd(4%IOx`YdK
zDa#Ik$Ps4O(2$y}$0<{I2JZ<qQ{V}S$g$2`<Af9#^ava^7vivxnyV^URz&b{&lT6;
zJ-D;);AmJm!hJKvg$UCJyFZS%usIOHGMdXWwO+Sw-gnl*!0F)=N7l;K;%M`We7~pF
z0rW{7k<>Eb$qw(q)s=_{9Vzyn4vte#!O9Uo2OuKG>fwgnGKNcPs57`XBO*e6j3}FR
zSZ11rIW5zuGaZOiz2-o43(Igw4VLj-mVx=2%^x9~;d!Qm<21WAIsWbk{dbP6*GHp*
zoc4bVAG`lUORH?&|KYSYMh0K@e`x!;|KlT$KVbjI=$}IQAv<RbV!F0Fzm`B8za|eh
z^MqFVpX$_NNQW@q3X>*!qvKlWoXdUfFUmtsfL6+-SAK|}=2>aiBCC1DEh8u(dsCK5
zYb~m*c{5iFE$67t!J?o#;JQGyQP}G{*=!D(&HA!zS`>>p(7?iC)|F+_qHy(hxJqG_
zFhQ8r-eoFR?6}64rPHFImPr6}H3k+1wdi)hs4|6@(5a@b_5oeFYGkd9dR=6H^~hiI
z|B>P~g)fOKO#C;wAe5CF2a>-Qz1~I5i=qMD&I5Tr+;x$&A&#w_3>{4a)w4oGo|W-N
zR8r1@eIz#;Fl8k-SRGrSSA|hEEUIUX`SqsClqIiw#Yj=m@Bun1M1e$H$J3;eODTph
zKrD;lP&NpX3b$*k^m>w$wL)$>+iFR0d?j&8oo#Ddi)h*$I}qW9A1_DCj))7MFI#k0
zCl4q~0eR1CW71j=*Wize0kIYi@3}aFhhxjkHmi^jv1MkPrzpk32QH3s!BH|G)@(L5
z3W@<yFF0x@M9Yd;Ei<rLChIY^8#+d$$JtgVQUimlf-PH}NNurc=SL#PI%<psmID!_
z#!YqyBHYsEMQn_1WvkQbrb^YAVe=k{YuQ93aIB+RBjXbN5u~<=qZv<f4sQ&8ort*M
zCnDDT*l0?Li0W#YMh@@bIE^_R9A)($d|ReNYIwwgr6xupJP=_)ENQ2R&LuVU;kaw=
zuuQmb8KTs_E%j_uB&l-Yxb8*79RLv#^oV;iBBHJwaV?x}+@wrMje6ao$LTf;mmV#v
z9>FpLC*m|AaoM_LTzqS-!`3BJ;2X9b96jVm#IZx_$c9*ZibmhlVe2UhS+kkpw9J$r
zIrIo|&7U75ryd<xhJ$16q%@oNfMxKk(7|!?M=p-jGf7tO!IqhA@FG=(#tmW`;&oy-
z4LuSO>x86mZ`Q#Pw%)OEbnF}_@8RHBqtN1crc;lN38}*$r|E;kA0boV`xF)Nckjjc
VJU)-l<Ma5)<3H^5bS(f%004Y*ID7yA

diff --git a/metrics/migrations/0001_initial.py b/metrics/migrations/0001_initial.py
index 30ccc96..9746d5f 100644
--- a/metrics/migrations/0001_initial.py
+++ b/metrics/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-08-30 00:52
+# Generated by Codex on 2026-04-27
 
 import django.db.models.deletion
 from django.conf import settings
@@ -9,13 +9,13 @@ class Migration(migrations.Migration):
     initial = True
 
     dependencies = [
-        ("wagtaildocs", "0013_delete_uploadeddocument"),
+        ("collection", "0001_initial"),
         migrations.swappable_dependency(settings.AUTH_USER_MODEL),
     ]
 
     operations = [
         migrations.CreateModel(
-            name="Top100ArticlesFile",
+            name="DailyMetricJob",
             fields=[
                 (
                     "id",
@@ -28,133 +28,85 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
                 ),
                 (
                     "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
+                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                ),
+                (
+                    "access_date",
+                    models.DateField(db_index=True, verbose_name="Access Date"),
                 ),
                 (
                     "status",
                     models.CharField(
                         choices=[
-                            ("QUE", "Queued"),
-                            ("PAR", "Parsing"),
-                            ("PRO", "Processed"),
-                            ("INV", "Invalidated"),
+                            ("PEN", "Pending"),
+                            ("EXP", "Exporting"),
+                            ("SUC", "Exported"),
+                            ("ERR", "Error"),
                         ],
-                        default="QUE",
-                        max_length=5,
+                        db_index=True,
+                        default="PEN",
+                        max_length=3,
+                        verbose_name="Status",
                     ),
                 ),
                 (
-                    "attachment",
-                    models.ForeignKey(
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="+",
-                        to="wagtaildocs.document",
-                        verbose_name="Attachment",
-                    ),
+                    "input_log_hashes",
+                    models.JSONField(default=list, verbose_name="Input Log Hashes"),
                 ),
                 (
-                    "creator",
-                    models.ForeignKey(
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_creator",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Creator",
+                    "storage_path",
+                    models.CharField(
+                        blank=True,
+                        default="",
+                        max_length=500,
+                        verbose_name="Storage Path",
                     ),
                 ),
                 (
-                    "updated_by",
-                    models.ForeignKey(
+                    "payload_hash",
+                    models.CharField(
                         blank=True,
-                        editable=False,
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name="%(class)s_last_mod_user",
-                        to=settings.AUTH_USER_MODEL,
-                        verbose_name="Updater",
+                        default="",
+                        max_length=64,
+                        verbose_name="Payload Hash",
                     ),
                 ),
-            ],
-            options={
-                "verbose_name": "Top 100 Articles File",
-                "verbose_name_plural": "Top 100 Articles Files",
-            },
-        ),
-        migrations.CreateModel(
-            name="Top100Articles",
-            fields=[
                 (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
+                    "summary",
+                    models.JSONField(blank=True, default=dict, verbose_name="Summary"),
                 ),
                 (
-                    "created",
-                    models.DateTimeField(
-                        auto_now_add=True, verbose_name="Creation date"
-                    ),
+                    "attempts",
+                    models.PositiveIntegerField(default=0, verbose_name="Attempts"),
                 ),
                 (
-                    "updated",
-                    models.DateTimeField(
-                        auto_now=True, verbose_name="Last update date"
-                    ),
+                    "error_message",
+                    models.TextField(blank=True, default="", verbose_name="Error Message"),
                 ),
-                ("pid_issn", models.CharField(max_length=9, verbose_name="PID ISSN")),
-                ("year_month_day", models.DateField(verbose_name="Date of access")),
                 (
-                    "print_issn",
-                    models.CharField(
-                        blank=True, max_length=9, null=True, verbose_name="Print ISSN"
+                    "export_started_at",
+                    models.DateTimeField(
+                        blank=True,
+                        null=True,
+                        verbose_name="Export Started At",
                     ),
                 ),
                 (
-                    "online_issn",
-                    models.CharField(
-                        blank=True, max_length=9, null=True, verbose_name="Online ISSN"
-                    ),
+                    "exported_at",
+                    models.DateTimeField(blank=True, null=True, verbose_name="Exported At"),
                 ),
                 (
                     "collection",
-                    models.CharField(max_length=3, verbose_name="Collection Acronym 3"),
-                ),
-                ("pid", models.CharField(verbose_name="Publication ID")),
-                (
-                    "yop",
-                    models.PositiveSmallIntegerField(
-                        verbose_name="Year of Publication"
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="collection.collection",
+                        verbose_name="Collection",
                     ),
                 ),
-                (
-                    "total_item_requests",
-                    models.IntegerField(verbose_name="Total Item Requests"),
-                ),
-                (
-                    "total_item_investigations",
-                    models.IntegerField(verbose_name="Total Item Investigations"),
-                ),
-                (
-                    "unique_item_requests",
-                    models.IntegerField(verbose_name="Unique Item Requests"),
-                ),
-                (
-                    "unique_item_investigations",
-                    models.IntegerField(verbose_name="Unique Item Investigations"),
-                ),
                 (
                     "creator",
                     models.ForeignKey(
@@ -180,18 +132,23 @@ class Migration(migrations.Migration):
                 ),
             ],
             options={
-                "verbose_name_plural": "Top 100 Articles",
-                "indexes": [
-                    models.Index(
-                        fields=["pid_issn"], name="metrics_top_pid_iss_c1fba9_idx"
-                    ),
-                    models.Index(
-                        fields=["year_month_day"], name="metrics_top_year_mo_8cda7b_idx"
-                    ),
-                ],
-                "unique_together": {
-                    ("collection", "pid_issn", "pid", "year_month_day")
-                },
+                "verbose_name": "Daily Metric Job",
+                "verbose_name_plural": "Daily Metric Jobs",
+                "unique_together": {("collection", "access_date")},
             },
         ),
+        migrations.AddIndex(
+            model_name="dailymetricjob",
+            index=models.Index(
+                fields=["collection", "access_date"],
+                name="metrics_daily_coll_date_idx",
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="dailymetricjob",
+            index=models.Index(
+                fields=["status", "export_started_at"],
+                name="metrics_daily_status_exp_idx",
+            ),
+        ),
     ]
diff --git a/metrics/migrations/0002_alter_top100articlesfile_status.py b/metrics/migrations/0002_alter_top100articlesfile_status.py
deleted file mode 100644
index b2b98c5..0000000
--- a/metrics/migrations/0002_alter_top100articlesfile_status.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Generated by Django 5.0.7 on 2024-08-30 21:27
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="top100articlesfile",
-            name="status",
-            field=models.CharField(
-                choices=[
-                    ("QUE", "Queued"),
-                    ("PAR", "Parsing"),
-                    ("PRO", "Processed"),
-                    ("ERR", "Error"),
-                    ("INV", "Invalidated"),
-                ],
-                default="QUE",
-                max_length=5,
-            ),
-        ),
-    ]
diff --git a/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py b/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py
deleted file mode 100644
index 8b01d80..0000000
--- a/metrics/migrations/0003_remove_top100articlesfile_attachment_and_more.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("article", "0001_initial"),
-        ("collection", "0001_initial"),
-        ("journal", "0001_initial"),
-        ("metrics", "0002_alter_top100articlesfile_status"),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="attachment",
-        ),
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="creator",
-        ),
-        migrations.RemoveField(
-            model_name="top100articlesfile",
-            name="updated_by",
-        ),
-        migrations.CreateModel(
-            name="Item",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "article",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="article.article",
-                        verbose_name="Article",
-                    ),
-                ),
-                (
-                    "collection",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="collection.collection",
-                        verbose_name="Collection",
-                    ),
-                ),
-                (
-                    "journal",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="journal.journal",
-                        verbose_name="Journal",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Item",
-                "verbose_name_plural": "Items",
-            },
-        ),
-        migrations.CreateModel(
-            name="UserAgent",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "name",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="Name"
-                    ),
-                ),
-                (
-                    "version",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="Version"
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "User Agent",
-                "verbose_name_plural": "User Agents",
-                "unique_together": {("name", "version")},
-            },
-        ),
-        migrations.CreateModel(
-            name="UserSession",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                ("datetime", models.DateTimeField(verbose_name="Datetime")),
-                (
-                    "user_ip",
-                    models.CharField(
-                        db_index=True, max_length=255, verbose_name="User IP"
-                    ),
-                ),
-                (
-                    "user_agent",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.useragent",
-                        verbose_name="User Agent",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "User Session",
-                "verbose_name_plural": "User Sessions",
-            },
-        ),
-        migrations.CreateModel(
-            name="ItemAccess",
-            fields=[
-                (
-                    "id",
-                    models.BigAutoField(
-                        auto_created=True,
-                        primary_key=True,
-                        serialize=False,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "country_code",
-                    models.CharField(
-                        db_index=True, max_length=2, verbose_name="Country"
-                    ),
-                ),
-                (
-                    "media_language",
-                    models.CharField(
-                        db_index=True, max_length=2, verbose_name="Media Language"
-                    ),
-                ),
-                (
-                    "media_format",
-                    models.CharField(max_length=10, verbose_name="Media Format"),
-                ),
-                (
-                    "item",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.item",
-                        verbose_name="Item",
-                    ),
-                ),
-                (
-                    "user_session",
-                    models.ForeignKey(
-                        on_delete=django.db.models.deletion.CASCADE,
-                        to="metrics.usersession",
-                        verbose_name="User Session",
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Item Access",
-                "verbose_name_plural": "Items Access",
-            },
-        ),
-        migrations.DeleteModel(
-            name="Top100Articles",
-        ),
-    ]
diff --git a/metrics/migrations/0004_delete_top100articlesfile_and_more.py b/metrics/migrations/0004_delete_top100articlesfile_and_more.py
deleted file mode 100644
index b10c41b..0000000
--- a/metrics/migrations/0004_delete_top100articlesfile_and_more.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-07 16:55
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0003_remove_top100articlesfile_attachment_and_more"),
-        ("tracker", "0003_logfilediscardedline_delete_top100articlesfileevent"),
-    ]
-
-    operations = [
-        migrations.DeleteModel(
-            name="Top100ArticlesFile",
-        ),
-        migrations.AddIndex(
-            model_name="item",
-            index=models.Index(
-                fields=["collection", "journal", "article"],
-                name="metrics_ite_collect_6971a5_idx",
-            ),
-        ),
-        migrations.AddIndex(
-            model_name="item",
-            index=models.Index(
-                fields=["collection", "journal"], name="metrics_ite_collect_b5f79b_idx"
-            ),
-        ),
-        migrations.AlterUniqueTogether(
-            name="item",
-            unique_together={("collection", "journal", "article")},
-        ),
-        migrations.AlterUniqueTogether(
-            name="usersession",
-            unique_together={("datetime", "user_agent", "user_ip")},
-        ),
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together={
-                (
-                    "item",
-                    "user_session",
-                    "country_code",
-                    "media_format",
-                    "media_language",
-                )
-            },
-        ),
-    ]
diff --git a/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py b/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py
deleted file mode 100644
index 7bfafff..0000000
--- a/metrics/migrations/0005_alter_itemaccess_unique_together_and_more.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-27 20:40
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0004_delete_top100articlesfile_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together=set(),
-        ),
-        migrations.AddField(
-            model_name="itemaccess",
-            name="click_timestamps",
-            field=models.JSONField(default=dict, verbose_name="Click Timestamps"),
-        ),
-        migrations.AddField(
-            model_name="itemaccess",
-            name="content_type",
-            field=models.CharField(
-                default="undefined", max_length=16, verbose_name="Content Type"
-            ),
-            preserve_default=False,
-        ),
-        migrations.AlterField(
-            model_name="itemaccess",
-            name="media_format",
-            field=models.CharField(
-                db_index=True, max_length=10, verbose_name="Media Format"
-            ),
-        ),
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together={
-                (
-                    "item",
-                    "user_session",
-                    "country_code",
-                    "media_format",
-                    "media_language",
-                    "content_type",
-                )
-            },
-        ),
-    ]
diff --git a/metrics/migrations/0006_alter_itemaccess_content_type.py b/metrics/migrations/0006_alter_itemaccess_content_type.py
deleted file mode 100644
index 0e81287..0000000
--- a/metrics/migrations/0006_alter_itemaccess_content_type.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Generated by Django 5.0.7 on 2025-03-31 21:07
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0005_alter_itemaccess_unique_together_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="itemaccess",
-            name="content_type",
-            field=models.CharField(max_length=32, verbose_name="Content Type"),
-        ),
-    ]
diff --git a/metrics/migrations/0007_alter_usersession_datetime_and_more.py b/metrics/migrations/0007_alter_usersession_datetime_and_more.py
deleted file mode 100644
index e45036e..0000000
--- a/metrics/migrations/0007_alter_usersession_datetime_and_more.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-12 17:16
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0006_alter_itemaccess_content_type"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="usersession",
-            name="datetime",
-            field=models.DateTimeField(db_index=True, verbose_name="Datetime"),
-        ),
-        migrations.AddIndex(
-            model_name="itemaccess",
-            index=models.Index(
-                fields=["item", "user_session"], name="metrics_ite_item_id_8799c9_idx"
-            ),
-        ),
-    ]
diff --git a/metrics/migrations/0008_remove_a_few_models.py b/metrics/migrations/0008_remove_a_few_models.py
deleted file mode 100644
index dfd14ec..0000000
--- a/metrics/migrations/0008_remove_a_few_models.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Generated by Django 5.0.7 on 2025-06-22 17:45
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("metrics", "0007_alter_usersession_datetime_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterUniqueTogether(
-            name="itemaccess",
-            unique_together=None,
-        ),
-        migrations.AlterUniqueTogether(
-            name="useragent",
-            unique_together=None,
-        ),
-        migrations.AlterUniqueTogether(
-            name="usersession",
-            unique_together=None,
-        ),
-        migrations.RemoveField(
-            model_name="itemaccess",
-            name="user_session",
-        ),
-        migrations.RemoveField(
-            model_name="usersession",
-            name="user_agent",
-        ),
-        migrations.RemoveField(
-            model_name="itemaccess",
-            name="item",
-        ),
-        migrations.DeleteModel(
-            name="Item",
-        ),
-        migrations.DeleteModel(
-            name="ItemAccess",
-        ),
-        migrations.DeleteModel(
-            name="UserAgent",
-        ),
-        migrations.DeleteModel(
-            name="UserSession",
-        ),
-    ]
diff --git a/metrics/models.py b/metrics/models.py
index e69de29..aa789b5 100644
--- a/metrics/models.py
+++ b/metrics/models.py
@@ -0,0 +1,108 @@
+from django.db import models
+from django.utils.translation import gettext_lazy as _
+
+from collection.models import Collection
+from core.models import CommonControlField
+
+
+class DailyMetricJob(CommonControlField):
+    STATUS_PENDING = "PEN"
+    STATUS_EXPORTING = "EXP"
+    STATUS_EXPORTED = "SUC"
+    STATUS_ERROR = "ERR"
+    STATUS_CHOICES = (
+        (STATUS_PENDING, _("Pending")),
+        (STATUS_EXPORTING, _("Exporting")),
+        (STATUS_EXPORTED, _("Exported")),
+        (STATUS_ERROR, _("Error")),
+    )
+
+    collection = models.ForeignKey(
+        Collection,
+        verbose_name=_("Collection"),
+        on_delete=models.CASCADE,
+        db_index=True,
+    )
+
+    access_date = models.DateField(
+        verbose_name=_("Access Date"),
+        db_index=True,
+    )
+
+    status = models.CharField(
+        verbose_name=_("Status"),
+        max_length=3,
+        choices=STATUS_CHOICES,
+        default=STATUS_PENDING,
+        db_index=True,
+    )
+
+    input_log_hashes = models.JSONField(
+        verbose_name=_("Input Log Hashes"),
+        default=list,
+    )
+
+    storage_path = models.CharField(
+        verbose_name=_("Storage Path"),
+        max_length=500,
+        blank=True,
+        default="",
+    )
+
+    payload_hash = models.CharField(
+        verbose_name=_("Payload Hash"),
+        max_length=64,
+        blank=True,
+        default="",
+    )
+
+    summary = models.JSONField(
+        verbose_name=_("Summary"),
+        default=dict,
+        blank=True,
+    )
+
+    attempts = models.PositiveIntegerField(
+        verbose_name=_("Attempts"),
+        default=0,
+    )
+
+    error_message = models.TextField(
+        verbose_name=_("Error Message"),
+        blank=True,
+        default="",
+    )
+
+    export_started_at = models.DateTimeField(
+        verbose_name=_("Export Started At"),
+        null=True,
+        blank=True,
+    )
+
+    exported_at = models.DateTimeField(
+        verbose_name=_("Exported At"),
+        null=True,
+        blank=True,
+    )
+
+    @property
+    def input_log_count(self):
+        return len(self.input_log_hashes or [])
+
+    @property
+    def job_id(self):
+        if not self.payload_hash:
+            return ""
+        return f"{self.collection.acron3}|{self.access_date.isoformat()}|{self.payload_hash}"
+
+    class Meta:
+        verbose_name = _("Daily Metric Job")
+        verbose_name_plural = _("Daily Metric Jobs")
+        unique_together = (("collection", "access_date"),)
+        indexes = [
+            models.Index(fields=["collection", "access_date"], name="metrics_daily_coll_date_idx"),
+            models.Index(fields=["status", "export_started_at"], name="metrics_daily_status_exp_idx"),
+        ]
+
+    def __str__(self):
+        return f"{self.collection.acron3}-{self.access_date}"
diff --git a/metrics/templates/search/indexes/metrics/top100articles_text.txt b/metrics/templates/search/indexes/metrics/top100articles_text.txt
deleted file mode 100644
index ccf5e94..0000000
--- a/metrics/templates/search/indexes/metrics/top100articles_text.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-{{ object.collection }}
-{{ object.key_issn }}
-{{ object.pid }}
-{{ object.yop }}
-{{ object.language }}
-{{ object.country }}
-{{ object.total_item_requests }}
-{{ object.total_item_investigations }}
-{{ object.unique_item_requests }}
-{{ object.unique_item_investigations }}
\ No newline at end of file