diff --git a/ENV.md b/ENV.md index 2a203d7d..b0811247 100644 --- a/ENV.md +++ b/ENV.md @@ -22,10 +22,36 @@ Please ensure these are properly defined in a `.env` file in the root directory. | `DISCORD_WEBHOOK_URL` | The URL for the Discord webhook used for notifications | `abc123` | | `HUGGINGFACE_INFERENCE_API_KEY` | The API key required for accessing the Hugging Face Inference API. | `abc123` | | `HUGGINGFACE_HUB_TOKEN` | The API key required for uploading to the PDAP HuggingFace account via Hugging Face Hub API. | `abc123` | -| `SCHEDULED_TASKS_FLAG` | Set to `1` to enable running scheduled tasks. | `1` | + + [^1:] The user account in question will require elevated permissions to access certain endpoints. At a minimum, the user will require the `source_collector` and `db_write` permissions. +## Task Flags +Task flags are used to enable/disable certain tasks. They are set to `1` to enable the task and `0` to disable the task. By default, all tasks are enabled. + +The following flags are available: + +| Flag | Description | +|---------------------------------------|-------------------------------------------------------| +| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. | +| `URL_HTML_TASK_FLAG` | URL HTML scraping task. | +| `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. | +| `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. | +| `URL_SUBMIT_APPROVED_TASK_FLAG` | Submits approved URLs to the Data Sources App. | +| `URL_DUPLICATE_TASK_FLAG` | Identifies duplicate URLs. | +| `URL_MISC_METADATA_TASK_FLAG` | Adds misc metadata to URLs. | +| `URL_404_PROBE_TASK_FLAG` | Probes URLs for 404 errors. | +| `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | +| `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | +| `SYNC_AGENCIES_TASK_FLAG` | Synchonize agencies from Data Sources App. | +| `SYNC_DATA_SOURCES_TASK_FLAG` | Synchonize data sources from Data Sources App. | +| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. | +| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. | +| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. | +| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. | + + ## Foreign Data Wrapper (FDW) ``` FDW_DATA_SOURCES_HOST=127.0.0.1 # The host of the Data Sources Database, used for FDW setup diff --git a/alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py b/alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py new file mode 100644 index 00000000..97fbd655 --- /dev/null +++ b/alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py @@ -0,0 +1,63 @@ +"""Add scheduled tasks + +Revision ID: 11ece61d7ac2 +Revises: 8cd5aa7670ff +Create Date: 2025-08-10 10:32:11.400714 + +""" +from typing import Sequence, Union + +from src.util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '11ece61d7ac2' +down_revision: Union[str, None] = '8cd5aa7670ff' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources', + 'Push to Hugging Face', + 'URL Probe', + 'Populate Backlog Snapshot', + 'Delete Old Logs', + 'Run URL Task Cycles' + ] + ) + + +def downgrade() -> None: + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources' + 'Push to Hugging Face', + 'URL Probe' + ] + ) diff --git a/src/api/main.py b/src/api/main.py index e9916724..4e587a2a 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -24,6 +24,7 @@ from src.core.tasks.handler import TaskHandler from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader from src.core.tasks.scheduled.manager import AsyncScheduledTaskManager +from src.core.tasks.scheduled.registry.core import ScheduledJobRegistry from src.core.tasks.url.loader import URLTaskOperatorLoader from src.core.tasks.url.manager import TaskManager from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser @@ -98,15 +99,16 @@ async def lifespan(app: FastAPI): collector_manager=async_collector_manager ) async_scheduled_task_manager = AsyncScheduledTaskManager( - async_core=async_core, handler=task_handler, loader=ScheduledTaskOperatorLoader( adb_client=adb_client, pdap_client=pdap_client, hf_client=HuggingFaceHubClient( token=env_var_manager.hf_hub_token - ) - ) + ), + async_core=async_core, + ), + registry=ScheduledJobRegistry() ) await async_scheduled_task_manager.setup() diff --git a/src/core/core.py b/src/core/core.py index ec82e3c5..f2c084c5 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -48,6 +48,9 @@ class AsyncCore: + task_manager: TaskManager | None = None + adb_client: AsyncDatabaseClient | None = None + collector_manager: AsyncCollectorManager | None = None def __init__( self, @@ -57,7 +60,6 @@ def __init__( ): self.task_manager = task_manager self.adb_client = adb_client - self.collector_manager = collector_manager diff --git a/src/core/tasks/scheduled/huggingface/__init__.py b/src/core/tasks/scheduled/impl/__init__.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/__init__.py rename to src/core/tasks/scheduled/impl/__init__.py diff --git a/src/core/tasks/scheduled/huggingface/queries/__init__.py b/src/core/tasks/scheduled/impl/backlog/__init__.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/__init__.py rename to src/core/tasks/scheduled/impl/backlog/__init__.py diff --git a/src/core/tasks/scheduled/impl/backlog/operator.py b/src/core/tasks/scheduled/impl/backlog/operator.py new file mode 100644 index 00000000..d628c91c --- /dev/null +++ b/src/core/tasks/scheduled/impl/backlog/operator.py @@ -0,0 +1,16 @@ +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType + + +class PopulateBacklogSnapshotTaskOperator(ScheduledTaskOperatorBase): + + def __init__(self, adb_client: AsyncDatabaseClient): + super().__init__(adb_client) + + @property + def task_type(self) -> TaskType: + return TaskType.POPULATE_BACKLOG_SNAPSHOT + + async def inner_task_logic(self) -> None: + await self.adb_client.populate_backlog_snapshot() \ No newline at end of file diff --git a/src/core/tasks/scheduled/huggingface/queries/check/__init__.py b/src/core/tasks/scheduled/impl/delete_logs/__init__.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/check/__init__.py rename to src/core/tasks/scheduled/impl/delete_logs/__init__.py diff --git a/src/core/tasks/scheduled/impl/delete_logs/operator.py b/src/core/tasks/scheduled/impl/delete_logs/operator.py new file mode 100644 index 00000000..fa7a6ae4 --- /dev/null +++ b/src/core/tasks/scheduled/impl/delete_logs/operator.py @@ -0,0 +1,16 @@ +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType + + +class DeleteOldLogsTaskOperator(ScheduledTaskOperatorBase): + + def __init__(self, adb_client: AsyncDatabaseClient): + super().__init__(adb_client) + + @property + def task_type(self) -> TaskType: + return TaskType.DELETE_OLD_LOGS + + async def inner_task_logic(self) -> None: + await self.adb_client.delete_old_logs() \ No newline at end of file diff --git a/src/core/tasks/scheduled/huggingface/queries/get/__init__.py b/src/core/tasks/scheduled/impl/huggingface/__init__.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/get/__init__.py rename to src/core/tasks/scheduled/impl/huggingface/__init__.py diff --git a/src/core/tasks/scheduled/huggingface/operator.py b/src/core/tasks/scheduled/impl/huggingface/operator.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/operator.py rename to src/core/tasks/scheduled/impl/huggingface/operator.py diff --git a/src/core/tasks/scheduled/sync/__init__.py b/src/core/tasks/scheduled/impl/huggingface/queries/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/__init__.py rename to src/core/tasks/scheduled/impl/huggingface/queries/__init__.py diff --git a/src/core/tasks/scheduled/sync/agency/__init__.py b/src/core/tasks/scheduled/impl/huggingface/queries/check/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/__init__.py rename to src/core/tasks/scheduled/impl/huggingface/queries/check/__init__.py diff --git a/src/core/tasks/scheduled/huggingface/queries/check/core.py b/src/core/tasks/scheduled/impl/huggingface/queries/check/core.py similarity index 79% rename from src/core/tasks/scheduled/huggingface/queries/check/core.py rename to src/core/tasks/scheduled/impl/huggingface/queries/check/core.py index 7b724a30..c76fa2e1 100644 --- a/src/core/tasks/scheduled/huggingface/queries/check/core.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/check/core.py @@ -1,6 +1,6 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.huggingface.queries.check.requester import CheckValidURLsUpdatedRequester +from src.core.tasks.scheduled.impl.huggingface.queries.check.requester import CheckValidURLsUpdatedRequester from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/huggingface/queries/check/requester.py b/src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/check/requester.py rename to src/core/tasks/scheduled/impl/huggingface/queries/check/requester.py diff --git a/src/core/tasks/scheduled/sync/agency/dtos/__init__.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/dtos/__init__.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/__init__.py diff --git a/src/core/tasks/scheduled/huggingface/queries/get/convert.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py similarity index 66% rename from src/core/tasks/scheduled/huggingface/queries/get/convert.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py index 0f8e26a6..9d5c4135 100644 --- a/src/core/tasks/scheduled/huggingface/queries/get/convert.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py @@ -1,7 +1,7 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType -from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse -from src.core.tasks.scheduled.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING, \ +from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse +from src.core.tasks.scheduled.impl.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING, \ OUTCOME_RELEVANCY_MAPPING diff --git a/src/core/tasks/scheduled/huggingface/queries/get/core.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py similarity index 91% rename from src/core/tasks/scheduled/huggingface/queries/get/core.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/core.py index 906f4d4f..90d448dc 100644 --- a/src/core/tasks/scheduled/huggingface/queries/get/core.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py @@ -2,9 +2,9 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.collectors.enums import URLStatus -from src.core.tasks.scheduled.huggingface.queries.get.convert import convert_url_status_to_relevant, \ +from src.core.tasks.scheduled.impl.huggingface.queries.get.convert import convert_url_status_to_relevant, \ convert_fine_to_coarse_record_type -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput from src.db.models.instantiations.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.instantiations.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/huggingface/queries/get/enums.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/enums.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/get/enums.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/enums.py diff --git a/src/core/tasks/scheduled/huggingface/queries/get/mappings.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py similarity index 97% rename from src/core/tasks/scheduled/huggingface/queries/get/mappings.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py index 2196a927..a6ceb233 100644 --- a/src/core/tasks/scheduled/huggingface/queries/get/mappings.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py @@ -1,6 +1,6 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType -from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse +from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse FINE_COARSE_RECORD_TYPE_MAPPING = { # Police and Public diff --git a/src/core/tasks/scheduled/huggingface/queries/get/model.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/model.py similarity index 75% rename from src/core/tasks/scheduled/huggingface/queries/get/model.py rename to src/core/tasks/scheduled/impl/huggingface/queries/get/model.py index 8aa52b16..187b2ee2 100644 --- a/src/core/tasks/scheduled/huggingface/queries/get/model.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/model.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.enums import RecordType -from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse +from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse class GetForLoadingToHuggingFaceOutput(BaseModel): diff --git a/src/core/tasks/scheduled/huggingface/queries/state.py b/src/core/tasks/scheduled/impl/huggingface/queries/state.py similarity index 100% rename from src/core/tasks/scheduled/huggingface/queries/state.py rename to src/core/tasks/scheduled/impl/huggingface/queries/state.py diff --git a/src/core/tasks/scheduled/sync/agency/queries/__init__.py b/src/core/tasks/scheduled/impl/run_url_tasks/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/queries/__init__.py rename to src/core/tasks/scheduled/impl/run_url_tasks/__init__.py diff --git a/src/core/tasks/scheduled/impl/run_url_tasks/operator.py b/src/core/tasks/scheduled/impl/run_url_tasks/operator.py new file mode 100644 index 00000000..ef76fbac --- /dev/null +++ b/src/core/tasks/scheduled/impl/run_url_tasks/operator.py @@ -0,0 +1,17 @@ +from src.core.core import AsyncCore +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.enums import TaskType + + +class RunURLTasksTaskOperator(ScheduledTaskOperatorBase): + + def __init__(self, async_core: AsyncCore): + super().__init__(async_core.adb_client) + self.async_core = async_core + + @property + def task_type(self) -> TaskType: + return TaskType.RUN_URL_TASKS + + async def inner_task_logic(self) -> None: + await self.async_core.run_tasks() \ No newline at end of file diff --git a/src/core/tasks/scheduled/sync/data_sources/__init__.py b/src/core/tasks/scheduled/impl/sync/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/__init__.py rename to src/core/tasks/scheduled/impl/sync/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/__init__.py rename to src/core/tasks/scheduled/impl/sync/agency/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/dtos/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/__init__.py rename to src/core/tasks/scheduled/impl/sync/agency/dtos/__init__.py diff --git a/src/core/tasks/scheduled/sync/agency/dtos/parameters.py b/src/core/tasks/scheduled/impl/sync/agency/dtos/parameters.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/dtos/parameters.py rename to src/core/tasks/scheduled/impl/sync/agency/dtos/parameters.py diff --git a/src/core/tasks/scheduled/sync/agency/operator.py b/src/core/tasks/scheduled/impl/sync/agency/operator.py similarity index 89% rename from src/core/tasks/scheduled/sync/agency/operator.py rename to src/core/tasks/scheduled/impl/sync/agency/operator.py index 333d0195..db20acf1 100644 --- a/src/core/tasks/scheduled/sync/agency/operator.py +++ b/src/core/tasks/scheduled/impl/sync/agency/operator.py @@ -1,5 +1,5 @@ -from src.core.tasks.scheduled.sync.check import check_max_sync_requests_not_exceeded -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.check import check_max_sync_requests_not_exceeded +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/__init__.py b/src/core/tasks/scheduled/impl/sync/agency/queries/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/__init__.py rename to src/core/tasks/scheduled/impl/sync/agency/queries/__init__.py diff --git a/src/core/tasks/scheduled/sync/agency/queries/get_sync_params.py b/src/core/tasks/scheduled/impl/sync/agency/queries/get_sync_params.py similarity index 91% rename from src/core/tasks/scheduled/sync/agency/queries/get_sync_params.py rename to src/core/tasks/scheduled/impl/sync/agency/queries/get_sync_params.py index a502a156..106211df 100644 --- a/src/core/tasks/scheduled/sync/agency/queries/get_sync_params.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/get_sync_params.py @@ -2,7 +2,7 @@ from sqlalchemy.exc import NoResultFound from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters from src.db.models.instantiations.state.sync.agencies import AgenciesSyncState from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/sync/agency/queries/mark_full_sync.py b/src/core/tasks/scheduled/impl/sync/agency/queries/mark_full_sync.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/queries/mark_full_sync.py rename to src/core/tasks/scheduled/impl/sync/agency/queries/mark_full_sync.py diff --git a/src/core/tasks/scheduled/sync/agency/queries/update_sync_progress.py b/src/core/tasks/scheduled/impl/sync/agency/queries/update_sync_progress.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/queries/update_sync_progress.py rename to src/core/tasks/scheduled/impl/sync/agency/queries/update_sync_progress.py diff --git a/src/core/tasks/scheduled/sync/agency/queries/upsert.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert.py similarity index 100% rename from src/core/tasks/scheduled/sync/agency/queries/upsert.py rename to src/core/tasks/scheduled/impl/sync/agency/queries/upsert.py diff --git a/src/core/tasks/scheduled/sync/check.py b/src/core/tasks/scheduled/impl/sync/check.py similarity index 69% rename from src/core/tasks/scheduled/sync/check.py rename to src/core/tasks/scheduled/impl/sync/check.py index 449506c5..3dfe75dc 100644 --- a/src/core/tasks/scheduled/sync/check.py +++ b/src/core/tasks/scheduled/impl/sync/check.py @@ -1,5 +1,5 @@ -from src.core.tasks.scheduled.sync.constants import MAX_SYNC_REQUESTS -from src.core.tasks.scheduled.sync.exceptions import MaxRequestsExceededError +from src.core.tasks.scheduled.impl.sync.constants import MAX_SYNC_REQUESTS +from src.core.tasks.scheduled.impl.sync.exceptions import MaxRequestsExceededError def check_max_sync_requests_not_exceeded(request_count: int) -> None: diff --git a/src/core/tasks/scheduled/sync/constants.py b/src/core/tasks/scheduled/impl/sync/constants.py similarity index 100% rename from src/core/tasks/scheduled/sync/constants.py rename to src/core/tasks/scheduled/impl/sync/constants.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/operator.py b/src/core/tasks/scheduled/impl/sync/data_sources/operator.py similarity index 89% rename from src/core/tasks/scheduled/sync/data_sources/operator.py rename to src/core/tasks/scheduled/impl/sync/data_sources/operator.py index cfae9459..ad595919 100644 --- a/src/core/tasks/scheduled/sync/data_sources/operator.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/operator.py @@ -1,6 +1,6 @@ from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase -from src.core.tasks.scheduled.sync.check import check_max_sync_requests_not_exceeded -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters +from src.core.tasks.scheduled.impl.sync.check import check_max_sync_requests_not_exceeded +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/scheduled/sync/data_sources/params.py b/src/core/tasks/scheduled/impl/sync/data_sources/params.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/params.py rename to src/core/tasks/scheduled/impl/sync/data_sources/params.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/get_sync_params.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/get_sync_params.py similarity index 91% rename from src/core/tasks/scheduled/sync/data_sources/queries/get_sync_params.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/get_sync_params.py index 5608dfe4..26e76921 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/get_sync_params.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/get_sync_params.py @@ -2,7 +2,7 @@ from sqlalchemy.exc import NoResultFound from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.db.models.instantiations.state.sync.data_sources import DataSourcesSyncState from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/mark_full_sync.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/mark_full_sync.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/mark_full_sync.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/mark_full_sync.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/update_sync_progress.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/update_sync_progress.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/update_sync_progress.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/update_sync_progress.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/insert/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/insert/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/convert.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/convert.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/convert.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/convert.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/core.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py similarity index 57% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/core.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py index e1820898..6222d1fd 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/core.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py @@ -1,7 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.query import URLAgencyLinkUpdateQueryBuilder -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import URLAgencyLinkUpdateQueryBuilder +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams async def update_agency_links( diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/params.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/params.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/params.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/query.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py similarity index 91% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/query.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py index 4850be39..fa807acc 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/agency/query.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/query.py @@ -3,10 +3,10 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models from src.db.helpers.session import session_helper as sh from src.db.models.instantiations.link.url_agency.pydantic import LinkURLAgencyPydantic -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams from src.db.models.instantiations.link.url_agency.sqlalchemy import LinkURLAgency from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/core.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py similarity index 87% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/core.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py index a0517b45..44737be7 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/core.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/core.py @@ -3,13 +3,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from typing_extensions import override -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.helpers.filter import filter_for_urls_with_ids, \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.filter import filter_for_urls_with_ids, \ get_mappings_for_urls_without_data_sources -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.param_manager import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.param_manager import \ UpsertURLsFromDataSourcesParamManager -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.requester import UpsertURLsFromDataSourcesDBRequester -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.requester import UpsertURLsFromDataSourcesDBRequester +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \ LookupURLForDataSourcesSyncResponse from src.db.dtos.url.mapping import URLMapping from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/update/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/__init__.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/update/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/convert.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py similarity index 92% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/convert.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py index f0933b04..d26b51b1 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/convert.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/convert.py @@ -1,7 +1,7 @@ from src.collectors.enums import URLStatus -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \ InsertURLForDataSourcesSyncParams -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \ UpdateURLForDataSourcesSyncParams from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/filter.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py similarity index 89% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/filter.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py index ef23fcd2..d7e6ba73 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/helpers/filter.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py @@ -1,4 +1,4 @@ -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \ LookupURLForDataSourcesSyncResponse from src.db.dtos.url.mapping import URLMapping diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/mapper.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/mapper.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/mapper.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/mapper.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/param_manager.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py similarity index 83% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/param_manager.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py index 19d8a0cd..ffbe61f9 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/param_manager.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py @@ -1,13 +1,13 @@ -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \ UpdateLinkURLAgencyForDataSourcesSyncParams -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.helpers.convert import convert_to_url_update_params, \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.convert import convert_to_url_update_params, \ convert_to_url_insert_params -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \ InsertURLForDataSourcesSyncParams -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \ LookupURLForDataSourcesSyncResponse -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \ UpdateURLForDataSourcesSyncParams from src.db.dtos.url.mapping import URLMapping from src.db.models.instantiations.link.url_agency.pydantic import LinkURLAgencyPydantic diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/requester.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py similarity index 80% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/requester.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py index 14a73ce8..c0d6eaa1 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/requester.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/requester.py @@ -1,16 +1,16 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \ UpdateLinkURLAgencyForDataSourcesSyncParams -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.query import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import \ URLAgencyLinkUpdateQueryBuilder -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \ InsertURLForDataSourcesSyncParams -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.query import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.query import \ LookupURLForDataSourcesSyncQueryBuilder -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \ LookupURLForDataSourcesSyncResponse -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \ UpdateURLForDataSourcesSyncParams from src.db.dtos.url.mapping import URLMapping from src.db.helpers.session import session_helper as sh diff --git a/tests/automated/integration/tasks/scheduled/huggingface/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/huggingface/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/insert/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/insert/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/insert/params.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/insert/params.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/insert/params.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/insert/params.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/models/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/format.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/format.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/format.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/format.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/query.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/query.py similarity index 90% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/query.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/query.py index f24c84ae..cf232a4a 100644 --- a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/query.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/query.py @@ -1,9 +1,9 @@ from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.format import format_agency_ids_result +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.format import format_agency_ids_result from src.db.helpers.session import session_helper as sh -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \ LookupURLForDataSourcesSyncResponse, URLDataSyncInfo from src.db.models.instantiations.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.instantiations.url.core.sqlalchemy import URL diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/response.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/response.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/lookup/response.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/lookup/response.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/queries/__init__.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/update/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/queries/__init__.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/update/__init__.py diff --git a/src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/update/params.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/update/params.py similarity index 100% rename from src/core/tasks/scheduled/sync/data_sources/queries/upsert/url/update/params.py rename to src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/url/update/params.py diff --git a/src/core/tasks/scheduled/sync/exceptions.py b/src/core/tasks/scheduled/impl/sync/exceptions.py similarity index 100% rename from src/core/tasks/scheduled/sync/exceptions.py rename to src/core/tasks/scheduled/impl/sync/exceptions.py diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index 36f28db5..b738a0c9 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -1,6 +1,14 @@ -from src.core.tasks.scheduled.huggingface.operator import PushToHuggingFaceTaskOperator -from src.core.tasks.scheduled.sync.agency.operator import SyncAgenciesTaskOperator -from src.core.tasks.scheduled.sync.data_sources.operator import SyncDataSourcesTaskOperator +from environs import Env + +from src.core.core import AsyncCore +from src.core.tasks.scheduled.enums import IntervalEnum +from src.core.tasks.scheduled.impl.backlog.operator import PopulateBacklogSnapshotTaskOperator +from src.core.tasks.scheduled.impl.delete_logs.operator import DeleteOldLogsTaskOperator +from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator +from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient from src.external.pdap.client import PDAPClient @@ -10,30 +18,71 @@ class ScheduledTaskOperatorLoader: def __init__( self, + async_core: AsyncCore, adb_client: AsyncDatabaseClient, pdap_client: PDAPClient, hf_client: HuggingFaceHubClient ): # Dependencies + self.async_core = async_core self.adb_client = adb_client self.pdap_client = pdap_client self.hf_client = hf_client + self.env = Env() + self.env.read_env() + + + async def load_entries(self) -> list[ScheduledTaskEntry]: + scheduled_task_flag = self.env.bool("SCHEDULED_TASKS_FLAG", default=True) + if not scheduled_task_flag: + print("Scheduled tasks are disabled.") + return [] + - async def get_sync_agencies_task_operator(self) -> SyncAgenciesTaskOperator: - return SyncAgenciesTaskOperator( - adb_client=self.adb_client, - pdap_client=self.pdap_client - ) + return [ + ScheduledTaskEntry( + operator=RunURLTasksTaskOperator(async_core=self.async_core), + interval=IntervalEnum.HOURLY, + enabled=self.env.bool("RUN_URL_TASKS_TASK_FLAG", default=True) - async def get_sync_data_sources_task_operator(self) -> SyncDataSourcesTaskOperator: - return SyncDataSourcesTaskOperator( - adb_client=self.adb_client, - pdap_client=self.pdap_client - ) + ), + ScheduledTaskEntry( + operator=DeleteOldLogsTaskOperator(adb_client=self.async_core.adb_client), + interval=IntervalEnum.DAILY, + enabled=self.env.bool("DELETE_OLD_LOGS_TASK_FLAG", default=True) + ), + ScheduledTaskEntry( + operator=PopulateBacklogSnapshotTaskOperator(adb_client=self.async_core.adb_client), + interval=IntervalEnum.DAILY, + enabled=self.env.bool("POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG", default=True) + ), + ScheduledTaskEntry( + operator=SyncDataSourcesTaskOperator( + adb_client=self.async_core.adb_client, + pdap_client=self.pdap_client + ), + interval=IntervalEnum.DAILY, + enabled=self.env.bool("SYNC_DATA_SOURCES_TASK_FLAG", default=True) + ), + ScheduledTaskEntry( + operator=SyncAgenciesTaskOperator( + adb_client=self.async_core.adb_client, + pdap_client=self.pdap_client + ), + interval=IntervalEnum.DAILY, + enabled=self.env.bool("SYNC_AGENCIES_TASK_FLAG", default=True) + ), + ScheduledTaskEntry( + operator=PushToHuggingFaceTaskOperator( + adb_client=self.async_core.adb_client, + hf_client=self.hf_client + ), + interval=IntervalEnum.DAILY, + enabled=self.env.bool( + "PUSH_TO_HUGGING_FACE_TASK_FLAG", + default=True + ) + ) - async def get_push_to_hugging_face_task_operator(self) -> PushToHuggingFaceTaskOperator: - return PushToHuggingFaceTaskOperator( - adb_client=self.adb_client, - hf_client=self.hf_client - ) + ] diff --git a/src/core/tasks/scheduled/manager.py b/src/core/tasks/scheduled/manager.py index e946b590..0006af41 100644 --- a/src/core/tasks/scheduled/manager.py +++ b/src/core/tasks/scheduled/manager.py @@ -1,118 +1,58 @@ -from datetime import datetime, timedelta - from apscheduler.job import Job from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.triggers.interval import IntervalTrigger -from src.core.core import AsyncCore + from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.handler import TaskHandler -from src.core.tasks.scheduled.convert import convert_interval_enum_to_hours -from src.core.tasks.scheduled.enums import IntervalEnum from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry +from src.core.tasks.scheduled.registry.core import ScheduledJobRegistry from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase -from environs import Env - class AsyncScheduledTaskManager: def __init__( self, - async_core: AsyncCore, handler: TaskHandler, - loader: ScheduledTaskOperatorLoader + loader: ScheduledTaskOperatorLoader, + registry: ScheduledJobRegistry ): + # Dependencies - self.async_core = async_core - self.handler = handler - self.loader = loader + self._handler = handler + self._loader = loader + self._registry = registry # Main objects self.scheduler = AsyncIOScheduler() - # Jobs - self._jobs: dict[str, Job] = {} - async def setup(self): - env = Env() - env.read_env() - - scheduled_task_flag = env.bool("SCHEDULED_TASKS_FLAG", default=True) - if not scheduled_task_flag: - print("Scheduled tasks are disabled.") - return - - self.scheduler.start() + self._registry.start_scheduler() await self.add_scheduled_tasks() - async def _get_entries(self) -> list[ScheduledTaskEntry]: - return [ - ScheduledTaskEntry( - name="Run Task Cycles", - function=self.async_core.run_tasks, - interval=IntervalEnum.HOURLY - ), - ScheduledTaskEntry( - name="Delete Old Logs", - function=self.async_core.adb_client.delete_old_logs, - interval=IntervalEnum.DAILY - ), - ScheduledTaskEntry( - name="Populate Backlog Snapshot", - function=self.async_core.adb_client.populate_backlog_snapshot, - interval=IntervalEnum.DAILY - ), - ScheduledTaskEntry( - name="Sync Agencies", - function=self.run_task, - interval=IntervalEnum.DAILY, - kwargs={ - "operator": await self.loader.get_sync_agencies_task_operator() - } - ), - ScheduledTaskEntry( - name="Sync Data Sources", - function=self.run_task, - interval=IntervalEnum.DAILY, - kwargs={ - "operator": await self.loader.get_sync_data_sources_task_operator() - } - ), - # ScheduledTaskEntry( - # name="Push to Hugging Face", - # function=self.run_task, - # interval=IntervalEnum.DAILY, - # kwargs={ - # "operator": await self.loader.get_push_to_hugging_face_task_operator() - # } - # ) - ] - async def add_scheduled_tasks(self): """ Modifies: - self._jobs + self._registry """ - entries: list[ScheduledTaskEntry] = await self._get_entries() + entries: list[ScheduledTaskEntry] = await self._loader.load_entries() for idx, entry in enumerate(entries): - self._jobs[entry.name] = self.scheduler.add_job( - entry.function, - trigger=IntervalTrigger( - hours=convert_interval_enum_to_hours(entry.interval), - start_date=datetime.now() + timedelta(minutes=idx + 1) - ), - misfire_grace_time=60, - kwargs=entry.kwargs + if not entry.enabled: + print(f"{entry.operator.task_type.value} is disabled. Skipping add to scheduler.") + continue + + await self._registry.add_job( + func=self.run_task, + entry=entry, + minute_lag=idx ) def shutdown(self): - if self.scheduler.running: - self.scheduler.shutdown() + self._registry.shutdown_scheduler() async def run_task(self, operator: ScheduledTaskOperatorBase): print(f"Running {operator.task_type.value} Task") - task_id = await self.handler.initiate_task_in_db(task_type=operator.task_type) + task_id = await self._handler.initiate_task_in_db(task_type=operator.task_type) run_info: TaskOperatorRunInfo = await operator.run_task(task_id) - await self.handler.handle_outcome(run_info) + await self._handler.handle_outcome(run_info) diff --git a/src/core/tasks/scheduled/models/entry.py b/src/core/tasks/scheduled/models/entry.py index 8413baea..e3d647d0 100644 --- a/src/core/tasks/scheduled/models/entry.py +++ b/src/core/tasks/scheduled/models/entry.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from src.core.tasks.scheduled.enums import IntervalEnum +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase class ScheduledTaskEntry(BaseModel): @@ -10,7 +11,6 @@ class ScheduledTaskEntry(BaseModel): class Config: arbitrary_types_allowed = True - name: str - function: Any + operator: ScheduledTaskOperatorBase interval: IntervalEnum - kwargs: dict[str, Any] = {} \ No newline at end of file + enabled: bool diff --git a/tests/automated/integration/tasks/scheduled/sync/__init__.py b/src/core/tasks/scheduled/registry/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/__init__.py rename to src/core/tasks/scheduled/registry/__init__.py diff --git a/src/core/tasks/scheduled/convert.py b/src/core/tasks/scheduled/registry/convert.py similarity index 100% rename from src/core/tasks/scheduled/convert.py rename to src/core/tasks/scheduled/registry/convert.py diff --git a/src/core/tasks/scheduled/registry/core.py b/src/core/tasks/scheduled/registry/core.py new file mode 100644 index 00000000..a7af830f --- /dev/null +++ b/src/core/tasks/scheduled/registry/core.py @@ -0,0 +1,52 @@ +from datetime import datetime, timedelta +from typing import Awaitable, Callable + +from apscheduler.job import Job +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.interval import IntervalTrigger + +from src.core.tasks.scheduled.registry.convert import convert_interval_enum_to_hours +from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry +from src.db.enums import TaskType + + +class ScheduledJobRegistry: + + + def __init__(self): + # Main objects + self.scheduler = AsyncIOScheduler() + + # Jobs + self._jobs: dict[TaskType, Job] = {} + + async def add_job( + self, + func: Callable, + entry: ScheduledTaskEntry, + minute_lag: int + ) -> None: + """ + Modifies: + self._jobs + """ + self._jobs[entry.operator.task_type] = self.scheduler.add_job( + func, + trigger=IntervalTrigger( + hours=convert_interval_enum_to_hours(entry.interval), + start_date=datetime.now() + timedelta(minutes=minute_lag) + ), + misfire_grace_time=60, + kwargs={"operator": entry.operator} + ) + + def start_scheduler(self) -> None: + """ + Modifies: + self.scheduler + """ + self.scheduler.start() + + def shutdown_scheduler(self) -> None: + if self.scheduler.running: + self.scheduler.shutdown() \ No newline at end of file diff --git a/src/core/tasks/url/loader.py b/src/core/tasks/url/loader.py index bee76770..e381c486 100644 --- a/src/core/tasks/url/loader.py +++ b/src/core/tasks/url/loader.py @@ -2,24 +2,26 @@ The task loader loads task a task operator and all dependencies. """ +from environs import Env + from src.collectors.source_collectors.muckrock.api_interface.core import MuckrockAPIInterface +from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader from src.core.tasks.url.operators.auto_relevant.core import URLAutoRelevantTaskOperator -from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.core.tasks.url.operators.duplicate.core import URLDuplicateTaskOperator +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser +from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator from src.core.tasks.url.operators.probe.core import URLProbeTaskOperator from src.core.tasks.url.operators.probe_404.core import URL404ProbeTaskOperator from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator from src.core.tasks.url.operators.record_type.llm_api.record_classifier.openai import OpenAIRecordClassifier from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator -from src.core.tasks.url.operators.duplicate.core import URLDuplicateTaskOperator -from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator -from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser -from src.external.url_request.core import URLRequestInterface -from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.pdap.client import PDAPClient +from src.external.url_request.core import URLRequestInterface class URLTaskOperatorLoader: @@ -37,28 +39,41 @@ def __init__( self.adb_client = adb_client self.url_request_interface = url_request_interface self.html_parser = html_parser + self.env = Env() # External clients and interfaces self.pdap_client = pdap_client self.muckrock_api_interface = muckrock_api_interface self.hf_inference_client = hf_inference_client - async def get_url_html_task_operator(self): + async def _get_url_html_task_operator(self) -> URLTaskEntry: operator = URLHTMLTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface, html_parser=self.html_parser ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_HTML_TASK_FLAG", + default=True + ) + ) - async def get_url_record_type_task_operator(self): + async def _get_url_record_type_task_operator(self) -> URLTaskEntry: operator = URLRecordTypeTaskOperator( adb_client=self.adb_client, classifier=OpenAIRecordClassifier() ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_RECORD_TYPE_TASK_FLAG", + default=True + ) + ) - async def get_agency_identification_task_operator(self): + async def _get_agency_identification_task_operator(self) -> URLTaskEntry: operator = AgencyIdentificationTaskOperator( adb_client=self.adb_client, loader=AgencyIdentificationSubtaskLoader( @@ -66,58 +81,100 @@ async def get_agency_identification_task_operator(self): muckrock_api_interface=self.muckrock_api_interface ) ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_AGENCY_IDENTIFICATION_TASK_FLAG", + default=True + ) + ) - async def get_submit_approved_url_task_operator(self): + async def _get_submit_approved_url_task_operator(self) -> URLTaskEntry: operator = SubmitApprovedURLTaskOperator( adb_client=self.adb_client, pdap_client=self.pdap_client ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_SUBMIT_APPROVED_TASK_FLAG", + default=True + ) + ) - async def get_url_miscellaneous_metadata_task_operator(self): + async def _get_url_miscellaneous_metadata_task_operator(self) -> URLTaskEntry: operator = URLMiscellaneousMetadataTaskOperator( adb_client=self.adb_client ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_MISC_METADATA_TASK_FLAG", + default=True + ) + ) - async def get_url_duplicate_task_operator(self): + async def _get_url_duplicate_task_operator(self) -> URLTaskEntry: operator = URLDuplicateTaskOperator( adb_client=self.adb_client, pdap_client=self.pdap_client ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_DUPLICATE_TASK_FLAG", + default=True + ) + ) - async def get_url_404_probe_task_operator(self): + async def _get_url_404_probe_task_operator(self) -> URLTaskEntry: operator = URL404ProbeTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_404_PROBE_TASK_FLAG", + default=True + ) + ) - async def get_url_auto_relevance_task_operator(self): + async def _get_url_auto_relevance_task_operator(self) -> URLTaskEntry: operator = URLAutoRelevantTaskOperator( adb_client=self.adb_client, hf_client=self.hf_inference_client ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_AUTO_RELEVANCE_TASK_FLAG", + default=True + ) + ) - async def get_url_probe_task_operator(self): + async def _get_url_probe_task_operator(self) -> URLTaskEntry: operator = URLProbeTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface ) - return operator + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_PROBE_TASK_FLAG", + default=True + ) + ) - async def get_task_operators(self) -> list[URLTaskOperatorBase]: + async def load_entries(self) -> list[URLTaskEntry]: return [ - # await self.get_url_probe_task_operator(), - await self.get_url_html_task_operator(), - await self.get_url_duplicate_task_operator(), - # await self.get_url_404_probe_task_operator(), - await self.get_url_record_type_task_operator(), - await self.get_agency_identification_task_operator(), - await self.get_url_miscellaneous_metadata_task_operator(), - await self.get_submit_approved_url_task_operator(), - await self.get_url_auto_relevance_task_operator() + await self._get_url_probe_task_operator(), + await self._get_url_html_task_operator(), + await self._get_url_duplicate_task_operator(), + await self._get_url_404_probe_task_operator(), + await self._get_url_record_type_task_operator(), + await self._get_agency_identification_task_operator(), + await self._get_url_miscellaneous_metadata_task_operator(), + await self._get_submit_approved_url_task_operator(), + await self._get_url_auto_relevance_task_operator() ] diff --git a/src/core/tasks/url/manager.py b/src/core/tasks/url/manager.py index 1d843b95..8d4973a1 100644 --- a/src/core/tasks/url/manager.py +++ b/src/core/tasks/url/manager.py @@ -2,6 +2,7 @@ from src.core.tasks.handler import TaskHandler from src.core.tasks.url.loader import URLTaskOperatorLoader +from src.core.tasks.url.models.entry import URLTaskEntry from src.db.enums import TaskType from src.core.tasks.dtos.run_info import URLTaskOperatorRunInfo from src.core.tasks.url.enums import TaskOperatorOutcome @@ -28,37 +29,46 @@ def __init__( #region Tasks - async def set_manager_status(self, task_type: TaskType): + async def set_manager_status(self, task_type: TaskType) -> None: + """ + Modifies: + self.manager_status + """ self.manager_status = task_type - async def run_tasks(self): - operators = await self.loader.get_task_operators() - for operator in operators: - count = 0 - await self.set_manager_status(task_type=operator.task_type) + async def run_tasks(self) -> None: + entries: list[URLTaskEntry] = await self.loader.load_entries() + for entry in entries: + if not entry.enabled: + continue + await self._run_task(entry) + await self.set_manager_status(task_type=TaskType.IDLE) + async def _run_task(self, entry: URLTaskEntry) -> None: + operator = entry.operator + count = 0 + await self.set_manager_status(task_type=operator.task_type) + meets_prereq = await operator.meets_task_prerequisites() + while meets_prereq: + print(f"Running {operator.task_type.value} Task") + if count > TASK_REPEAT_THRESHOLD: + message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." + print(message) + await self.handler.post_to_discord(message=message) + break + task_id = await self.handler.initiate_task_in_db(task_type=operator.task_type) + run_info: URLTaskOperatorRunInfo = await operator.run_task(task_id) + await self.conclude_task(run_info) + if run_info.outcome == TaskOperatorOutcome.ERROR: + break + count += 1 meets_prereq = await operator.meets_task_prerequisites() - while meets_prereq: - print(f"Running {operator.task_type.value} Task") - if count > TASK_REPEAT_THRESHOLD: - message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." - print(message) - await self.handler.post_to_discord(message=message) - break - task_id = await self.handler.initiate_task_in_db(task_type=operator.task_type) - run_info: URLTaskOperatorRunInfo = await operator.run_task(task_id) - await self.conclude_task(run_info) - if run_info.outcome == TaskOperatorOutcome.ERROR: - break - count += 1 - meets_prereq = await operator.meets_task_prerequisites() - await self.set_manager_status(task_type=TaskType.IDLE) - async def trigger_task_run(self): + async def trigger_task_run(self) -> None: await self.task_trigger.trigger_or_rerun() - async def conclude_task(self, run_info: URLTaskOperatorRunInfo): + async def conclude_task(self, run_info: URLTaskOperatorRunInfo) -> None: await self.handler.link_urls_to_task( task_id=run_info.task_id, url_ids=run_info.linked_url_ids diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/__init__.py b/src/core/tasks/url/models/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/agency/__init__.py rename to src/core/tasks/url/models/__init__.py diff --git a/src/core/tasks/url/models/entry.py b/src/core/tasks/url/models/entry.py new file mode 100644 index 00000000..eeb09047 --- /dev/null +++ b/src/core/tasks/url/models/entry.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + +from src.core.tasks.url.operators.base import URLTaskOperatorBase + + +class URLTaskEntry(BaseModel): + + class Config: + arbitrary_types_allowed = True + + operator: URLTaskOperatorBase + enabled: bool \ No newline at end of file diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 475d8404..1fa4376e 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -52,22 +52,22 @@ from src.collectors.enums import URLStatus, CollectorType from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus from src.core.env_var_manager import EnvVarManager -from src.core.tasks.scheduled.huggingface.queries.check.core import CheckValidURLsUpdatedQueryBuilder -from src.core.tasks.scheduled.huggingface.queries.get.core import GetForLoadingToHuggingFaceQueryBuilder -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput -from src.core.tasks.scheduled.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters -from src.core.tasks.scheduled.sync.agency.queries.get_sync_params import GetAgenciesSyncParametersQueryBuilder -from src.core.tasks.scheduled.sync.agency.queries.mark_full_sync import get_mark_full_agencies_sync_query -from src.core.tasks.scheduled.sync.agency.queries.update_sync_progress import get_update_agencies_sync_progress_query -from src.core.tasks.scheduled.sync.agency.queries.upsert import \ +from src.core.tasks.scheduled.impl.huggingface.queries.check.core import CheckValidURLsUpdatedQueryBuilder +from src.core.tasks.scheduled.impl.huggingface.queries.get.core import GetForLoadingToHuggingFaceQueryBuilder +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from src.core.tasks.scheduled.impl.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.agency.queries.get_sync_params import GetAgenciesSyncParametersQueryBuilder +from src.core.tasks.scheduled.impl.sync.agency.queries.mark_full_sync import get_mark_full_agencies_sync_query +from src.core.tasks.scheduled.impl.sync.agency.queries.update_sync_progress import get_update_agencies_sync_progress_query +from src.core.tasks.scheduled.impl.sync.agency.queries.upsert import \ convert_agencies_sync_response_to_agencies_upsert -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters -from src.core.tasks.scheduled.sync.data_sources.queries.get_sync_params import GetDataSourcesSyncParametersQueryBuilder -from src.core.tasks.scheduled.sync.data_sources.queries.mark_full_sync import get_mark_full_data_sources_sync_query -from src.core.tasks.scheduled.sync.data_sources.queries.update_sync_progress import \ +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters +from src.core.tasks.scheduled.impl.sync.data_sources.queries.get_sync_params import GetDataSourcesSyncParametersQueryBuilder +from src.core.tasks.scheduled.impl.sync.data_sources.queries.mark_full_sync import get_mark_full_data_sources_sync_query +from src.core.tasks.scheduled.impl.sync.data_sources.queries.update_sync_progress import \ get_update_data_sources_sync_progress_query -from src.core.tasks.scheduled.sync.data_sources.queries.upsert.core import \ +from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.core import \ UpsertURLsFromDataSourcesQueryBuilder from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo from src.core.tasks.url.operators.agency_identification.dtos.tdo import AgencyIdentificationTDO diff --git a/src/db/enums.py b/src/db/enums.py index c8ed9840..27d64402 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -32,6 +32,8 @@ class URLHTMLContentType(PyEnum): DIV = "Div" class TaskType(PyEnum): + + # URL Tasks HTML = "HTML" RELEVANCY = "Relevancy" RECORD_TYPE = "Record Type" @@ -41,10 +43,15 @@ class TaskType(PyEnum): DUPLICATE_DETECTION = "Duplicate Detection" IDLE = "Idle" PROBE_404 = "404 Probe" + PROBE_URL = "URL Probe" + + # Scheduled Tasks + PUSH_TO_HUGGINGFACE = "Push to Hugging Face" SYNC_AGENCIES = "Sync Agencies" SYNC_DATA_SOURCES = "Sync Data Sources" - PUSH_TO_HUGGINGFACE = "Push to Hugging Face" - PROBE_URL = "URL Probe" + POPULATE_BACKLOG_SNAPSHOT = "Populate Backlog Snapshot" + DELETE_OLD_LOGS = "Delete Old Logs" + RUN_URL_TASKS = "Run URL Task Cycles" class ChangeLogOperationType(PyEnum): INSERT = "INSERT" diff --git a/src/external/huggingface/hub/client.py b/src/external/huggingface/hub/client.py index 9bb63391..9cb2ba34 100644 --- a/src/external/huggingface/hub/client.py +++ b/src/external/huggingface/hub/client.py @@ -3,7 +3,7 @@ from src.external.huggingface.hub.constants import DATA_SOURCES_RAW_REPO_ID from src.external.huggingface.hub.format import format_as_huggingface_dataset -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput class HuggingFaceHubClient: diff --git a/src/external/huggingface/hub/format.py b/src/external/huggingface/hub/format.py index b103d31d..c870ec17 100644 --- a/src/external/huggingface/hub/format.py +++ b/src/external/huggingface/hub/format.py @@ -1,6 +1,6 @@ from datasets import Dataset -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput def format_as_huggingface_dataset(outputs: list[GetForLoadingToHuggingFaceOutput]) -> Dataset: diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py index ee442600..29f99154 100644 --- a/src/external/pdap/client.py +++ b/src/external/pdap/client.py @@ -2,8 +2,8 @@ from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO, SubmittedURLInfo from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo, AgenciesSyncResponseInfo from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo diff --git a/tests/automated/integration/core/async_/run_task/test_break_loop.py b/tests/automated/integration/core/async_/run_task/test_break_loop.py index 303ee39d..17ce5e51 100644 --- a/tests/automated/integration/core/async_/run_task/test_break_loop.py +++ b/tests/automated/integration/core/async_/run_task/test_break_loop.py @@ -1,8 +1,10 @@ import types -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, create_autospec import pytest +from src.core.tasks.url.models.entry import URLTaskEntry +from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType from src.core.tasks.dtos.run_info import URLTaskOperatorRunInfo from src.core.tasks.url.enums import TaskOperatorOutcome @@ -30,12 +32,16 @@ async def run_task(self, task_id: int) -> URLTaskOperatorRunInfo: core = setup_async_core(db_data_creator.adb_client) core.task_manager.conclude_task = AsyncMock() - mock_operator = AsyncMock() + mock_operator = create_autospec(URLTaskOperatorBase, instance=True) mock_operator.meets_task_prerequisites = AsyncMock(return_value=True) mock_operator.task_type = TaskType.HTML mock_operator.run_task = types.MethodType(run_task, mock_operator) + entry = URLTaskEntry( + operator=mock_operator, + enabled=True + ) - core.task_manager.loader.get_task_operators = AsyncMock(return_value=[mock_operator]) + core.task_manager.loader.load_entries = AsyncMock(return_value=[entry]) await core.task_manager.trigger_task_run() core.task_manager.handler.discord_poster.post_to_discord.assert_called_once_with( diff --git a/tests/automated/integration/core/async_/run_task/test_prereq_met.py b/tests/automated/integration/core/async_/run_task/test_prereq_met.py index 00484e15..fa8ed93b 100644 --- a/tests/automated/integration/core/async_/run_task/test_prereq_met.py +++ b/tests/automated/integration/core/async_/run_task/test_prereq_met.py @@ -1,11 +1,13 @@ import types -from unittest.mock import AsyncMock, call +from unittest.mock import AsyncMock, call, create_autospec import pytest from src.core.enums import BatchStatus from src.core.tasks.dtos.run_info import URLTaskOperatorRunInfo from src.core.tasks.url.enums import TaskOperatorOutcome +from src.core.tasks.url.models.entry import URLTaskEntry +from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType from src.db.models.instantiations.task.core import Task from tests.automated.integration.core.async_.helpers import setup_async_core @@ -30,14 +32,18 @@ async def run_task(self, task_id: int) -> URLTaskOperatorRunInfo: core = setup_async_core(db_data_creator.adb_client) core.task_manager.conclude_task = AsyncMock() - mock_operator = AsyncMock() + mock_operator = create_autospec(URLTaskOperatorBase, instance=True) mock_operator.meets_task_prerequisites = AsyncMock( side_effect=[True, False] ) mock_operator.task_type = TaskType.HTML mock_operator.run_task = types.MethodType(run_task, mock_operator) + entry = URLTaskEntry( + operator=mock_operator, + enabled=True + ) - core.task_manager.loader.get_task_operators = AsyncMock(return_value=[mock_operator]) + core.task_manager.loader.load_entries = AsyncMock(return_value=[entry]) await core.run_tasks() # There should be two calls to meets_task_prerequisites diff --git a/tests/automated/integration/core/async_/run_task/test_prereq_not_met.py b/tests/automated/integration/core/async_/run_task/test_prereq_not_met.py index ef068cd5..286c14dd 100644 --- a/tests/automated/integration/core/async_/run_task/test_prereq_not_met.py +++ b/tests/automated/integration/core/async_/run_task/test_prereq_not_met.py @@ -1,7 +1,9 @@ -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, create_autospec import pytest +from src.core.tasks.url.models.entry import URLTaskEntry +from src.core.tasks.url.operators.base import URLTaskOperatorBase from tests.automated.integration.core.async_.helpers import setup_async_core @@ -12,9 +14,10 @@ async def test_run_task_prereq_not_met(): """ core = setup_async_core(AsyncMock()) - mock_operator = AsyncMock() + mock_operator = create_autospec(URLTaskOperatorBase, instance=True) mock_operator.meets_task_prerequisites = AsyncMock(return_value=False) - core.task_manager.loader.get_task_operators = AsyncMock(return_value=[mock_operator]) + entry = URLTaskEntry(operator=mock_operator, enabled=True) + core.task_manager.loader.load_entries = AsyncMock(return_value=[entry]) await core.run_tasks() mock_operator.meets_task_prerequisites.assert_called_once() diff --git a/tests/automated/integration/db/structure/test_task_enums.py b/tests/automated/integration/db/structure/test_task_enums.py new file mode 100644 index 00000000..709808a3 --- /dev/null +++ b/tests/automated/integration/db/structure/test_task_enums.py @@ -0,0 +1,13 @@ +import pytest + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType + + +@pytest.mark.asyncio +async def test_task_enums(adb_client_test: AsyncDatabaseClient) -> None: + + for task_type in TaskType: + if task_type == TaskType.IDLE: + continue + await adb_client_test.initiate_task(task_type=task_type) \ No newline at end of file diff --git a/tests/automated/integration/tasks/conftest.py b/tests/automated/integration/tasks/conftest.py index 807157cb..a06da58c 100644 --- a/tests/automated/integration/tasks/conftest.py +++ b/tests/automated/integration/tasks/conftest.py @@ -1,8 +1,8 @@ from unittest.mock import MagicMock, AsyncMock import pytest - from pdap_access_manager import AccessManager + from src.external.pdap.client import PDAPClient @@ -20,4 +20,4 @@ def mock_pdap_client() -> PDAPClient: pdap_client = PDAPClient( access_manager=mock_access_manager ) - return pdap_client \ No newline at end of file + return pdap_client diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/__init__.py b/tests/automated/integration/tasks/scheduled/impl/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/__init__.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/conftest.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/conftest.py similarity index 76% rename from tests/automated/integration/tasks/scheduled/huggingface/conftest.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/conftest.py index 29d397b4..687f0dce 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/conftest.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/conftest.py @@ -2,7 +2,7 @@ import pytest -from src.core.tasks.scheduled.huggingface.operator import PushToHuggingFaceTaskOperator +from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator from src.external.huggingface.hub.client import HuggingFaceHubClient diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/__init__.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/data.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py similarity index 85% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/data.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py index d28aa8f2..d7ece710 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/data.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/data.py @@ -1,11 +1,11 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType -from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.entry \ +from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.entry \ import TestPushToHuggingFaceURLSetupEntry as Entry -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.output import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \ TestPushToHuggingFaceURLSetupExpectedOutput as Output -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.input import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput as Input ENTRIES = [ diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/manager.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py similarity index 76% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/manager.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py index 9b6606d2..d6438472 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/manager.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/manager.py @@ -1,13 +1,10 @@ -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput from src.db.client.async_ import AsyncDatabaseClient -from tests.automated.integration.tasks.scheduled.huggingface.setup.data import ENTRIES -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.output import \ - TestPushToHuggingFaceURLSetupExpectedOutput -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.record import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import ENTRIES +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.record import \ TestPushToHuggingFaceRecordSetupRecord as Record, TestPushToHuggingFaceRecordSetupRecord -from tests.automated.integration.tasks.scheduled.huggingface.setup.queries.setup import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.queries.setup import \ SetupTestPushToHuggingFaceEntryQueryBuilder -from tests.helpers.data_creator.core import DBDataCreator class PushToHuggingFaceTestSetupManager: diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/queries/__init__.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/queries/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/entry.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py similarity index 61% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/models/entry.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py index e072a1b6..16bb74aa 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/entry.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/entry.py @@ -1,8 +1,8 @@ from pydantic import BaseModel -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.input import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.output import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \ TestPushToHuggingFaceURLSetupExpectedOutput diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/input.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/models/input.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/input.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/output.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py similarity index 84% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/models/output.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py index c1303543..736bd97e 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/output.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/output.py @@ -2,8 +2,7 @@ from pydantic import BaseModel, model_validator -from src.core.enums import RecordType -from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse +from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse class TestPushToHuggingFaceURLSetupExpectedOutput(BaseModel): diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/record.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py similarity index 75% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/models/record.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py index becabc17..4ce15770 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/models/record.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/models/record.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.enums import RecordType -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.output import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.output import \ TestPushToHuggingFaceURLSetupExpectedOutput diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/__init__.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/huggingface/setup/queries/setup.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py similarity index 91% rename from tests/automated/integration/tasks/scheduled/huggingface/setup/queries/setup.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py index d4fd84ad..b8bd2175 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/setup/queries/setup.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py @@ -5,9 +5,9 @@ from src.db.models.instantiations.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase from src.db.utils.compression import compress_html -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.entry import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.entry import \ TestPushToHuggingFaceURLSetupEntry as Entry -from tests.automated.integration.tasks.scheduled.huggingface.setup.models.record import \ +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.record import \ TestPushToHuggingFaceRecordSetupRecord as Record diff --git a/tests/automated/integration/tasks/scheduled/huggingface/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py similarity index 73% rename from tests/automated/integration/tasks/scheduled/huggingface/test_happy_path.py rename to tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py index d5eca4a7..ddb85104 100644 --- a/tests/automated/integration/tasks/scheduled/huggingface/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_happy_path.py @@ -2,10 +2,10 @@ import pytest -from src.core.tasks.scheduled.huggingface.operator import PushToHuggingFaceTaskOperator -from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.scheduled.huggingface.setup.manager import PushToHuggingFaceTestSetupManager +from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator +from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.manager import PushToHuggingFaceTestSetupManager from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/__init__.py diff --git a/tests/automated/integration/tasks/url/agency_identification/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py similarity index 66% rename from tests/automated/integration/tasks/scheduled/sync/agency/conftest.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py index 8ba4221f..5b0539e7 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/conftest.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/conftest.py @@ -1,7 +1,7 @@ import pytest_asyncio -from src.core.tasks.scheduled.sync.agency.operator import SyncAgenciesTaskOperator -from tests.automated.integration.tasks.scheduled.sync.agency.helpers import update_existing_agencies_updated_at, \ +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator +from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import update_existing_agencies_updated_at, \ add_existing_agencies @pytest_asyncio.fixture diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/data.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/data.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/agency/data.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/data.py diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/existence_checker.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/existence_checker.py similarity index 87% rename from tests/automated/integration/tasks/scheduled/sync/agency/existence_checker.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/existence_checker.py index 292f4aea..e99f6112 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/existence_checker.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/existence_checker.py @@ -1,6 +1,6 @@ from src.db.models.instantiations.agency.sqlalchemy import Agency from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInnerInfo -from tests.automated.integration.tasks.scheduled.sync.agency.data import FIRST_CALL_RESPONSE, SECOND_CALL_RESPONSE +from tests.automated.integration.tasks.scheduled.impl.sync.agency.data import FIRST_CALL_RESPONSE, SECOND_CALL_RESPONSE class AgencyChecker: diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/helpers.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/helpers.py similarity index 95% rename from tests/automated/integration/tasks/scheduled/sync/agency/helpers.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/helpers.py index 7c35a654..0fbe64bc 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/helpers.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/helpers.py @@ -8,7 +8,7 @@ from src.db.models.instantiations.agency.sqlalchemy import Agency from src.db.models.instantiations.state.sync.agencies import AgenciesSyncState from src.external.pdap.client import PDAPClient -from tests.automated.integration.tasks.scheduled.sync.agency.data import PREEXISTING_AGENCIES +from tests.automated.integration.tasks.scheduled.impl.sync.agency.data import PREEXISTING_AGENCIES async def check_sync_concluded( diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py similarity index 74% rename from tests/automated/integration/tasks/scheduled/sync/agency/test_happy_path.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py index 02cefa3e..8b3d8294 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_happy_path.py @@ -3,12 +3,12 @@ import pytest from sqlalchemy import select -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters -from src.core.tasks.scheduled.sync.agency.operator import SyncAgenciesTaskOperator +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator from src.db.models.instantiations.agency.sqlalchemy import Agency -from tests.automated.integration.tasks.scheduled.sync.agency.data import AGENCIES_SYNC_RESPONSES -from tests.automated.integration.tasks.scheduled.sync.agency.existence_checker import AgencyChecker -from tests.automated.integration.tasks.scheduled.sync.agency.helpers import check_sync_concluded, patch_sync_agencies +from tests.automated.integration.tasks.scheduled.impl.sync.agency.data import AGENCIES_SYNC_RESPONSES +from tests.automated.integration.tasks.scheduled.impl.sync.agency.existence_checker import AgencyChecker +from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import check_sync_concluded, patch_sync_agencies from tests.helpers.asserts import assert_task_run_success diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/test_interruption.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py similarity index 84% rename from tests/automated/integration/tasks/scheduled/sync/agency/test_interruption.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py index 2f112175..d1af6417 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/test_interruption.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_interruption.py @@ -1,14 +1,14 @@ import pytest from sqlalchemy import select -from src.core.tasks.scheduled.sync.agency.operator import SyncAgenciesTaskOperator +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.instantiations.agency.sqlalchemy import Agency from src.db.models.instantiations.state.sync.agencies import AgenciesSyncState -from tests.automated.integration.tasks.scheduled.sync.agency.data import FIRST_CALL_RESPONSE, \ +from tests.automated.integration.tasks.scheduled.impl.sync.agency.data import FIRST_CALL_RESPONSE, \ THIRD_CALL_RESPONSE, SECOND_CALL_RESPONSE -from tests.automated.integration.tasks.scheduled.sync.agency.existence_checker import AgencyChecker -from tests.automated.integration.tasks.scheduled.sync.agency.helpers import patch_sync_agencies, check_sync_concluded +from tests.automated.integration.tasks.scheduled.impl.sync.agency.existence_checker import AgencyChecker +from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import patch_sync_agencies, check_sync_concluded @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/scheduled/sync/agency/test_no_new_results.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_new_results.py similarity index 73% rename from tests/automated/integration/tasks/scheduled/sync/agency/test_no_new_results.py rename to tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_new_results.py index 18fd263b..8c7b9abd 100644 --- a/tests/automated/integration/tasks/scheduled/sync/agency/test_no_new_results.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_no_new_results.py @@ -4,13 +4,13 @@ import pytest from sqlalchemy import select -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters -from src.core.tasks.scheduled.sync.agency.operator import SyncAgenciesTaskOperator +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator from src.db.models.instantiations.agency.sqlalchemy import Agency from src.db.models.instantiations.state.sync.agencies import AgenciesSyncState -from tests.automated.integration.tasks.scheduled.sync.agency.data import THIRD_CALL_RESPONSE -from tests.automated.integration.tasks.scheduled.sync.agency.existence_checker import AgencyChecker -from tests.automated.integration.tasks.scheduled.sync.agency.helpers import patch_sync_agencies, check_sync_concluded +from tests.automated.integration.tasks.scheduled.impl.sync.agency.data import THIRD_CALL_RESPONSE +from tests.automated.integration.tasks.scheduled.impl.sync.agency.existence_checker import AgencyChecker +from tests.automated.integration.tasks.scheduled.impl.sync.agency.helpers import patch_sync_agencies, check_sync_concluded from tests.helpers.asserts import assert_task_run_success diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/check.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/check.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/check.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py similarity index 80% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/conftest.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py index 017a9894..44239db8 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/conftest.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/conftest.py @@ -1,6 +1,6 @@ import pytest_asyncio -from src.core.tasks.scheduled.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.external.pdap.client import PDAPClient from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/existence_checker.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/existence_checker.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/existence_checker.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/existence_checker.py diff --git a/tests/automated/integration/tasks/url/agency_identification/subtasks/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/subtasks/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/core.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/data.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py similarity index 87% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/data.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py index 5c3df730..e4094b38 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/data.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/data.py @@ -1,10 +1,10 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import SyncResponseOrder, AgencyAssigned -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.source_collector import TestSCURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.core import TestURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder, AgencyAssigned +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import TestSCURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry ENTRIES = [ TestURLSetupEntry( diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/enums.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/enums.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/enums.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/enums.py diff --git a/tests/automated/integration/tasks/url/auto_relevant/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/auto_relevant/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/agency.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py similarity index 90% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/agency.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py index f7fd5765..c7a0ad41 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/agency.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/agency.py @@ -2,7 +2,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.instantiations.agency.sqlalchemy import Agency -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import AgencyAssigned +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned class AgencyAssignmentManager: diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py similarity index 84% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/core.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py index 79f44f88..8f1ab8fa 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/core.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/core.py @@ -2,13 +2,13 @@ from src.db.client.async_ import AsyncDatabaseClient from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo, DataSourcesSyncResponseInfo -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import SyncResponseOrder -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.agency import AgencyAssignmentManager -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.queries.check import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.agency import AgencyAssignmentManager +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.queries.check import \ CheckURLQueryBuilder -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.url import URLSetupFunctor -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.core import TestURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.url import URLSetupFunctor +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord class DataSourcesSyncTestSetupManager: diff --git a/tests/automated/integration/tasks/url/duplicate/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/duplicate/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/queries/check.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py similarity index 93% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/queries/check.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py index c9055749..c31748d2 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/queries/check.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/queries/check.py @@ -5,7 +5,7 @@ from src.db.models.instantiations.url.core.sqlalchemy import URL from src.db.models.instantiations.url.data_source.sqlalchemy import URLDataSource from src.db.queries.base.builder import QueryBuilderBase -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord from src.db.helpers.session import session_helper as sh diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/url.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py similarity index 83% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/url.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py index a4bd93f8..4c9fdeca 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/manager/url.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/manager/url.py @@ -5,13 +5,13 @@ from src.db.models.instantiations.url.core.enums import URLSource from src.db.models.instantiations.url.core.sqlalchemy import URL from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import AgencyAssigned -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.agency import AgencyAssignmentManager -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.core import TestURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.data_sources import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.agency import AgencyAssignmentManager +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.core import TestURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import \ TestDSURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.source_collector import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.post import TestURLPostSetupRecord +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \ TestSCURLSetupEntry diff --git a/tests/automated/integration/tasks/url/html/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/__init__.py diff --git a/tests/automated/integration/tasks/url/html/check/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/check/__init__.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py similarity index 59% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/core.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py index 54360b35..155a3ace 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/core.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/core.py @@ -1,8 +1,8 @@ from pydantic import BaseModel from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.source_collector import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import TestDSURLSetupEntry +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \ TestSCURLSetupEntry diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/data_sources.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py similarity index 81% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/data_sources.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py index 5112dd1f..47809293 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/data_sources.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/data_sources.py @@ -2,7 +2,7 @@ from src.core.enums import RecordType from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import AgencyAssigned, SyncResponseOrder +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned, SyncResponseOrder class TestDSURLSetupEntry(BaseModel): diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/post.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py similarity index 87% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/post.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py index b16233da..e535cd56 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/post.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/post.py @@ -3,9 +3,9 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.data_sources import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.data_sources import \ TestDSURLSetupEntry -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.models.url.source_collector import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.models.url.source_collector import \ TestSCURLSetupEntry diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/source_collector.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py similarity index 79% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/source_collector.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py index 83092f7e..c151d783 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/setup/models/url/source_collector.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/models/url/source_collector.py @@ -2,7 +2,7 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import AgencyAssigned +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import AgencyAssigned class TestSCURLSetupEntry(BaseModel): diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py similarity index 68% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/test_happy_path.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py index 0b71b28c..d1042e66 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_happy_path.py @@ -2,13 +2,13 @@ import pytest -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters -from src.core.tasks.scheduled.sync.data_sources.operator import SyncDataSourcesTaskOperator -from tests.automated.integration.tasks.scheduled.sync.data_sources.check import check_sync_concluded -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.core import patch_sync_data_sources -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.data import ENTRIES -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import SyncResponseOrder -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.core import \ +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \ DataSourcesSyncTestSetupManager from tests.helpers.asserts import assert_task_run_success diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_interruption.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py similarity index 74% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/test_interruption.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py index 81fb8806..4b98094f 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_interruption.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_interruption.py @@ -1,14 +1,14 @@ import pytest from sqlalchemy import select -from src.core.tasks.scheduled.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.instantiations.state.sync.data_sources import DataSourcesSyncState -from tests.automated.integration.tasks.scheduled.sync.data_sources.check import check_sync_concluded -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.core import patch_sync_data_sources -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.data import ENTRIES -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import SyncResponseOrder -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.core import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \ DataSourcesSyncTestSetupManager diff --git a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_no_new_results.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py similarity index 68% rename from tests/automated/integration/tasks/scheduled/sync/data_sources/test_no_new_results.py rename to tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py index 880c2ef3..d3181f90 100644 --- a/tests/automated/integration/tasks/scheduled/sync/data_sources/test_no_new_results.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_no_new_results.py @@ -3,14 +3,14 @@ import pytest -from src.core.tasks.scheduled.sync.data_sources.operator import SyncDataSourcesTaskOperator -from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.db.models.instantiations.state.sync.data_sources import DataSourcesSyncState -from tests.automated.integration.tasks.scheduled.sync.data_sources.check import check_sync_concluded -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.core import patch_sync_data_sources -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.data import ENTRIES -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.enums import SyncResponseOrder -from tests.automated.integration.tasks.scheduled.sync.data_sources.setup.manager.core import \ +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.check import check_sync_concluded +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.core import patch_sync_data_sources +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.data import ENTRIES +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.enums import SyncResponseOrder +from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.manager.core import \ DataSourcesSyncTestSetupManager from tests.helpers.asserts import assert_task_run_success diff --git a/tests/automated/integration/tasks/url/html/mocks/__init__.py b/tests/automated/integration/tasks/scheduled/loader/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/mocks/__init__.py rename to tests/automated/integration/tasks/scheduled/loader/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/loader/conftest.py b/tests/automated/integration/tasks/scheduled/loader/conftest.py new file mode 100644 index 00000000..67f18283 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/loader/conftest.py @@ -0,0 +1,20 @@ +from unittest.mock import AsyncMock, create_autospec + +import pytest + +from src.core.core import AsyncCore +from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader +from src.db.client.async_ import AsyncDatabaseClient +from src.external.huggingface.hub.client import HuggingFaceHubClient +from src.external.pdap.client import PDAPClient + + +@pytest.fixture(scope="session") +def loader() -> ScheduledTaskOperatorLoader: + """Setup loader with mock dependencies""" + return ScheduledTaskOperatorLoader( + async_core=create_autospec(AsyncCore, instance=True), + adb_client=AsyncMock(spec=AsyncDatabaseClient), + pdap_client=AsyncMock(spec=PDAPClient), + hf_client=AsyncMock(spec=HuggingFaceHubClient) + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/loader/test_flags.py b/tests/automated/integration/tasks/scheduled/loader/test_flags.py new file mode 100644 index 00000000..8176dc11 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/loader/test_flags.py @@ -0,0 +1,62 @@ +import pytest +from pydantic import BaseModel + +from src.core.tasks.scheduled.impl.backlog.operator import PopulateBacklogSnapshotTaskOperator +from src.core.tasks.scheduled.impl.delete_logs.operator import DeleteOldLogsTaskOperator +from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator +from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator +from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator +from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator +from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader +from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase + + +class FlagTestParams(BaseModel): + + class Config: + arbitrary_types_allowed = True + + env_var: str + operator: type[ScheduledTaskOperatorBase] + +params: list[FlagTestParams] = [ + FlagTestParams( + env_var="SYNC_AGENCIES_TASK_FLAG", + operator=SyncAgenciesTaskOperator + ), + FlagTestParams( + env_var="SYNC_DATA_SOURCES_TASK_FLAG", + operator=SyncDataSourcesTaskOperator + ), + FlagTestParams( + env_var="PUSH_TO_HUGGING_FACE_TASK_FLAG", + operator=PushToHuggingFaceTaskOperator + ), + FlagTestParams( + env_var="POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG", + operator=PopulateBacklogSnapshotTaskOperator + ), + FlagTestParams( + env_var="DELETE_OLD_LOGS_TASK_FLAG", + operator=DeleteOldLogsTaskOperator + ), + FlagTestParams( + env_var="RUN_URL_TASKS_TASK_FLAG", + operator=RunURLTasksTaskOperator + ) +] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("flag_test_params", params) +async def test_flag_enabled( + flag_test_params: FlagTestParams, + monkeypatch, + loader: ScheduledTaskOperatorLoader +): + monkeypatch.setenv(flag_test_params.env_var, "0") + entries: list[ScheduledTaskEntry] = await loader.load_entries() + for entry in entries: + if isinstance(entry.operator, flag_test_params.operator): + assert not entry.enabled, f"Flag associated with env_var {flag_test_params.env_var} should be disabled" diff --git a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py new file mode 100644 index 00000000..1fbf24a7 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py @@ -0,0 +1,15 @@ +import pytest + +from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader + +NUMBER_OF_ENTRIES = 6 + +@pytest.mark.asyncio +async def test_happy_path( + loader: ScheduledTaskOperatorLoader +): + """ + Under normal circumstances, all task operators should be returned + """ + entries = await loader.load_entries() + assert len(entries) == NUMBER_OF_ENTRIES \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/html/mocks/url_request_interface/__init__.py b/tests/automated/integration/tasks/scheduled/manager/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/mocks/url_request_interface/__init__.py rename to tests/automated/integration/tasks/scheduled/manager/__init__.py diff --git a/tests/automated/integration/tasks/scheduled/manager/conftest.py b/tests/automated/integration/tasks/scheduled/manager/conftest.py new file mode 100644 index 00000000..5cd92c57 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/manager/conftest.py @@ -0,0 +1,41 @@ +from unittest.mock import create_autospec + +import pytest +from discord_poster import DiscordPoster + +from src.core.tasks.handler import TaskHandler +from src.core.tasks.scheduled.enums import IntervalEnum +from src.core.tasks.scheduled.impl.backlog.operator import PopulateBacklogSnapshotTaskOperator +from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader +from src.core.tasks.scheduled.manager import AsyncScheduledTaskManager +from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry +from src.core.tasks.scheduled.registry.core import ScheduledJobRegistry +from src.db.client.async_ import AsyncDatabaseClient + + +@pytest.fixture +def manager(adb_client_test: AsyncDatabaseClient) -> AsyncScheduledTaskManager: + mock_discord_poster = create_autospec(DiscordPoster, instance=True) + + task_handler = TaskHandler( + adb_client=adb_client_test, + discord_poster=mock_discord_poster + ) + mock_loader = create_autospec( + ScheduledTaskOperatorLoader, + instance=True + ) + mock_loader.load_entries.return_value = [ + ScheduledTaskEntry( + operator=PopulateBacklogSnapshotTaskOperator(adb_client=adb_client_test), + interval=IntervalEnum.DAILY, + enabled=True + ) + ] + registry = ScheduledJobRegistry() + + return AsyncScheduledTaskManager( + handler=task_handler, + loader=mock_loader, + registry=registry + ) diff --git a/tests/automated/integration/tasks/scheduled/manager/test_add_scheduled_tasks.py b/tests/automated/integration/tasks/scheduled/manager/test_add_scheduled_tasks.py new file mode 100644 index 00000000..c8282cce --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/manager/test_add_scheduled_tasks.py @@ -0,0 +1,11 @@ +import pytest + +from src.core.tasks.scheduled.manager import AsyncScheduledTaskManager + + +@pytest.mark.asyncio +async def test_add_scheduled_tasks(manager: AsyncScheduledTaskManager): + await manager.setup() + + assert len(manager._registry._jobs) == 1 + diff --git a/tests/automated/integration/tasks/url/html/setup/__init__.py b/tests/automated/integration/tasks/url/impl/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/setup/__init__.py rename to tests/automated/integration/tasks/url/impl/__init__.py diff --git a/tests/automated/integration/tasks/url/html/setup/models/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/html/setup/models/__init__.py rename to tests/automated/integration/tasks/url/impl/agency_identification/__init__.py diff --git a/tests/automated/integration/tasks/url/probe/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/__init__.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/__init__.py diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/asserts.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/asserts.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/asserts.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/asserts.py diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/conftest.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py similarity index 89% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/conftest.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py index d3a95856..68e33158 100644 --- a/tests/automated/integration/tasks/url/agency_identification/happy_path/conftest.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/conftest.py @@ -7,7 +7,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader from src.db.client.async_ import AsyncDatabaseClient from src.external.pdap.client import PDAPClient -from tests.automated.integration.tasks.url.agency_identification.happy_path.mock import mock_run_subtask +from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.mock import mock_run_subtask @pytest.fixture diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/data.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/data.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/data.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/data.py diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/mock.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py similarity index 83% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/mock.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py index cec98d3c..a4dcb227 100644 --- a/tests/automated/integration/tasks/url/agency_identification/happy_path/mock.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/mock.py @@ -2,7 +2,7 @@ from typing import Optional from src.core.enums import SuggestionType -from tests.automated.integration.tasks.url.agency_identification.happy_path.data import SAMPLE_AGENCY_SUGGESTIONS +from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.data import SAMPLE_AGENCY_SUGGESTIONS async def mock_run_subtask( diff --git a/tests/automated/integration/tasks/url/agency_identification/happy_path/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py similarity index 98% rename from tests/automated/integration/tasks/url/agency_identification/happy_path/test_happy_path.py rename to tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py index 7eb5a7f9..57c62fc3 100644 --- a/tests/automated/integration/tasks/url/agency_identification/happy_path/test_happy_path.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/happy_path/test_happy_path.py @@ -10,7 +10,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.muckrock import \ MuckrockAgencyIdentificationSubtask from src.core.tasks.url.operators.agency_identification.subtasks.impl.unknown import UnknownAgencyIdentificationSubtask -from tests.automated.integration.tasks.url.agency_identification.happy_path.asserts import \ +from tests.automated.integration.tasks.url.impl.agency_identification.happy_path.asserts import \ assert_expected_confirmed_and_auto_suggestions from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters diff --git a/tests/automated/integration/tasks/url/probe/check/__init__.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/check/__init__.py rename to tests/automated/integration/tasks/url/impl/agency_identification/subtasks/__init__.py diff --git a/tests/automated/integration/tasks/url/agency_identification/subtasks/test_ckan.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_ckan.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/subtasks/test_ckan.py rename to tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_ckan.py diff --git a/tests/automated/integration/tasks/url/agency_identification/subtasks/test_muckrock.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_muckrock.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/subtasks/test_muckrock.py rename to tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_muckrock.py diff --git a/tests/automated/integration/tasks/url/agency_identification/subtasks/test_unknown.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_unknown.py similarity index 100% rename from tests/automated/integration/tasks/url/agency_identification/subtasks/test_unknown.py rename to tests/automated/integration/tasks/url/impl/agency_identification/subtasks/test_unknown.py diff --git a/tests/automated/integration/tasks/asserts.py b/tests/automated/integration/tasks/url/impl/asserts.py similarity index 100% rename from tests/automated/integration/tasks/asserts.py rename to tests/automated/integration/tasks/url/impl/asserts.py diff --git a/tests/automated/integration/tasks/url/probe/mocks/__init__.py b/tests/automated/integration/tasks/url/impl/auto_relevant/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/mocks/__init__.py rename to tests/automated/integration/tasks/url/impl/auto_relevant/__init__.py diff --git a/tests/automated/integration/tasks/url/auto_relevant/setup.py b/tests/automated/integration/tasks/url/impl/auto_relevant/setup.py similarity index 100% rename from tests/automated/integration/tasks/url/auto_relevant/setup.py rename to tests/automated/integration/tasks/url/impl/auto_relevant/setup.py diff --git a/tests/automated/integration/tasks/url/auto_relevant/test_task.py b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py similarity index 88% rename from tests/automated/integration/tasks/url/auto_relevant/test_task.py rename to tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py index fab2edfe..be44c42a 100644 --- a/tests/automated/integration/tasks/url/auto_relevant/test_task.py +++ b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py @@ -7,9 +7,9 @@ from src.db.models.instantiations.url.core.sqlalchemy import URL from src.db.models.instantiations.url.error_info.sqlalchemy import URLErrorInfo from src.db.models.instantiations.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion -from tests.automated.integration.tasks.asserts import assert_prereqs_not_met, assert_url_task_has_expected_run_info, \ +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_url_task_has_expected_run_info, \ assert_prereqs_met -from tests.automated.integration.tasks.url.auto_relevant.setup import setup_operator, setup_urls +from tests.automated.integration.tasks.url.impl.auto_relevant.setup import setup_operator, setup_urls @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/models/__init__.py b/tests/automated/integration/tasks/url/impl/duplicate/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/models/__init__.py rename to tests/automated/integration/tasks/url/impl/duplicate/__init__.py diff --git a/tests/automated/integration/tasks/url/duplicate/constants.py b/tests/automated/integration/tasks/url/impl/duplicate/constants.py similarity index 100% rename from tests/automated/integration/tasks/url/duplicate/constants.py rename to tests/automated/integration/tasks/url/impl/duplicate/constants.py diff --git a/tests/automated/integration/tasks/url/duplicate/test_url_duplicate_task.py b/tests/automated/integration/tasks/url/impl/duplicate/test_url_duplicate_task.py similarity index 96% rename from tests/automated/integration/tasks/url/duplicate/test_url_duplicate_task.py rename to tests/automated/integration/tasks/url/impl/duplicate/test_url_duplicate_task.py index 2f4e64b5..e20fd883 100644 --- a/tests/automated/integration/tasks/url/duplicate/test_url_duplicate_task.py +++ b/tests/automated/integration/tasks/url/impl/duplicate/test_url_duplicate_task.py @@ -9,7 +9,7 @@ from src.db.models.instantiations.url.core.sqlalchemy import URL from src.collectors.enums import URLStatus from src.core.tasks.url.enums import TaskOperatorOutcome -from tests.automated.integration.tasks.url.duplicate.constants import BATCH_CREATION_PARAMETERS +from tests.automated.integration.tasks.url.impl.duplicate.constants import BATCH_CREATION_PARAMETERS from tests.helpers.data_creator.core import DBDataCreator from pdap_access_manager import ResponseInfo from src.external.pdap.client import PDAPClient diff --git a/tests/automated/integration/tasks/url/probe/no_redirect/__init__.py b/tests/automated/integration/tasks/url/impl/html/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/no_redirect/__init__.py rename to tests/automated/integration/tasks/url/impl/html/__init__.py diff --git a/tests/automated/integration/tasks/url/probe/redirect/__init__.py b/tests/automated/integration/tasks/url/impl/html/check/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/redirect/__init__.py rename to tests/automated/integration/tasks/url/impl/html/check/__init__.py diff --git a/tests/automated/integration/tasks/url/html/check/manager.py b/tests/automated/integration/tasks/url/impl/html/check/manager.py similarity index 96% rename from tests/automated/integration/tasks/url/html/check/manager.py rename to tests/automated/integration/tasks/url/impl/html/check/manager.py index 71a48b42..9b30a4f8 100644 --- a/tests/automated/integration/tasks/url/html/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/html/check/manager.py @@ -3,7 +3,7 @@ from src.db.models.instantiations.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.instantiations.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.instantiations.url.web_metadata.sqlalchemy import URLWebMetadata -from tests.automated.integration.tasks.url.html.setup.models.record import TestURLHTMLTaskSetupRecord +from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord class TestURLHTMLTaskCheckManager: diff --git a/tests/automated/integration/tasks/url/probe/redirect/dest_new/__init__.py b/tests/automated/integration/tasks/url/impl/html/mocks/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/redirect/dest_new/__init__.py rename to tests/automated/integration/tasks/url/impl/html/mocks/__init__.py diff --git a/tests/automated/integration/tasks/url/html/mocks/methods.py b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py similarity index 100% rename from tests/automated/integration/tasks/url/html/mocks/methods.py rename to tests/automated/integration/tasks/url/impl/html/mocks/methods.py diff --git a/tests/automated/integration/tasks/url/probe/setup/__init__.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/setup/__init__.py rename to tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py diff --git a/tests/automated/integration/tasks/url/html/mocks/url_request_interface/core.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py similarity index 75% rename from tests/automated/integration/tasks/url/html/mocks/url_request_interface/core.py rename to tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py index a8dde5b5..49e6b1f3 100644 --- a/tests/automated/integration/tasks/url/html/mocks/url_request_interface/core.py +++ b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py @@ -1,5 +1,5 @@ from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.html.mocks.url_request_interface.setup import setup_url_to_response_info +from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.setup import setup_url_to_response_info class MockURLRequestInterface: diff --git a/tests/automated/integration/tasks/url/html/mocks/url_request_interface/setup.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py similarity index 85% rename from tests/automated/integration/tasks/url/html/mocks/url_request_interface/setup.py rename to tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py index cff46013..76f1969e 100644 --- a/tests/automated/integration/tasks/url/html/mocks/url_request_interface/setup.py +++ b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py @@ -1,8 +1,8 @@ from http import HTTPStatus from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestErrorType +from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES +from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestErrorType from tests.helpers.simple_test_data_functions import generate_test_html diff --git a/tests/automated/integration/tasks/url/submit_approved/__init__.py b/tests/automated/integration/tasks/url/impl/html/setup/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/submit_approved/__init__.py rename to tests/automated/integration/tasks/url/impl/html/setup/__init__.py diff --git a/tests/automated/integration/tasks/url/html/setup/data.py b/tests/automated/integration/tasks/url/impl/html/setup/data.py similarity index 96% rename from tests/automated/integration/tasks/url/html/setup/data.py rename to tests/automated/integration/tasks/url/impl/html/setup/data.py index 9c488484..7d3f0028 100644 --- a/tests/automated/integration/tasks/url/html/setup/data.py +++ b/tests/automated/integration/tasks/url/impl/html/setup/data.py @@ -2,7 +2,7 @@ from src.collectors.enums import URLStatus from src.db.models.instantiations.url.scrape_info.enums import ScrapeStatus -from tests.automated.integration.tasks.url.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestURLInfo, \ +from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestURLInfo, \ TestWebMetadataInfo, ExpectedResult, TestErrorType TEST_ENTRIES = [ diff --git a/tests/automated/integration/tasks/url/html/setup/manager.py b/tests/automated/integration/tasks/url/impl/html/setup/manager.py similarity index 88% rename from tests/automated/integration/tasks/url/html/setup/manager.py rename to tests/automated/integration/tasks/url/impl/html/setup/manager.py index 7cfac879..eee71462 100644 --- a/tests/automated/integration/tasks/url/html/setup/manager.py +++ b/tests/automated/integration/tasks/url/impl/html/setup/manager.py @@ -8,10 +8,10 @@ from src.db.models.instantiations.url.core.enums import URLSource from src.db.models.instantiations.url.core.pydantic.insert import URLInsertModel from src.db.models.instantiations.url.web_metadata.insert import URLWebMetadataPydantic -from tests.automated.integration.tasks.url.html.mocks.methods import mock_get_from_cache, mock_parse -from tests.automated.integration.tasks.url.html.mocks.url_request_interface.core import MockURLRequestInterface -from tests.automated.integration.tasks.url.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.html.setup.models.record import TestURLHTMLTaskSetupRecord +from tests.automated.integration.tasks.url.impl.html.mocks.methods import mock_get_from_cache, mock_parse +from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.core import MockURLRequestInterface +from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES +from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord class TestURLHTMLTaskSetupManager: diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py b/tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/html/setup/models/entry.py b/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py similarity index 100% rename from tests/automated/integration/tasks/url/html/setup/models/entry.py rename to tests/automated/integration/tasks/url/impl/html/setup/models/entry.py diff --git a/tests/automated/integration/tasks/url/html/setup/models/record.py b/tests/automated/integration/tasks/url/impl/html/setup/models/record.py similarity index 55% rename from tests/automated/integration/tasks/url/html/setup/models/record.py rename to tests/automated/integration/tasks/url/impl/html/setup/models/record.py index 7902dd81..022c9639 100644 --- a/tests/automated/integration/tasks/url/html/setup/models/record.py +++ b/tests/automated/integration/tasks/url/impl/html/setup/models/record.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from tests.automated.integration.tasks.url.html.setup.models.entry import TestURLHTMLTaskSetupEntry +from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry class TestURLHTMLTaskSetupRecord(BaseModel): diff --git a/tests/automated/integration/tasks/url/html/test_task.py b/tests/automated/integration/tasks/url/impl/html/test_task.py similarity index 74% rename from tests/automated/integration/tasks/url/html/test_task.py rename to tests/automated/integration/tasks/url/impl/html/test_task.py index fe059838..8d4de418 100644 --- a/tests/automated/integration/tasks/url/html/test_task.py +++ b/tests/automated/integration/tasks/url/impl/html/test_task.py @@ -2,10 +2,10 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from tests.automated.integration.tasks.asserts import assert_prereqs_not_met, assert_prereqs_met, \ +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_prereqs_met, \ assert_task_ran_without_error -from tests.automated.integration.tasks.url.html.check.manager import TestURLHTMLTaskCheckManager -from tests.automated.integration.tasks.url.html.setup.manager import setup_operator, \ +from tests.automated.integration.tasks.url.impl.html.check.manager import TestURLHTMLTaskCheckManager +from tests.automated.integration.tasks.url.impl.html.setup.manager import setup_operator, \ TestURLHTMLTaskSetupManager diff --git a/tests/automated/integration/tasks/url/impl/probe/__init__.py b/tests/automated/integration/tasks/url/impl/probe/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/impl/probe/check/__init__.py b/tests/automated/integration/tasks/url/impl/probe/check/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/check/manager.py rename to tests/automated/integration/tasks/url/impl/probe/check/manager.py diff --git a/tests/automated/integration/tasks/url/probe/conftest.py b/tests/automated/integration/tasks/url/impl/probe/conftest.py similarity index 68% rename from tests/automated/integration/tasks/url/probe/conftest.py rename to tests/automated/integration/tasks/url/impl/probe/conftest.py index 45d3d820..1c390288 100644 --- a/tests/automated/integration/tasks/url/probe/conftest.py +++ b/tests/automated/integration/tasks/url/impl/probe/conftest.py @@ -1,8 +1,8 @@ import pytest from src.db.client.async_ import AsyncDatabaseClient -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.fixture diff --git a/tests/automated/integration/tasks/url/probe/constants.py b/tests/automated/integration/tasks/url/impl/probe/constants.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/constants.py rename to tests/automated/integration/tasks/url/impl/probe/constants.py diff --git a/tests/automated/integration/tasks/url/impl/probe/mocks/__init__.py b/tests/automated/integration/tasks/url/impl/probe/mocks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/mocks/url_request_interface.py b/tests/automated/integration/tasks/url/impl/probe/mocks/url_request_interface.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/mocks/url_request_interface.py rename to tests/automated/integration/tasks/url/impl/probe/mocks/url_request_interface.py diff --git a/tests/automated/integration/tasks/url/impl/probe/models/__init__.py b/tests/automated/integration/tasks/url/impl/probe/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/models/entry.py b/tests/automated/integration/tasks/url/impl/probe/models/entry.py similarity index 100% rename from tests/automated/integration/tasks/url/probe/models/entry.py rename to tests/automated/integration/tasks/url/impl/probe/models/entry.py diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/__init__.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py similarity index 81% rename from tests/automated/integration/tasks/url/probe/no_redirect/test_error.py rename to tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py index c62498c2..924efb5c 100644 --- a/tests/automated/integration/tasks/url/probe/no_redirect/test_error.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py similarity index 81% rename from tests/automated/integration/tasks/url/probe/no_redirect/test_not_found.py rename to tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py index 44dab7f5..400cf3d1 100644 --- a/tests/automated/integration/tasks/url/probe/no_redirect/test_not_found.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/no_redirect/test_ok.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py similarity index 81% rename from tests/automated/integration/tasks/url/probe/no_redirect/test_ok.py rename to tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py index 607e503d..2d0dd641 100644 --- a/tests/automated/integration/tasks/url/probe/no_redirect/test_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/no_redirect/test_two_urls.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py similarity index 81% rename from tests/automated/integration/tasks/url/probe/no_redirect/test_two_urls.py rename to tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py index a67d7713..aa531de0 100644 --- a/tests/automated/integration/tasks/url/probe/no_redirect/test_two_urls.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py @@ -2,9 +2,9 @@ from src.collectors.enums import URLStatus from src.db.models.instantiations.url.core.sqlalchemy import URL -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/__init__.py b/tests/automated/integration/tasks/url/impl/probe/redirect/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/redirect/dest_new/README.md b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/README.md similarity index 100% rename from tests/automated/integration/tasks/url/probe/redirect/dest_new/README.md rename to tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/README.md diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/__init__.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/redirect/dest_new/test_dest_ok.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py similarity index 84% rename from tests/automated/integration/tasks/url/probe/redirect/dest_new/test_dest_ok.py rename to tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py index acb7c1a8..7c589bd7 100644 --- a/tests/automated/integration/tasks/url/probe/redirect/dest_new/test_dest_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/redirect/test_dest_exists_in_db.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py similarity index 85% rename from tests/automated/integration/tasks/url/probe/redirect/test_dest_exists_in_db.py rename to tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py index 9dbb03d6..398b6828 100644 --- a/tests/automated/integration/tasks/url/probe/redirect/test_dest_exists_in_db.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py @@ -2,10 +2,10 @@ from src.collectors.enums import URLStatus from src.db.models.instantiations.url.web_metadata.insert import URLWebMetadataPydantic -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.constants import TEST_DEST_URL -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.constants import TEST_DEST_URL +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/redirect/test_redirect_infinite.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py similarity index 81% rename from tests/automated/integration/tasks/url/probe/redirect/test_redirect_infinite.py rename to tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py index 637c3a63..c6ef468f 100644 --- a/tests/automated/integration/tasks/url/probe/redirect/test_redirect_infinite.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_redirect_infinite.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.constants import TEST_URL -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.constants import TEST_URL +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/probe/redirect/test_two_urls_same_dest.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py similarity index 85% rename from tests/automated/integration/tasks/url/probe/redirect/test_two_urls_same_dest.py rename to tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py index 0104b5ee..47d2ae34 100644 --- a/tests/automated/integration/tasks/url/probe/redirect/test_two_urls_same_dest.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py @@ -1,9 +1,9 @@ import pytest from src.collectors.enums import URLStatus -from tests.automated.integration.tasks.asserts import assert_task_ran_without_error -from tests.automated.integration.tasks.url.probe.check.manager import TestURLProbeCheckManager -from tests.automated.integration.tasks.url.probe.setup.manager import TestURLProbeSetupManager +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error +from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager +from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/impl/probe/setup/__init__.py b/tests/automated/integration/tasks/url/impl/probe/setup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/probe/setup/manager.py b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py similarity index 93% rename from tests/automated/integration/tasks/url/probe/setup/manager.py rename to tests/automated/integration/tasks/url/impl/probe/setup/manager.py index 3e0635ed..fe52e133 100644 --- a/tests/automated/integration/tasks/url/probe/setup/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py @@ -8,8 +8,8 @@ from src.external.url_request.probe.models.redirect import URLProbeRedirectResponsePair from src.external.url_request.probe.models.response import URLProbeResponse from src.external.url_request.probe.models.wrapper import URLProbeResponseOuterWrapper -from tests.automated.integration.tasks.url.probe.constants import TEST_URL, TEST_DEST_URL, TEST_SOURCE -from tests.automated.integration.tasks.url.probe.mocks.url_request_interface import MockURLRequestInterface +from tests.automated.integration.tasks.url.impl.probe.constants import TEST_URL, TEST_DEST_URL, TEST_SOURCE +from tests.automated.integration.tasks.url.impl.probe.mocks.url_request_interface import MockURLRequestInterface class TestURLProbeSetupManager: diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/__init__.py b/tests/automated/integration/tasks/url/impl/submit_approved/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/submit_approved/mock.py b/tests/automated/integration/tasks/url/impl/submit_approved/mock.py similarity index 100% rename from tests/automated/integration/tasks/url/submit_approved/mock.py rename to tests/automated/integration/tasks/url/impl/submit_approved/mock.py diff --git a/tests/automated/integration/tasks/url/submit_approved/setup.py b/tests/automated/integration/tasks/url/impl/submit_approved/setup.py similarity index 100% rename from tests/automated/integration/tasks/url/submit_approved/setup.py rename to tests/automated/integration/tasks/url/impl/submit_approved/setup.py diff --git a/tests/automated/integration/tasks/url/submit_approved/test_submit_approved_url_task.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py similarity index 96% rename from tests/automated/integration/tasks/url/submit_approved/test_submit_approved_url_task.py rename to tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py index ce9861e0..e07e9064 100644 --- a/tests/automated/integration/tasks/url/submit_approved/test_submit_approved_url_task.py +++ b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py @@ -8,8 +8,8 @@ from src.db.models.instantiations.url.core.sqlalchemy import URL from src.collectors.enums import URLStatus from src.core.tasks.url.enums import TaskOperatorOutcome -from tests.automated.integration.tasks.url.submit_approved.mock import mock_make_request -from tests.automated.integration.tasks.url.submit_approved.setup import setup_validated_urls +from tests.automated.integration.tasks.url.impl.submit_approved.mock import mock_make_request +from tests.automated.integration.tasks.url.impl.submit_approved.setup import setup_validated_urls from pdap_access_manager import RequestInfo, RequestType, DataSourcesNamespaces from src.external.pdap.client import PDAPClient diff --git a/tests/automated/integration/tasks/url/test_example_task.py b/tests/automated/integration/tasks/url/impl/test_example_task.py similarity index 100% rename from tests/automated/integration/tasks/url/test_example_task.py rename to tests/automated/integration/tasks/url/impl/test_example_task.py diff --git a/tests/automated/integration/tasks/url/test_url_404_probe.py b/tests/automated/integration/tasks/url/impl/test_url_404_probe.py similarity index 100% rename from tests/automated/integration/tasks/url/test_url_404_probe.py rename to tests/automated/integration/tasks/url/impl/test_url_404_probe.py diff --git a/tests/automated/integration/tasks/url/test_url_miscellaneous_metadata_task.py b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py similarity index 100% rename from tests/automated/integration/tasks/url/test_url_miscellaneous_metadata_task.py rename to tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py diff --git a/tests/automated/integration/tasks/url/test_url_record_type_task.py b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py similarity index 100% rename from tests/automated/integration/tasks/url/test_url_record_type_task.py rename to tests/automated/integration/tasks/url/impl/test_url_record_type_task.py diff --git a/tests/automated/integration/tasks/url/loader/__init__.py b/tests/automated/integration/tasks/url/loader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/loader/conftest.py b/tests/automated/integration/tasks/url/loader/conftest.py new file mode 100644 index 00000000..1e5c69ae --- /dev/null +++ b/tests/automated/integration/tasks/url/loader/conftest.py @@ -0,0 +1,24 @@ +from unittest.mock import AsyncMock + +import pytest + +from src.collectors.source_collectors.muckrock.api_interface.core import MuckrockAPIInterface +from src.core.tasks.url.loader import URLTaskOperatorLoader +from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser +from src.db.client.async_ import AsyncDatabaseClient +from src.external.huggingface.inference.client import HuggingFaceInferenceClient +from src.external.pdap.client import PDAPClient +from src.external.url_request.core import URLRequestInterface + + +@pytest.fixture(scope="session") +def loader() -> URLTaskOperatorLoader: + """Setup loader with mock dependencies""" + return URLTaskOperatorLoader( + adb_client=AsyncMock(spec=AsyncDatabaseClient), + url_request_interface=AsyncMock(spec=URLRequestInterface), + html_parser=AsyncMock(spec=HTMLResponseParser), + pdap_client=AsyncMock(spec=PDAPClient), + muckrock_api_interface=AsyncMock(spec=MuckrockAPIInterface), + hf_inference_client=AsyncMock(spec=HuggingFaceInferenceClient) + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/loader/test_flags.py b/tests/automated/integration/tasks/url/loader/test_flags.py new file mode 100644 index 00000000..f184397d --- /dev/null +++ b/tests/automated/integration/tasks/url/loader/test_flags.py @@ -0,0 +1,76 @@ +import pytest +from pydantic import BaseModel + +from src.core.tasks.url.loader import URLTaskOperatorLoader +from src.core.tasks.url.models.entry import URLTaskEntry +from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator +from src.core.tasks.url.operators.auto_relevant.core import URLAutoRelevantTaskOperator +from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.core.tasks.url.operators.duplicate.core import URLDuplicateTaskOperator +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator +from src.core.tasks.url.operators.probe.core import URLProbeTaskOperator +from src.core.tasks.url.operators.probe_404.core import URL404ProbeTaskOperator +from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator +from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator + + +class FlagTestParams(BaseModel): + + class Config: + arbitrary_types_allowed = True + + env_var: str + operator: type[URLTaskOperatorBase] + +params = [ + FlagTestParams( + env_var="URL_HTML_TASK_FLAG", + operator=URLHTMLTaskOperator + ), + FlagTestParams( + env_var="URL_RECORD_TYPE_TASK_FLAG", + operator=URLRecordTypeTaskOperator + ), + FlagTestParams( + env_var="URL_AGENCY_IDENTIFICATION_TASK_FLAG", + operator=AgencyIdentificationTaskOperator + ), + FlagTestParams( + env_var="URL_SUBMIT_APPROVED_TASK_FLAG", + operator=SubmitApprovedURLTaskOperator + ), + FlagTestParams( + env_var="URL_DUPLICATE_TASK_FLAG", + operator=URLDuplicateTaskOperator + ), + FlagTestParams( + env_var="URL_MISC_METADATA_TASK_FLAG", + operator=URLMiscellaneousMetadataTaskOperator + ), + FlagTestParams( + env_var="URL_404_PROBE_TASK_FLAG", + operator=URL404ProbeTaskOperator + ), + FlagTestParams( + env_var="URL_AUTO_RELEVANCE_TASK_FLAG", + operator=URLAutoRelevantTaskOperator + ), + FlagTestParams( + env_var="URL_PROBE_TASK_FLAG", + operator=URLProbeTaskOperator + ), +] + +@pytest.mark.asyncio +@pytest.mark.parametrize("flag_test_params", params) +async def test_flag_enabled( + flag_test_params: FlagTestParams, + monkeypatch, + loader: URLTaskOperatorLoader +): + monkeypatch.setenv(flag_test_params.env_var, "0") + entries: list[URLTaskEntry] = await loader.load_entries() + for entry in entries: + if isinstance(entry.operator, flag_test_params.operator): + assert not entry.enabled, f"Flag associated with env_var {flag_test_params.env_var} should be disabled" diff --git a/tests/automated/integration/tasks/url/loader/test_happy_path.py b/tests/automated/integration/tasks/url/loader/test_happy_path.py new file mode 100644 index 00000000..769204d7 --- /dev/null +++ b/tests/automated/integration/tasks/url/loader/test_happy_path.py @@ -0,0 +1,15 @@ +import pytest + +from src.core.tasks.url.loader import URLTaskOperatorLoader + +NUMBER_OF_TASK_OPERATORS = 9 + +@pytest.mark.asyncio +async def test_happy_path( + loader: URLTaskOperatorLoader +): + """ + Under normal circumstances, all task operators should be returned + """ + task_operators = await loader.load_entries() + assert len(task_operators) == NUMBER_OF_TASK_OPERATORS \ No newline at end of file diff --git a/tests/manual/external/pdap/sync/test_sync_agencies.py b/tests/manual/external/pdap/sync/test_sync_agencies.py index 16be5d9d..f5af7a7e 100644 --- a/tests/manual/external/pdap/sync/test_sync_agencies.py +++ b/tests/manual/external/pdap/sync/test_sync_agencies.py @@ -3,7 +3,7 @@ from pendulum import tomorrow -from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters +from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters @pytest.mark.asyncio