Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion ENV.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,36 @@ Please ensure these are properly defined in a `.env` file in the root directory.
| `DISCORD_WEBHOOK_URL` | The URL for the Discord webhook used for notifications | `abc123` |
| `HUGGINGFACE_INFERENCE_API_KEY` | The API key required for accessing the Hugging Face Inference API. | `abc123` |
| `HUGGINGFACE_HUB_TOKEN` | The API key required for uploading to the PDAP HuggingFace account via Hugging Face Hub API. | `abc123` |
| `SCHEDULED_TASKS_FLAG` | Set to `1` to enable running scheduled tasks. | `1` |



[^1:] The user account in question will require elevated permissions to access certain endpoints. At a minimum, the user will require the `source_collector` and `db_write` permissions.

## Task Flags
Task flags are used to enable/disable certain tasks. They are set to `1` to enable the task and `0` to disable the task. By default, all tasks are enabled.

The following flags are available:

| Flag | Description |
|---------------------------------------|-------------------------------------------------------|
| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. |
| `URL_HTML_TASK_FLAG` | URL HTML scraping task. |
| `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. |
| `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. |
| `URL_SUBMIT_APPROVED_TASK_FLAG` | Submits approved URLs to the Data Sources App. |
| `URL_DUPLICATE_TASK_FLAG` | Identifies duplicate URLs. |
| `URL_MISC_METADATA_TASK_FLAG` | Adds misc metadata to URLs. |
| `URL_404_PROBE_TASK_FLAG` | Probes URLs for 404 errors. |
| `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. |
| `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. |
| `SYNC_AGENCIES_TASK_FLAG` | Synchonize agencies from Data Sources App. |
| `SYNC_DATA_SOURCES_TASK_FLAG` | Synchonize data sources from Data Sources App. |
| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. |
| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. |
| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. |
| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. |


## Foreign Data Wrapper (FDW)
```
FDW_DATA_SOURCES_HOST=127.0.0.1 # The host of the Data Sources Database, used for FDW setup
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Add scheduled tasks

Revision ID: 11ece61d7ac2
Revises: 8cd5aa7670ff
Create Date: 2025-08-10 10:32:11.400714

"""
from typing import Sequence, Union

from src.util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '11ece61d7ac2'
down_revision: Union[str, None] = '8cd5aa7670ff'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 19 in alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py#L19 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py:19:1: D103 Missing docstring in public function
switch_enum_type(
table_name='tasks',
column_name='task_type',
enum_name='task_type',
new_enum_values=[
'HTML',
'Relevancy',
'Record Type',
'Agency Identification',
'Misc Metadata',
'Submit Approved URLs',
'Duplicate Detection',
'404 Probe',
'Sync Agencies',
'Sync Data Sources',
'Push to Hugging Face',
'URL Probe',
'Populate Backlog Snapshot',
'Delete Old Logs',
'Run URL Task Cycles'
]
)


def downgrade() -> None:

Check warning on line 44 in alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py#L44 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_08_10_1032-11ece61d7ac2_add_scheduled_tasks.py:44:1: D103 Missing docstring in public function
switch_enum_type(
table_name='tasks',
column_name='task_type',
enum_name='task_type',
new_enum_values=[
'HTML',
'Relevancy',
'Record Type',
'Agency Identification',
'Misc Metadata',
'Submit Approved URLs',
'Duplicate Detection',
'404 Probe',
'Sync Agencies',
'Sync Data Sources'
'Push to Hugging Face',
'URL Probe'
]
)
8 changes: 5 additions & 3 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from src.core.tasks.handler import TaskHandler
from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader
from src.core.tasks.scheduled.manager import AsyncScheduledTaskManager
from src.core.tasks.scheduled.registry.core import ScheduledJobRegistry
from src.core.tasks.url.loader import URLTaskOperatorLoader
from src.core.tasks.url.manager import TaskManager
from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser
Expand Down Expand Up @@ -98,15 +99,16 @@ async def lifespan(app: FastAPI):
collector_manager=async_collector_manager
)
async_scheduled_task_manager = AsyncScheduledTaskManager(
async_core=async_core,
handler=task_handler,
loader=ScheduledTaskOperatorLoader(
adb_client=adb_client,
pdap_client=pdap_client,
hf_client=HuggingFaceHubClient(
token=env_var_manager.hf_hub_token
)
)
),
async_core=async_core,
),
registry=ScheduledJobRegistry()
)
await async_scheduled_task_manager.setup()

Expand Down
4 changes: 3 additions & 1 deletion src/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@


class AsyncCore:
task_manager: TaskManager | None = None
adb_client: AsyncDatabaseClient | None = None
collector_manager: AsyncCollectorManager | None = None

def __init__(
self,
Expand All @@ -57,7 +60,6 @@ def __init__(
):
self.task_manager = task_manager
self.adb_client = adb_client

self.collector_manager = collector_manager


Expand Down
16 changes: 16 additions & 0 deletions src/core/tasks/scheduled/impl/backlog/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase

Check warning on line 1 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:1:1: D100 Missing docstring in public module
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType


class PopulateBacklogSnapshotTaskOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:6:1: D101 Missing docstring in public class

def __init__(self, adb_client: AsyncDatabaseClient):

Check warning on line 8 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L8 <107>

Missing docstring in __init__
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:8:1: D107 Missing docstring in __init__
super().__init__(adb_client)

@property
def task_type(self) -> TaskType:

Check warning on line 12 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:12:1: D102 Missing docstring in public method
return TaskType.POPULATE_BACKLOG_SNAPSHOT

async def inner_task_logic(self) -> None:

Check warning on line 15 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L15 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:15:1: D102 Missing docstring in public method
await self.adb_client.populate_backlog_snapshot()

Check warning on line 16 in src/core/tasks/scheduled/impl/backlog/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/backlog/operator.py#L16 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/backlog/operator.py:16:58: W292 no newline at end of file
16 changes: 16 additions & 0 deletions src/core/tasks/scheduled/impl/delete_logs/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase

Check warning on line 1 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:1:1: D100 Missing docstring in public module
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType


class DeleteOldLogsTaskOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:6:1: D101 Missing docstring in public class

def __init__(self, adb_client: AsyncDatabaseClient):

Check warning on line 8 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L8 <107>

Missing docstring in __init__
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:8:1: D107 Missing docstring in __init__
super().__init__(adb_client)

@property
def task_type(self) -> TaskType:

Check warning on line 12 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:12:1: D102 Missing docstring in public method
return TaskType.DELETE_OLD_LOGS

async def inner_task_logic(self) -> None:

Check warning on line 15 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L15 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:15:1: D102 Missing docstring in public method
await self.adb_client.delete_old_logs()

Check warning on line 16 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L16 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:16:48: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.huggingface.queries.check.requester import CheckValidURLsUpdatedRequester
from src.core.tasks.scheduled.impl.huggingface.queries.check.requester import CheckValidURLsUpdatedRequester
from src.db.queries.base.builder import QueryBuilderBase


Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from src.collectors.enums import URLStatus
from src.core.enums import RecordType
from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse
from src.core.tasks.scheduled.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING, \
from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse
from src.core.tasks.scheduled.impl.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING, \
OUTCOME_RELEVANCY_MAPPING


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from sqlalchemy.ext.asyncio import AsyncSession

from src.collectors.enums import URLStatus
from src.core.tasks.scheduled.huggingface.queries.get.convert import convert_url_status_to_relevant, \
from src.core.tasks.scheduled.impl.huggingface.queries.get.convert import convert_url_status_to_relevant, \
convert_fine_to_coarse_record_type
from src.core.tasks.scheduled.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput
from src.db.models.instantiations.url.html.compressed.sqlalchemy import URLCompressedHTML
from src.db.models.instantiations.url.core.sqlalchemy import URL
from src.db.queries.base.builder import QueryBuilderBase
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from src.collectors.enums import URLStatus
from src.core.enums import RecordType
from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse
from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse

FINE_COARSE_RECORD_TYPE_MAPPING = {
# Police and Public
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pydantic import BaseModel

from src.core.enums import RecordType
from src.core.tasks.scheduled.huggingface.queries.get.enums import RecordTypeCoarse
from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse


class GetForLoadingToHuggingFaceOutput(BaseModel):
Expand Down
17 changes: 17 additions & 0 deletions src/core/tasks/scheduled/impl/run_url_tasks/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from src.core.core import AsyncCore

Check warning on line 1 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.enums import TaskType


class RunURLTasksTaskOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:6:1: D101 Missing docstring in public class

def __init__(self, async_core: AsyncCore):

Check warning on line 8 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L8 <107>

Missing docstring in __init__
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:8:1: D107 Missing docstring in __init__
super().__init__(async_core.adb_client)
self.async_core = async_core

@property
def task_type(self) -> TaskType:

Check warning on line 13 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L13 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:13:1: D102 Missing docstring in public method
return TaskType.RUN_URL_TASKS

async def inner_task_logic(self) -> None:

Check warning on line 16 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L16 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:16:1: D102 Missing docstring in public method
await self.async_core.run_tasks()

Check warning on line 17 in src/core/tasks/scheduled/impl/run_url_tasks/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/run_url_tasks/operator.py#L17 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/run_url_tasks/operator.py:17:42: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from src.core.tasks.scheduled.sync.check import check_max_sync_requests_not_exceeded
from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters
from src.core.tasks.scheduled.impl.sync.check import check_max_sync_requests_not_exceeded

Check warning on line 1 in src/core/tasks/scheduled/impl/sync/agency/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync/agency/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/sync/agency/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from sqlalchemy.exc import NoResultFound
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.sync.agency.dtos.parameters import AgencySyncParameters
from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters
from src.db.models.instantiations.state.sync.agencies import AgenciesSyncState
from src.db.queries.base.builder import QueryBuilderBase

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from src.core.tasks.scheduled.sync.constants import MAX_SYNC_REQUESTS
from src.core.tasks.scheduled.sync.exceptions import MaxRequestsExceededError
from src.core.tasks.scheduled.impl.sync.constants import MAX_SYNC_REQUESTS

Check warning on line 1 in src/core/tasks/scheduled/impl/sync/check.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync/check.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/sync/check.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.impl.sync.exceptions import MaxRequestsExceededError


def check_max_sync_requests_not_exceeded(request_count: int) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.core.tasks.scheduled.sync.check import check_max_sync_requests_not_exceeded
from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters
from src.core.tasks.scheduled.impl.sync.check import check_max_sync_requests_not_exceeded
from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType
from src.external.pdap.client import PDAPClient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from sqlalchemy.exc import NoResultFound
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.sync.data_sources.params import DataSourcesSyncParameters
from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters
from src.db.models.instantiations.state.sync.data_sources import DataSourcesSyncState
from src.db.queries.base.builder import QueryBuilderBase

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.query import URLAgencyLinkUpdateQueryBuilder
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import URLAgencyLinkUpdateQueryBuilder
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams


async def update_agency_links(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models
from src.db.helpers.session import session_helper as sh
from src.db.models.instantiations.link.url_agency.pydantic import LinkURLAgencyPydantic
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyForDataSourcesSyncParams
from src.db.models.instantiations.link.url_agency.sqlalchemy import LinkURLAgency
from src.db.queries.base.builder import QueryBuilderBase

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from sqlalchemy.ext.asyncio import AsyncSession
from typing_extensions import override

from src.core.tasks.scheduled.sync.data_sources.queries.upsert.helpers.filter import filter_for_urls_with_ids, \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.filter import filter_for_urls_with_ids, \
get_mappings_for_urls_without_data_sources
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.param_manager import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.param_manager import \
UpsertURLsFromDataSourcesParamManager
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.requester import UpsertURLsFromDataSourcesDBRequester
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.requester import UpsertURLsFromDataSourcesDBRequester
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \
LookupURLForDataSourcesSyncResponse
from src.db.dtos.url.mapping import URLMapping
from src.db.queries.base.builder import QueryBuilderBase
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from src.collectors.enums import URLStatus
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \
InsertURLForDataSourcesSyncParams
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \
UpdateURLForDataSourcesSyncParams
from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInnerInfo
from src.external.pdap.enums import DataSourcesURLStatus, ApprovalStatus
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \

Check warning on line 1 in src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/helpers/filter.py:1:1: D100 Missing docstring in public module
LookupURLForDataSourcesSyncResponse
from src.db.dtos.url.mapping import URLMapping

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \

Check warning on line 1 in src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py:1:1: D100 Missing docstring in public module
UpdateLinkURLAgencyForDataSourcesSyncParams
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.helpers.convert import convert_to_url_update_params, \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.helpers.convert import convert_to_url_update_params, \
convert_to_url_insert_params
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.mapper import URLSyncInfoMapper
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \
InsertURLForDataSourcesSyncParams
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \
LookupURLForDataSourcesSyncResponse
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \
UpdateURLForDataSourcesSyncParams
from src.db.dtos.url.mapping import URLMapping
from src.db.models.instantiations.link.url_agency.pydantic import LinkURLAgencyPydantic
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import \
UpdateLinkURLAgencyForDataSourcesSyncParams
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.agency.query import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.query import \
URLAgencyLinkUpdateQueryBuilder
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.insert.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \
InsertURLForDataSourcesSyncParams
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.query import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.query import \
LookupURLForDataSourcesSyncQueryBuilder
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.lookup.response import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \
LookupURLForDataSourcesSyncResponse
from src.core.tasks.scheduled.sync.data_sources.queries.upsert.url.update.params import \
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \
UpdateURLForDataSourcesSyncParams
from src.db.dtos.url.mapping import URLMapping
from src.db.helpers.session import session_helper as sh
Expand Down
Loading