Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Change URL outcome to URL status

Revision ID: 5930e70660c5
Revises: 11ece61d7ac2
Create Date: 2025-08-10 20:46:58.576623

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = '5930e70660c5'
down_revision: Union[str, None] = '11ece61d7ac2'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 21 in alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py#L21 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py:21:1: D103 Missing docstring in public function
op.alter_column('urls', 'outcome', new_column_name='status')


def downgrade() -> None:

Check warning on line 25 in alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py#L25 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_08_10_2046-5930e70660c5_change_url_outcome_to_url_status.py:25:1: D103 Missing docstring in public function
op.alter_column('urls', 'status', new_column_name='outcome')
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ async def run(
)

common_where_clause = [
URL.outcome == URLStatus.PENDING.value,
URL.status == URLStatus.PENDING.value,
LinkBatchURL.batch_id == self.batch_id,
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def run(self, session: AsyncSession):

query = (
query
.where(URL.outcome == URLStatus.PENDING.value)
.where(URL.status == URLStatus.PENDING.value)
# URL must not have user suggestion
.where(
StatementComposer.user_suggestion_not_exists(self.user_suggestion_model_to_exclude)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ async def run(

# Must not have confirmed agencies
query = query.where(
URL.outcome == URLStatus.PENDING.value
URL.status == URLStatus.PENDING.value
)


Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/annotate/all/get/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async def run(
query
.where(
and_(
URL.outcome == URLStatus.PENDING.value,
URL.status == URLStatus.PENDING.value,
StatementComposer.user_suggestion_not_exists(UserUrlAgencySuggestion),
StatementComposer.user_suggestion_not_exists(UserRecordTypeSuggestion),
StatementComposer.user_suggestion_not_exists(UserRelevantSuggestion),
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/collector/manual/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO:
name=entry.name,
description=entry.description,
collector_metadata=entry.collector_metadata,
outcome=URLStatus.PENDING.value,
status=URLStatus.PENDING.value,
record_type=entry.record_type.value if entry.record_type is not None else None,
source=URLSource.MANUAL
)
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/metrics/batches/aggregated/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def url_column(status: URLStatus, label):
return sc.count_distinct(
case(
(
URL.outcome == status.value,
URL.status == status.value,
URL.id
)
),
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/metrics/batches/breakdown/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def url_column(status: URLStatus, label):
return sc.count_distinct(
case(
(
URL.outcome == status.value,
URL.status == status.value,
URL.id
)
),
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/review/approve/query_/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ async def run(self, session: AsyncSession) -> None:

# If it does, do nothing

url.outcome = URLStatus.VALIDATED.value
url.status = URLStatus.VALIDATED.value

update_if_not_none(url, "name", self.approval_info.name, required=True)
update_if_not_none(url, "description", self.approval_info.description, required=False)
Expand Down
6 changes: 3 additions & 3 deletions src/api/endpoints/review/next/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _build_base_query(
query = (
query.where(
and_(
URL.outcome == URLStatus.PENDING.value,
URL.status == URLStatus.PENDING.value,
*where_exist_clauses
)
)
Expand Down Expand Up @@ -189,7 +189,7 @@ async def get_count_ready_query(self):
)
.where(
LinkBatchURL.batch_id == self.batch_id,
URL.outcome == URLStatus.PENDING.value,
URL.status == URLStatus.PENDING.value,
*self._get_where_exist_clauses(
builder.query
)
Expand All @@ -209,7 +209,7 @@ async def get_count_reviewed_query(self):
.join(LinkBatchURL)
.outerjoin(URL, URL.id == LinkBatchURL.url_id)
.where(
URL.outcome.in_(
URL.status.in_(
[
URLStatus.VALIDATED.value,
URLStatus.NOT_RELEVANT.value,
Expand Down
6 changes: 3 additions & 3 deletions src/api/endpoints/review/reject/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ async def run(self, session) -> None:

match self.rejection_reason:
case RejectionReason.INDIVIDUAL_RECORD:
url.outcome = URLStatus.INDIVIDUAL_RECORD.value
url.status = URLStatus.INDIVIDUAL_RECORD.value
case RejectionReason.BROKEN_PAGE_404:
url.outcome = URLStatus.NOT_FOUND.value
url.status = URLStatus.NOT_FOUND.value
case RejectionReason.NOT_RELEVANT:
url.outcome = URLStatus.NOT_RELEVANT.value
url.status = URLStatus.NOT_RELEVANT.value
case _:
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/task/by_id/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def run(self, session: AsyncSession) -> TaskInfo:
batch_id=url.batch.id,
url=url.url,
collector_metadata=url.collector_metadata,
outcome=URLStatus(url.outcome),
status=URLStatus(url.status),
updated_at=url.updated_at
)
url_infos.append(url_info)
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/url/get/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo:
id=result.id,
batch_id=result.batch.id if result.batch is not None else None,
url=result.url,
status=URLStatus(result.outcome),
status=URLStatus(result.status),
collector_metadata=result.collector_metadata,
updated_at=result.updated_at,
created_at=result.created_at,
Expand Down
2 changes: 1 addition & 1 deletion src/collectors/queries/insert/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async def run(self, session: AsyncSession) -> int:
url_entry = URL(
url=self.url_info.url,
collector_metadata=self.url_info.collector_metadata,
outcome=self.url_info.outcome.value,
status=self.url_info.status.value,
source=self.url_info.source
)
if self.url_info.created_at is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def has_valid_urls(self, last_upload_at: datetime | None) -> bool:
URL.id == URLCompressedHTML.url_id
)
.where(
URL.outcome.in_(
URL.status.in_(
[
URLStatus.VALIDATED,
URLStatus.NOT_RELEVANT.value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut
select(
URL.id.label(label_url_id),
URL.url.label(label_url),
URL.outcome.label(label_url_status),
URL.status.label(label_url_status),
URL.record_type.label(label_record_type_fine),
URLCompressedHTML.compressed_html.label(label_html)
)
Expand All @@ -35,7 +35,7 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut
URL.id == URLCompressedHTML.url_id
)
.where(
URL.outcome.in_([
URL.status.in_([
URLStatus.VALIDATED,
URLStatus.NOT_RELEVANT,
URLStatus.SUBMITTED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.param_manager import \
UpsertURLsFromDataSourcesParamManager
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.requester import UpsertURLsFromDataSourcesDBRequester
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.insert.params import \
InsertURLForDataSourcesSyncParams
from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import \
LookupURLForDataSourcesSyncResponse
from src.db.dtos.url.mapping import URLMapping
Expand Down Expand Up @@ -84,7 +86,7 @@ async def _add_new_data_sources(self, url_mappings: list[URLMapping]):
await self.requester.add_new_data_sources(url_ds_insert_params)

async def _add_new_urls(self, urls: list[str]):
url_insert_params = self.param_manager.add_new_urls(urls)
url_insert_params: list[InsertURLForDataSourcesSyncParams] = self.param_manager.add_new_urls(urls)
url_mappings = await self.requester.add_new_urls(url_insert_params)
return url_mappings

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def convert_to_url_update_params(
id=url_id,
name=sync_info.name,
description=sync_info.description,
outcome=convert_to_source_collector_url_status(
status=convert_to_source_collector_url_status(
ds_url_status=sync_info.url_status,
ds_approval_status=sync_info.approval_status
),
Expand All @@ -56,7 +56,7 @@ def convert_to_url_insert_params(
url=url,
name=sync_info.name,
description=sync_info.description,
outcome=convert_to_source_collector_url_status(
status=convert_to_source_collector_url_status(
ds_url_status=sync_info.url_status,
ds_approval_status=sync_info.approval_status
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class InsertURLForDataSourcesSyncParams(BulkInsertableModel):
url: str
name: str
description: str | None
outcome: URLStatus
status: URLStatus
record_type: RecordType
source: URLSource = URLSource.DATA_SOURCES

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ def sa_model(cls) -> type[URL]:
id: int
name: str
description: str | None
outcome: URLStatus
status: URLStatus
record_type: RecordType
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ async def run(self, session: AsyncSession) -> list[AgencyIdentificationTDO]:
Batch.strategy
)
.select_from(URL)
.where(URL.outcome == URLStatus.PENDING.value)
.where(URL.status == URLStatus.PENDING.value)
.outerjoin(LinkBatchURL)
.outerjoin(Batch)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async def run(
select(
URL.id
).where(
URL.outcome == URLStatus.PENDING.value
URL.status == URLStatus.PENDING.value
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async def run(self, session: AsyncSession) -> list[URLRelevantTDO]:
)
.join(URLCompressedHTML)
.where(
URL.outcome == URLStatus.PENDING.value,
URL.status == URLStatus.PENDING.value,
)
)
query = StatementComposer.exclude_urls_with_extant_model(
Expand Down
2 changes: 1 addition & 1 deletion src/core/tasks/url/operators/html/queries/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ async def run(self, session: AsyncSession) -> list[URLInfo]:
batch_id=url.batch.id if url.batch is not None else None,
url=url.url,
collector_metadata=url.collector_metadata,
outcome=url.outcome,
status=url.status,
created_at=url.created_at,
updated_at=url.updated_at,
name=url.name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async def _process_results(self, urls):
async def _build_query():
query = (
select(URL)
.where(URL.outcome == URLStatus.VALIDATED.value)
.where(URL.status == URLStatus.VALIDATED.value)
.options(
selectinload(URL.optional_data_source_metadata),
selectinload(URL.confirmed_agencies),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class HasValidatedURLsQueryBuilder(QueryBuilderBase):
async def run(self, session: AsyncSession) -> bool:
query = (
select(URL)
.where(URL.outcome == URLStatus.VALIDATED.value)
.where(URL.status == URLStatus.VALIDATED.value)
)
urls = await session.execute(query)
urls = urls.scalars().all()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ async def run(self, session: AsyncSession):
update(URL)
.where(URL.id == url_id)
.values(
outcome=URLStatus.SUBMITTED.value
status=URLStatus.SUBMITTED.value
)
)

Expand Down
Loading