diff --git a/ENV.md b/ENV.md index 427861d6..95d15551 100644 --- a/ENV.md +++ b/ENV.md @@ -28,6 +28,14 @@ Please ensure these are properly defined in a `.env` file in the root directory. [^1:] The user account in question will require elevated permissions to access certain endpoints. At a minimum, the user will require the `source_collector` and `db_write` permissions. +# Variables With Defaults + +The following environment variables have default values that will be used if not otherwise defined. + +| Variable | Description | Default | +|-------------------------------|------------------------------------------------------------------|---------| +| `URL_TASKS_FREQUENCY_MINUTES` | The frequency for the `RUN_URL_TASKS` Scheduled Task, in minutes | `60` | + # Flags Flags are used to enable/disable certain features. They are set to `1` to enable the feature and `0` to disable the feature. By default, all flags are enabled. @@ -77,6 +85,7 @@ URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag | `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | | `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | | `URL_ROOT_URL_TASK_FLAG` | Extracts and links Root URLs to URLs. | +| `URL_SCREENSHOT_TASK_FLAG` | Takes screenshots of URLs. | ### Agency ID Subtasks diff --git a/alembic/versions/2025_09_12_2040-e7189dc92a83_create_url_screenshot_task.py b/alembic/versions/2025_09_12_2040-e7189dc92a83_create_url_screenshot_task.py new file mode 100644 index 00000000..0348c6c3 --- /dev/null +++ b/alembic/versions/2025_09_12_2040-e7189dc92a83_create_url_screenshot_task.py @@ -0,0 +1,122 @@ +"""Create url screenshot task + +Revision ID: e7189dc92a83 +Revises: 70baaee0dd79 +Create Date: 2025-09-12 20:40:45.950204 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import switch_enum_type, id_column, url_id_column, created_at_column, updated_at_column + +# revision identifiers, used by Alembic. +revision: str = 'e7189dc92a83' +down_revision: Union[str, None] = '70baaee0dd79' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +URL_SCREENSHOT_TABLE_NAME = "url_screenshot" +SCREENSHOT_ERROR_TABLE_NAME = "error_url_screenshot" + + + +def upgrade() -> None: + _add_url_screenshot_task() + _add_url_screenshot_table() + _add_screenshot_error_table() + + + +def downgrade() -> None: + _remove_url_screenshot_task() + _remove_url_screenshot_table() + _remove_screenshot_error_table() + + +def _add_screenshot_error_table(): + op.create_table( + SCREENSHOT_ERROR_TABLE_NAME, + url_id_column(), + sa.Column('error', sa.String(), nullable=False), + created_at_column(), + sa.PrimaryKeyConstraint('url_id') + ) + + +def _add_url_screenshot_table(): + op.create_table( + URL_SCREENSHOT_TABLE_NAME, + url_id_column(), + sa.Column('content', sa.LargeBinary(), nullable=False), + sa.Column('file_size', sa.Integer(), nullable=False), + created_at_column(), + updated_at_column(), + sa.UniqueConstraint('url_id', name='uq_url_id_url_screenshot') + ) + + +def _remove_url_screenshot_table(): + op.drop_table(URL_SCREENSHOT_TABLE_NAME) + + +def _remove_screenshot_error_table(): + op.drop_table(SCREENSHOT_ERROR_TABLE_NAME) + + +def _add_url_screenshot_task(): + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources', + 'Push to Hugging Face', + 'URL Probe', + 'Populate Backlog Snapshot', + 'Delete Old Logs', + 'Run URL Task Cycles', + 'Root URL', + 'Internet Archives Probe', + 'Internet Archives Archive', + 'Screenshot' + ] + ) + +def _remove_url_screenshot_task(): + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources', + 'Push to Hugging Face', + 'URL Probe', + 'Populate Backlog Snapshot', + 'Delete Old Logs', + 'Run URL Task Cycles', + 'Root URL', + 'Internet Archives Probe', + 'Internet Archives Archive' + ] + ) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index afe4a89a..2846bf88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "marshmallow~=3.23.2", "openai~=1.60.1", "pdap-access-manager==0.3.6", + "pillow>=11.3.0", "pip>=25.2", "playwright~=1.49.1", "psycopg2-binary~=2.9.6", diff --git a/src/api/endpoints/task/by_id/dto.py b/src/api/endpoints/task/by_id/dto.py index 1cac74d1..d10c3930 100644 --- a/src/api/endpoints/task/by_id/dto.py +++ b/src/api/endpoints/task/by_id/dto.py @@ -4,7 +4,7 @@ from pydantic import BaseModel from src.db.models.impl.url.core.pydantic.info import URLInfo -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.core.enums import BatchStatus @@ -15,4 +15,4 @@ class TaskInfo(BaseModel): updated_at: datetime.datetime error_info: str | None = None urls: list[URLInfo] - url_errors: list[URLErrorPydanticInfo] \ No newline at end of file + url_errors: list[URLErrorInfoPydantic] \ No newline at end of file diff --git a/src/api/endpoints/task/by_id/query.py b/src/api/endpoints/task/by_id/query.py index 45917d3a..40321333 100644 --- a/src/api/endpoints/task/by_id/query.py +++ b/src/api/endpoints/task/by_id/query.py @@ -6,7 +6,7 @@ from src.collectors.enums import URLStatus from src.core.enums import BatchStatus from src.db.models.impl.url.core.pydantic.info import URLInfo -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.db.models.impl.task.core import Task from src.db.models.impl.url.core.sqlalchemy import URL @@ -50,7 +50,7 @@ async def run(self, session: AsyncSession) -> TaskInfo: errored_urls = [] for url in task.errored_urls: - url_error_info = URLErrorPydanticInfo( + url_error_info = URLErrorInfoPydantic( task_id=url.task_id, url_id=url.url_id, error=url.error, diff --git a/src/api/endpoints/url/by_id/__init__.py b/src/api/endpoints/url/by_id/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/url/by_id/screenshot/__init__.py b/src/api/endpoints/url/by_id/screenshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/url/by_id/screenshot/query.py b/src/api/endpoints/url/by_id/screenshot/query.py new file mode 100644 index 00000000..93a38b23 --- /dev/null +++ b/src/api/endpoints/url/by_id/screenshot/query.py @@ -0,0 +1,28 @@ +from typing import Any + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from src.db.queries.base.builder import QueryBuilderBase + +from src.db.helpers.session import session_helper as sh + +class GetURLScreenshotQueryBuilder(QueryBuilderBase): + + def __init__(self, url_id: int): + super().__init__() + self.url_id = url_id + + async def run(self, session: AsyncSession) -> bytes | None: + + query = ( + select(URLScreenshot.content) + .where(URLScreenshot.url_id == self.url_id) + ) + + return await sh.one_or_none( + session=session, + query=query + ) + diff --git a/src/api/endpoints/url/by_id/screenshot/wrapper.py b/src/api/endpoints/url/by_id/screenshot/wrapper.py new file mode 100644 index 00000000..9de38cbb --- /dev/null +++ b/src/api/endpoints/url/by_id/screenshot/wrapper.py @@ -0,0 +1,22 @@ +from http import HTTPStatus + +from fastapi import HTTPException + +from src.api.endpoints.url.by_id.screenshot.query import GetURLScreenshotQueryBuilder +from src.db.client.async_ import AsyncDatabaseClient + + +async def get_url_screenshot_wrapper( + url_id: int, + adb_client: AsyncDatabaseClient, +) -> bytes: + + raw_result: bytes | None = await adb_client.run_query_builder( + GetURLScreenshotQueryBuilder(url_id=url_id) + ) + if raw_result is None: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail="URL not found" + ) + return raw_result \ No newline at end of file diff --git a/src/api/endpoints/url/routes.py b/src/api/endpoints/url/routes.py index 225dd5d6..c7bb59b0 100644 --- a/src/api/endpoints/url/routes.py +++ b/src/api/endpoints/url/routes.py @@ -1,6 +1,7 @@ -from fastapi import APIRouter, Query, Depends +from fastapi import APIRouter, Query, Depends, Response from src.api.dependencies import get_async_core +from src.api.endpoints.url.by_id.screenshot.wrapper import get_url_screenshot_wrapper from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.core.core import AsyncCore from src.security.manager import get_access_info @@ -27,3 +28,18 @@ async def get_urls( ) -> GetURLsResponseInfo: result = await async_core.get_urls(page=page, errors=errors) return result + +@url_router.get("/{url_id}/screenshot") +async def get_url_screenshot( + url_id: int, + async_core: AsyncCore = Depends(get_async_core), +) -> Response: + + raw_result: bytes = await get_url_screenshot_wrapper( + url_id=url_id, + adb_client=async_core.adb_client + ) + return Response( + content=raw_result, + media_type="image/webp" + ) diff --git a/src/core/tasks/scheduled/impl/internet_archives/probe/operator.py b/src/core/tasks/scheduled/impl/internet_archives/probe/operator.py index 5afeded4..05f58554 100644 --- a/src/core/tasks/scheduled/impl/internet_archives/probe/operator.py +++ b/src/core/tasks/scheduled/impl/internet_archives/probe/operator.py @@ -13,7 +13,7 @@ from src.db.dtos.url.mapping import URLMapping from src.db.enums import TaskType from src.db.models.impl.flag.checked_for_ia.pydantic import FlagURLCheckedForInternetArchivesPydantic -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.internet_archives.probe.pydantic import URLInternetArchiveMetadataPydantic from src.external.internet_archives.client import InternetArchivesClient from src.external.internet_archives.models.ia_url_mapping import InternetArchivesURLMapping @@ -60,10 +60,10 @@ async def inner_task_logic(self) -> None: await self._add_ia_metadata_to_db(mapper, ia_mappings=subsets.has_metadata) async def _add_errors_to_db(self, mapper: URLMapper, ia_mappings: list[InternetArchivesURLMapping]) -> None: - url_error_info_list: list[URLErrorPydanticInfo] = [] + url_error_info_list: list[URLErrorInfoPydantic] = [] for ia_mapping in ia_mappings: url_id = mapper.get_id(ia_mapping.url) - url_error_info = URLErrorPydanticInfo( + url_error_info = URLErrorInfoPydantic( url_id=url_id, error=ia_mapping.error, task_id=self.task_id diff --git a/src/core/tasks/scheduled/impl/internet_archives/save/operator.py b/src/core/tasks/scheduled/impl/internet_archives/save/operator.py index a52b313d..8a5b3cdb 100644 --- a/src/core/tasks/scheduled/impl/internet_archives/save/operator.py +++ b/src/core/tasks/scheduled/impl/internet_archives/save/operator.py @@ -14,7 +14,7 @@ from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.internet_archives.save.pydantic import URLInternetArchiveSaveMetadataPydantic from src.external.internet_archives.client import InternetArchivesClient from src.external.internet_archives.models.save_response import InternetArchivesSaveResponseInfo @@ -89,10 +89,10 @@ async def _add_errors_to_db( mapper: URLToEntryMapper, responses: list[InternetArchivesSaveResponseInfo] ) -> None: - error_info_list: list[URLErrorPydanticInfo] = [] + error_info_list: list[URLErrorInfoPydantic] = [] for response in responses: url_id = mapper.get_url_id(response.url) - url_error_info = URLErrorPydanticInfo( + url_error_info = URLErrorInfoPydantic( url_id=url_id, error=response.error, task_id=self.task_id diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index 76c707ea..da3a6e4b 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -53,7 +53,7 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: adb_client=self.adb_client, ia_client=self.ia_client ), - interval=IntervalEnum.TEN_MINUTES, + interval_minutes=IntervalEnum.TEN_MINUTES.value, enabled=self.env.bool("IA_PROBE_TASK_FLAG", default=True), ), ScheduledTaskEntry( @@ -61,12 +61,12 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: adb_client=self.adb_client, ia_client=self.ia_client ), - interval=IntervalEnum.TEN_MINUTES, + interval_minutes=IntervalEnum.TEN_MINUTES.value, enabled=self.env.bool("IA_SAVE_TASK_FLAG", default=True), ), ScheduledTaskEntry( operator=DeleteOldLogsTaskOperator(adb_client=self.adb_client), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=self.env.bool("DELETE_OLD_LOGS_TASK_FLAG", default=True) ), ScheduledTaskEntry( @@ -74,7 +74,7 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: adb_client=self.adb_client, pdap_client=self.pdap_client ), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=self.env.bool("SYNC_DATA_SOURCES_TASK_FLAG", default=True) ), ScheduledTaskEntry( @@ -82,18 +82,21 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: adb_client=self.async_core.adb_client, pdap_client=self.pdap_client ), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=self.env.bool("SYNC_AGENCIES_TASK_FLAG", default=True) ), ScheduledTaskEntry( operator=RunURLTasksTaskOperator(async_core=self.async_core), - interval=IntervalEnum.HOURLY, + interval_minutes=self.env.int( + "URL_TASKS_FREQUENCY_MINUTES", + default=IntervalEnum.HOURLY.value + ), enabled=self.env.bool("RUN_URL_TASKS_TASK_FLAG", default=True) ), ScheduledTaskEntry( operator=PopulateBacklogSnapshotTaskOperator(adb_client=self.async_core.adb_client), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=self.env.bool("POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG", default=True) ), ScheduledTaskEntry( @@ -101,7 +104,7 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: adb_client=self.async_core.adb_client, hf_client=self.hf_client ), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=self.env.bool( "PUSH_TO_HUGGING_FACE_TASK_FLAG", default=True diff --git a/src/core/tasks/scheduled/models/entry.py b/src/core/tasks/scheduled/models/entry.py index 22430a42..32abb913 100644 --- a/src/core/tasks/scheduled/models/entry.py +++ b/src/core/tasks/scheduled/models/entry.py @@ -10,5 +10,5 @@ class Config: arbitrary_types_allowed = True operator: ScheduledTaskOperatorBase - interval: IntervalEnum + interval_minutes: int enabled: bool diff --git a/src/core/tasks/scheduled/registry/core.py b/src/core/tasks/scheduled/registry/core.py index a622346c..e9fc205b 100644 --- a/src/core/tasks/scheduled/registry/core.py +++ b/src/core/tasks/scheduled/registry/core.py @@ -34,7 +34,7 @@ async def add_job( id=entry.operator.task_type.value, func=func, trigger=IntervalTrigger( - minutes=entry.interval.value, + minutes=entry.interval_minutes, start_date=datetime.now() + timedelta(minutes=minute_lag) ), misfire_grace_time=60, diff --git a/src/core/tasks/url/loader.py b/src/core/tasks/url/loader.py index 600ea1d2..8405a3bb 100644 --- a/src/core/tasks/url/loader.py +++ b/src/core/tasks/url/loader.py @@ -19,6 +19,7 @@ from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator from src.core.tasks.url.operators.record_type.llm_api.record_classifier.openai import OpenAIRecordClassifier from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator +from src.core.tasks.url.operators.screenshot.core import URLScreenshotTaskOperator from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.inference.client import HuggingFaceInferenceClient @@ -50,7 +51,7 @@ def __init__( self.muckrock_api_interface = muckrock_api_interface self.hf_inference_client = hf_inference_client - async def _get_url_html_task_operator(self) -> URLTaskEntry: + def _get_url_html_task_operator(self) -> URLTaskEntry: operator = URLHTMLTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface, @@ -64,7 +65,7 @@ async def _get_url_html_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_record_type_task_operator(self) -> URLTaskEntry: + def _get_url_record_type_task_operator(self) -> URLTaskEntry: operator = URLRecordTypeTaskOperator( adb_client=self.adb_client, classifier=OpenAIRecordClassifier() @@ -77,7 +78,7 @@ async def _get_url_record_type_task_operator(self) -> URLTaskEntry: ) ) - async def _get_agency_identification_task_operator(self) -> URLTaskEntry: + def _get_agency_identification_task_operator(self) -> URLTaskEntry: operator = AgencyIdentificationTaskOperator( adb_client=self.adb_client, loader=AgencyIdentificationSubtaskLoader( @@ -95,7 +96,7 @@ async def _get_agency_identification_task_operator(self) -> URLTaskEntry: ) ) - async def _get_submit_approved_url_task_operator(self) -> URLTaskEntry: + def _get_submit_approved_url_task_operator(self) -> URLTaskEntry: operator = SubmitApprovedURLTaskOperator( adb_client=self.adb_client, pdap_client=self.pdap_client @@ -108,7 +109,7 @@ async def _get_submit_approved_url_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_miscellaneous_metadata_task_operator(self) -> URLTaskEntry: + def _get_url_miscellaneous_metadata_task_operator(self) -> URLTaskEntry: operator = URLMiscellaneousMetadataTaskOperator( adb_client=self.adb_client ) @@ -120,7 +121,7 @@ async def _get_url_miscellaneous_metadata_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_404_probe_task_operator(self) -> URLTaskEntry: + def _get_url_404_probe_task_operator(self) -> URLTaskEntry: operator = URL404ProbeTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface @@ -133,7 +134,7 @@ async def _get_url_404_probe_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_auto_relevance_task_operator(self) -> URLTaskEntry: + def _get_url_auto_relevance_task_operator(self) -> URLTaskEntry: operator = URLAutoRelevantTaskOperator( adb_client=self.adb_client, hf_client=self.hf_inference_client @@ -146,7 +147,7 @@ async def _get_url_auto_relevance_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_probe_task_operator(self) -> URLTaskEntry: + def _get_url_probe_task_operator(self) -> URLTaskEntry: operator = URLProbeTaskOperator( adb_client=self.adb_client, url_request_interface=self.url_request_interface @@ -159,7 +160,7 @@ async def _get_url_probe_task_operator(self) -> URLTaskEntry: ) ) - async def _get_url_root_url_task_operator(self) -> URLTaskEntry: + def _get_url_root_url_task_operator(self) -> URLTaskEntry: operator = URLRootURLTaskOperator( adb_client=self.adb_client ) @@ -171,16 +172,29 @@ async def _get_url_root_url_task_operator(self) -> URLTaskEntry: ) ) + def _get_url_screenshot_task_operator(self) -> URLTaskEntry: + operator = URLScreenshotTaskOperator( + adb_client=self.adb_client, + ) + return URLTaskEntry( + operator=operator, + enabled=self.env.bool( + "URL_SCREENSHOT_TASK_FLAG", + default=True + ) + ) + async def load_entries(self) -> list[URLTaskEntry]: return [ - await self._get_url_root_url_task_operator(), - await self._get_url_probe_task_operator(), - await self._get_url_html_task_operator(), - await self._get_url_404_probe_task_operator(), - await self._get_url_record_type_task_operator(), - await self._get_agency_identification_task_operator(), - await self._get_url_miscellaneous_metadata_task_operator(), - await self._get_submit_approved_url_task_operator(), - await self._get_url_auto_relevance_task_operator() + self._get_url_root_url_task_operator(), + self._get_url_probe_task_operator(), + self._get_url_html_task_operator(), + self._get_url_404_probe_task_operator(), + self._get_url_record_type_task_operator(), + self._get_agency_identification_task_operator(), + self._get_url_miscellaneous_metadata_task_operator(), + self._get_submit_approved_url_task_operator(), + self._get_url_auto_relevance_task_operator(), + self._get_url_screenshot_task_operator() ] diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py index 4085b6dd..efd89ef9 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py @@ -5,7 +5,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.run_info import AgencyIDSubtaskRunInfo from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic @@ -66,11 +66,11 @@ async def _upload_subtask_data( models=suggestions, ) - error_infos: list[URLErrorPydanticInfo] = [] + error_infos: list[URLErrorInfoPydantic] = [] for subtask_info in subtask_data_list: if not subtask_info.has_error: continue - error_info = URLErrorPydanticInfo( + error_info = URLErrorInfoPydantic( url_id=subtask_info.url_id, error=subtask_info.error, task_id=self.task_id, diff --git a/src/core/tasks/url/operators/auto_relevant/core.py b/src/core/tasks/url/operators/auto_relevant/core.py index 386b4be7..4cb36a27 100644 --- a/src/core/tasks/url/operators/auto_relevant/core.py +++ b/src/core/tasks/url/operators/auto_relevant/core.py @@ -4,7 +4,7 @@ from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.suggestion.relevant.auto.pydantic.input import AutoRelevancyAnnotationInput -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.huggingface.inference.models.input import BasicInput @@ -79,7 +79,7 @@ async def put_results_into_database(self, tdos: list[URLRelevantTDO]) -> None: async def update_errors_in_database(self, tdos: list[URLRelevantTDO]) -> None: error_infos = [] for tdo in tdos: - error_info = URLErrorPydanticInfo( + error_info = URLErrorInfoPydantic( task_id=self.task_id, url_id=tdo.url_id, error=tdo.error diff --git a/src/core/tasks/url/operators/html/queries/insert/convert.py b/src/core/tasks/url/operators/html/queries/insert/convert.py index b07118bb..d689edac 100644 --- a/src/core/tasks/url/operators/html/queries/insert/convert.py +++ b/src/core/tasks/url/operators/html/queries/insert/convert.py @@ -3,7 +3,7 @@ from src.core.tasks.url.operators.html.content_info_getter import HTMLContentInfoGetter from src.core.tasks.url.operators.html.tdo import UrlHtmlTDO from src.db.dtos.url.html_content import URLHTMLContentInfo -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.html.compressed.pydantic import URLCompressedHTMLPydantic from src.db.models.impl.url.scrape_info.enums import ScrapeStatus from src.db.models.impl.url.scrape_info.pydantic import URLScrapeInfoInsertModel @@ -59,12 +59,12 @@ def convert_to_scrape_infos(tdos: list[UrlHtmlTDO]) -> list[URLScrapeInfoInsertM def convert_to_url_errors( tdos: list[UrlHtmlTDO], task_id: int -) -> list[URLErrorPydanticInfo]: +) -> list[URLErrorInfoPydantic]: models = [] for tdo in tdos: if tdo.url_response_info.success: continue - model = URLErrorPydanticInfo( + model = URLErrorInfoPydantic( url_id=tdo.url_info.id, error=tdo.url_response_info.exception, task_id=task_id diff --git a/src/core/tasks/url/operators/misc_metadata/core.py b/src/core/tasks/url/operators/misc_metadata/core.py index 20e2fcd2..c34c2df7 100644 --- a/src/core/tasks/url/operators/misc_metadata/core.py +++ b/src/core/tasks/url/operators/misc_metadata/core.py @@ -1,7 +1,7 @@ from typing import Optional from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.collectors.enums import CollectorType from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO @@ -69,7 +69,7 @@ async def inner_task_logic(self) -> None: subtask.process(tdo) await self.html_default_logic(tdo) except Exception as e: - error_info = URLErrorPydanticInfo( + error_info = URLErrorInfoPydantic( task_id=self.task_id, url_id=tdo.url_id, error=str(e), diff --git a/src/core/tasks/url/operators/record_type/core.py b/src/core/tasks/url/operators/record_type/core.py index 2efbe28f..bc40e572 100644 --- a/src/core/tasks/url/operators/record_type/core.py +++ b/src/core/tasks/url/operators/record_type/core.py @@ -1,5 +1,5 @@ from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.core.tasks.url.operators.record_type.tdo import URLRecordTypeTDO from src.core.tasks.url.operators.base import URLTaskOperatorBase @@ -44,7 +44,7 @@ async def inner_task_logic(self): async def update_errors_in_database(self, tdos: list[URLRecordTypeTDO]): error_infos = [] for tdo in tdos: - error_info = URLErrorPydanticInfo( + error_info = URLErrorInfoPydantic( task_id=self.task_id, url_id=tdo.url_with_html.url_id, error=tdo.error diff --git a/src/core/tasks/url/operators/screenshot/__init__.py b/src/core/tasks/url/operators/screenshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/url/operators/screenshot/constants.py b/src/core/tasks/url/operators/screenshot/constants.py new file mode 100644 index 00000000..676a06ab --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/constants.py @@ -0,0 +1,4 @@ + + + +TASK_URL_LIMIT: int = 50 \ No newline at end of file diff --git a/src/core/tasks/url/operators/screenshot/convert.py b/src/core/tasks/url/operators/screenshot/convert.py new file mode 100644 index 00000000..b2527f42 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/convert.py @@ -0,0 +1,30 @@ +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome +from src.db.models.impl.url.error.url_screenshot.pydantic import ErrorURLScreenshotPydantic +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic +from src.db.models.impl.url.screenshot.pydantic import URLScreenshotPydantic + + +def convert_to_url_screenshot_pydantic( + outcomes: list[URLScreenshotOutcome] +) -> list[URLScreenshotPydantic]: + results: list[URLScreenshotPydantic] = [] + for outcome in outcomes: + result = URLScreenshotPydantic( + url_id=outcome.url_id, + content=outcome.screenshot, + file_size=len(outcome.screenshot), + ) + results.append(result) + return results + +def convert_to_error_url_screenshot_pydantic( + outcomes: list[URLScreenshotOutcome] +) -> list[ErrorURLScreenshotPydantic]: + results: list[ErrorURLScreenshotPydantic] = [] + for outcome in outcomes: + result = ErrorURLScreenshotPydantic( + url_id=outcome.url_id, + error=outcome.error, + ) + results.append(result) + return results diff --git a/src/core/tasks/url/operators/screenshot/core.py b/src/core/tasks/url/operators/screenshot/core.py new file mode 100644 index 00000000..2e54f501 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/core.py @@ -0,0 +1,63 @@ +from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.core.tasks.url.operators.screenshot.convert import convert_to_url_screenshot_pydantic, \ + convert_to_error_url_screenshot_pydantic +from src.core.tasks.url.operators.screenshot.filter import filter_success_outcomes +from src.core.tasks.url.operators.screenshot.get import get_url_screenshots +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome +from src.core.tasks.url.operators.screenshot.models.subsets import URLScreenshotOutcomeSubsets +from src.core.tasks.url.operators.screenshot.queries.get import GetURLsForScreenshotTaskQueryBuilder +from src.core.tasks.url.operators.screenshot.queries.prereq import URLsForScreenshotTaskPrerequisitesQueryBuilder +from src.db.client.async_ import AsyncDatabaseClient +from src.db.dtos.url.mapping import URLMapping +from src.db.enums import TaskType +from src.db.models.impl.url.error.url_screenshot.pydantic import ErrorURLScreenshotPydantic +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic +from src.db.models.impl.url.screenshot.pydantic import URLScreenshotPydantic + + +class URLScreenshotTaskOperator(URLTaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + ): + super().__init__(adb_client) + + @property + def task_type(self) -> TaskType: + return TaskType.SCREENSHOT + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + URLsForScreenshotTaskPrerequisitesQueryBuilder() + ) + + async def get_urls_without_screenshot(self) -> list[URLMapping]: + return await self.adb_client.run_query_builder( + GetURLsForScreenshotTaskQueryBuilder() + ) + + async def upload_screenshots(self, outcomes: list[URLScreenshotOutcome]) -> None: + insert_models: list[URLScreenshotPydantic] = convert_to_url_screenshot_pydantic(outcomes) + await self.adb_client.bulk_insert(insert_models) + + async def upload_errors(self, outcomes: list[URLScreenshotOutcome]) -> None: + insert_models: list[ErrorURLScreenshotPydantic] = convert_to_error_url_screenshot_pydantic( + outcomes=outcomes, + ) + await self.adb_client.bulk_insert(insert_models) + + async def inner_task_logic(self) -> None: + url_mappings: list[URLMapping] = await self.get_urls_without_screenshot() + await self.link_urls_to_task( + url_ids=[url_mapping.url_id for url_mapping in url_mappings] + ) + + outcomes: list[URLScreenshotOutcome] = await get_url_screenshots( + mappings=url_mappings + ) + + subsets: URLScreenshotOutcomeSubsets = filter_success_outcomes(outcomes) + await self.upload_screenshots(subsets.success) + await self.upload_errors(subsets.failed) + diff --git a/src/core/tasks/url/operators/screenshot/filter.py b/src/core/tasks/url/operators/screenshot/filter.py new file mode 100644 index 00000000..97cb5c89 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/filter.py @@ -0,0 +1,13 @@ +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome +from src.core.tasks.url.operators.screenshot.models.subsets import URLScreenshotOutcomeSubsets + + +def filter_success_outcomes(outcomes: list[URLScreenshotOutcome]) -> URLScreenshotOutcomeSubsets: + success: list[URLScreenshotOutcome] = [] + failed: list[URLScreenshotOutcome] = [] + for outcome in outcomes: + if outcome.success: + success.append(outcome) + else: + failed.append(outcome) + return URLScreenshotOutcomeSubsets(success=success, failed=failed) \ No newline at end of file diff --git a/src/core/tasks/url/operators/screenshot/get.py b/src/core/tasks/url/operators/screenshot/get.py new file mode 100644 index 00000000..7c0d6a42 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/get.py @@ -0,0 +1,22 @@ +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome +from src.db.dtos.url.mapping import URLMapping +from src.external.url_request.dtos.screenshot_response import URLScreenshotResponse +from src.external.url_request.screenshot_.core import get_screenshots +from src.util.url_mapper import URLMapper + + +async def get_url_screenshots(mappings: list[URLMapping]) -> list[URLScreenshotOutcome]: + mapper = URLMapper(mappings) + responses: list[URLScreenshotResponse] = await get_screenshots( + urls=mapper.get_all_urls() + ) + outcomes: list[URLScreenshotOutcome] = [] + for response in responses: + url_id: int = mapper.get_id(response.url) + outcome = URLScreenshotOutcome( + url_id=url_id, + screenshot=response.screenshot, + error=response.error, + ) + outcomes.append(outcome) + return outcomes diff --git a/src/core/tasks/url/operators/screenshot/models/__init__.py b/src/core/tasks/url/operators/screenshot/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/url/operators/screenshot/models/outcome.py b/src/core/tasks/url/operators/screenshot/models/outcome.py new file mode 100644 index 00000000..4940b903 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/models/outcome.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + + +class URLScreenshotOutcome(BaseModel): + url_id: int + screenshot: bytes | None + error: str | None + + @property + def success(self) -> bool: + return self.error is None \ No newline at end of file diff --git a/src/core/tasks/url/operators/screenshot/models/subsets.py b/src/core/tasks/url/operators/screenshot/models/subsets.py new file mode 100644 index 00000000..070171e6 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/models/subsets.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome + + +class URLScreenshotOutcomeSubsets(BaseModel): + success: list[URLScreenshotOutcome] + failed: list[URLScreenshotOutcome] \ No newline at end of file diff --git a/src/core/tasks/url/operators/screenshot/queries/__init__.py b/src/core/tasks/url/operators/screenshot/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/url/operators/screenshot/queries/cte.py b/src/core/tasks/url/operators/screenshot/queries/cte.py new file mode 100644 index 00000000..e1bbf763 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/queries/cte.py @@ -0,0 +1,37 @@ +from sqlalchemy import CTE, select, exists, Column + +from src.db.helpers.query import url_not_validated, not_exists_url +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.error.url_screenshot.sqlalchemy import ErrorURLScreenshot +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata + + +class URLScreenshotPrerequisitesCTEContainer: + + def __init__(self): + self._cte: CTE = ( + select( + URL.id.label("url_id"), + URL.url, + ) + .join( + URLWebMetadata, + URL.id == URLWebMetadata.url_id + ) + .where( + url_not_validated(), + not_exists_url(URLScreenshot), + not_exists_url(ErrorURLScreenshot), + URLWebMetadata.status_code == 200, + ) + .cte("url_screenshot_prerequisites") + ) + + @property + def url_id(self) -> Column[int]: + return self._cte.c.url_id + + @property + def url(self) -> Column[str]: + return self._cte.c.url \ No newline at end of file diff --git a/src/core/tasks/url/operators/screenshot/queries/get.py b/src/core/tasks/url/operators/screenshot/queries/get.py new file mode 100644 index 00000000..e2dd94df --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/queries/get.py @@ -0,0 +1,25 @@ +from typing import Any, Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.screenshot.constants import TASK_URL_LIMIT +from src.core.tasks.url.operators.screenshot.queries.cte import URLScreenshotPrerequisitesCTEContainer +from src.db.dtos.url.mapping import URLMapping +from src.db.queries.base.builder import QueryBuilderBase + +from src.db.helpers.session import session_helper as sh + +class GetURLsForScreenshotTaskQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[URLMapping]: + cte = URLScreenshotPrerequisitesCTEContainer() + + query = select( + cte.url_id, + cte.url, + ).limit(TASK_URL_LIMIT) + + mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) + + return [URLMapping(**mapping) for mapping in mappings] diff --git a/src/core/tasks/url/operators/screenshot/queries/prereq.py b/src/core/tasks/url/operators/screenshot/queries/prereq.py new file mode 100644 index 00000000..885b8ad4 --- /dev/null +++ b/src/core/tasks/url/operators/screenshot/queries/prereq.py @@ -0,0 +1,21 @@ +from typing import Any + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.screenshot.queries.cte import URLScreenshotPrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + +from src.db.helpers.session import session_helper as sh + +class URLsForScreenshotTaskPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> Any: + cte = URLScreenshotPrerequisitesCTEContainer() + + query = select( + cte.url_id, + cte.url, + ).limit(1) + + return await sh.results_exist(session=session, query=query) diff --git a/src/core/tasks/url/operators/submit_approved/core.py b/src/core/tasks/url/operators/submit_approved/core.py index 107130eb..618f7f2f 100644 --- a/src/core/tasks/url/operators/submit_approved/core.py +++ b/src/core/tasks/url/operators/submit_approved/core.py @@ -1,5 +1,5 @@ from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.enums import TaskType from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO from src.core.tasks.url.operators.base import URLTaskOperatorBase @@ -50,13 +50,13 @@ async def get_success_infos(self, submitted_url_infos): return success_infos async def get_error_infos(self, submitted_url_infos): - error_infos: list[URLErrorPydanticInfo] = [] + error_infos: list[URLErrorInfoPydantic] = [] error_response_objects = [ response_object for response_object in submitted_url_infos if response_object.request_error is not None ] for error_response_object in error_response_objects: - error_info = URLErrorPydanticInfo( + error_info = URLErrorInfoPydantic( task_id=self.task_id, url_id=error_response_object.url_id, error=error_response_object.request_error, diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 19cbc3f5..cd266b1d 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -113,7 +113,7 @@ from src.db.models.impl.url.core.pydantic.info import URLInfo from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.models.impl.url.error_info.sqlalchemy import URLErrorInfo from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent @@ -449,7 +449,7 @@ async def add_user_record_type_suggestion( # endregion record_type @session_manager - async def add_url_error_infos(self, session: AsyncSession, url_error_infos: list[URLErrorPydanticInfo]): + async def add_url_error_infos(self, session: AsyncSession, url_error_infos: list[URLErrorInfoPydantic]): for url_error_info in url_error_infos: statement = select(URL).where(URL.id == url_error_info.url_id) scalar_result = await session.scalars(statement) @@ -460,7 +460,7 @@ async def add_url_error_infos(self, session: AsyncSession, url_error_infos: list session.add(url_error) @session_manager - async def get_urls_with_errors(self, session: AsyncSession) -> list[URLErrorPydanticInfo]: + async def get_urls_with_errors(self, session: AsyncSession) -> list[URLErrorInfoPydantic]: statement = (select(URL, URLErrorInfo.error, URLErrorInfo.updated_at, URLErrorInfo.task_id) .join(URLErrorInfo) .where(URL.status == URLStatus.ERROR.value) @@ -470,7 +470,7 @@ async def get_urls_with_errors(self, session: AsyncSession) -> list[URLErrorPyda final_results = [] for url, error, updated_at, task_id in results: final_results.append( - URLErrorPydanticInfo( + URLErrorInfoPydantic( url_id=url.id, error=error, updated_at=updated_at, diff --git a/src/db/enums.py b/src/db/enums.py index 1b85e9b1..25a4a728 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -47,6 +47,7 @@ class TaskType(PyEnum): ROOT_URL = "Root URL" IA_PROBE = "Internet Archives Probe" IA_SAVE = "Internet Archives Archive" + SCREENSHOT = "Screenshot" # Scheduled Tasks PUSH_TO_HUGGINGFACE = "Push to Hugging Face" diff --git a/src/db/helpers.py b/src/db/helpers.py deleted file mode 100644 index 10151935..00000000 --- a/src/db/helpers.py +++ /dev/null @@ -1,3 +0,0 @@ -from src.core.env_var_manager import EnvVarManager - - diff --git a/src/db/helpers/query.py b/src/db/helpers/query.py new file mode 100644 index 00000000..b5eda268 --- /dev/null +++ b/src/db/helpers/query.py @@ -0,0 +1,16 @@ +from sqlalchemy import exists, ColumnElement + +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.mixins import URLDependentMixin + + +def url_not_validated() -> ColumnElement[bool]: + return not_exists_url(FlagURLValidated) + +def not_exists_url( + model: type[URLDependentMixin] +) -> ColumnElement[bool]: + return ~exists().where( + model.url_id == URL.id + ) \ No newline at end of file diff --git a/src/db/helpers/session/session_helper.py b/src/db/helpers/session/session_helper.py index aebf236f..bf92f686 100644 --- a/src/db/helpers/session/session_helper.py +++ b/src/db/helpers/session/session_helper.py @@ -5,7 +5,7 @@ from typing import Any, Optional, Sequence import sqlalchemy as sa -from sqlalchemy import update, ColumnElement, Row +from sqlalchemy import update, ColumnElement, Row, Select from sqlalchemy.dialects import postgresql from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession @@ -191,6 +191,14 @@ async def bulk_insert( return_ids=return_ids ) +async def results_exist( + session: AsyncSession, + query: Select +) -> bool: + query = query.limit(1) + result: sa.Row | None = await one_or_none(session=session, query=query) + return result is not None + async def bulk_update( session: AsyncSession, models: list[BulkUpdatableModel], diff --git a/src/db/models/helpers.py b/src/db/models/helpers.py index 50f3d43e..e4b941ed 100644 --- a/src/db/models/helpers.py +++ b/src/db/models/helpers.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, TIMESTAMP, func, Integer, ForeignKey, Enum as SAEnum +from sqlalchemy import Column, TIMESTAMP, func, Integer, ForeignKey, Enum as SAEnum, PrimaryKeyConstraint from enum import Enum as PyEnum def get_created_at_column() -> Column: @@ -38,3 +38,6 @@ def url_id_column() -> Column[int]: ) CURRENT_TIME_SERVER_DEFAULT = func.now() + +def url_id_primary_key_constraint() -> PrimaryKeyConstraint: + return PrimaryKeyConstraint('url_id') \ No newline at end of file diff --git a/src/db/models/impl/url/error/__init__.py b/src/db/models/impl/url/error/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/error/url_screenshot/__init__.py b/src/db/models/impl/url/error/url_screenshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/error/url_screenshot/pydantic.py b/src/db/models/impl/url/error/url_screenshot/pydantic.py new file mode 100644 index 00000000..ffecc86d --- /dev/null +++ b/src/db/models/impl/url/error/url_screenshot/pydantic.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel + +from src.db.models.impl.url.error.url_screenshot.sqlalchemy import ErrorURLScreenshot +from src.db.models.templates_.base import Base + + +class ErrorURLScreenshotPydantic(BaseModel): + url_id: int + error: str + + @classmethod + def sa_model(cls) -> type[Base]: + return ErrorURLScreenshot \ No newline at end of file diff --git a/src/db/models/impl/url/error/url_screenshot/sqlalchemy.py b/src/db/models/impl/url/error/url_screenshot/sqlalchemy.py new file mode 100644 index 00000000..e06bf6dd --- /dev/null +++ b/src/db/models/impl/url/error/url_screenshot/sqlalchemy.py @@ -0,0 +1,20 @@ +from sqlalchemy import Column, String + +from src.db.models.helpers import url_id_primary_key_constraint +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class ErrorURLScreenshot( + Base, + URLDependentMixin, + CreatedAtMixin, +): + + __tablename__ = "error_url_screenshot" + __table_args__ = ( + url_id_primary_key_constraint(), + ) + + + error = Column(String, nullable=False) \ No newline at end of file diff --git a/src/db/models/impl/url/error_info/pydantic.py b/src/db/models/impl/url/error_info/pydantic.py index 2de814c8..013584cb 100644 --- a/src/db/models/impl/url/error_info/pydantic.py +++ b/src/db/models/impl/url/error_info/pydantic.py @@ -5,7 +5,7 @@ from src.db.templates.markers.bulk.insert import BulkInsertableModel -class URLErrorPydanticInfo(BulkInsertableModel): +class URLErrorInfoPydantic(BulkInsertableModel): task_id: int url_id: int error: str diff --git a/src/db/models/impl/url/screenshot/__init__.py b/src/db/models/impl/url/screenshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/screenshot/pydantic.py b/src/db/models/impl/url/screenshot/pydantic.py new file mode 100644 index 00000000..027bec19 --- /dev/null +++ b/src/db/models/impl/url/screenshot/pydantic.py @@ -0,0 +1,13 @@ +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from src.db.models.templates_.base import Base +from src.db.templates.markers.bulk.insert import BulkInsertableModel + + +class URLScreenshotPydantic(BulkInsertableModel): + url_id: int + content: bytes + file_size: int + + @classmethod + def sa_model(cls) -> type[Base]: + return URLScreenshot diff --git a/src/db/models/impl/url/screenshot/sqlalchemy.py b/src/db/models/impl/url/screenshot/sqlalchemy.py new file mode 100644 index 00000000..e61a77ea --- /dev/null +++ b/src/db/models/impl/url/screenshot/sqlalchemy.py @@ -0,0 +1,22 @@ +from sqlalchemy import Column, LargeBinary, Integer, UniqueConstraint, PrimaryKeyConstraint + +from src.db.models.helpers import url_id_primary_key_constraint +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, UpdatedAtMixin +from src.db.models.templates_.base import Base + + +class URLScreenshot( + Base, + URLDependentMixin, + CreatedAtMixin, + UpdatedAtMixin, +): + __tablename__ = "url_screenshot" + __table_args__ = ( + url_id_primary_key_constraint(), + ) + + + content = Column(LargeBinary, nullable=False) + file_size = Column(Integer, nullable=False) + diff --git a/src/external/url_request/constants.py b/src/external/url_request/constants.py index dc832aff..178b0fad 100644 --- a/src/external/url_request/constants.py +++ b/src/external/url_request/constants.py @@ -1,2 +1,6 @@ +from typing import Literal + HTML_CONTENT_TYPE = "text/html" MAX_CONCURRENCY = 5 + +NETWORK_IDLE: Literal["networkidle"] = "networkidle" \ No newline at end of file diff --git a/src/external/url_request/core.py b/src/external/url_request/core.py index 2f37f90d..7a6920fe 100644 --- a/src/external/url_request/core.py +++ b/src/external/url_request/core.py @@ -19,3 +19,4 @@ async def probe_urls(urls: list[str]) -> list[URLProbeResponseOuterWrapper]: async with ClientSession(timeout=ClientTimeout(total=30)) as session: manager = URLProbeManager(session=session) return await manager.probe_urls(urls=urls) + diff --git a/src/external/url_request/dtos/screenshot_response.py b/src/external/url_request/dtos/screenshot_response.py new file mode 100644 index 00000000..bb36b258 --- /dev/null +++ b/src/external/url_request/dtos/screenshot_response.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + + +class URLScreenshotResponse(BaseModel): + url: str + screenshot: bytes | None + error: str | None = None + + @property + def is_success(self) -> bool: + return self.error is None \ No newline at end of file diff --git a/src/external/url_request/screenshot_/__init__.py b/src/external/url_request/screenshot_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/url_request/screenshot_/constants.py b/src/external/url_request/screenshot_/constants.py new file mode 100644 index 00000000..a45c37f5 --- /dev/null +++ b/src/external/url_request/screenshot_/constants.py @@ -0,0 +1,5 @@ + + + +SCREENSHOT_HEIGHT: int = 800 +SCREENSHOT_WIDTH: int = 800 diff --git a/src/external/url_request/screenshot_/convert.py b/src/external/url_request/screenshot_/convert.py new file mode 100644 index 00000000..618487c5 --- /dev/null +++ b/src/external/url_request/screenshot_/convert.py @@ -0,0 +1,11 @@ +from PIL import Image +from io import BytesIO + +from PIL.ImageFile import ImageFile + + +def convert_png_to_webp(png: bytes) -> bytes: + image: ImageFile = Image.open(BytesIO(png)) + output = BytesIO() + image.save(output, format="WEBP", lossless=True) + return output.getvalue() diff --git a/src/external/url_request/screenshot_/core.py b/src/external/url_request/screenshot_/core.py new file mode 100644 index 00000000..c7e3c3d4 --- /dev/null +++ b/src/external/url_request/screenshot_/core.py @@ -0,0 +1,54 @@ +from playwright.async_api import async_playwright, Browser, ViewportSize, Page +from tqdm.asyncio import tqdm_asyncio + +from src.external.url_request.constants import NETWORK_IDLE +from src.external.url_request.dtos.screenshot_response import URLScreenshotResponse +from src.external.url_request.screenshot_.constants import SCREENSHOT_HEIGHT, SCREENSHOT_WIDTH +from src.external.url_request.screenshot_.convert import convert_png_to_webp +from src.util.progress_bar import get_progress_bar_disabled + + +async def get_screenshots( + urls: list[str] +) -> list[URLScreenshotResponse]: + responses: list[URLScreenshotResponse] = [] + async with async_playwright() as playwright: + browser: Browser = await playwright.chromium.launch(headless=True) + page: Page = await browser.new_page( + viewport=ViewportSize( + { + "width": SCREENSHOT_WIDTH, + "height": SCREENSHOT_HEIGHT, + } + ) + ) + for url in tqdm_asyncio(urls, disable=get_progress_bar_disabled()): + try: + response: URLScreenshotResponse = await get_screenshot( + page=page, url=url + ) + responses.append(response) + except Exception as e: + responses.append( + URLScreenshotResponse( + url=url, + screenshot=None, + error=str(e) + ) + ) + await page.close() + await browser.close() + return responses + +async def get_screenshot( + page: Page, + url: str, +) -> URLScreenshotResponse: + await page.goto(url) + await page.wait_for_load_state(NETWORK_IDLE) + screenshot_png: bytes = await page.screenshot(type="png") + screenshot_webp: bytes = convert_png_to_webp(screenshot_png) + return URLScreenshotResponse( + url=url, + screenshot=screenshot_webp, + ) diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index afa19afe..c5ff4eaf 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -1,7 +1,7 @@ from http import HTTPStatus from typing import Optional, Annotated -from fastapi import HTTPException +from fastapi import HTTPException, Response from pydantic import BaseModel from starlette.testclient import TestClient @@ -462,4 +462,10 @@ async def get_urls_aggregated_pending_metrics(self) -> GetMetricsURLsAggregatedP data = self.get_v2( url="/metrics/urls/aggregate/pending", ) - return GetMetricsURLsAggregatedPendingResponseDTO(**data) \ No newline at end of file + return GetMetricsURLsAggregatedPendingResponseDTO(**data) + + async def get_url_screenshot(self, url_id: int) -> Response: + return self.client.get( + url=f"/url/{url_id}/screenshot", + headers={"Authorization": f"Bearer token"} + ) \ No newline at end of file diff --git a/tests/automated/integration/api/url/__init__.py b/tests/automated/integration/api/url/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/url/by_id/__init__.py b/tests/automated/integration/api/url/by_id/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/url/by_id/snapshot/__init__.py b/tests/automated/integration/api/url/by_id/snapshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py new file mode 100644 index 00000000..cce84649 --- /dev/null +++ b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py @@ -0,0 +1,10 @@ +import pytest + +from tests.helpers.api_test_helper import APITestHelper +from fastapi import Response + +@pytest.mark.asyncio +async def test_get_url_screenshot_not_found(api_test_helper: APITestHelper): + + response: Response = await api_test_helper.request_validator.get_url_screenshot(url_id=1) + assert response.status_code == 404 \ No newline at end of file diff --git a/tests/automated/integration/api/url/by_id/snapshot/test_success.py b/tests/automated/integration/api/url/by_id/snapshot/test_success.py new file mode 100644 index 00000000..e3ea9d73 --- /dev/null +++ b/tests/automated/integration/api/url/by_id/snapshot/test_success.py @@ -0,0 +1,32 @@ +import pytest + +from src.db.dtos.url.mapping import URLMapping +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from tests.automated.integration.api._helpers.RequestValidator import RequestValidator +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_get_url_screenshot_success( + api_test_helper: APITestHelper +): + ath: APITestHelper = api_test_helper + ddc: DBDataCreator = api_test_helper.db_data_creator + rv: RequestValidator = ath.request_validator + + url_mapping: URLMapping = (await ddc.create_urls())[0] + url_id: int = url_mapping.url_id + + url_screenshot = URLScreenshot( + url_id=url_id, + content=b"test", + file_size=4 + ) + await ddc.adb_client.add(url_screenshot) + + response = await rv.get_url_screenshot(url_id=url_id) + assert response.status_code == 200 + assert response.headers["Content-Type"] == "image/webp" + assert response.content == b"test" + assert response.headers["Content-Length"] == "4" diff --git a/tests/automated/integration/api/test_url.py b/tests/automated/integration/api/url/test_get.py similarity index 92% rename from tests/automated/integration/api/test_url.py rename to tests/automated/integration/api/url/test_get.py index e59c8299..c4bb6bbf 100644 --- a/tests/automated/integration/api/test_url.py +++ b/tests/automated/integration/api/url/test_get.py @@ -2,10 +2,11 @@ from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.db.dtos.url.insert import InsertURLsInfo +from tests.helpers.api_test_helper import APITestHelper @pytest.mark.asyncio -async def test_get_urls(api_test_helper): +async def test_get_urls(api_test_helper: APITestHelper): # Basic test, no results data: GetURLsResponseInfo = api_test_helper.request_validator.get_urls() diff --git a/tests/automated/integration/db/client/test_add_url_error_info.py b/tests/automated/integration/db/client/test_add_url_error_info.py index 32564f6b..bdcdd498 100644 --- a/tests/automated/integration/db/client/test_add_url_error_info.py +++ b/tests/automated/integration/db/client/test_add_url_error_info.py @@ -1,7 +1,7 @@ import pytest from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from tests.helpers.data_creator.core import DBDataCreator @@ -16,7 +16,7 @@ async def test_add_url_error_info(db_data_creator: DBDataCreator): error_infos = [] for url_mapping in url_mappings: - uei = URLErrorPydanticInfo( + uei = URLErrorInfoPydantic( url_id=url_mapping.url_id, error="test error", task_id=task_id diff --git a/tests/automated/integration/tasks/scheduled/manager/conftest.py b/tests/automated/integration/tasks/scheduled/manager/conftest.py index 3daf2a44..65c6cacb 100644 --- a/tests/automated/integration/tasks/scheduled/manager/conftest.py +++ b/tests/automated/integration/tasks/scheduled/manager/conftest.py @@ -31,7 +31,7 @@ def manager( mock_loader.load_entries.return_value = [ ScheduledTaskEntry( operator=PopulateBacklogSnapshotTaskOperator(adb_client=adb_client_test), - interval=IntervalEnum.DAILY, + interval_minutes=IntervalEnum.DAILY.value, enabled=True ) ] diff --git a/tests/automated/integration/tasks/url/impl/screenshot/__init__.py b/tests/automated/integration/tasks/url/impl/screenshot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/impl/screenshot/conftest.py b/tests/automated/integration/tasks/url/impl/screenshot/conftest.py new file mode 100644 index 00000000..41c38366 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/screenshot/conftest.py @@ -0,0 +1,14 @@ +import pytest_asyncio + +from src.core.tasks.url.operators.screenshot.core import URLScreenshotTaskOperator +from src.db.client.async_ import AsyncDatabaseClient + + +@pytest_asyncio.fixture +async def operator( + adb_client_test: AsyncDatabaseClient, +) -> URLScreenshotTaskOperator: + operator = URLScreenshotTaskOperator( + adb_client=adb_client_test, + ) + return operator \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/screenshot/test_core.py b/tests/automated/integration/tasks/url/impl/screenshot/test_core.py new file mode 100644 index 00000000..cb627f72 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/screenshot/test_core.py @@ -0,0 +1,76 @@ +from unittest.mock import AsyncMock + +import pytest + +from src.core.tasks.url.operators.screenshot.core import URLScreenshotTaskOperator +from src.core.tasks.url.operators.screenshot.models.outcome import URLScreenshotOutcome +from src.db.dtos.url.mapping import URLMapping +from src.db.models.impl.url.error.url_screenshot.sqlalchemy import ErrorURLScreenshot +from src.db.models.impl.url.error_info.sqlalchemy import URLErrorInfo +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from src.external.url_request.dtos.screenshot_response import URLScreenshotResponse +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + +# src/core/tasks/url/operators/screenshot/get.py +MOCK_ROOT_PATH = "src.core.tasks.url.operators.screenshot.get.get_screenshots" + +@pytest.mark.asyncio +async def test_core( + operator: URLScreenshotTaskOperator, + db_data_creator: DBDataCreator, + monkeypatch +) -> None: + + # Should not yet meet task prerequisites + assert not await operator.meets_task_prerequisites() + + # Add two URLs to database + url_mappings: list[URLMapping] = await db_data_creator.create_urls(count=2) + screenshot_mapping: URLMapping = url_mappings[0] + error_mapping: URLMapping = url_mappings[1] + url_ids: list[int] = [url_mapping.url_id for url_mapping in url_mappings] + + # Add web metadata for 200 responses + await db_data_creator.create_web_metadata( + url_ids=url_ids, + status_code=200, + ) + + # Should now meet task prerequisites + assert await operator.meets_task_prerequisites() + + mock_get_screenshots = AsyncMock(return_value=[ + URLScreenshotResponse( + url=screenshot_mapping.url, + screenshot=bytes(124536), + ), + URLScreenshotResponse( + url=error_mapping.url, + screenshot=None, + error="error", + ) + ]) + + # Mock get_url_screenshots to return one success and one failure + monkeypatch.setattr( + MOCK_ROOT_PATH, + mock_get_screenshots + ) + + await run_task_and_confirm_success(operator) + + # Get screenshots from database, confirm only one + screenshots: list[URLScreenshot] = await db_data_creator.adb_client.get_all(URLScreenshot) + assert len(screenshots) == 1 + assert screenshots[0].url_id == screenshot_mapping.url_id + + # Get errors from database, confirm only one + errors: list[ErrorURLScreenshot] = await db_data_creator.adb_client.get_all(ErrorURLScreenshot) + assert len(errors) == 1 + assert errors[0].url_id == error_mapping.url_id + + + + + diff --git a/tests/automated/integration/tasks/url/loader/test_happy_path.py b/tests/automated/integration/tasks/url/loader/test_happy_path.py index 769204d7..cee1bb86 100644 --- a/tests/automated/integration/tasks/url/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/url/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.url.loader import URLTaskOperatorLoader -NUMBER_OF_TASK_OPERATORS = 9 +NUMBER_OF_TASK_OPERATORS = 10 @pytest.mark.asyncio async def test_happy_path( diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 57ee3576..439f0459 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -14,12 +14,13 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL from src.db.models.impl.url.core.enums import URLSource -from src.db.models.impl.url.error_info.pydantic import URLErrorPydanticInfo +from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic from src.db.client.sync import DatabaseClient from src.db.enums import TaskType from src.collectors.enums import CollectorType, URLStatus from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters @@ -314,7 +315,7 @@ async def error_info( task_id = await self.task() error_infos = [] for url_id in url_ids: - url_error_info = URLErrorPydanticInfo( + url_error_info = URLErrorInfoPydantic( url_id=url_id, error="test error", task_id=task_id @@ -544,4 +545,20 @@ async def link_urls_to_agencies(self, url_ids: list[int], agency_ids: list[int]) agency_id=agency_id ) links.append(link) - await self.adb_client.add_all(links) \ No newline at end of file + await self.adb_client.add_all(links) + + async def create_web_metadata( + self, + url_ids: list[int], + status_code: int = 200, + ): + web_metadata: list[URLWebMetadata] = [ + URLWebMetadata( + url_id=url_id, + status_code=status_code, + accessed=True, + content_type="text/html", + ) + for url_id in url_ids + ] + await self.adb_client.add_all(web_metadata) \ No newline at end of file diff --git a/tests/helpers/run.py b/tests/helpers/run.py new file mode 100644 index 00000000..aa889f7f --- /dev/null +++ b/tests/helpers/run.py @@ -0,0 +1,15 @@ +from src.core.tasks.base.run_info import TaskOperatorRunInfo +from src.core.tasks.url.operators.base import URLTaskOperatorBase +from tests.helpers.asserts import assert_task_run_success + + +async def run_task_and_confirm_success( + operator: URLTaskOperatorBase, +) -> None: + """ + Run task, confirm success, and assert task no longer meets prerequisites. + """ + + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + assert not await operator.meets_task_prerequisites() \ No newline at end of file diff --git a/tests/manual/external/url_request/test_url_screenshot.py b/tests/manual/external/url_request/test_url_screenshot.py new file mode 100644 index 00000000..b16535d6 --- /dev/null +++ b/tests/manual/external/url_request/test_url_screenshot.py @@ -0,0 +1,21 @@ +import pytest + +from src.external.url_request.dtos.screenshot_response import URLScreenshotResponse +from src.external.url_request.screenshot_.core import get_screenshots + + +@pytest.mark.asyncio +async def test_url_screenshot(): + """ + Note that this will save a file to the working directory + Be sure to remove it after inspection. + """ + + urls: list[str] = [ + "https://www.example.com" + ] + + responses: list[URLScreenshotResponse] = await get_screenshots(urls=urls) + for idx, response in enumerate(responses): + with open(f"screenshot_{idx}.webp", "wb") as f: + f.write(response.screenshot) \ No newline at end of file diff --git a/uv.lock b/uv.lock index 3dffe619..739c9411 100644 --- a/uv.lock +++ b/uv.lock @@ -508,6 +508,7 @@ dependencies = [ { name = "marshmallow" }, { name = "openai" }, { name = "pdap-access-manager" }, + { name = "pillow" }, { name = "pip" }, { name = "playwright" }, { name = "psycopg", extra = ["binary"] }, @@ -559,6 +560,7 @@ requires-dist = [ { name = "marshmallow", specifier = "~=3.23.2" }, { name = "openai", specifier = "~=1.60.1" }, { name = "pdap-access-manager", specifier = "==0.3.6" }, + { name = "pillow", specifier = ">=11.3.0" }, { name = "pip", specifier = ">=25.2" }, { name = "playwright", specifier = "~=1.49.1" }, { name = "psycopg", extras = ["binary"], specifier = "~=3.1.20" }, @@ -1643,6 +1645,90 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/23/e98758924d1b3aac11a626268eabf7f3cf177e7837c28d47bf84c64532d0/pendulum-3.1.0-py3-none-any.whl", hash = "sha256:f9178c2a8e291758ade1e8dd6371b1d26d08371b4c7730a6e9a3ef8b16ebae0f", size = 111799, upload_time = "2025-04-19T14:02:34.739Z" }, ] +[[package]] +name = "pillow" +version = "11.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload_time = "2025-07-01T09:16:30.666Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/26/77f8ed17ca4ffd60e1dcd220a6ec6d71210ba398cfa33a13a1cd614c5613/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722", size = 5316531, upload_time = "2025-07-01T09:13:59.203Z" }, + { url = "https://files.pythonhosted.org/packages/cb/39/ee475903197ce709322a17a866892efb560f57900d9af2e55f86db51b0a5/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288", size = 4686560, upload_time = "2025-07-01T09:14:01.101Z" }, + { url = "https://files.pythonhosted.org/packages/d5/90/442068a160fd179938ba55ec8c97050a612426fae5ec0a764e345839f76d/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d", size = 5870978, upload_time = "2025-07-03T13:09:55.638Z" }, + { url = "https://files.pythonhosted.org/packages/13/92/dcdd147ab02daf405387f0218dcf792dc6dd5b14d2573d40b4caeef01059/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494", size = 7641168, upload_time = "2025-07-03T13:10:00.37Z" }, + { url = "https://files.pythonhosted.org/packages/6e/db/839d6ba7fd38b51af641aa904e2960e7a5644d60ec754c046b7d2aee00e5/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58", size = 5973053, upload_time = "2025-07-01T09:14:04.491Z" }, + { url = "https://files.pythonhosted.org/packages/f2/2f/d7675ecae6c43e9f12aa8d58b6012683b20b6edfbdac7abcb4e6af7a3784/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f", size = 6640273, upload_time = "2025-07-01T09:14:06.235Z" }, + { url = "https://files.pythonhosted.org/packages/45/ad/931694675ede172e15b2ff03c8144a0ddaea1d87adb72bb07655eaffb654/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e", size = 6082043, upload_time = "2025-07-01T09:14:07.978Z" }, + { url = "https://files.pythonhosted.org/packages/3a/04/ba8f2b11fc80d2dd462d7abec16351b45ec99cbbaea4387648a44190351a/pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94", size = 6715516, upload_time = "2025-07-01T09:14:10.233Z" }, + { url = "https://files.pythonhosted.org/packages/48/59/8cd06d7f3944cc7d892e8533c56b0acb68399f640786313275faec1e3b6f/pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0", size = 6274768, upload_time = "2025-07-01T09:14:11.921Z" }, + { url = "https://files.pythonhosted.org/packages/f1/cc/29c0f5d64ab8eae20f3232da8f8571660aa0ab4b8f1331da5c2f5f9a938e/pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac", size = 6986055, upload_time = "2025-07-01T09:14:13.623Z" }, + { url = "https://files.pythonhosted.org/packages/c6/df/90bd886fabd544c25addd63e5ca6932c86f2b701d5da6c7839387a076b4a/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd", size = 2423079, upload_time = "2025-07-01T09:14:15.268Z" }, + { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload_time = "2025-07-01T09:14:17.648Z" }, + { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload_time = "2025-07-01T09:14:19.828Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload_time = "2025-07-03T13:10:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload_time = "2025-07-03T13:10:10.391Z" }, + { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload_time = "2025-07-01T09:14:21.63Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload_time = "2025-07-01T09:14:23.321Z" }, + { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload_time = "2025-07-01T09:14:25.237Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358, upload_time = "2025-07-01T09:14:27.053Z" }, + { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079, upload_time = "2025-07-01T09:14:30.104Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324, upload_time = "2025-07-01T09:14:31.899Z" }, + { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067, upload_time = "2025-07-01T09:14:33.709Z" }, + { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328, upload_time = "2025-07-01T09:14:35.276Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8", size = 2170652, upload_time = "2025-07-01T09:14:37.203Z" }, + { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443, upload_time = "2025-07-01T09:14:39.344Z" }, + { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474, upload_time = "2025-07-01T09:14:41.843Z" }, + { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038, upload_time = "2025-07-01T09:14:44.008Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407, upload_time = "2025-07-03T13:10:15.628Z" }, + { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094, upload_time = "2025-07-03T13:10:21.857Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503, upload_time = "2025-07-01T09:14:45.698Z" }, + { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574, upload_time = "2025-07-01T09:14:47.415Z" }, + { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060, upload_time = "2025-07-01T09:14:49.636Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b", size = 6721407, upload_time = "2025-07-01T09:14:51.962Z" }, + { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3", size = 6273841, upload_time = "2025-07-01T09:14:54.142Z" }, + { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51", size = 6978450, upload_time = "2025-07-01T09:14:56.436Z" }, + { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055, upload_time = "2025-07-01T09:14:58.072Z" }, + { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110, upload_time = "2025-07-01T09:14:59.79Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547, upload_time = "2025-07-01T09:15:01.648Z" }, + { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554, upload_time = "2025-07-03T13:10:27.018Z" }, + { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132, upload_time = "2025-07-03T13:10:33.01Z" }, + { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001, upload_time = "2025-07-01T09:15:03.365Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814, upload_time = "2025-07-01T09:15:05.655Z" }, + { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124, upload_time = "2025-07-01T09:15:07.358Z" }, + { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c", size = 6747186, upload_time = "2025-07-01T09:15:09.317Z" }, + { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788", size = 6277546, upload_time = "2025-07-01T09:15:11.311Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31", size = 6985102, upload_time = "2025-07-01T09:15:13.164Z" }, + { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload_time = "2025-07-01T09:15:15.695Z" }, + { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12", size = 5278520, upload_time = "2025-07-01T09:15:17.429Z" }, + { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a", size = 4686116, upload_time = "2025-07-01T09:15:19.423Z" }, + { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632", size = 5864597, upload_time = "2025-07-03T13:10:38.404Z" }, + { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673", size = 7638246, upload_time = "2025-07-03T13:10:44.987Z" }, + { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027", size = 5973336, upload_time = "2025-07-01T09:15:21.237Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77", size = 6642699, upload_time = "2025-07-01T09:15:23.186Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874", size = 6083789, upload_time = "2025-07-01T09:15:25.1Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a", size = 6720386, upload_time = "2025-07-01T09:15:27.378Z" }, + { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214", size = 6370911, upload_time = "2025-07-01T09:15:29.294Z" }, + { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635", size = 7117383, upload_time = "2025-07-01T09:15:31.128Z" }, + { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6", size = 2511385, upload_time = "2025-07-01T09:15:33.328Z" }, + { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae", size = 5281129, upload_time = "2025-07-01T09:15:35.194Z" }, + { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653", size = 4689580, upload_time = "2025-07-01T09:15:37.114Z" }, + { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6", size = 5902860, upload_time = "2025-07-03T13:10:50.248Z" }, + { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36", size = 7670694, upload_time = "2025-07-03T13:10:56.432Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b", size = 6005888, upload_time = "2025-07-01T09:15:39.436Z" }, + { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477", size = 6670330, upload_time = "2025-07-01T09:15:41.269Z" }, + { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50", size = 6114089, upload_time = "2025-07-01T09:15:43.13Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b", size = 6748206, upload_time = "2025-07-01T09:15:44.937Z" }, + { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370, upload_time = "2025-07-01T09:15:46.673Z" }, + { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500, upload_time = "2025-07-01T09:15:48.512Z" }, + { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload_time = "2025-07-01T09:15:50.399Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e3/6fa84033758276fb31da12e5fb66ad747ae83b93c67af17f8c6ff4cc8f34/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6", size = 5270566, upload_time = "2025-07-01T09:16:19.801Z" }, + { url = "https://files.pythonhosted.org/packages/5b/ee/e8d2e1ab4892970b561e1ba96cbd59c0d28cf66737fc44abb2aec3795a4e/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438", size = 4654618, upload_time = "2025-07-01T09:16:21.818Z" }, + { url = "https://files.pythonhosted.org/packages/f2/6d/17f80f4e1f0761f02160fc433abd4109fa1548dcfdca46cfdadaf9efa565/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3", size = 4874248, upload_time = "2025-07-03T13:11:20.738Z" }, + { url = "https://files.pythonhosted.org/packages/de/5f/c22340acd61cef960130585bbe2120e2fd8434c214802f07e8c03596b17e/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c", size = 6583963, upload_time = "2025-07-03T13:11:26.283Z" }, + { url = "https://files.pythonhosted.org/packages/31/5e/03966aedfbfcbb4d5f8aa042452d3361f325b963ebbadddac05b122e47dd/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361", size = 4957170, upload_time = "2025-07-01T09:16:23.762Z" }, + { url = "https://files.pythonhosted.org/packages/cc/2d/e082982aacc927fc2cab48e1e731bdb1643a1406acace8bed0900a61464e/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7", size = 5581505, upload_time = "2025-07-01T09:16:25.593Z" }, + { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload_time = "2025-07-01T09:16:27.732Z" }, +] + [[package]] name = "pip" version = "25.2"