Fairfetch-co
diff --git a/‎api/dependencies.py‎
Lines changed: 3 additions & 1 deletion b/‎api/dependencies.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎api/main.py‎
Lines changed: 17 additions & 7 deletions b/‎api/main.py‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎api/negotiation.py‎
Lines changed: 25 additions & 8 deletions b/‎api/negotiation.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎api/routes.py‎
Lines changed: 36 additions & 19 deletions b/‎api/routes.py‎
Lines changed: 36 additions & 19 deletions
diff --git a/‎compliance/copyright.py‎
Lines changed: 8 additions & 8 deletions b/‎compliance/copyright.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎compliance/lineage.py‎
Lines changed: 12 additions & 7 deletions b/‎compliance/lineage.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎core/converter.py‎
Lines changed: 3 additions & 1 deletion b/‎core/converter.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎core/knowledge_packet.py‎
Lines changed: 3 additions & 5 deletions b/‎core/knowledge_packet.py‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎core/signatures.py‎
Lines changed: 3 additions & 1 deletion b/‎core/signatures.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎deploy/cloudfront/viewer_request.py‎
Lines changed: 38 additions & 9 deletions b/‎deploy/cloudfront/viewer_request.py‎
Lines changed: 38 additions & 9 deletions
@@ -59,7 +59,9 @@ def from_env(cls) -> FairFetchConfig:
             llms_txt_url=os.getenv("FAIRFETCH_LLMS_TXT_URL", "/.well-known/llms.txt"),
             mcp_endpoint=os.getenv("FAIRFETCH_MCP_ENDPOINT", "/mcp"),
             enable_usage_grants=os.getenv("FAIRFETCH_ENABLE_GRANTS", "true").lower() == "true",
-            enable_preferred_access=os.getenv("FAIRFETCH_PREFERRED_ACCESS", "true").lower() == "true",
+            enable_preferred_access=(
+                os.getenv("FAIRFETCH_PREFERRED_ACCESS", "true").lower() == "true"
+            ),
         )
 
 
 
@@ -46,20 +46,30 @@ def create_app() -> FastAPI:
         allow_origins=["*"],
         allow_methods=["*"],
         allow_headers=[
-            "*", "X-PAYMENT", "X-PAYMENT-RECEIPT",
-            "X-FairFetch-License-ID", "X-FairFetch-Origin-Signature",
+            "*",
+            "X-PAYMENT",
+            "X-PAYMENT-RECEIPT",
+            "X-FairFetch-License-ID",
+            "X-FairFetch-Origin-Signature",
         ],
         expose_headers=[
-            "X-PAYMENT-RECEIPT", "X-Data-Origin-Verified", "X-AI-License-Type",
-            "X-FairFetch-License-ID", "X-FairFetch-Origin-Signature",
-            "X-FairFetch-Preferred-Access", "X-FairFetch-LLMS-Txt",
-            "X-FairFetch-MCP-Endpoint", "Link",
+            "X-PAYMENT-RECEIPT",
+            "X-Data-Origin-Verified",
+            "X-AI-License-Type",
+            "X-FairFetch-License-ID",
+            "X-FairFetch-Origin-Signature",
+            "X-FairFetch-Preferred-Access",
+            "X-FairFetch-LLMS-Txt",
+            "X-FairFetch-MCP-Endpoint",
+            "Link",
         ],
     )
 
     facilitator = build_facilitator(config)
     requirement = build_payment_requirement(config)
-    license_provider = build_license_provider(config, signer) if config.enable_usage_grants else None
+    license_provider = (
+        build_license_provider(config, signer) if config.enable_usage_grants else None
+    )
 
     application.add_middleware(
         X402Middleware,
 
@@ -8,10 +8,10 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 
 
-class ContentFormat(str, Enum):
+class ContentFormat(StrEnum):
     MARKDOWN = "text/markdown"
     JSON_LD = "application/ld+json"
     AI_CONTEXT = "application/ai-context+json"
@@ -36,12 +36,29 @@ class ContentFormat(str, Enum):
 ]
 
 KNOWN_CRAWLER_UAS = [
-    "chatgpt", "claude", "anthropic", "openai", "perplexity",
-    "google-extended", "gptbot", "ccbot", "cohere-ai",
-    "bytespider", "claudebot", "ia_archiver", "amazonbot",
-    "facebookexternalhit", "twitterbot", "applebot",
-    "diffbot", "semrushbot", "ahrefsbot", "mj12bot",
-    "dotbot", "petalbot", "barkrowler",
+    "chatgpt",
+    "claude",
+    "anthropic",
+    "openai",
+    "perplexity",
+    "google-extended",
+    "gptbot",
+    "ccbot",
+    "cohere-ai",
+    "bytespider",
+    "claudebot",
+    "ia_archiver",
+    "amazonbot",
+    "facebookexternalhit",
+    "twitterbot",
+    "applebot",
+    "diffbot",
+    "semrushbot",
+    "ahrefsbot",
+    "mj12bot",
+    "dotbot",
+    "petalbot",
+    "barkrowler",
 ]
 
 
 
@@ -121,7 +121,11 @@ async def fetch_content(
     tracker = DataLineageTracker(source_url=url)
 
     result = await converter.from_url(url)
-    tracker.record("extract", tool="trafilatura", output_hash=DataLineageTracker.hash_content(result.markdown))
+    tracker.record(
+        "extract",
+        tool="trafilatura",
+        output_hash=DataLineageTracker.hash_content(result.markdown),
+    )
 
     content_format = negotiate(accept)
 
@@ -130,8 +134,11 @@ async def fetch_content(
     if content_format == ContentFormat.MARKDOWN:
         response = PlainTextResponse(result.markdown, media_type="text/markdown")
         _attach_compliance_headers(
-            response, signer=signer, content=result.markdown,
-            license_type=license_type, license_id=grant_header,
+            response,
+            signer=signer,
+            content=result.markdown,
+            license_type=license_type,
+            license_id=grant_header,
         )
         _attach_preferred_access(response, request)
         return response
@@ -164,8 +171,11 @@ async def fetch_content(
         response = JSONResponse(content=packet.to_jsonld())
 
     _attach_compliance_headers(
-        response, signer=signer, content=result.markdown,
-        license_type=license_type, license_id=grant_header,
+        response,
+        signer=signer,
+        content=result.markdown,
+        license_type=license_type,
+        license_id=grant_header,
     )
     _attach_preferred_access(response, request)
     return response
@@ -183,13 +193,15 @@ async def get_summary(
     result = await converter.from_url(url)
     summary_result = await summarizer.summarize(result.markdown)
 
-    return JSONResponse(content={
-        "url": url,
-        "title": result.title,
-        "author": result.author,
-        "summary": summary_result.summary,
-        "model": summary_result.model,
-    })
+    return JSONResponse(
+        content={
+            "url": url,
+            "title": result.title,
+            "author": result.author,
+            "summary": summary_result.summary,
+            "model": summary_result.model,
+        }
+    )
 
 
 @router.get("/content/markdown")
@@ -206,8 +218,11 @@ async def get_markdown(
     grant_header = await _issue_grant_for_response(request, result.markdown, url, license_type)
     response = PlainTextResponse(result.markdown, media_type="text/markdown")
     _attach_compliance_headers(
-        response, signer=signer, content=result.markdown,
-        license_type=license_type, license_id=grant_header,
+        response,
+        signer=signer,
+        content=result.markdown,
+        license_type=license_type,
+        license_id=grant_header,
     )
     _attach_preferred_access(response, request)
     return response
@@ -236,8 +251,10 @@ async def get_optout_status(
     opted_out = log.is_opted_out(domain)
     entries = log.get_entries(domain)
 
-    return JSONResponse(content={
-        "domain": domain,
-        "opted_out": opted_out,
-        "entries": [e.model_dump() for e in entries],
-    })
+    return JSONResponse(
+        content={
+            "domain": domain,
+            "opted_out": opted_out,
+            "entries": [e.model_dump() for e in entries],
+        }
+    )
@@ -7,8 +7,7 @@
 
 from __future__ import annotations
 
-import json
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
 
 from pydantic import BaseModel, Field
@@ -24,9 +23,7 @@ class OptOutEntry(BaseModel):
         description="Scope: 'training', 'all', or 'none'",
     )
     declared_by: str = ""
-    timestamp: str = Field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
     reference: str = Field(
         default="",
         description="Link to the publisher's opt-out declaration (e.g. robots.txt, TDM policy)",
@@ -59,9 +56,12 @@ def add(self, entry: OptOutEntry) -> None:
     def is_opted_out(self, domain: str, url: str = "") -> bool:
         """Check if a domain/URL has opted out of AI training."""
         for entry in self._entries:
-            if entry.domain == domain and entry.opt_out_scope in ("training", "all"):
-                if entry.url_pattern == "*" or url.startswith(entry.url_pattern):
-                    return True
+            if (
+                entry.domain == domain
+                and entry.opt_out_scope in ("training", "all")
+                and (entry.url_pattern == "*" or url.startswith(entry.url_pattern))
+            ):
+                return True
         return False
 
     def get_entries(self, domain: str | None = None) -> list[OptOutEntry]:
 
@@ -7,7 +7,7 @@
 from __future__ import annotations
 
 import hashlib
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 
 from pydantic import BaseModel, Field
 
@@ -16,9 +16,7 @@ class LineageRecord(BaseModel):
     """A single step in the data lineage chain."""
 
     step: str
-    timestamp: str = Field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
     tool: str = ""
     input_hash: str = ""
     output_hash: str = ""
@@ -28,11 +26,18 @@ class LineageRecord(BaseModel):
 class DataLineageTracker:
     """Accumulates lineage records through the content processing pipeline.
 
-    Example:
+    Example::
+
         tracker = DataLineageTracker(source_url="https://example.com/article")
         tracker.record("fetch", tool="httpx", output_hash=hash_of_html)
-        tracker.record("extract", tool="trafilatura", input_hash=hash_of_html, output_hash=hash_of_md)
-        tracker.record("summarize", tool="litellm/gpt-4o-mini", input_hash=hash_of_md, output_hash=hash_of_summary)
+        tracker.record(
+            "extract", tool="trafilatura",
+            input_hash=hash_of_html, output_hash=hash_of_md,
+        )
+        tracker.record(
+            "summarize", tool="litellm/gpt-4o-mini",
+            input_hash=hash_of_md, output_hash=hash_of_summary,
+        )
         lineage = tracker.to_dict()
     """
 
 
@@ -32,7 +32,9 @@ def __init__(self, *, timeout: float = 30.0) -> None:
 
     async def from_url(self, url: str) -> ConversionResult:
         async with httpx.AsyncClient(timeout=self._timeout, follow_redirects=True) as client:
-            resp = await client.get(url, headers={"User-Agent": "Fairfetch/0.1 (+https://fairfetch.dev)"})
+            resp = await client.get(
+                url, headers={"User-Agent": "Fairfetch/0.1 (+https://fairfetch.dev)"}
+            )
             resp.raise_for_status()
             return await self.from_html(resp.text, url=url)
 
 
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import hashlib
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from typing import Any
 
 from pydantic import BaseModel, Field
@@ -16,9 +16,7 @@ class DataLineage(BaseModel):
 
     source_url: str
     extraction_method: str = "trafilatura"
-    extraction_timestamp: str = Field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    extraction_timestamp: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
     content_hash: str = ""
     license_type: str = "publisher-terms"
     opt_out_respected: bool = True
@@ -98,7 +96,7 @@ def build(
             headline=title,
             author=author,
             canonical_url=url,
-            date_published=date or datetime.now(timezone.utc).isoformat(),
+            date_published=date or datetime.now(UTC).isoformat(),
             summary=summary,
             markdown_content=markdown,
             origin_signature=signature,
 
@@ -36,7 +36,9 @@ def private_key_b64(self) -> str:
 
     def sign(self, payload: bytes) -> SignatureBundle:
         signed = self._signing_key.sign(payload, encoder=Base64Encoder)
-        sig_b64 = signed.signature.decode() if isinstance(signed.signature, bytes) else signed.signature
+        sig_b64 = (
+            signed.signature.decode() if isinstance(signed.signature, bytes) else signed.signature
+        )
         return SignatureBundle(
             signature=sig_b64,
             public_key=self.public_key_b64,
 
@@ -18,10 +18,24 @@
 ]
 
 KNOWN_CRAWLER_UAS = [
-    "chatgpt", "claude", "anthropic", "openai", "perplexity",
-    "gptbot", "ccbot", "cohere-ai", "google-extended",
-    "bytespider", "claudebot", "amazonbot", "diffbot",
-    "semrushbot", "ahrefsbot", "mj12bot", "dotbot", "petalbot",
+    "chatgpt",
+    "claude",
+    "anthropic",
+    "openai",
+    "perplexity",
+    "gptbot",
+    "ccbot",
+    "cohere-ai",
+    "google-extended",
+    "bytespider",
+    "claudebot",
+    "amazonbot",
+    "diffbot",
+    "semrushbot",
+    "ahrefsbot",
+    "mj12bot",
+    "dotbot",
+    "petalbot",
 ]
 
 PAYMENT_REQUIREMENT = {
@@ -67,11 +81,26 @@ def is_scraper_html(request: dict[str, Any]) -> bool:
 
 def steering_headers() -> dict[str, list[dict[str, str]]]:
     return {
-        "x-fairfetch-preferred-access": [{"key": "X-FairFetch-Preferred-Access", "value": "mcp+json-ld"}],
-        "x-fairfetch-llms-txt": [{"key": "X-FairFetch-LLMS-Txt", "value": LLMS_TXT_URL}],
-        "x-fairfetch-mcp-endpoint": [{"key": "X-FairFetch-MCP-Endpoint", "value": MCP_ENDPOINT}],
-        "link": [{"key": "Link", "value": f'<{LLMS_TXT_URL}>; rel="ai-policy", <{MCP_ENDPOINT}>; rel="ai-content-api"'}],
-        "x-fairfetch-scraper-intercepted": [{"key": "X-FairFetch-Scraper-Intercepted", "value": "true"}],
+        "x-fairfetch-preferred-access": [
+            {"key": "X-FairFetch-Preferred-Access", "value": "mcp+json-ld"},
+        ],
+        "x-fairfetch-llms-txt": [
+            {"key": "X-FairFetch-LLMS-Txt", "value": LLMS_TXT_URL},
+        ],
+        "x-fairfetch-mcp-endpoint": [
+            {"key": "X-FairFetch-MCP-Endpoint", "value": MCP_ENDPOINT},
+        ],
+        "link": [
+            {
+                "key": "Link",
+                "value": (
+                    f'<{LLMS_TXT_URL}>; rel="ai-policy", <{MCP_ENDPOINT}>; rel="ai-content-api"'
+                ),
+            },
+        ],
+        "x-fairfetch-scraper-intercepted": [
+            {"key": "X-FairFetch-Scraper-Intercepted", "value": "true"},
+        ],
     }
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,9 @@ def from_env(cls) -> FairFetchConfig:`
`59`	`59`	`llms_txt_url=os.getenv("FAIRFETCH_LLMS_TXT_URL", "/.well-known/llms.txt"),`
`60`	`60`	`mcp_endpoint=os.getenv("FAIRFETCH_MCP_ENDPOINT", "/mcp"),`
`61`	`61`	`enable_usage_grants=os.getenv("FAIRFETCH_ENABLE_GRANTS", "true").lower() == "true",`
`62`		`- enable_preferred_access=os.getenv("FAIRFETCH_PREFERRED_ACCESS", "true").lower() == "true",`
	`62`	`+ enable_preferred_access=(`
	`63`	`+ os.getenv("FAIRFETCH_PREFERRED_ACCESS", "true").lower() == "true"`
	`64`	`+ ),`
`63`	`65`	`)`
`64`	`66`
`65`	`67`