Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .envs.example/.production/.django
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# =============================================================================
# OpenContracts — Django service env (production)
# Copy to .envs/.production/.django and fill in every <REPLACE-ME> below.
# Generate strong secrets with: ./scripts/easypanel/generate-env.sh
# =============================================================================

# --- Django core ----------------------------------------------------------
DJANGO_SETTINGS_MODULE=config.settings.production
DJANGO_DEBUG=False
DJANGO_SECRET_KEY=<REPLACE-ME-64-char-random>
DJANGO_ADMIN_URL=admin/<REPLACE-ME-30-char-random>/
DJANGO_ALLOWED_HOSTS=<REPLACE-ME-domain>,django

# Initial superuser (created via `createsuperuser` or signal)
DJANGO_SUPERUSER_USERNAME=admin
DJANGO_SUPERUSER_EMAIL=<REPLACE-ME-email>
DJANGO_SUPERUSER_PASSWORD=<REPLACE-ME-superuser-password>

# Worker timeout for daphne (seconds). 60 is safe for most production loads.
DJANGO_WORKER_TIMEOUT=60

# --- Storage --------------------------------------------------------------
# LOCAL = files on the django container's disk (fine for a single-host setup).
# Set to AWS or GCP for object storage; see docs/deployment/.
STORAGE_BACKEND=LOCAL

# --- Redis / Celery -------------------------------------------------------
REDIS_URL=redis://redis:6379/0

# Flower (Celery monitor) — protect with strong creds; port 5555 is exposed
# by the bundled traefik. Set both to a long random string.
CELERY_FLOWER_USER=<REPLACE-ME-flower-user>
CELERY_FLOWER_PASSWORD=<REPLACE-ME-flower-password>

# --- Auth --------------------------------------------------------------------
# Auth0 is OFF by default; OC falls back to JWT/session auth.
USE_AUTH0=false

# --- LLM credentials ------------------------------------------------------
# Required for embeddings + agent answers. The Bolivian-laws RAG service
# uses these for both per-area specialists and the orchestrator.
OPENAI_API_KEY=<REPLACE-ME-openai-key>
OPENAI_MODEL=gpt-4o
ANTHROPIC_API_KEY=

# --- Pipeline microservices (in-network URLs) ----------------------------
EMBEDDINGS_MICROSERVICE_URL=http://vector-embedder:8000
VECTOR_EMBEDDER_API_KEY=<REPLACE-ME-vector-embedder-key>
DOCLING_PARSER_SERVICE_URL=http://docling-parser:8000/parse/
DOCXODUS_PARSER_SERVICE_URL=http://docxodus-parser:8080/parse
DOCXODUS_PARSER_TIMEOUT=120

# --- Bolivian Laws RAG (overrides; sane defaults baked into settings) ----
# Override only if a target site moves or you mirror it elsewhere.
# BOLIVIAN_LAWS_GACETA_BASE_URL=https://gacetaoficialdebolivia.gob.bo/
# BOLIVIAN_LAWS_GACETA_LISTING_PATHS=/
# BOLIVIAN_LAWS_TSJ_BASE_URL=https://tsj.bo/
# BOLIVIAN_LAWS_TSJ_LISTING_PATHS=/jurisprudencia/
# BOLIVIAN_LAWS_TCP_BASE_URL=https://tcpbolivia.bo/
# BOLIVIAN_LAWS_TCP_LISTING_PATHS=/jurisprudencia/
BOLIVIAN_LAWS_SCRAPER_USER_AGENT=OpenContractsBolivianLawsBot/1.0 (+contact: <REPLACE-ME-email>)
BOLIVIAN_LAWS_SCRAPE_LOOKBACK_DAYS=30
BOLIVIAN_LAWS_REQUEST_DELAY_SECONDS=1.0
16 changes: 16 additions & 0 deletions .envs.example/.production/.frontend
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# =============================================================================
# OpenContracts — Frontend env (production build)
# Copy to .envs/.production/.frontend and replace <REPLACE-ME-domain>.
# These are baked into the Vite bundle at container build time.
# =============================================================================

REACT_APP_API_ROOT_URL=https://<REPLACE-ME-domain>
REACT_APP_APPLICATION_DOMAIN=<REPLACE-ME-domain>

# Auth0 off — match the django service's USE_AUTH0 setting.
REACT_APP_USE_AUTH0=false

# Optional: pre-fill an Auth0 client if you switch USE_AUTH0=true.
# REACT_APP_AUTH0_DOMAIN=
# REACT_APP_AUTH0_CLIENT_ID=
# REACT_APP_AUTH0_AUDIENCE=
12 changes: 12 additions & 0 deletions .envs.example/.production/.postgres
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# =============================================================================
# OpenContracts — Postgres env (production)
# Copy to .envs/.production/.postgres and fill in <REPLACE-ME>.
# POSTGRES_PASSWORD MUST match the password embedded in DATABASE_URL.
# =============================================================================

POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_DB=opencontractserver
POSTGRES_USER=opencontractserver
POSTGRES_PASSWORD=<REPLACE-ME-strong-password>
DATABASE_URL=postgres://opencontractserver:<REPLACE-ME-same-strong-password>@postgres:5432/opencontractserver
26 changes: 26 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- **EasyPanel deployment kit**:
- **`easypanel.yml`** — dedicated Compose file parameterised entirely by environment variables (no `.envs/.production/*` files), with no bundled Traefik service: EasyPanel's built-in proxy handles TLS and domain routing. Missing required secrets fail-fast thanks to `${VAR:?error}` syntax.
- **`scripts/easypanel/print-env.sh`** — prints a ready-to-paste `KEY=value` block for the EasyPanel app's Environment tab, with all random secrets (`DJANGO_SECRET_KEY`, admin URL slug, Postgres password, Flower creds, vector-embedder API key) pre-generated.
- **`scripts/easypanel/deploy.sh`** — optional one-command bootstrap for folks SSH-ing into the host (wraps generate-env + configure-traefik + docker compose build/migrate/up + smoke test of the Bolivian-laws scrape). Used with the legacy `production.yml`.
- Building blocks for the legacy flow: commit-able env templates under `.envs.example/.production/`, `scripts/easypanel/generate-env.sh`, `scripts/easypanel/configure-traefik.sh`.
- Docs (`docs/deployment/easypanel.md`) rewritten around the GitHub-native flow: paste env vars → wire domain → deploy.
- **Bolivian Laws RAG service** (`opencontractserver/bolivian_laws/`): multi-agent RAG over Bolivian legal sources, organised by legal area to keep embeddings cost-aware and retrieval precise.
- One Corpus per `LegalArea` (constitucional, penal, civil, administrativo, laboral, tributario, familia, comercial, agrario, ambiental, otros), seeded idempotently from `AREA_PROFILES` (`opencontractserver/bolivian_laws/constants.py`).
- Tracking model `BolivianLegalDocument` with global SHA-256 dedupe and source attribution (`gaceta`, `tsj`, `tcp`, `manual`).
- Bulk ingestion via management command `python manage.py ingest_bolivian_laws --path ... --area ...` with optional LLM-based `--auto-classify`, dry-run and async (Celery) modes.
- Specialist agents per area + orchestrator agent (pydantic_ai) that routes questions to one or more specialists and synthesises answers (`opencontractserver/bolivian_laws/services/agents.py`).
- GraphQL mutation `askBolivianLaw(question, areas?)` returns the synthesised answer plus area-tagged source citations (`config/graphql/bolivian_laws_mutations.py`).
- Settings: `BOLIVIAN_LAWS_DEFAULT_EMBEDDER`, `BOLIVIAN_LAWS_CLASSIFIER_MODEL`, `BOLIVIAN_LAWS_ORCHESTRATOR_MODEL`, `BOLIVIAN_LAWS_SPECIALIST_MODEL` (`config/settings/base.py`).
- Documentation in `docs/features/bolivian_laws_rag.md`.
- **Bolivian Laws automatic scrapers** (`opencontractserver/bolivian_laws/scrapers/`): three pluggable scrapers that fetch legal PDFs from the Gaceta Oficial (`gacetaoficialdebolivia.gob.bo`), Tribunal Supremo de Justicia (`tsj.bo`) and Tribunal Constitucional Plurinacional (`tcpbolivia.bo`).
- `BaseScraper` with injectable `httpx.Client`, configurable User-Agent, rate limiting, and defensive per-listing error handling so a single broken page cannot abort a batch.
- Per-source classes (`GacetaOficialScraper`, `TribunalSupremoJusticiaScraper`, `TribunalConstitucionalScraper`) extract best-effort metadata: external ID (e.g. `LEY-1178`, `AS-123/2023`, `SCP-0250/2012`), publication date, and a suggested `LegalArea` via keyword heuristics (sala name for TSJ, SAFCO/tributario/etc. for Gaceta, always `constitucional` for TCP).
- Celery tasks `scrape_and_ingest_source(source_key)` and `scrape_and_ingest_all()` (`opencontractserver/bolivian_laws/tasks.py`). A SHA-256 pre-check before `ingest_pdf` makes re-runs cheap; download failures are logged and counted per-source without aborting the batch.
- New Beat schedule entry `bolivian-laws-scrape-all` running once daily (`config/settings/base.py`).
- Management command `python manage.py scrape_bolivian_laws [--source gaceta|tsj|tcp | --all] [--since-days N] [--max-entries N] [--sync]` for on-demand runs.
- Settings: `BOLIVIAN_LAWS_{GACETA,TSJ,TCP}_BASE_URL` / `_LISTING_PATHS`, `BOLIVIAN_LAWS_SCRAPER_USER_AGENT`, `BOLIVIAN_LAWS_SCRAPE_LOOKBACK_DAYS`, `BOLIVIAN_LAWS_REQUEST_DELAY_SECONDS`.
- Added `beautifulsoup4>=4.12,<5` to `requirements/base.txt` for HTML parsing.
- Tests use `httpx.MockTransport` with inline HTML fixtures; no real HTTP traffic.

### Fixed

- **GraphQL security hardening cleanup** (Issue #1198):
Expand Down
138 changes: 138 additions & 0 deletions config/graphql/bolivian_laws_mutations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""GraphQL mutation for the Bolivian Laws RAG service.

Single mutation: ``askBolivianLaw(question, areas?)``.

- If ``areas`` is empty / null: routes through the orchestrator agent
which decides which specialist(s) to consult.
- If ``areas`` is given: skips orchestration and consults the listed
specialists directly in parallel (cheaper, deterministic).
"""

from __future__ import annotations

import logging

import graphene
from asgiref.sync import async_to_sync
from graphql_jwt.decorators import login_required

from opencontractserver.bolivian_laws.constants import LegalArea
from opencontractserver.bolivian_laws.services.agents import (
ask_orchestrator,
ask_specialists,
)

logger = logging.getLogger(__name__)


class BolivianLawSourceType(graphene.ObjectType):
"""Source citation returned by the orchestrator/specialists."""

area = graphene.String(required=True)
document_id = graphene.Int(required=False)
snippet = graphene.String(required=True)
similarity_score = graphene.Float(required=True)


class AskBolivianLawMutation(graphene.Mutation):
"""Query the Bolivian Laws RAG service.

Returns a synthesised answer plus per-source citations tagged by
legal area.
"""

class Arguments:
question = graphene.String(
required=True,
description="Pregunta del usuario en lenguaje natural.",
)
areas = graphene.List(
graphene.String,
required=False,
description=(
"Lista opcional de áreas (constitucional, penal, civil, "
"administrativo, laboral, tributario, familia, comercial, "
"agrario, ambiental, otros). Si se provee, se omite el "
"orquestador y se consultan en paralelo."
),
)
conversation_id = graphene.Int(
required=False,
description="ID de conversación a continuar (opcional).",
)

ok = graphene.Boolean()
message = graphene.String()
answer = graphene.String()
consulted_areas = graphene.List(graphene.String)
sources = graphene.List(BolivianLawSourceType)
conversation_id = graphene.Int()

@staticmethod
@login_required
def mutate(root, info, question, areas=None, conversation_id=None):
question = (question or "").strip()
if not question:
return AskBolivianLawMutation(
ok=False,
message="Question must be non-empty.",
answer="",
consulted_areas=[],
sources=[],
)

valid_areas = {a.value for a in LegalArea}
if areas:
cleaned: list[str] = []
for a in areas:
a_norm = (a or "").strip().lower()
if a_norm not in valid_areas:
return AskBolivianLawMutation(
ok=False,
message=f"Unknown area: {a!r}",
answer="",
consulted_areas=[],
sources=[],
)
cleaned.append(a_norm)
areas = cleaned

user_id = info.context.user.pk if info.context.user else None

try:
if areas:
response = async_to_sync(ask_specialists)(
areas, question, user_id=user_id
)
else:
response = async_to_sync(ask_orchestrator)(
question,
user_id=user_id,
conversation_id=conversation_id,
)
except Exception as exc:
logger.exception("askBolivianLaw failed")
return AskBolivianLawMutation(
ok=False,
message=f"Internal error: {exc}",
answer="",
consulted_areas=[],
sources=[],
)

return AskBolivianLawMutation(
ok=True,
message="ok",
answer=response.answer,
consulted_areas=response.consulted_areas,
conversation_id=response.conversation_id,
sources=[
BolivianLawSourceType(
area=s.area,
document_id=s.document_id,
snippet=s.snippet,
similarity_score=s.similarity_score,
)
for s in response.sources
],
)
6 changes: 6 additions & 0 deletions config/graphql/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@
UpdateModeratorPermissionsMutation,
)

# Import Bolivian Laws mutations
from config.graphql.bolivian_laws_mutations import AskBolivianLawMutation

# Import notification mutations
from config.graphql.notification_mutations import (
DeleteNotificationMutation,
Expand Down Expand Up @@ -397,6 +400,9 @@ class Mutation(graphene.ObjectType):
update_agent_configuration = UpdateAgentConfigurationMutation.Field()
delete_agent_configuration = DeleteAgentConfigurationMutation.Field()

# BOLIVIAN LAWS RAG ##########################################################
ask_bolivian_law = AskBolivianLawMutation.Field()

# PIPELINE SETTINGS MUTATIONS (Superuser only) ###############################
update_pipeline_settings = UpdatePipelineSettingsMutation.Field()
reset_pipeline_settings = ResetPipelineSettingsMutation.Field()
Expand Down
57 changes: 57 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
"opencontractserver.agents",
"opencontractserver.worker_uploads",
"opencontractserver.discovery",
"opencontractserver.bolivian_laws",
]

# https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
Expand Down Expand Up @@ -680,8 +681,48 @@
"schedule": 300.0, # every 5 minutes
"options": {"queue": "worker_uploads"},
},
# Bolivian-laws RAG: fan out a scrape+ingest task per registered
# source (Gaceta Oficial, TSJ, TCP) once a day. Ingestion dedupes
# by SHA-256 so re-running is cheap.
"bolivian-laws-scrape-all": {
"task": "bolivian_laws.scrape_and_ingest_all",
"schedule": 86400.0, # daily
},
}

# Bolivian Laws RAG service
# ------------------------------------------------------------------------------
# Base URLs can be overridden per-deployment (useful for staging mirrors
# or archive snapshots). Listing paths are comma-separated strings.
BOLIVIAN_LAWS_GACETA_BASE_URL = env(
"BOLIVIAN_LAWS_GACETA_BASE_URL", default="https://gacetaoficialdebolivia.gob.bo/"
)
BOLIVIAN_LAWS_GACETA_LISTING_PATHS = env.list(
"BOLIVIAN_LAWS_GACETA_LISTING_PATHS", default=["/"]
)
BOLIVIAN_LAWS_TSJ_BASE_URL = env(
"BOLIVIAN_LAWS_TSJ_BASE_URL", default="https://tsj.bo/"
)
BOLIVIAN_LAWS_TSJ_LISTING_PATHS = env.list(
"BOLIVIAN_LAWS_TSJ_LISTING_PATHS", default=["/jurisprudencia/"]
)
BOLIVIAN_LAWS_TCP_BASE_URL = env(
"BOLIVIAN_LAWS_TCP_BASE_URL", default="https://tcpbolivia.bo/"
)
BOLIVIAN_LAWS_TCP_LISTING_PATHS = env.list(
"BOLIVIAN_LAWS_TCP_LISTING_PATHS", default=["/jurisprudencia/"]
)
BOLIVIAN_LAWS_SCRAPER_USER_AGENT = env(
"BOLIVIAN_LAWS_SCRAPER_USER_AGENT",
default="OpenContractsBolivianLawsBot/1.0 (+https://github.com/JSv4/OpenContracts)",
)
BOLIVIAN_LAWS_SCRAPE_LOOKBACK_DAYS = env.int(
"BOLIVIAN_LAWS_SCRAPE_LOOKBACK_DAYS", default=30
)
BOLIVIAN_LAWS_REQUEST_DELAY_SECONDS = env.float(
"BOLIVIAN_LAWS_REQUEST_DELAY_SECONDS", default=1.0
)

# Worker Upload Processing
# ------------------------------------------------------------------------------
# Documents per batch when draining the staging table
Expand Down Expand Up @@ -1241,3 +1282,19 @@
},
"cache_ttl": env.int("MCP_CACHE_TTL", default=300),
}


# Bolivian Laws RAG Service
# ------------------------------------------------------------------------------
# See docs/services/bolivian_laws.md
BOLIVIAN_LAWS_DEFAULT_EMBEDDER = env.str(
"BOLIVIAN_LAWS_DEFAULT_EMBEDDER",
default=DEFAULT_EMBEDDER,
)
BOLIVIAN_LAWS_CLASSIFIER_MODEL = env.str(
"BOLIVIAN_LAWS_CLASSIFIER_MODEL", default="gpt-4o-mini"
)
BOLIVIAN_LAWS_ORCHESTRATOR_MODEL = env.str(
"BOLIVIAN_LAWS_ORCHESTRATOR_MODEL", default="gpt-4o-mini"
)
BOLIVIAN_LAWS_SPECIALIST_MODEL = env.str("BOLIVIAN_LAWS_SPECIALIST_MODEL", default="")
Loading