diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..756bcfa9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.git/ +helm/ +.venv/ +.github/ diff --git a/.github/workflows/deploy_sidecar.yml b/.github/workflows/deploy_sidecar.yml new file mode 100644 index 00000000..b3779ee1 --- /dev/null +++ b/.github/workflows/deploy_sidecar.yml @@ -0,0 +1,59 @@ +name: Deploy sidecar for client +on: + workflow_dispatch: + inputs: + pdp_key: + description: 'Provide a PDP_API_KEY env var' + required: true + type: string + name: + description: 'Provide the name of the sidecar' + required: true + type: string + +env: + AWS_REGION: "us-east-2" + +permissions: + id-token: write + contents: write + +jobs: + deploy-new-sidecar: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + + - name: AWS Auth - Assume OIDC Github Role + uses: aws-actions/configure-aws-credentials@v1.6.1 + with: + role-to-assume: ${{ secrets.ROLE_ARN }} + aws-region: ${{ env.AWS_REGION }} + role-session-name: githubactions + + - name: Amazon ECR Login - Root Account + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - uses: azure/k8s-set-context@v2 + with: + method: kubeconfig + kubeconfig: ${{ secrets.KUBE_CONFIG }} + context: prod + + - name: Helm Dependency Build + shell: bash + env: + NAMESPACE: permitio + run: + helm dependency build ./helm/ ; + + - name: Helm Install + shell: bash + env: + NAMESPACE: sidecars + CHART_DIR: ./helm + run: + helm upgrade --install -f ${{ env.CHART_DIR }}/values-prod.yaml ${{ inputs.name }}-sidecar ${{ env.CHART_DIR }} -n ${{ env.NAMESPACE }} --wait --set expose.dnsHostName="${{ inputs.name }}-sidecar.permit.io" --set config.env.PDP_API_KEY="${{ inputs.pdp_key }}" diff --git a/.github/workflows/dockerhub_push.yml b/.github/workflows/dockerhub_push.yml deleted file mode 100644 index 5913a7ce..00000000 --- a/.github/workflows/dockerhub_push.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: Build and publish docker image -on: - push: - # disable push new image on merge to master (because we don't want to push a "master" tag) - # branches: - # - 'master' - # on every tags push, we will publish both the latest tag and the versioned tag (semver) - tags: - - 'v*' - - # Disabled: Allows you to run this workflow manually from the Actions tab (because auto tagging won't work) - # workflow_dispatch: - -jobs: - # ====== Todos Backend (build and deploy) ====== - build-and-publish-image: - runs-on: ubuntu-latest - steps: - - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Docker meta - id: meta - uses: docker/metadata-action@v3 - with: - images: permitio/pdp - tags: | - type=ref,event=branch - type=semver,pattern={{version}} - - - name: Echo published tags - run: | - echo "Published docker tags: ${{ steps.meta.outputs.tags }}" - - - name: Build image and push - id: docker_build - uses: docker/build-push-action@v2 - with: - push: true - tags: ${{ steps.meta.outputs.tags }} - context: . - file: ./Dockerfile - #All available platforms: linux/arm64,linux/amd64,linux/riscv64,linux/ppc64le,linux/s390x,linux/386,linux/arm/v7,linux/arm/v6 - platforms: linux/arm64,linux/amd64 - build-args: | - READ_ONLY_GITHUB_TOKEN=${{ secrets.READ_ONLY_GITHUB_TOKEN }} - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/helm_release.yml b/.github/workflows/helm_release.yml new file mode 100644 index 00000000..d96e4d92 --- /dev/null +++ b/.github/workflows/helm_release.yml @@ -0,0 +1,34 @@ +name: Helm Release Workflow + +on: + push: + paths: + - 'charts/pdp/Chart.yaml' + +jobs: + helm-release: + if: github.event_name == 'push' && github.ref == 'refs/heads/v2' + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Configure Git + run: | + git config user.name "elimoshkovich" + git config user.email "eli@permit.io" + + - name: Install Helm + uses: azure/setup-helm@v3 + + - name: Run chart-releaser + uses: helm/chart-releaser-action@v1.6.0 + env: + CR_TOKEN: "${{ secrets.PAGES }}" + with: + skip_existing: true + mark_as_latest: false diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml deleted file mode 100644 index b75d06ea..00000000 --- a/.github/workflows/pre-commit.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: pre-commit - -on: - pull_request: - push: - branches: [master, main] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v2.0.3 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..a8ba70a0 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,146 @@ +name: Build and Push PDP Docker Image + +on: + release: + types: [published] + +permissions: + id-token: write + contents: read + +jobs: + pdp-tests: + # Call the reusable tests workflow. + uses: ./.github/workflows/tests.yml + secrets: inherit + + build-and-push-pdp-vanilla: + needs: pdp-tests + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Pre build - for PDP-Vanilla + run: echo "${{ github.event.release.tag_name }}" | cut -d '-' -f 1 > permit_pdp_version + + - name: Build and push PDP-Vanilla - (official release) + if: "!github.event.release.prerelease" + uses: docker/build-push-action@v5 + with: + push: true + context: . + platforms: linux/amd64,linux/arm64 + tags: permitio/pdp-v2-vanilla:${{ github.event.release.tag_name }}, permitio/pdp-v2-vanilla:latest + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push PDP-Vanilla image - (pre-release) + if: "github.event.release.prerelease" + uses: docker/build-push-action@v5 + with: + push: true + context: . + platforms: linux/amd64,linux/arm64 + tags: permitio/pdp-v2-vanilla:${{ github.event.release.tag_name }} + cache-from: type=gha + cache-to: type=gha,mode=max + + build-and-push-pdp: + needs: pdp-tests + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - uses: actions/checkout@v3 + with: + repository: permitio/permit-opa + ref: main + path: './permit-opa' + token: ${{ secrets.CLONE_REPO_TOKEN }} + + - name: Pre build PDP + run: | + echo "${{ github.event.release.tag_name }}" | cut -d '-' -f 1 > permit_pdp_version + rm -rf custom + mkdir custom + build_root="$PWD" + cd ./permit-opa + find * \( -name '*go*' -o -name 'LICENSE.md' \) -print0 | xargs -0 tar -czf "$build_root"/custom/custom_opa.tar.gz --exclude '.*' + + - uses: robinraju/release-downloader@v1 + with: + repository: permitio/datasync + latest: true + fileName: factstore_server* + token: ${{ secrets.CLONE_REPO_TOKEN }} + out-file-path: "factdb" + + - name: Build and push PDP image - (pre-release) + if: "github.event.release.prerelease" + uses: docker/build-push-action@v5 + with: + push: true + context: . + platforms: linux/amd64,linux/arm64 + build-args: | + ALLOW_MISSING_FACTSTORE=false + tags: permitio/pdp-v2:${{ github.event.release.tag_name }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push PDP image - (official release) + if: "!github.event.release.prerelease" + uses: docker/build-push-action@v5 + with: + push: true + context: . + platforms: linux/amd64,linux/arm64 + build-args: | + ALLOW_MISSING_FACTSTORE=false + tags: permitio/pdp-v2:${{ github.event.release.tag_name }},permitio/pdp-v2:latest + cache-from: type=gha + cache-to: type=gha,mode=max + + update-pdp-api-ecs-service: + needs: build-and-push-pdp + runs-on: ubuntu-latest + if: "!github.event.release.prerelease" + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.PDP_CICD_AWS_ROLE }} + aws-region: us-east-1 + + - name: Redeploy ECS service - pdp-general-redoc-service + run: | + aws ecs update-service \ + --cluster public-pdps-us-east-1 \ + --service pdp-general-redoc-service-731a74c \ + --force-new-deployment diff --git a/.github/workflows/test_release.yml b/.github/workflows/test_release.yml new file mode 100644 index 00000000..e0dafed8 --- /dev/null +++ b/.github/workflows/test_release.yml @@ -0,0 +1,28 @@ +name: Helm test + +on: + pull_request: + paths: + - 'charts/pdp/**' + +jobs: + helm_test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Create k8s Kind Cluster + uses: helm/kind-action@v1 + + - name: Deploy Helm Chart in Kind Cluster + working-directory: ./charts/pdp + run: helm install pdp . --set pdp.ApiKey=${{ secrets.PDP_API_KEY }} --create-namespace --namespace pdp --wait + + - name: Show PDP logs in case of failure + run: kubectl logs svc/permitio-pdp -n pdp + if: failure() + + - name: Show PDP pod status + run: kubectl get pods -n pdp + if: always() diff --git a/.github/workflows/test_release_skipping.yml b/.github/workflows/test_release_skipping.yml new file mode 100644 index 00000000..b15494ee --- /dev/null +++ b/.github/workflows/test_release_skipping.yml @@ -0,0 +1,16 @@ +name: Helm test + +on: + pull_request: + paths-ignore: + - 'charts/pdp/**' # Ignore changes in 'charts/pdp' directory + +jobs: + helm_test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Skipping helm_test workflow + run: echo "Skipping helm_test since 'charts/pdp' has not been touched" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..cce0a3e0 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,133 @@ +name: PDP CI Tests + +on: + pull_request: + push: + branches: [master, main, v*] + workflow_call: + secrets: + PDP_TESTER_API_KEY: + required: true + CLONE_REPO_TOKEN: + required: true + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v2.0.3 + + pytests: + runs-on: ubuntu-latest + steps: + - name: Python setup + uses: actions/setup-python@v5 + with: + python-version: '3.11.8' + + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run Pytests + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + pytest -s --cache-clear horizon/tests/ + + pdp-tester: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: permitio/permit-opa + ref: main + path: './permit-opa' + token: ${{ secrets.CLONE_REPO_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Pre build PDP tests + run: | + echo "next" > permit_pdp_version + rm -rf custom + mkdir custom + build_root="$PWD" + cd ./permit-opa + find * \( -name '*go*' -o -name 'LICENSE.md' \) -print0 | xargs -0 tar -czf "$build_root"/custom/custom_opa.tar.gz --exclude '.*' + + - uses: robinraju/release-downloader@v1 + with: + repository: permitio/datasync + latest: true + fileName: factstore_server* + token: ${{ secrets.CLONE_REPO_TOKEN }} + out-file-path: "factdb" + + - name: Build and load PDP Docker image + uses: docker/build-push-action@v5 + with: + push: false + load: true + context: . + build-args: | + ALLOW_MISSING_FACTSTORE=false + platforms: linux/amd64 + tags: permitio/pdp-v2:next + cache-from: type=gha + cache-to: type=gha,mode=max + + # Checkout the pdp-tester repository + - name: Checkout pdp-tester repository + uses: actions/checkout@v3 + with: + repository: "permitio/pdp-tester" + token: ${{ secrets.CLONE_REPO_TOKEN }} + path: './pdp-tester' + + # Setup Python environment + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.12" + + # Install dependencies for pdp-tester + - name: Install pdp-tester dependencies + working-directory: ./pdp-tester + run: | + pip install -r requirements.txt + + # Run pdp-tester + - name: Run pdp-tester + working-directory: ./pdp-tester + env: + TOKEN: ${{ secrets.PDP_TESTER_API_KEY }} + LOCAL_TAGS: '["next"]' + INCLUDE_TAGS: '[]' + AUTO_REMOVE: "False" + SKIP_GENERATE: "True" + ENVIRONMENT: '{"PDP_ENABLE_FACTDB": true}' + ENABLE_APM: "False" + run: | + python -m pdp_tester.main + + - name: Print Docker container logs + if: always() + run: | + echo "Fetching logs for all Docker containers..." + for container in $(docker ps -aq); do + echo "========================================" + echo "Logs for container: $container" + echo "----------------------------------------" + docker logs "$container" || true + echo "========================================" + echo "" + done diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index b9fb3f3e..00000000 --- a/.isort.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[settings] -profile=black diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ac3e32bc..73b6df9d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,39 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v5.0.0 hooks: - - id: check-yaml - - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - - repo: https://github.com/pycqa/isort - rev: 5.10.1 + - id: end-of-file-fixer + - id: check-added-large-files + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-json + - id: check-toml + - id: check-yaml + exclude: "^charts/.*" + - id: check-xml + - id: check-merge-conflict + - id: mixed-line-ending + args: [ --fix=lf ] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.3 hooks: - - id: isort + - id: ruff + args: [--fix] + files: \.py$ + types: [ file ] + - id: ruff-format + files: \.py$ + types: [ file ] + +# - repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.13.0 +# hooks: +# - id: mypy +# pass_filenames: false +# additional_dependencies: +# - pydantic +# - types-requests +# files: \.py$ +# types: [ file ] diff --git a/Dockerfile b/Dockerfile index 648d492c..1224ffcd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,75 +1,162 @@ -# BUILD STAGE --------------------------------------- -# split this stage to save time and reduce image size +# OPA BUILD STAGE ----------------------------------- +# Build OPA from source or download precompiled binary # --------------------------------------------------- -FROM python:3.8-alpine3.11 as BuildStage -# update apk cache -RUN apk update -# TODO: remove this when upgrading to a new alpine version -# more details: https://github.com/pyca/cryptography/issues/5771 -ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 -# install linux libraries necessary to compile some python packages -RUN apk add --update --no-cache --virtual .build-deps gcc git build-base alpine-sdk python3-dev musl-dev postgresql-dev libffi-dev libressl-dev -# from now on, work in the /app directory -WORKDIR /app/ -# Layer dependency install (for caching) -COPY requirements.txt requirements.txt -# install python deps -RUN pip install --upgrade pip && pip install --user -r requirements.txt +FROM golang:bullseye AS opa_build + +COPY custom* /custom +COPY factdb* /factdb + +# Build OPA binary if custom_opa.tar.gz is provided +RUN if [ -f /custom/custom_opa.tar.gz ]; \ + then \ + cd /custom && \ + tar xzf custom_opa.tar.gz && \ + go build -ldflags="-extldflags=-static" -o /opa && \ + rm -rf /custom; \ + else \ + case $(uname -m) in \ + x86_64) curl -L -o /opa https://openpolicyagent.org/downloads/latest/opa_linux_amd64_static ;; \ + aarch64) curl -L -o /opa https://openpolicyagent.org/downloads/latest/opa_linux_arm64_static ;; \ + *) echo "Unknown architecture." && exit 1 ;; \ + esac; \ + fi + +# Build or copy factdb binary +RUN if [ -f /factdb/factdb.tar.gz ]; \ + then \ + cd /factdb && \ + tar xzf factdb.tar.gz && \ + go build -ldflags="-extldflags=-static" -o /bin/factdb ./cmd/factstore_server && \ + rm -rf /factdb; \ + else \ + case $(uname -m) in \ + x86_64) \ + if [ -f /factdb/factstore_server-linux-amd64 ]; then \ + cp /factdb/factstore_server-linux-amd64 /bin/factdb; \ + else \ + echo "factstore_server-linux-amd64 not found."; \ + if [ "$ALLOW_MISSING_FACTSTORE" = "false" ]; then \ + echo "Missing Factstore is not allowed, exiting..."; \ + exit 1; \ + else \ + echo "Missing Factstore is allowed, continuing..."; \ + touch /bin/factdb; \ + fi; \ + fi \ + ;; \ + aarch64) \ + if [ -f /factdb/factstore_server-linux-arm64 ]; then \ + cp /factdb/factstore_server-linux-arm64 /bin/factdb; \ + else \ + echo "factstore_server-linux-arm64 not found."; \ + if [ "$ALLOW_MISSING_FACTSTORE" = "false" ]; then \ + echo "Missing Factstore is not allowed, exiting..."; \ + exit 1; \ + else \ + echo "Missing Factstore is allowed, continuing..."; \ + touch /bin/factdb; \ + fi; \ + fi \ + ;; \ + *) \ + echo "Unknown architecture."; \ + exit 1; \ + ;; \ + esac; \ + fi # MAIN IMAGE ---------------------------------------- -# most of the time only this image should be built +# Main image setup (optimized) # --------------------------------------------------- -FROM python:3.8-alpine3.11 -# bash is needed for ./start/sh script -RUN apk add --update --no-cache bash curl -# needed for rookout -RUN apk add g++ python3-dev linux-headers -# copy opa from official image (main binary and lib for web assembly) -RUN curl -L -o /opa https://openpolicyagent.org/downloads/latest/opa_linux_amd64_static && chmod 755 /opa -# copy libraries from build stage -COPY --from=BuildStage /root/.local /root/.local -# copy wait-for-it (use only for development! e.g: docker compose) -COPY scripts/wait-for-it.sh /usr/wait-for-it.sh -RUN chmod +x /usr/wait-for-it.sh -# copy startup script -COPY ./scripts/start.sh /start.sh -RUN chmod +x /start.sh -# copy gunicorn_config -COPY ./scripts/gunicorn_conf.py /gunicorn_conf.py -# copy app code -COPY . ./ -# install sidecar package -RUN python setup.py install -# Make sure scripts in .local are usable: -ENV PATH=/:/root/.local/bin:$PATH -# uvicorn config ------------------------------------ - -# WARNING: do not change the number of workers on the opal client! -# only one worker is currently supported for the client. - -# number of uvicorn workers +FROM python:3.10-alpine + +WORKDIR /app + +# Create necessary user and group in a single step +RUN addgroup -S permit -g 1001 && \ + adduser -S -s /bin/bash -u 1000 -G permit -h /home/permit permit + +# Create backup directory with permissions +RUN mkdir -p /app/backup && chmod -R 777 /app/backup + +# Install necessary libraries in a single RUN command +RUN apk update && \ + apk add --no-cache bash build-base libffi-dev libressl-dev musl-dev zlib-dev gcompat + +# Copy OPA and factdb binaries from the build stage +COPY --from=opa_build --chmod=755 /opa /app/bin/opa +COPY --from=opa_build --chmod=755 /bin/factdb /app/bin/factdb + +# Environment variables for OPA and FactDB +ENV OPAL_INLINE_OPA_EXEC_PATH="/app/bin/opa" +ENV PDP_FACTDB_BINARY_PATH="/app/bin/factdb" + +# Copy required scripts +COPY scripts /scripts + +# Set permissions and ownership for the application +RUN mkdir -p /config && chown -R permit:permit /config +RUN chmod +x /scripts/wait-for-it.sh && \ + chmod +x /scripts/start.sh + +# Ensure the `permit` user has the correct permissions for home directory and binaries +RUN chown -R permit:permit /home/permit /app /usr/local/bin /scripts + +# Switch to permit user +USER permit + +# Copy Kong routes and Gunicorn config +COPY kong_routes.json /config/kong_routes.json +COPY ./scripts/gunicorn_conf.py ./gunicorn_conf.py + +USER root + +# Install python dependencies in one command to optimize layer size +COPY ./requirements.txt ./requirements.txt +RUN pip install --upgrade pip setuptools && \ + pip install -r requirements.txt && \ + python -m pip uninstall -y pip setuptools && \ + rm -r /usr/local/lib/python3.10/ensurepip + +USER permit + +# Copy the application code +COPY ./horizon /app/horizon + +# Version file for the application +COPY ./permit_pdp_version /app/permit_pdp_version + +# Set the PATH to ensure the local binary paths are used +ENV PATH="/app/bin:/home/permit/.local/bin:$PATH" + +# Uvicorn configuration ENV UVICORN_NUM_WORKERS=1 -# uvicorn asgi app -ENV UVICORN_ASGI_APP=horizon.main:app -# uvicorn port +ENV UVICORN_ASGI_APP="horizon.main:app" ENV UVICORN_PORT=7000 # opal configuration -------------------------------- -ENV OPAL_SERVER_URL=https://opal.permit.io -ENV OPAL_LOG_DIAGNOSE=false -ENV OPAL_LOG_TRACEBACK=false +ENV OPAL_SERVER_URL="https://opal.permit.io" +ENV OPAL_LOG_DIAGNOSE="false" +ENV OPAL_LOG_TRACEBACK="false" ENV OPAL_LOG_MODULE_EXCLUDE_LIST="[]" -ENV OPAL_INLINE_OPA_ENABLED=true -ENV OPAL_INLINE_OPA_LOG_FORMAT=http +ENV OPAL_INLINE_OPA_ENABLED="true" +ENV OPAL_INLINE_OPA_LOG_FORMAT="http" # horizon configuration ----------------------------- # by default, the backend is at port 8000 on the docker host # in prod, you must pass the correct url -ENV PDP_CONTROL_PLANE=https://api.permit.io +ENV PDP_CONTROL_PLANE="https://api.permit.io" ENV PDP_API_KEY="MUST BE DEFINED" -# expose sidecar port -EXPOSE 7000 -# expose opa directly -EXPOSE 8181 -# run gunicorn -CMD ["/start.sh"] +ENV PDP_REMOTE_CONFIG_ENDPOINT="/v2/pdps/me/config" +ENV PDP_REMOTE_STATE_ENDPOINT="/v2/pdps/me/state" +ENV PDP_VERSION_FILE_PATH="/app/permit_pdp_version" +ENV PDP_FACTDB_BINARY_PATH="/app/bin/factdb" +# This is a default PUBLIC (not secret) key, +# and it is here as a safety measure on purpose. +ENV OPAL_AUTH_PUBLIC_KEY="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDe2iQ+/E01P2W5/EZwD5NpRiSQ8/r/k18pFnym+vWCSNMWpd9UVpgOUWfA9CAX4oEo5G6RfVVId/epPH/qVSL87uh5PakkLZ3E+PWVnYtbzuFPs/lHZ9HhSqNtOQ3WcPDTcY/ST2jyib2z0sURYDMInSc1jnYKqPQ6YuREdoaNdPHwaTFN1tEKhQ1GyyhL5EDK97qU1ejvcYjpGm+EeE2sjauHYn2iVXa2UA9fC+FAKUwKqNcwRTf3VBLQTE6EHGWbxVzXv1Feo8lPZgL7Yu/UPgp7ivCZhZCROGDdagAfK9sveYjkKiWCLNUSpado/E5Vb+/1EVdAYj6fCzk45AdQzA9vwZefP0sVg7EuZ8VQvlz7cU9m+XYIeWqduN4Qodu87rtBYtSEAsru/8YDCXBDWlLJfuZb0p/klbte3TayKnQNSWD+tNYSJHrtA/3ZewP+tGDmtgLeB38NLy1xEsgd31v6ISOSCTHNS8ku9yWQXttv0/xRnuITr8a3TCLuqtUrNOhCx+nKLmYF2cyjYeQjOWWpn/Z6VkZvOa35jhG1ETI8IwE+t5zXqrf2s505mh18LwA1DhC8L/wHk8ZG7bnUe56QwxEo32myUBN8nHdu7XmPCVP8MWQNLh406QRAysishWhXVs/+0PbgfBJ/FxKP8BXW9zqzeIG+7b/yk8tRHQ==" +# 7000 sidecar port +# 8181 opa port +EXPOSE 7000 8181 + +# Run the application using the startup script +CMD ["/scripts/start.sh"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a52023a5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 Or Weis and Asaf Cohen + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile index bcf3d2d1..d636d717 100644 --- a/Makefile +++ b/Makefile @@ -1,29 +1,30 @@ -.PHONY: help build +.PHONY: help build prepare .DEFAULT_GOAL := help -# DOCKER TASKS -# Build the container -build: ## Build the container - @docker build -t permitio/pdp . - -build-local: ## Build the container - @docker build -t permitio/pdp:local . - -run: ## Run the container locally - @docker run -it \ - -e "OPAL_SERVER_URL=http://host.docker.internal:7002" \ - -e "PDP_CONTROL_PLANE=http://host.docker.internal:8000" \ - -e "PDP_API_KEY=$(DEV_MODE_CLIENT_TOKEN)" \ - -p 7000:7000 \ - -p 8181:8181 \ - permitio/pdp - -run-against-prod: ## Run the container against prod - @docker run -it \ - -e "PDP_PRINT_CONFIG_ON_STARTUP=true" \ - -e "PDP_API_KEY=$(AUTPDP_PROD_CLIENT_TOKEN)" \ - -e "OPAL_CLIENT_TOKEN=$(OPAL_PROD_CLIENT_TOKEN)" \ - -p 7000:7000 \ - -p 8181:8181 \ - permitio/pdp +prepare: +ifndef VERSION + $(error You must set VERSION variable to build local image) +endif + + ./build_opal_bundle.sh + +run-prepare: +ifndef API_KEY + $(error You must set API_KEY variable to run pdp locally) +endif +ifndef VERSION + $(error You must set VERSION variable to run pdp locally) +endif + +build-amd64: prepare + @docker buildx build --platform linux/amd64 -t permitio/pdp-v2:$(VERSION) . --load + +build-arm64: prepare + @docker buildx build --build-arg ALLOW_MISSING_FACTSTORE=false --platform linux/arm64 -t permitio/pdp-v2:$(VERSION) . --load + +run: run-prepare + @docker run -it --rm -p 7766:7000 --env PDP_API_KEY=$(API_KEY) --env PDP_DEBUG=true permitio/pdp-v2:$(VERSION) + +run-on-background: run-prepare + @docker run -it --rm -d -p 7766:7000 --env PDP_API_KEY=$(API_KEY) --env PDP_DEBUG=true permitio/pdp-v2:$(VERSION) diff --git a/README.md b/README.md index e8db9185..ce7d1fb0 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,50 @@ +![PDP.png](imgs/PDP.png) # Permit.io PDP The PDP (Policy decision point) syncs with the authorization service and maintains up-to-date policy cache for open policy agent. -## Running locally (during development) +## Running a PDP +PDPs are connected to your [Permit.io account](https://docs.permit.io/quickstart) using an API Key. +Check out the [Permit.io documentation](https://docs.permit.io/manage-your-account/projects-and-env#fetching-and-rotating-the-api-key) to learn how to get an Environment API Key. + +You can run a PDP in a docker container by running the following command: +```bash +docker run -it -p 7766:7000 -e PDP_API_KEY= -e PDP_DEBUG=True permitio/pdp-v2:latest ``` -uvicorn horizon.main:app --reload --port=7000 + +### Deploying PDP to Production +You can deploy the PDP to production in multiple designs. See the [Permit.io documentation](https://docs.permit.io/concepts/pdp/overview) for more information. + +## Contributing + +### Setting up the development environment +1. Clone the repository +2. Install the dependencies +```bash +pip install ".[dev]" ``` -you can pass environment variables to control the behavior of the sidecar: -e.g, running a local sidecar against production backend: +### Running locally (during development) ``` -AUTHZ_SERVICE_URL=https://api.permit.io CLIENT_TOKEN= uvicorn horizon.main:app --reload --port=7000 +PDP_API_KEY= uvicorn horizon.main:app --reload --port=7000 ``` -## Installing and running in production - -Pull the image from docker hub +You can pass environment variables to control the behavior of the PDP image. +For example, running a local PDP against the Permit API: ``` -docker pull permitio/pdp +PDP_CONTROL_PLANE=https://api.permit.io PDP_API_KEY= uvicorn horizon.main:app --reload --port=7000 ``` -Run the image: don't forget to pass your authorization service API KEY: +## Building a Custom PDP Docker image +For ARM architecture: ``` -docker run -it -e "CLIENT_TOKEN=" -p 7000:7000 permitio/pdp +VERSION= make build-arm64 ``` - -By default the image exposes port 7000 but you can change it. - -## Building the docker image yourself +For AMD64 architecture: ``` -READ_ONLY_GITHUB_TOKEN= make build +VERSION= make build-amd64 ``` -you must declare the environment variable `READ_ONLY_GITHUB_TOKEN` for this command to work. -## Running the image in development mode +### Running the image in development mode ``` -DEV_MODE_CLIENT_TOKEN= make run +VERSION= API_KEY= make run ``` -you must declare the environment variable `DEV_MODE_CLIENT_TOKEN` for this command to work. diff --git a/build_opal_bundle.sh b/build_opal_bundle.sh new file mode 100755 index 00000000..ccbff5e7 --- /dev/null +++ b/build_opal_bundle.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e + +# Check if PDP_VANILLA is set to true from command line argument +if [ "$PDP_VANILLA" == "true" ]; then + echo "Building for pdp-vanilla environment." +fi + +# Check if permit-opa directory already exists +if [ ! -d "../permit-opa" ]; then + # Clone the permit-opa repository into the parent directory if it doesn't exist + git clone git@github.com:permitio/permit-opa.git ../permit-opa +else + echo "permit-opa directory already exists. Skipping clone operation." +fi + +# Check if factdb directory already exists +if [ ! -d "../factdb" ]; then + # Clone the permit-opa repository into the parent directory if it doesn't exist + git clone git@github.com:permitio/datasync.git ../factdb +else + echo "factdb directory already exists. Skipping clone operation." +fi + +# Conditionally execute the custom OPA tarball creation section based on the value of PDP_VANILLA +if [ "$PDP_VANILLA" != "true" ]; then + # Custom OPA tarball creation section + rm -rf custom + mkdir custom + build_root="$PWD" + cd "../permit-opa" + find * \( -name '*go*' -o -name 'LICENSE.md' \) -print0 | xargs -0 tar -czf "$build_root"/custom/custom_opa.tar.gz --exclude '.*' + cd "$build_root" + # factdb tarball creation section + rm -rf factdb + mkdir factdb + build_root="$PWD" + cd "../factdb" + find * \( -name '*go*' -o -name 'LICENSE.md' \) -print0 | xargs -0 tar -czf "$build_root"/factdb/factdb.tar.gz --exclude '.*' + cd "$build_root" +else + echo "Skipping custom OPA tarball creation for pdp-vanilla environment." +fi diff --git a/charts/pdp/Chart.yaml b/charts/pdp/Chart.yaml new file mode 100644 index 00000000..facac28f --- /dev/null +++ b/charts/pdp/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v2 +name: pdp +description: An official Helm chart for Permit.io PDP (Policy Decision Point) +version: 0.0.3 diff --git a/charts/pdp/templates/deployment.yaml b/charts/pdp/templates/deployment.yaml new file mode 100644 index 00000000..8fb9c340 --- /dev/null +++ b/charts/pdp/templates/deployment.yaml @@ -0,0 +1,123 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: permitio-pdp + labels: + app: permitio-pdp +spec: + replicas: {{ .Values.pdp.replicas }} + selector: + matchLabels: + app: permitio-pdp + template: + metadata: + labels: + app: permitio-pdp + {{- if .Values.labels }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + annotations: + {{- if .Values.annotations }} + {{- range $key, $value := .Values.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + spec: + containers: + - name: permitio-pdp + image: "{{ .Values.pdp.image.repository }}:{{ .Values.pdp.image.tag }}" + imagePullPolicy: {{ .Values.pdp.image.pullPolicy }} + ports: + - containerPort: {{ .Values.pdp.port }} + env: + - name: PDP_API_KEY + valueFrom: + secretKeyRef: + name: permitio-pdp-secret + key: ApiKey + {{- if .Values.pdp.pdpEnvs }} + {{- range .Values.pdp.pdpEnvs }} + - name: {{ .name }} + value: {{ .value | quote }} + {{- end }} + {{- end }} + {{- if .Values.pdp.logs_forwarder.enabled }} + - name: PDP_OPA_DECISION_LOG_CONSOLE + value: "true" + - name: OPAL_LOG_TO_FILE + value: "true" + - name: OPAL_LOG_FILE_PATH + value: "/tmp/pdp.log" + {{- end }} + {{- if .Values.pdp.debug_mode }} + - name: PDP_DEBUG + value: "true" + {{- end }} + {{- if hasKey .Values "dd_agent_envref_enabled" }} + {{- if .Values.dd_agent_envref_enabled }} + - name: DD_AGENT_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: DD_ENV + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/env'] + - name: DD_SERVICE + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/service'] + - name: DD_VERSION + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/version'] + {{- end }} + {{- end }} + resources: + requests: + cpu: "{{ .Values.resources.requests.cpu }}" + memory: "{{ .Values.resources.requests.memory }}" + limits: + memory: "{{ .Values.resources.limits.memory }}" + livenessProbe: + httpGet: + path: /health + port: 7000 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /healthy + port: 7000 + initialDelaySeconds: 10 + periodSeconds: 10 + {{- if .Values.pdp.logs_forwarder.enabled }} + volumeMounts: + - name: logs + mountPath: /tmp/ + {{- end }} + {{- if .Values.pdp.logs_forwarder.enabled }} + - name: fluentbit + image: fluent/fluent-bit:3.1.4 + volumeMounts: + - name: fluent-bit-config + mountPath: /fluent-bit/etc + readOnly: true + - name: logs + mountPath: /tmp/ + env: + - name: APP_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app'] + {{- end }} + {{- if .Values.pdp.logs_forwarder.enabled }} + volumes: + - name: fluent-bit-config + configMap: + name: fluentbit-config + - name: logs + emptyDir: {} + {{- end }} diff --git a/charts/pdp/templates/logs-forwarder-cm.yaml b/charts/pdp/templates/logs-forwarder-cm.yaml new file mode 100644 index 00000000..7bfb4325 --- /dev/null +++ b/charts/pdp/templates/logs-forwarder-cm.yaml @@ -0,0 +1,70 @@ +{{ if .Values.pdp.logs_forwarder.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluentbit-config +data: + fluent-bit.conf: | + [SERVICE] + Flush 1 + Log_Level {{ if .Values.pdp.logs_forwarder.debug_mode }}debug{{ else }}info{{ end }} + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port 2020 + + [INPUT] + Name tail + Path /tmp/pdp.log + Tag kube.* + DB /var/log/flb_kube.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + + [FILTER] + Name grep + Match kube.* + Regex log "Decision Log.*{.*}" + + [FILTER] + Name lua + Match kube.* + script /fluent-bit/etc/filter.lua + call process_log + + {{- if eq .Values.pdp.logs_forwarder.type "stdout" }} + [OUTPUT] + Name stdout + Match * + {{- else if eq .Values.pdp.logs_forwarder.type "elasticsearch" }} + [OUTPUT] + Name es + Match * + Host {{ .Values.pdp.logs_forwarder.elasticsearch.host }} + Index {{ .Values.pdp.logs_forwarder.elasticsearch.index }} + Port {{ .Values.pdp.logs_forwarder.elasticsearch.port }} + tls On + tls.verify Off + Cloud_Auth {{ .Values.pdp.logs_forwarder.elasticsearch.cloud_auth }} + Suppress_Type_Name On + Retry_Limit False + {{- end }} + + filter.lua: | + function process_log(tag, timestamp, record) + local log_message = record["log"] + + if log_message then + log_message = log_message:gsub('\\\\"', '\\"') + log_message = log_message:gsub('\\"', '"') + log_message = log_message:gsub('\\\\', '\\') + end + + local json_data = string.match(log_message, '{"decision_id":.*}') + + if json_data then + return 1, timestamp, { log = json_data } + else + return -1, timestamp, record + end + end +{{- end }} diff --git a/charts/pdp/templates/secret.yaml b/charts/pdp/templates/secret.yaml new file mode 100644 index 00000000..add49afc --- /dev/null +++ b/charts/pdp/templates/secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: permitio-pdp-secret +type: Opaque +data: + ApiKey: {{ .Values.pdp.ApiKey | b64enc | quote }} diff --git a/charts/pdp/templates/service.yaml b/charts/pdp/templates/service.yaml new file mode 100644 index 00000000..78b4f383 --- /dev/null +++ b/charts/pdp/templates/service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: permitio-pdp +spec: + selector: + app: permitio-pdp + ports: + - protocol: TCP + port: {{ .Values.pdp.port }} + targetPort: 7000 diff --git a/charts/pdp/values.yaml b/charts/pdp/values.yaml new file mode 100644 index 00000000..6190a47f --- /dev/null +++ b/charts/pdp/values.yaml @@ -0,0 +1,28 @@ +pdp: + pdpEnvs: [] + # - name: custom_env + # value: "custom_env" + ApiKey: "" + port: 7766 + replicas: 1 + image: + repository: permitio/pdp-v2 + tag: latest + pullPolicy: Always + logs_forwarder: + enabled: false + debug_mode: false + type: "stdout" # 'stdout' or 'elasticsearch' + elasticsearch: + host: "" + cloud_auth: "<{user}:{password}>" + port: 443 + index: "" + debug_mode: false + +resources: + requests: + cpu: "256m" + memory: "512Mi" + limits: + memory: "1Gi" diff --git a/horizon/authentication.py b/horizon/authentication.py index fb6a18fa..c0596da9 100644 --- a/horizon/authentication.py +++ b/horizon/authentication.py @@ -1,14 +1,30 @@ +from typing import Annotated + from fastapi import Header, HTTPException, status -from horizon.config import sidecar_config +from horizon.config import MOCK_API_KEY, sidecar_config +from horizon.startup.api_keys import get_env_api_key -def enforce_pdp_token(authorization=Header(None)): +def enforce_pdp_token(authorization: Annotated[str | None, Header()]): if authorization is None: + raise HTTPException(status.HTTP_401_UNAUTHORIZED, detail="Missing Authorization header") + schema, token = authorization.split(" ") + + if schema.strip().lower() != "bearer" or token.strip() != get_env_api_key(): + raise HTTPException(status.HTTP_401_UNAUTHORIZED, detail="Invalid PDP token") + + +def enforce_pdp_control_key(authorization: Annotated[str | None, Header()]): + if sidecar_config.CONTAINER_CONTROL_KEY == MOCK_API_KEY: raise HTTPException( - status.HTTP_401_UNAUTHORIZED, detail="Missing Authorization header" + status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Control API disabled. Set a PDP_CONTAINER_CONTROL_KEY variable to enable.", ) + + if authorization is None: + raise HTTPException(status.HTTP_401_UNAUTHORIZED, detail="Missing Authorization header") schema, token = authorization.split(" ") - if schema.strip().lower() != "bearer" or token.strip() != sidecar_config.API_KEY: + if schema.strip().lower() != "bearer" or token.strip() != sidecar_config.CONTAINER_CONTROL_KEY: raise HTTPException(status.HTTP_401_UNAUTHORIZED, detail="Invalid PDP token") diff --git a/horizon/config.py b/horizon/config.py index 7e88e85a..e5792b2c 100644 --- a/horizon/config.py +++ b/horizon/config.py @@ -1,28 +1,123 @@ -import os +from typing import Any from opal_common.confi import Confi, confi +from opal_common.schemas.data import CallbackEntry +from pydantic import parse_obj_as, parse_raw_as + +MOCK_API_KEY = "MUST BE DEFINED" + + +# scopes enum +class ApiKeyLevel(str): + ORGANIZATION = "organization" + PROJECT = "project" + ENVIRONMENT = "environment" class SidecarConfig(Confi): + def __new__(cls, *, prefix=None, is_model=True): # noqa: ARG003 + """creates a singleton object, if it is not created, + or else returns the previous singleton object""" + if not hasattr(cls, "instance"): + cls.instance = super().__new__(cls) + return cls.instance + + FACTDB_ENABLED = confi.bool( + "FACTDB_ENABLED", + False, + description="if true, the sidecar will enable the FactDB service to manage the PDP data in " "FactDB", + ) + + FACTDB_BINARY_PATH = confi.str( + "FACTDB_BINARY_PATH", + "/factstore", + description="path in which to find the FactDB executable", + ) + + FACTDB_SERVICE_URL = confi.str( + "FACTDB_SERVICE_URL", + "http://localhost:8080", + description="URL to the FactDB service that manages the PDP data, will only be used if " + "FACTDB_ENABLED is true", + ) + + FACTDB_BACKUP_SERVER_URL = confi.str( + "FACTDB_BACKUP_SERVER_URL", + None, + description="URL from which the FactDB service will fetch the PDP data backup from, will only be used if " + "FACTDB_ENABLE_REMOTE_BACKUP is true", + ) + + SHARD_ID = confi.str( + "SHARD_ID", + None, + description="The shard id of this PDP, used to identify the PDP in the control plane", + ) + CONTROL_PLANE = confi.str( "CONTROL_PLANE", "http://localhost:8000", description="URL to the control plane that manages this PDP, typically Permit.io cloud (api.permit.io)", ) - # backend api url, where proxy requests go - BACKEND_SERVICE_URL = confi.str( - "BACKEND_SERVICE_URL", confi.delay("{CONTROL_PLANE}/v1") + CONTROL_PLANE_PDP_DELTAS_API = confi.str( + "CONTROL_PLANE_PDP_DELTAS_API", + "http://localhost:8000", + description="URL to the control plane's PDP deltas API", + ) + + CONTROL_PLANE_RELAY_API = confi.str( + "CONTROL_PLANE_RELAY_API", + "http://localhost:8001", ) - BACKEND_LEGACY_URL = confi.str( - "BACKEND_LEGACY_URL", confi.delay("{CONTROL_PLANE}/sdk") + + CONTROL_PLANE_RELAY_JWT_TIER = confi.str( + "CONTROL_PLANE_RELAY_JWT_TIER", + "http://localhost:8000", + description="the backend tier that will be used to generate relay API JWTs", ) + # backend api url, where proxy requests go + BACKEND_SERVICE_URL = confi.str("BACKEND_SERVICE_URL", confi.delay("{CONTROL_PLANE}/v1")) + BACKEND_LEGACY_URL = confi.str("BACKEND_LEGACY_URL", confi.delay("{CONTROL_PLANE}/sdk")) + # backend route to fetch policy data topics - REMOTE_CONFIG_ENDPOINT = confi.str("REMOTE_CONFIG_ENDPOINT", "pdps/me/config") + REMOTE_CONFIG_ENDPOINT = confi.str("REMOTE_CONFIG_ENDPOINT", "/v2/pdps/me/config") + + # backend route to push state changes + REMOTE_STATE_ENDPOINT = confi.str("REMOTE_STATE_ENDPOINT", "/v2/pdps/me/state") # access token to access backend api - API_KEY = confi.str("API_KEY", "PJUKkuwiJkKxbIoC4o4cguWxB_2gX6MyATYKc2OCM") + API_KEY = confi.str( + "API_KEY", + MOCK_API_KEY, + description="set this to your environment's API key if you prefer to use the environment level API key.", + ) + + # access token to your organization + ORG_API_KEY = confi.str( + "ORG_API_KEY", + None, + description="set this to your organization's API key if you prefer to use the organization level API key. " + "By default, the PDP will use the project level API key", + ) + + # access token to your project + PROJECT_API_KEY = confi.str( + "PROJECT_API_KEY", + None, + description="set this to your project's API key if you prefer to use the project level API key. " + "By default, the PDP will use the default project API key", + ) + + # chosen project id/key to use for the PDP + ACTIVE_PROJECT = confi.str("ACTIVE_PROJECT", None, description="the project id/key to use for the PDP") + + # chosen environment id/key to use for the PDP + ACTIVE_ENV = confi.str("ACTIVE_ENV", None, description="the environment id/key to use for the PDP") + + # access token to perform system control operations + CONTAINER_CONTROL_KEY = confi.str("CONTAINER_CONTROL_KEY", MOCK_API_KEY) # if enabled, will output to log more data for each "is allowed" decision DECISION_LOG_DEBUG_INFO = confi.bool("DECISION_LOG_DEBUG_INFO", True) @@ -33,14 +128,62 @@ class SidecarConfig(Confi): # enable datadog APM tracing ENABLE_MONITORING = confi.bool("ENABLE_MONITORING", False) - # centralized logging - CENTRAL_LOG_DRAIN_URL = confi.str( - "CENTRAL_LOG_DRAIN_URL", "https://listener.logz.io:8071" + ENABLE_OFFLINE_MODE = confi.bool( + "ENABLE_OFFLINE_MODE", + False, + description="When true, sidecar will use a file backup to restore configuration and policy data when " + "cloud services are unavailable", + ) + + OFFLINE_MODE_BACKUP_DIR = confi.str( + "OFFLINE_MODE_BACKUP_DIR", + "/app/backup", + description="Dir path where pdp would backup its cloud configuration when in offline mode", + ) + OFFLINE_MODE_BACKUP_FILENAME = confi.str( + "OFFLINE_MODE_BACKUP_FILENAME", + "pdp_cloud_config_backup.json", + description="Filename for offline mode's cloud configuration backup", + ) + OFFLINE_MODE_POLICY_BACKUP_FILENAME = confi.str( + "OFFLINE_MODE_POLICY_BACKUP_FILENAME", + "policy_store_backup.json", + description="Filename for offline mode's policy backup (OPAL's offline mode backup)", + ) + + CONFIG_FETCH_MAX_RETRIES = confi.int( + "CONFIG_FETCH_MAX_RETRIES", + 6, + description="Number of times to retry fetching the sidecar configuration from control plane", ) + + # centralized logging + CENTRAL_LOG_DRAIN_URL = confi.str("CENTRAL_LOG_DRAIN_URL", "https://listener.logz.io:8071") CENTRAL_LOG_DRAIN_TIMEOUT = confi.int("CENTRAL_LOG_DRAIN_TIMEOUT", 5) CENTRAL_LOG_TOKEN = confi.str("CENTRAL_LOG_TOKEN", None) CENTRAL_LOG_ENABLED = confi.bool("CENTRAL_LOG_ENABLED", False) + PING_INTERVAL = confi.int( + "PING_INTERVAL", + 10, + ) + + OPA_CLIENT_QUERY_TIMEOUT = confi.float( + "OPA_CLIENT_QUERY_TIMEOUT", + 1, # aiohttp's default timeout is 5m, we want to be more aggressive + description="the timeout for querying OPA for an allow decision, in seconds. 0 means no timeout", + ) + OPA_CLIENT_FAILURE_THRESHOLD_PERCENTAGE = confi.float( + "OPA_CLIENT_FAILURE_THRESHOLD", + 0.1, + description="the percentage of failed requests to OPA that will trigger a failure threshold", + ) + OPA_CLIENT_FAILURE_THRESHOLD_INTERVAL = confi.float( + "OPA_CLIENT_FAILURE_THRESHOLD_INTERVAL", + 60, + description="the interval (in seconds) to calculate the failure threshold", + ) + # internal OPA config OPA_CONFIG_FILE_PATH = confi.str( "OPA_CONFIG_FILE_PATH", @@ -65,13 +208,19 @@ class SidecarConfig(Confi): OPA_DECISION_LOG_CONSOLE = confi.bool( "OPA_DECISION_LOG_CONSOLE", False, - description="if true, OPA decision logs will also be printed to console (only relevant if `OPA_DECISION_LOG_ENABLED` is true)", + description="if true, OPA decision logs will also be printed to console " + "(only relevant if `OPA_DECISION_LOG_ENABLED` is true)", ) OPA_DECISION_LOG_INGRESS_ROUTE = confi.str( "OPA_DECISION_LOG_INGRESS_ROUTE", "/v1/decision_logs/ingress", description="the route on the backend the decision logs will be uploaded to", ) + OPA_DECISION_LOG_INGRESS_BACKEND_TIER_URL = confi.str( + "OPA_DECISION_LOG_INGRESS_BACKEND_TIER_URL", + None, + description="the backend tier that decision logs will be uploaded to", + ) OPA_DECISION_LOG_MIN_DELAY = confi.int( "OPA_DECISION_LOG_MIN_DELAY", 1, @@ -82,6 +231,18 @@ class SidecarConfig(Confi): 10, description="max amount of time (in seconds) to wait between decision log uploads", ) + OPA_DECISION_LOG_UPLOAD_SIZE_LIMIT = confi.int( + "OPA_DECISION_LOG_UPLOAD_SIZE_LIMIT", + 65536, # This is twice as much the default OPA value (32768) + description="log upload size limit in bytes. OPA will chunk uploads to cap message body to this limit", + ) + + # allow access to metrics endpoint without auth + ALLOW_METRICS_UNAUTHENTICATED = confi.bool( + "ALLOW_METRICS_UNAUTHENTICATED", + False, + description="if true, the /metrics endpoint will be accessible without authentication", + ) # temp log format (until cloud config is received) TEMP_LOG_FORMAT = confi.str( @@ -89,10 +250,43 @@ class SidecarConfig(Confi): "{time} | {process} | {name: <40}|{level:^6} | {message}", ) + IS_DEBUG_MODE = confi.bool("DEBUG", None) + + # enables the Kong integration endpoint. This shouldn't be enabled unless needed, as it's unauthenticated + KONG_INTEGRATION = confi.bool("KONG_INTEGRATION", False) + # enables debug ouptut for the Kong integration endpoint + KONG_INTEGRATION_DEBUG = confi.bool("KONG_INTEGRATION_DEBUG", False) + + LOCAL_FACTS_WAIT_TIMEOUT = confi.float( + "LOCAL_FACTS_WAIT_TIMEOUT", + 10, + description="The amount of time in seconds to wait for the local facts to be synced before timing out", + ) + VERSION_FILE_PATH = confi.str( + "VERSION_FILE_PATH", + "/permit_pdp_version", + description="The path to the file that contains the PDP version", + ) + + @staticmethod + def parse_callbacks(value: Any) -> list[CallbackEntry]: + if isinstance(value, str): + return parse_raw_as(list[CallbackEntry], value) + else: + return parse_obj_as(list[CallbackEntry], value) + + DATA_UPDATE_CALLBACKS: list[CallbackEntry] = confi.str( + "DATA_UPDATE_CALLBACKS", + [], + description="List of callbacks to be triggered when data is updated", + cast=parse_callbacks, + cast_from_json=parse_callbacks, + ) + # non configurable values ------------------------------------------------- # redoc configuration (openapi schema) - OPENAPI_TAGS_METADATA = [ + OPENAPI_TAGS_METADATA = [ # noqa: RUF012 { "name": "Authorization API", "description": "Authorization queries to OPA. These queries are answered locally by OPA " @@ -110,11 +304,14 @@ class SidecarConfig(Confi): }, { "name": "Cloud API Proxy", - "description": "These endpoints proxy the Permit.io cloud api, and therefore **incur high-latency**. " - + "You should not use the cloud API in the standard request flow of users, i.e in places where the incurred " - + "added latency will affect your entire api. A good place to call the cloud API will be in one-time user events " - + "such as user registration (i.e: calling sync user, assigning initial user roles, etc.). " - + "The sidecar will proxy to the cloud every request prefixed with '/sdk'.", + "description": ( + "These endpoints proxy the Permit.io cloud api, and therefore **incur high-latency**. " + "You should not use the cloud API in the standard request flow of users, i.e in places " + "where the incurred added latency will affect your entire api. " + "A good place to call the cloud API will be in one-time user events such as user registration " + "(i.e: calling sync user, assigning initial user roles, etc.). " + "The sidecar will proxy to the cloud every request prefixed with '/sdk'." + ), "externalDocs": { "description": "The cloud api complete docs are located here:", "url": "https://api.permit.io/redoc", diff --git a/horizon/enforcer/api.py b/horizon/enforcer/api.py index 6256a26a..a933f42c 100644 --- a/horizon/enforcer/api.py +++ b/horizon/enforcer/api.py @@ -1,126 +1,749 @@ +import asyncio import json -from typing import Dict, Optional +import re +from collections.abc import Callable +from pathlib import Path +from typing import Annotated, cast -from fastapi import APIRouter, Depends, Response, status +import aiohttp +from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, Response, status +from fastapi.encoders import jsonable_encoder +from opal_client.config import opal_client_config from opal_client.logger import logger from opal_client.policy_store.base_policy_store_client import BasePolicyStoreClient -from opal_client.policy_store.opa_client import fail_silently from opal_client.policy_store.policy_store_client_factory import ( DEFAULT_POLICY_STORE_GETTER, ) +from opal_client.utils import proxy_response +from pydantic import PositiveInt, parse_obj_as +from starlette.responses import JSONResponse from horizon.authentication import enforce_pdp_token from horizon.config import sidecar_config -from horizon.enforcer.schemas import AuthorizationQuery, AuthorizationResult +from horizon.enforcer.schemas import ( + AllTenantsAuthorizationResult, + AuthorizationQuery, + AuthorizationResult, + AuthorizedUsersAuthorizationQuery, + AuthorizedUsersResult, + BaseSchema, + BulkAuthorizationQuery, + BulkAuthorizationResult, + MappingRuleData, + Resource, + UrlAuthorizationQuery, + User, + UserPermissionsQuery, + UserPermissionsResult, + UserTenantsQuery, + UserTenantsResult, +) +from horizon.enforcer.schemas_kong import ( + KongAuthorizationInput, + KongAuthorizationQuery, + KongAuthorizationResult, + KongWrappedAuthorizationQuery, +) +from horizon.enforcer.schemas_v1 import AuthorizationQueryV1 +from horizon.enforcer.utils.mapping_rules_utils import MappingRulesUtils +from horizon.enforcer.utils.statistics_utils import StatisticsManager +from horizon.state import PersistentStateHandler +AUTHZ_HEADER = "Authorization" +MAIN_POLICY_PACKAGE = "permit.root" +BULK_POLICY_PACKAGE = "permit.bulk" +ALL_TENANTS_POLICY_PACKAGE = "permit.any_tenant" +USER_PERMISSIONS_POLICY_PACKAGE = "permit.user_permissions" +AUTHORIZED_USERS_POLICY_PACKAGE = "permit.authorized_users.authorized_users" +USER_TENANTS_POLICY_PACKAGE = USER_PERMISSIONS_POLICY_PACKAGE + ".tenants" +KONG_ROUTES_TABLE_FILE = "/config/kong_routes.json" -def init_enforcer_api_router(policy_store: BasePolicyStoreClient = None): - policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER() - router = APIRouter(dependencies=[Depends(enforce_pdp_token)]) +stats_manager = StatisticsManager( + interval_seconds=sidecar_config.OPA_CLIENT_FAILURE_THRESHOLD_INTERVAL, + failures_threshold_percentage=sidecar_config.OPA_CLIENT_FAILURE_THRESHOLD_PERCENTAGE, +) - def log_query_and_result(query: AuthorizationQuery, response: Response): - params = "({}, {}, {})".format(query.user, query.action, query.resource.type) - try: - result: dict = json.loads(response.body).get("result", {}) - allowed = result.get("allow", False) - permission = None - granting_role = None + +def extract_pdp_api_key(request: Request) -> str: + authorization: str = request.headers.get(AUTHZ_HEADER, "") + parts = authorization.split(" ") + if len(parts) != 2: + raise HTTPException( + status.HTTP_401_UNAUTHORIZED, + detail=f"bad authz header: {authorization}", + ) + schema, token = parts + if schema.strip().lower() != "bearer": + raise HTTPException(status.HTTP_401_UNAUTHORIZED, detail="Invalid PDP token") + return token + + +def transform_headers(request: Request) -> dict: + token = extract_pdp_api_key(request) + return { + AUTHZ_HEADER: f"Bearer {token}", + "Content-Type": "application/json", + } + + +def log_query_result(query: BaseSchema, response: Response, *, is_inner: bool = False): + """ + formats a nice log to default logger with the results of permit.check() + """ + params = repr(query) + try: + response_json = json.loads(response.body) + result: dict = response_json if is_inner else response_json.get("result", {}) + allowed: bool | list[dict] = result.get("allow") + color = "" + allow_output = False + if isinstance(allowed, bool): + allow_output = allowed if allowed: - granting_permissions = result.get("granting_permission", []) - granting_permission = ( - {} if len(granting_permissions) == 0 else granting_permissions[0] - ) - permission = granting_permission.get("permission", {}) - granting_role: Optional[Dict] = granting_permission.get( - "granting_role", None + color = "" + elif isinstance(allowed, list): + allow_output = any(a.get("allow", False) for a in allowed) + if allow_output: + color = "" + + if allowed is None: + allowed_tenants = result.get("allowed_tenants") + allow_output = [f"({a.get('tenant', {}).get('key')}, {a.get('allow', False)})" for a in allowed_tenants] + if len(allow_output) > 0: + color = "" + + debug = result.get("debug", {}) + + format = color + "is allowed = {allowed} " + format += " | {api_params}" + if sidecar_config.DECISION_LOG_DEBUG_INFO: + format += " | full_input={input} | debug={debug}" + logger.opt(colors=True).info( + format, + allowed=allow_output, + api_params=params, + input=query.dict(), + debug=debug, + ) + except Exception: # noqa: BLE001 + try: + body = str(response.body, "utf-8") + except ValueError: + body = None + data = {} if body is None else {"response_body": body} + logger.info( + "is allowed", + params=params, + query=query.dict(), + response_status=response.status_code, + **data, + ) + + +def log_query_result_kong(input: KongAuthorizationInput, response: Response): + """ + formats a nice log to default logger with the results of permit.check() + """ + params = f"({input.consumer.username}, {input.request.http.method}, {input.request.http.path})" + try: + result: dict = json.loads(response.body).get("result", {}) + allowed = result.get("allow", False) + debug = result.get("debug", {}) + + color = "" + if not allowed: + color = "" + format = color + "is allowed = {allowed} " + format += " | {api_params}" + if sidecar_config.DECISION_LOG_DEBUG_INFO: + format += " | full_input={input} | debug={debug}" + logger.opt(colors=True).info( + format, + allowed=allowed, + api_params=params, + input=input.dict(), + debug=debug, + ) + except Exception: # noqa: BLE001 + try: + body = str(response.body, "utf-8") + except ValueError: + body = None + data = {} if body is None else {"response_body": body} + logger.info( + "is allowed", + params=params, + query=input.dict(), + response_status=response.status_code, + **data, + ) + + +def get_v1_processed_query(result: dict) -> dict | None: + if "authorization_query" not in result: + return None # not a v1 query result + + processed_input = result.get("authorization_query", {}) + return { + "user": processed_input.get("user", {}), + "action": processed_input.get("action", ""), + "resource": processed_input.get("resource", {}), + } + + +def get_v2_processed_query(result: dict) -> dict | None: + return (result.get("debug", {}) or {}).get("input", None) + + +async def notify_seen_sdk( + x_permit_sdk_language: str | None = Header(default=None), +) -> str | None: + if x_permit_sdk_language is not None: + await PersistentStateHandler.get_instance().seen_sdk(x_permit_sdk_language) + return x_permit_sdk_language + + +async def post_to_opa(request: Request, path: str, data: dict | None): + headers = transform_headers(request) + url = f"{opal_client_config.POLICY_STORE_URL}/v1/data/{path}" + exc = None + _set_use_debugger(data) + try: + logger.debug(f"calling OPA at '{url}' with input: {data}") + async with aiohttp.ClientSession() as session: # noqa: SIM117 + async with session.post( + url, + data=json.dumps(data) if data is not None else None, + headers=headers, + timeout=sidecar_config.OPA_CLIENT_QUERY_TIMEOUT, + raise_for_status=True, + ) as opa_response: + stats_manager.report_success() + return await proxy_response(opa_response) + except asyncio.exceptions.TimeoutError: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_504_GATEWAY_TIMEOUT, + detail=f"OPA request timed out (url: {url}, timeout: {sidecar_config.OPA_CLIENT_QUERY_TIMEOUT}s)", + ) + except aiohttp.ClientResponseError as e: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_502_BAD_GATEWAY, # 502 indicates server got an error from another server + detail=f"OPA request failed (url: {url}, status: {e.status}, message: {e.message})", + ) + except aiohttp.ClientError as e: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"OPA request failed (url: {url}, error: {e!s}", + ) + logger.warning(exc.detail) + raise exc + + +def _set_use_debugger(data: dict | None) -> None: + if ( + data is not None + and data.get("input") is not None + and "use_debugger" not in data["input"] + and sidecar_config.IS_DEBUG_MODE is not None + ): + data["input"]["use_debugger"] = sidecar_config.IS_DEBUG_MODE + + +async def _is_allowed(query: BaseSchema, request: Request, policy_package: str): + opa_input = {"input": query.dict()} + path = policy_package.replace(".", "/") + return await post_to_opa(request, path, opa_input) + + +async def conditional_is_allowed( + query: BaseSchema, + request: Request, + *, + policy_package: str = MAIN_POLICY_PACKAGE, + factdb_path: str = "/check", + factdb_method: str = "POST", + factdb_params: dict | None = None, + legacy_parse_func: Callable[[dict | list], dict] | None = None, +) -> dict: + if sidecar_config.FACTDB_ENABLED: + response = await _is_allowed_factdb( + query if factdb_method != "GET" else None, + request, + path=factdb_path, + method=factdb_method, + params=factdb_params, + ) + raw_result = json.loads(response.body) + log_query_result(query, response, is_inner=True) + else: + response = await _is_allowed(query, request, policy_package) + raw_result = json.loads(response.body).get("result", {}) + log_query_result(query, response) + if legacy_parse_func: + try: + raw_result = legacy_parse_func(raw_result) + except Exception as e: # noqa: BLE001 + logger.opt(exception=e).warning( + "is allowed (fallback response)", + reason="cannot parse opa response", ) - if granting_role: - role_id = granting_role.get("id", "__NO_ID__") - roles = [ - r - for r in result.get("user_roles", []) - if r.get("id", "") == role_id - ] - granting_role = granting_role if not roles else roles[0] - - debug = { - "opa_warnings": result.get("debug", []), - "opa_processed_input": result.get("authorization_query", {}), + return {} + return raw_result + + +async def _is_allowed_factdb( + query: BaseSchema | list[BaseSchema] | None, + request: Request, + *, + path: str = "/check", + method: str = "POST", + params: dict | None = None, +): + headers = transform_headers(request) + url = f"{sidecar_config.FACTDB_SERVICE_URL}/v1/authz{path}" + _encoded_query = jsonable_encoder(query) + payload = None if query is None else {"input": _encoded_query} + exc = None + if _encoded_query is not None and isinstance(_encoded_query, dict): + _set_use_debugger(payload) + try: + logger.info(f"calling FactDB at '{url}' with input: {payload} and params {params}") + async with aiohttp.ClientSession() as session: # noqa: SIM117 + async with session.request( + method, + url, + data=json.dumps(payload["input"]) if payload is not None else None, + params=params, + headers=headers, + timeout=sidecar_config.OPA_CLIENT_QUERY_TIMEOUT, + raise_for_status=True, + ) as opa_response: + stats_manager.report_success() + return await proxy_response(opa_response) + except asyncio.exceptions.TimeoutError: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_504_GATEWAY_TIMEOUT, + detail=f"FactDB request timed out (url: {url}, timeout: {sidecar_config.OPA_CLIENT_QUERY_TIMEOUT}s)", + ) + except aiohttp.ClientResponseError as e: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_502_BAD_GATEWAY, # 502 indicates server got an error from another server + detail=f"FactDB request failed (url: {url}, status: {e.status}, message: {e.message})", + ) + except aiohttp.ClientError as e: + stats_manager.report_failure() + exc = HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"FactDB request failed (url: {url}, error: {e!s}", + ) + logger.warning(exc.detail) + raise exc + + +def init_enforcer_api_router(policy_store: BasePolicyStoreClient = None): # noqa: C901 + policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER() + router = APIRouter() + if sidecar_config.KONG_INTEGRATION: + with Path(KONG_ROUTES_TABLE_FILE).open() as f: + kong_routes_table_raw = json.load(f) + kong_routes_table = [(re.compile(regex), resource) for regex, resource in kong_routes_table_raw] + logger.info(f"Kong integration: Loaded {len(kong_routes_table)} translation rules.") + + @router.get("/health", status_code=status.HTTP_200_OK, include_in_schema=False) + async def health(): + if await stats_manager.status(): + return JSONResponse( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + content={"status": "unavailable"}, + ) + + return JSONResponse(status_code=status.HTTP_200_OK, content={"status": "ok"}) + + def authorized_users_parse_func(result: dict | list) -> dict: + if isinstance(result, list): + raise TypeError("Invalid result for authorized users from OPA") + return result.get("result", {}) + + @router.post( + "/authorized_users", + response_model=AuthorizedUsersResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token)], + ) + async def authorized_users(request: Request, query: AuthorizedUsersAuthorizationQuery): + raw_result = await conditional_is_allowed( + query, + request, + policy_package=AUTHORIZED_USERS_POLICY_PACKAGE, + factdb_path="/authorized-users", + legacy_parse_func=authorized_users_parse_func, + ) + try: + result = parse_obj_as(AuthorizedUsersResult, raw_result) + except Exception: # noqa: BLE001 + result = AuthorizedUsersResult.empty(query.resource) + logger.warning( + "authorized users (fallback response), response: {res}", + reason="cannot decode opa response", + res=raw_result, + ) + return result + + @router.post( + "/allowed_url", + response_model=AuthorizationResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token)], + ) + async def is_allowed_url( + request: Request, + query: UrlAuthorizationQuery, + ): + data = await post_to_opa(request, "mapping_rules", None) + + mapping_rules = [] + data_result = json.loads(data.body).get("result") + mapping_rules_json = data_result.get("all", []) if data_result is not None else [] + + for mapping_rule in mapping_rules_json: + mapping_rules.append(parse_obj_as(MappingRuleData, mapping_rule)) + matched_mapping_rule = MappingRulesUtils.extract_mapping_rule_by_request( + mapping_rules, query.http_method, query.url + ) + if matched_mapping_rule is None: + return { + "allow": False, + "result": False, + "query": query.dict(), + "debug": { + "reason": "Matched mapping rule not found for the requested URL and HTTP method", + "mapping_rules": mapping_rules_json, + }, } - if allowed and permission is not None and granting_role is not None: - debug["opa_granting_permision"] = permission - debug["opa_granting_role"] = granting_role + path_attributes = MappingRulesUtils.extract_attributes_from_url(matched_mapping_rule.url, query.url) + query_params_attributes = MappingRulesUtils.extract_attributes_from_query_params( + matched_mapping_rule.url, query.url + ) + attributes = {**path_attributes, **query_params_attributes} + allowed_query = AuthorizationQuery( + user=query.user, + action=matched_mapping_rule.action, + resource=Resource( + type=matched_mapping_rule.resource, + tenant=query.tenant, + attributes=attributes, + ), + context=query.context, + sdk=query.sdk, + ) + return await is_allowed(request, allowed_query) - if allowed: - format = "is allowed = {allowed} " - else: - format = "is allowed = {allowed}" - format += " | {api_params}" - if sidecar_config.DECISION_LOG_DEBUG_INFO: - format += " | full_input={input} | debug={debug}" - logger.opt(colors=True).info( - format, - allowed=allowed, - api_params=params, - input=query.dict(), - debug=debug, + @router.post( + "/user-permissions", + response_model=UserPermissionsResult, + name="Get User Permissions", + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token), Depends(notify_seen_sdk)], + ) + async def user_permissions( + request: Request, + query: UserPermissionsQuery, + page: Annotated[ + PositiveInt | None, + Query( + description="Page number for pagination, must be set together with per_page", + ), + ] = None, + per_page: Annotated[PositiveInt | None, Query(description="Number of items per page for pagination")] = None, + ): + paginated = query.set_pagination(page, per_page) + if paginated: + if query.context.get("enable_abac_user_permissions", False) is True: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Pagination is not supported for ABAC user permissions", + ) + logger.info("User permissions query with pagination") + + def parse_func(result: dict) -> dict | list: + results = result.get("permissions", {}) + if not query._offset and not query._limit: + return results + + resource_keys = sorted(results.keys()) + if query._offset and query._limit: + resource_keys = resource_keys[query._offset : query._offset + query._limit] + elif query._offset: + resource_keys = resource_keys[query._offset :] + elif query._limit: + resource_keys = resource_keys[: query._limit] + return {resource: results[resource] for resource in resource_keys} + + response = await conditional_is_allowed( + query, + request, + policy_package=USER_PERMISSIONS_POLICY_PACKAGE, + factdb_path="/user-permissions", + factdb_params=query.get_params(), + legacy_parse_func=parse_func, + ) + try: + result = parse_obj_as(UserPermissionsResult, response) + except Exception: # noqa: BLE001 + result = parse_obj_as(UserPermissionsResult, {}) + logger.warning( + "user permissions (fallback response)", + reason="cannot decode opa response", ) - except: - try: - body = str(response.body, "utf-8") - except: - body = None - data = {} if body is None else {"response_body": body} - logger.info( - "is allowed", - params=params, - query=query.dict(), - response_status=response.status_code, - **data + return result + + def parse_user_tenants_result(result: dict | list) -> dict | list: + if isinstance(result, dict): + tenants = result.get("tenants", []) + elif isinstance(result, list): + tenants = result + else: + raise TypeError(f"Expected raw result to be dict or list, got {type(result)}") + return tenants + + @router.post( + "/user-tenants", + response_model=UserTenantsResult, + name="Get User Tenants", + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token), Depends(notify_seen_sdk)], + ) + async def user_tenants( + request: Request, + query: UserTenantsQuery, + ): + raw_result = await conditional_is_allowed( + query, + request, + policy_package=USER_TENANTS_POLICY_PACKAGE, + factdb_path=f"/users/{query.user.key}/tenants", + factdb_method="GET", + legacy_parse_func=parse_user_tenants_result, + ) + try: + result = parse_obj_as(UserTenantsResult, raw_result) + except Exception: # noqa: BLE001 + result = parse_obj_as(UserTenantsResult, []) + logger.warning( + "get user tenants (fallback response)", + reason="cannot decode opa response", + ) + return result + + @router.post( + "/allowed/all-tenants", + response_model=AllTenantsAuthorizationResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token), Depends(notify_seen_sdk)], + ) + async def is_allowed_all_tenants( + request: Request, + query: AuthorizationQuery, + ): + if sidecar_config.FACTDB_ENABLED: + response = await _is_allowed_factdb(query, request, path="/check/all-tenants") + raw_result = json.loads(response.body) + log_query_result(query, response, is_inner=True) + else: + response = await _is_allowed(query, request, ALL_TENANTS_POLICY_PACKAGE) + raw_result = json.loads(response.body).get("result", {}) + log_query_result(query, response) + + try: + result = AllTenantsAuthorizationResult( + allowed_tenants=raw_result.get("allowed_tenants", []), ) + except Exception: # noqa: BLE001 + result = AllTenantsAuthorizationResult(allowed_tenants=[]) + logger.warning("is allowed (fallback response)", reason="cannot decode opa response") + return result + + @router.post( + "/allowed/bulk", + response_model=BulkAuthorizationResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token), Depends(notify_seen_sdk)], + ) + async def is_allowed_bulk( + request: Request, + queries: list[AuthorizationQuery], + ): + if sidecar_config.FACTDB_ENABLED: + response = await _is_allowed_factdb(queries, request, path="/check/bulk") + raw_result = json.loads(response.body) + else: + bulk_query = BulkAuthorizationQuery(checks=queries) + response = await _is_allowed(bulk_query, request, BULK_POLICY_PACKAGE) + raw_result = json.loads(response.body).get("result", {}).get("allow", []) + log_query_result(bulk_query, response) + try: + result = BulkAuthorizationResult( + allow=raw_result, + ) + except Exception: # noqa: BLE001 + result = BulkAuthorizationResult( + allow=[], + ) + logger.warning("is allowed (fallback response)", reason="cannot decode opa response") + return result @router.post( "/allowed", response_model=AuthorizationResult, status_code=status.HTTP_200_OK, response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token), Depends(notify_seen_sdk)], ) - async def is_allowed(query: AuthorizationQuery): - async def _is_allowed(): - return await policy_store.get_data_with_input(path="rbac", input=query) + async def is_allowed( + request: Request, + query: AuthorizationQuery | AuthorizationQueryV1, + ): + if isinstance(query, AuthorizationQueryV1): + raise HTTPException( + status_code=status.HTTP_421_MISDIRECTED_REQUEST, + detail="Mismatch between client version and PDP version," + " required v2 request body, got v1. " + "hint: try to update your client version to v2", + ) + query = cast(AuthorizationQuery, query) + + raw_result = await conditional_is_allowed(query, request) + try: + processed_query = get_v1_processed_query(raw_result) or get_v2_processed_query(raw_result) or {} + result = { + "allow": raw_result.get("allow", False), + "result": raw_result.get("allow", False), # fallback for older sdks (TODO: remove) + "query": processed_query, + "debug": raw_result.get("debug", {}), + } + except Exception: # noqa: BLE001 + result = {"allow": False, "result": False} + logger.warning("is allowed (fallback response)", reason="cannot decode opa response") + return result - fallback_response = dict(result=dict(allow=False, debug="OPA not responding")) - is_allowed_with_fallback = fail_silently(fallback=fallback_response)( - _is_allowed + @router.post( + "/nginx_allowed", + response_model=AuthorizationResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + dependencies=[Depends(enforce_pdp_token)], + ) + async def is_allowed_nginx( + request: Request, + permit_user_key: Annotated[str | None, Header()] = None, + permit_tenant_id: Annotated[str | None, Header()] = None, + permit_action: Annotated[str | None, Header()] = None, + permit_resource_type: Annotated[str | None, Header()] = None, + ): + query = AuthorizationQuery( + user=User(key=permit_user_key), + action=permit_action, + resource=Resource(type=permit_resource_type, tenant=permit_tenant_id), ) - response = await is_allowed_with_fallback() - log_query_and_result(query, response) + + raw_result = await conditional_is_allowed(query, request) try: - raw_result = json.loads(response.body).get("result", {}) - processed_query = raw_result.get("authorization_query", {}) + processed_query = get_v1_processed_query(raw_result) or get_v2_processed_query(raw_result) or {} result = { "allow": raw_result.get("allow", False), - "result": raw_result.get( - "allow", False - ), # fallback for older sdks (TODO: remove) - "query": { - "user": processed_query.get("user", {"id": query.user}), - "action": processed_query.get("action", query.action), - "resource": processed_query.get( - "resource", query.resource.dict(exclude_none=True) - ), + "result": raw_result.get("allow", False), # fallback for older sdks (TODO: remove) + "query": processed_query, + "debug": raw_result.get("debug", {}), + } + except Exception: # noqa: BLE001 + result = {"allow": False, "result": False} + logger.warning("is allowed (fallback response)", reason="cannot decode opa response") + return result + + @router.post( + "/kong", + response_model=KongAuthorizationResult, + status_code=status.HTTP_200_OK, + response_model_exclude_none=True, + ) + async def is_allowed_kong(request: Request, query: KongAuthorizationQuery): + # Short circuit if disabled + if sidecar_config.KONG_INTEGRATION is False: + raise HTTPException( + status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Kong integration is disabled. " + "Please set the PDP_KONG_INTEGRATION variable to true to enable it.", + ) + + await PersistentStateHandler.get_instance().seen_sdk("kong") + + if sidecar_config.KONG_INTEGRATION_DEBUG: + payload = await request.json() + logger.info(f"Got request from Kong with payload {payload}") + + if query.input.consumer is None: + logger.warning( + "Got request from Kong with no consumer " + "(perhaps you forgot to check 'Config.include Consumer In Opa Input' in the Kong OPA plugin config?), " + "returning allowed=False" + ) + return { + "result": False, + } + + object_type = None + for regex, resource in kong_routes_table: + r = regex.match(query.input.request.http.path) + if r is not None: + if isinstance(resource, str): + object_type = resource + elif isinstance(resource, int): + object_type = r.groups()[resource] + break + + if object_type is None: + logger.warning( + "Got request from Kong to path {} with no matching route, returning allowed=False", + query.input.request.http.path, + ) + return { + "result": False, + } + + response = await _is_allowed( + KongWrappedAuthorizationQuery( + user={ + "key": query.input.consumer.username, }, - "debug": { - "warnings": raw_result.get("debug", []), - "user_roles": raw_result.get("user_roles", []), - "granting_permission": raw_result.get("granting_permission", []), - "user_permissions": raw_result.get("user_permissions", []), + resource={ + "tenant": "default", + "type": object_type, }, + action=query.input.request.http.method.lower(), + ), + request, + ) + try: + raw_result = json.loads(response.body).get("result", {}) + result = { + "result": raw_result.get("allow", False), } - except: - result = dict(allow=False, result=False) + except Exception: # noqa: BLE001 + result = {"allow": False, "result": False} logger.warning( - "is allowed (fallback response)", reason="cannot decode opa response" + "is allowed (fallback response)", + reason="cannot decode opa response", ) return result diff --git a/horizon/enforcer/opa/config_maker.py b/horizon/enforcer/opa/config_maker.py index 8c3b72a7..d9115038 100644 --- a/horizon/enforcer/opa/config_maker.py +++ b/horizon/enforcer/opa/config_maker.py @@ -1,27 +1,23 @@ -import os +from pathlib import Path import jinja2 from opal_common.logger import logger from horizon.config import SidecarConfig +from horizon.startup.api_keys import get_env_api_key + +TEMPLATES_PATH = Path(__file__).parent.parent.parent / "static" / "templates" def get_jinja_environment() -> jinja2.Environment: - path = os.path.join(os.path.dirname(__file__), "../../static/templates") - return jinja2.Environment(loader=jinja2.FileSystemLoader(path)) + return jinja2.Environment(loader=jinja2.FileSystemLoader(TEMPLATES_PATH)) def persist_to_file(contents: str, path: str) -> str: - path = os.path.expanduser(path) - - # make sure the directory exists - os.makedirs(os.path.dirname(path), exist_ok=True) - - # persist to file - with open(path, "w") as f: - f.write(contents) - - return path + path_obj = Path(path).expanduser() + path_obj.parent.mkdir(parents=True, exist_ok=True) + path_obj.write_text(contents) + return str(path_obj) def get_opa_config_file_path( @@ -38,15 +34,23 @@ def get_opa_config_file_path( """ env = get_jinja_environment() target_path = sidecar_config.OPA_CONFIG_FILE_PATH + decision_logs_backend_tier = ( + sidecar_config.OPA_DECISION_LOG_INGRESS_BACKEND_TIER_URL or sidecar_config.CONTROL_PLANE + ) + logger.info( + "Uploading decision logs to backend tier: {tier}", + tier=decision_logs_backend_tier, + ) try: template = env.get_template(template_path) contents = template.render( - cloud_service_url=sidecar_config.CONTROL_PLANE, - bearer_token=sidecar_config.API_KEY, + cloud_service_url=decision_logs_backend_tier, + bearer_token=get_env_api_key(), log_ingress_endpoint=sidecar_config.OPA_DECISION_LOG_INGRESS_ROUTE, min_delay_seconds=sidecar_config.OPA_DECISION_LOG_MIN_DELAY, max_delay_seconds=sidecar_config.OPA_DECISION_LOG_MAX_DELAY, + upload_size_limit_bytes=sidecar_config.OPA_DECISION_LOG_UPLOAD_SIZE_LIMIT, log_to_console=sidecar_config.OPA_DECISION_LOG_CONSOLE, ) except jinja2.TemplateNotFound: @@ -74,7 +78,10 @@ def get_opa_authz_policy_file_path( try: template = env.get_template(template_path) - contents = template.render(bearer_token=sidecar_config.API_KEY) + contents = template.render( + bearer_token=get_env_api_key(), + allow_metrics_unauthenticated=sidecar_config.ALLOW_METRICS_UNAUTHENTICATED, + ) except jinja2.TemplateNotFound: logger.error(f"could not find the template: {template_path}") raise diff --git a/horizon/enforcer/schemas.py b/horizon/enforcer/schemas.py index e57b9989..f73f72bf 100644 --- a/horizon/enforcer/schemas.py +++ b/horizon/enforcer/schemas.py @@ -1,19 +1,30 @@ -from typing import Any, Dict, List, Optional +from __future__ import annotations -from pydantic import BaseModel +from typing import Any + +from pydantic import AnyHttpUrl, BaseModel, Field, PositiveInt, PrivateAttr class BaseSchema(BaseModel): class Config: orm_mode = True + allow_population_by_field_name = True + + +class User(BaseSchema): + key: str + first_name: str | None = Field(None, alias="firstName") + last_name: str | None = Field(None, alias="lastName") + email: str | None = None + attributes: dict[str, Any] | None = Field(default_factory=dict) class Resource(BaseSchema): type: str - id: Optional[str] = None - tenant: Optional[str] = None - attributes: Optional[Dict[str, Any]] = None - context: Optional[Dict[str, Any]] = {} + key: str | None = None + tenant: str | None = None + attributes: dict[str, Any] | None = Field(default_factory=dict) + context: dict[str, Any] | None = Field(default_factory=dict) class AuthorizationQuery(BaseSchema): @@ -21,27 +32,209 @@ class AuthorizationQuery(BaseSchema): the format of is_allowed() input """ - user: str # user_id or jwt + user: User action: str resource: Resource - context: Optional[Dict[str, Any]] = {} + context: dict[str, Any] | None = Field(default_factory=dict) + sdk: str | None = None + def __repr__(self) -> str: + return f"({self.user.key}, {self.action}, {self.resource.type})" -class ProcessedQuery(BaseSchema): - user: Dict[str, Any] - action: str - resource: Dict[str, Any] + +class BulkAuthorizationQuery(BaseSchema): + checks: list[AuthorizationQuery] + + def __repr__(self) -> str: + return " | ".join([repr(query) for query in self.checks]) + + +class UrlAuthorizationQuery(BaseSchema): + """ + the format of is_allowed_url() input + """ + + user: User + http_method: str + url: AnyHttpUrl + tenant: str + context: dict[str, Any] | None = Field(default_factory=dict) + sdk: str | None + + +class UserTenantsQuery(BaseSchema): + user: User + context: dict[str, Any] | None = Field(default_factory=dict) -class DebugInformation(BaseSchema): - warnings: Optional[List[str]] - user_roles: Optional[List[Dict[str, Any]]] - granting_permission: Optional[List[Dict[str, Any]]] - user_permissions: Optional[List[Dict[str, Any]]] +class UserPermissionsQuery(BaseSchema): + user: User + tenants: list[str] | None = None + resources: list[str] | None = None + resource_types: list[str] | None = None + context: dict[str, Any] | None = Field(default_factory=dict) + _offset: PositiveInt | None = PrivateAttr(None) + _limit: PositiveInt | None = PrivateAttr(None) + + def set_pagination(self, page: PositiveInt | None, per_page: PositiveInt | None) -> bool: + if per_page: + self._limit = per_page + if page: + self._offset = (page - 1) * per_page + return True + return False + + def get_params(self) -> dict[str, Any]: + params = {} + if self.tenants: + params["tenants"] = self.tenants + if self.resources: + params["resource_instances"] = self.resources + if self.resource_types: + params["resource_types"] = self.resource_types + if self._offset: + params["offset"] = str(self._offset) + if self._limit: + params["limit"] = str(self._limit) + + return params class AuthorizationResult(BaseSchema): allow: bool = False - query: Optional[ProcessedQuery] - debug: Optional[DebugInformation] + query: dict | None = None + debug: dict | None result: bool = False # fallback for older sdks (TODO: remove) + + +class BulkAuthorizationResult(BaseSchema): + allow: list[AuthorizationResult] = [] + + +class _TenantDetails(BaseSchema): + key: str + attributes: dict = Field(default_factory=dict) + + +class _ResourceDetails(_TenantDetails): + type: str + + +class _UserPermissionsResult(BaseSchema): + tenant: _TenantDetails | None + resource: _ResourceDetails | None + permissions: list[str] = Field(default_factory=list, regex="^.+:.+$") + roles: list[str] | None = None + + +UserPermissionsResult = dict[str, _UserPermissionsResult] +UserTenantsResult = list[_TenantDetails] + + +class _AllTenantsAuthorizationResult(AuthorizationResult): + tenant: _TenantDetails + + +class AllTenantsAuthorizationResult(BaseSchema): + allowed_tenants: list[_AllTenantsAuthorizationResult] = [] + + +class MappingRuleData(BaseSchema): + url: AnyHttpUrl + http_method: str + resource: str + action: str + priority: int | None = None + + @property + def resource_action(self) -> str: + return self.action or self.http_method + + +class AuthorizedUserAssignment(BaseSchema): + user: str = Field(..., description="The user that is authorized") + tenant: str = Field(..., description="The tenant that the user is authorized for") + resource: str = Field(..., description="The resource that the user is authorized for") + role: str = Field(..., description="The role that the user is assigned to") + + +AuthorizedUsersDict = dict[str, list[AuthorizedUserAssignment]] + + +class AuthorizedUsersResult(BaseSchema): + resource: str = Field( + ..., + description="The resource that the result is about." + "Can be either 'resource:*' or 'resource:resource_instance'", + ) + tenant: str = Field(..., description="The tenant that the result is about") + users: AuthorizedUsersDict = Field( + ..., + description="A key value mapping of the users that are " + "authorized for the resource." + "The key is the user key and the value is a list of assignments allowing the user to perform" + "the requested action", + ) + + @classmethod + def empty(cls, resource: Resource) -> AuthorizedUsersResult: + resource_key = "*" if resource.key is None else resource.key + return cls( + resource=f"{resource.type}:{resource_key}", + tenant=resource.tenant or "default", + users={}, + ) + + class Config: + schema_extra = { # noqa: RUF012 + "examples": [ + { + "resource": "repo:*", + "tenant": "default", + "users": { + "user1": [ + { + "user": "user1", + "tenant": "default", + "resource": "__tenant:default", + "role": "admin", + } + ] + }, + }, + { + "resource": "repo:OPAL", + "tenant": "default", + "users": { + "user1": [ + { + "user": "user1", + "tenant": "default", + "resource": "repo:OPAL", + "role": "admin", + }, + { + "user": "user1", + "tenant": "default", + "resource": "__tenant:default", + "role": "admin", + }, + ] + }, + }, + ] + } + + +class AuthorizedUsersAuthorizationQuery(BaseSchema): + """ + the format of authorized_users input + """ + + action: str + resource: Resource + context: dict[str, Any] | None = Field(default_factory=dict) + sdk: str | None + + def __repr__(self) -> str: + return f"({self.action}, {self.resource.type})" diff --git a/horizon/enforcer/schemas_kong.py b/horizon/enforcer/schemas_kong.py new file mode 100644 index 00000000..d810dffe --- /dev/null +++ b/horizon/enforcer/schemas_kong.py @@ -0,0 +1,85 @@ +from uuid import UUID + +from .schemas import BaseSchema + + +class KongAuthorizationInputRequestHttp(BaseSchema): + host: str + port: int + tls: dict + method: str + scheme: str + path: str + querystring: dict[str, str] + headers: dict[str, str] + + +class KongAuthorizationInputRequest(BaseSchema): + http: KongAuthorizationInputRequestHttp + + +class KongAuthorizationInputService(BaseSchema): + host: str + created_at: int + connect_timeout: int + id: UUID + procotol: str + name: str + read_timeout: int + port: int + updated_at: int + ws_id: UUID + retries: int + write_timeout: int + + +class KongAuthorizationInputRouteService(BaseSchema): + id: UUID + + +class KongAuthorizationInputRoute(BaseSchema): + id: UUID + paths: list[str] + protocols: list[str] + strip_path: bool + created_at: int + ws_id: UUID + request_buffering: bool + updated_at: int + preserve_host: bool + regex_priority: int + response_buffering: bool + https_redirect_status_code: int + path_handling: str + service: KongAuthorizationInputRouteService + + +class KongAuthorizationInputConsumer(BaseSchema): + id: UUID + username: str + + +class KongAuthorizationInput(BaseSchema): + request: KongAuthorizationInputRequest + client_ip: str | None + service: KongAuthorizationInputService | None + route: KongAuthorizationInputRoute | None + consumer: KongAuthorizationInputConsumer | None + + +class KongAuthorizationQuery(BaseSchema): + """ + the format of is_allowed_kong() input + """ + + input: KongAuthorizationInput + + +class KongWrappedAuthorizationQuery(BaseSchema): + user: dict + resource: dict + action: str + + +class KongAuthorizationResult(BaseSchema): + result: bool = False diff --git a/horizon/enforcer/schemas_v1.py b/horizon/enforcer/schemas_v1.py new file mode 100644 index 00000000..d7a9361f --- /dev/null +++ b/horizon/enforcer/schemas_v1.py @@ -0,0 +1,29 @@ +from typing import Any + +from pydantic import Field + +from horizon.enforcer.schemas import BaseSchema + + +class BaseSchemaV1(BaseSchema): + class Config: + schema_extra = {"deprecated": True} # noqa: RUF012 + + +class ResourceV1(BaseSchemaV1): + type: str + id: str | None = None + tenant: str | None = None + attributes: dict[str, Any] | None = None + context: dict[str, Any] | None = Field(default_factory=dict) + + +class AuthorizationQueryV1(BaseSchema): + """ + the format of is_allowed() input + """ + + user: str # user_id or jwt + action: str + resource: ResourceV1 + context: dict[str, Any] | None = Field(default_factory=dict) diff --git a/horizon/enforcer/utils/__init__.py b/horizon/enforcer/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/horizon/enforcer/utils/headers_utils.py b/horizon/enforcer/utils/headers_utils.py new file mode 100644 index 00000000..64bd9bd7 --- /dev/null +++ b/horizon/enforcer/utils/headers_utils.py @@ -0,0 +1,4 @@ +def get_case_insensitive(dictionary, key) -> str | None: + if isinstance(key, str): + return next((dictionary[k] for k in dictionary if k.lower() == key.lower()), None) + return dictionary.get(key, None) diff --git a/horizon/enforcer/utils/mapping_rules_utils.py b/horizon/enforcer/utils/mapping_rules_utils.py new file mode 100644 index 00000000..a4744b71 --- /dev/null +++ b/horizon/enforcer/utils/mapping_rules_utils.py @@ -0,0 +1,112 @@ +from pydantic import AnyHttpUrl +from starlette.datastructures import QueryParams + +from horizon.enforcer.schemas import MappingRuleData + + +class MappingRulesUtils: + @staticmethod + def _compare_urls(mapping_rule_url: AnyHttpUrl, request_url: AnyHttpUrl) -> bool: + if mapping_rule_url.scheme != request_url.scheme: + return False + if mapping_rule_url.host != request_url.host: + return False + if not MappingRulesUtils._compare_url_path(mapping_rule_url.path, request_url.path): + return False + if not MappingRulesUtils._compare_query_params(mapping_rule_url.query, request_url.query): # noqa: SIM103 + return False + return True + + @staticmethod + def _compare_url_path(mapping_rule_url: str | None, request_url: str | None) -> bool: + if mapping_rule_url is None and request_url is None: + return True + if not (mapping_rule_url is not None and request_url is not None): + return False + mapping_rule_url_parts = mapping_rule_url.split("/") + request_url_parts = request_url.split("/") + if len(mapping_rule_url_parts) != len(request_url_parts): + return False + for i in range(len(mapping_rule_url_parts)): + if mapping_rule_url_parts[i].startswith("{") and mapping_rule_url_parts[i].endswith("}"): + continue + if mapping_rule_url_parts[i] != request_url_parts[i]: + return False + return True + + @staticmethod + def _compare_query_params(mapping_rule_query_string: str | None, request_url_query_string: str | None) -> bool: + if mapping_rule_query_string is None and request_url_query_string is None: + # if both are None, they are equal + return True + if mapping_rule_query_string is not None and request_url_query_string is None: + # if the request query string is None, but the mapping rule query string is not + # then the request does not match the mapping rule + return False + if mapping_rule_query_string is None and request_url_query_string is not None: + # if the mapping rule query string is None, but the request query string is not + # then the request matches the query string rules it has additional data to the rule + return True + + mapping_rule_query_params = QueryParams(mapping_rule_query_string) + request_query_params = QueryParams(request_url_query_string) + + for key in mapping_rule_query_params: + if key not in request_query_params: + return False + + if mapping_rule_query_params[key].startswith("{") and mapping_rule_query_params[key].endswith("}"): + # if the value is an attribute + # we just need to make sure the attribute is in the request query params + continue + elif mapping_rule_query_params[key] != request_query_params[key]: + # if the value is not an attribute, verify that the values are the same + return False + return True + + @staticmethod + def extract_attributes_from_url(rule_url: str, request_url: str) -> dict: + rule_url_parts = rule_url.split("/") + request_url_parts = request_url.split("/") + attributes = {} + if len(rule_url_parts) != len(request_url_parts): + return {} + for i in range(len(rule_url_parts)): + if rule_url_parts[i].startswith("{") and rule_url_parts[i].endswith("}"): + attributes[rule_url_parts[i][1:-1]] = request_url_parts[i] + return attributes + + @staticmethod + def extract_attributes_from_query_params(rule_url: str, request_url: str) -> dict: + if "?" not in rule_url or "?" not in request_url: + return {} + rule_query_params = QueryParams(rule_url.split("?")[1]) + request_query_params = QueryParams(request_url.split("?")[1]) + attributes = {} + for key in rule_query_params: + if rule_query_params[key].startswith("{") and rule_query_params[key].endswith("}"): + attributes[rule_query_params[key][1:-1]] = request_query_params[key] + return attributes + + @classmethod + def extract_mapping_rule_by_request( + cls, + mapping_rules: list[MappingRuleData], + http_method: str, + url: AnyHttpUrl, + ) -> MappingRuleData | None: + matched_mapping_rules = [] + for mapping_rule in mapping_rules: + if mapping_rule.http_method != http_method.lower(): + # if the method is not the same, we don't need to check the url + continue + if not cls._compare_urls(mapping_rule.url, url): + # if the urls doesn't match, we don't need to check the headers + continue + matched_mapping_rules.append(mapping_rule) + # most priority first + matched_mapping_rules.sort(key=lambda rule: rule.priority or 0, reverse=True) + if len(matched_mapping_rules) > 0: + return matched_mapping_rules[0] + + return None diff --git a/horizon/enforcer/utils/statistics_utils.py b/horizon/enforcer/utils/statistics_utils.py new file mode 100644 index 00000000..b38c5cb7 --- /dev/null +++ b/horizon/enforcer/utils/statistics_utils.py @@ -0,0 +1,83 @@ +import asyncio + +from loguru import logger + + +class StatisticsManager: + def __init__(self, interval_seconds: int = 60, failures_threshold_percentage: float = 0.1): + self._requests = 0 + self._failures = 0 + self._messages: asyncio.Queue[bool] = asyncio.Queue() + self._lock = asyncio.Lock() + self._restarter_task: asyncio.Task | None = None + self._interval_task: asyncio.Task | None = None + self._interval_seconds = interval_seconds + self._failures_threshold_percentage = failures_threshold_percentage + self._had_failure = False + + async def restarter_task(self) -> None: + while True: + message = await self._messages.get() + try: + async with self._lock: + logger.debug("Statistics message: {message}", message=message) + self._requests += 1 + if message is False: + self._failures += 1 + finally: + self._messages.task_done() + + async def reset_stats(self) -> None: + async with self._lock: + logger.debug( + "Resetting error rate current status is requests={requests}, failures={failures}", + requests=self._requests, + failures=self._failures, + ) + self._requests = 0 + self._failures = 0 + + async def interval_task(self) -> None: + while True: + await asyncio.sleep(self._interval_seconds) + await self.reset_stats() + + async def run(self) -> None: + logger.debug("Starting statistics manager") + if self._restarter_task is None: + self._restarter_task = asyncio.create_task(self.restarter_task()) + if self._interval_task is None: + self._interval_task = asyncio.create_task(self.interval_task()) + + async def stop_tasks(self) -> None: + if self._restarter_task is not None: + self._restarter_task.cancel() + self._restarter_task = None + if self._interval_task is not None: + self._interval_task.cancel() + self._interval_task = None + + async def stop(self) -> None: + logger.debug("Stopping statistics manager") + await self._messages.join() + await self.stop_tasks() + + def report_success(self) -> None: + logger.debug("Reporting success") + self._messages.put_nowait(item=True) + + def report_failure(self) -> None: + logger.debug("Reporting failure") + self._messages.put_nowait(item=False) + + async def current_rate(self) -> float: + current_requests, current_failures = float(self._requests), float(self._failures) + if current_requests == 0: + return 0.0 + return current_failures / current_requests + + async def status(self) -> bool: + rate = await self.current_rate() + if rate > self._failures_threshold_percentage: + self._had_failure = True + return self._had_failure diff --git a/horizon/factdb/__init__.py b/horizon/factdb/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/horizon/factdb/client.py b/horizon/factdb/client.py new file mode 100644 index 00000000..fec3285c --- /dev/null +++ b/horizon/factdb/client.py @@ -0,0 +1,245 @@ +import asyncio +from collections.abc import Awaitable, Callable +from pathlib import Path + +from fastapi import FastAPI +from loguru import logger +from opal_client import OpalClient +from opal_client.config import EngineLogFormat, opal_client_config +from opal_client.data.api import init_data_router +from opal_client.data.updater import DataUpdater +from opal_client.engine.options import CedarServerOptions, OpaServerOptions +from opal_client.engine.runner import PolicyEngineRunner +from opal_client.policy.api import init_policy_router +from opal_client.policy.updater import PolicyUpdater +from opal_client.policy_store.api import init_policy_store_router +from opal_client.policy_store.base_policy_store_client import BasePolicyStoreClient +from opal_client.policy_store.schemas import PolicyStoreTypes +from opal_common.authentication.deps import JWTAuthenticator +from opal_common.authentication.verifier import JWTVerifier +from opal_common.fetcher.providers.http_fetch_provider import ( + HttpFetcherConfig, + HttpMethods, +) +from scalar_fastapi import get_scalar_api_reference +from starlette import status +from starlette.responses import JSONResponse + +from horizon.config import sidecar_config +from horizon.factdb.policy_store import FactDBPolicyStoreClient +from horizon.factdb.runner import FactDBRunner + + +class ExtendedOpalClient(OpalClient): + """ + Extended OpalClient that allows for additional healthchecks besides of the + policy store one + it is only used in FactDB and will later be removed when we add FactDB Policy Store implementation + """ + + async def check_healthy(self) -> bool: + return await self.policy_store.is_healthy() + + async def check_ready(self) -> bool: + return self._backup_loaded or await self.policy_store.is_ready() + + def _init_fast_api_app(self) -> FastAPI: + # Called at the end of OPALClient.__init__ + self._inject_extra_callbacks() + return super()._init_fast_api_app() + + def _configure_api_routes(self, app: FastAPI): + """mounts the api routes on the app object.""" + + @app.get("/scalar", include_in_schema=False) + async def scalar_html(): + return get_scalar_api_reference( + openapi_url="/openapi.json", + title="Permit.io PDP API", + ) + + authenticator = JWTAuthenticator(self.verifier) + + # Init api routers with required dependencies + policy_router = init_policy_router(policy_updater=self.policy_updater) + data_router = init_data_router(data_updater=self.data_updater) + policy_store_router = init_policy_store_router(authenticator) + + # mount the api routes on the app object + app.include_router(policy_router, tags=["Policy Updater"]) + app.include_router(data_router, tags=["Data Updater"]) + app.include_router(policy_store_router, tags=["Policy Store"]) + + # excluded callbacks api from the main api, since we use it internally. + # Use the DATA_UPDATE_CALLBACKS config to configure callbacks instead + + # top level routes (i.e: healthchecks) + @app.get("/healthcheck", include_in_schema=False) + @app.get("/", include_in_schema=False) + @app.get("/healthy", include_in_schema=False) + async def healthy(): + """returns 200 if updates keep being successfully fetched from the + server and applied to the policy store.""" + healthy = await self.check_healthy() + if healthy: + return JSONResponse(status_code=status.HTTP_200_OK, content={"status": "ok"}) + else: + return JSONResponse( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + content={"status": "unavailable"}, + ) + + @app.get("/ready", include_in_schema=False) + async def ready(): + """returns 200 if the policy store is ready to serve requests.""" + ready = await self.check_ready() + if ready: + return JSONResponse(status_code=status.HTTP_200_OK, content={"status": "ok"}) + else: + return JSONResponse( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + content={"status": "unavailable"}, + ) + + return app + + def _inject_extra_callbacks(self) -> None: + register = self._callbacks_register + default_config = HttpFetcherConfig( + method=HttpMethods.POST, + headers={"content-type": "application/json"}, + process_data=False, + fetcher=None, + ) + for entry in sidecar_config.DATA_UPDATE_CALLBACKS: + entry.config = entry.config or default_config + entry.key = entry.key or register.calc_hash(entry.url, entry.config) + + if register.get(entry.key): + raise RuntimeError(f"Callback with key '{entry.key}' already exists. Please specify a different key.") + + logger.info(f"Registering data update callback to url '{entry.url}' with key '{entry.key}'") + register.put(entry.url, entry.config, entry.key) + + +class FactDBClient(ExtendedOpalClient): + def __init__( + self, + *, + policy_store_type: PolicyStoreTypes = None, + policy_store: BasePolicyStoreClient = None, + data_updater: DataUpdater = None, + data_topics: list[str] | None = None, + policy_updater: PolicyUpdater = None, + inline_opa_enabled: bool | None = None, + inline_opa_options: OpaServerOptions = None, + inline_cedar_enabled: bool | None = None, + inline_cedar_options: CedarServerOptions = None, + verifier: JWTVerifier | None = None, + store_backup_path: str | None = None, + store_backup_interval: int | None = None, + offline_mode_enabled: bool = False, + shard_id: str | None = None, + ): + self._factdb_enabled = sidecar_config.FACTDB_ENABLED + if self._factdb_enabled: + self._factdb_runner = FactDBRunner( + storage_path=Path(sidecar_config.OFFLINE_MODE_BACKUP_DIR) / "factdb", + factdb_url=sidecar_config.FACTDB_SERVICE_URL, + factdb_binary_path=sidecar_config.FACTDB_BINARY_PATH, + factdb_token=opal_client_config.CLIENT_TOKEN, + factdb_backup_server_url=sidecar_config.FACTDB_BACKUP_SERVER_URL, + # Limit retires when in offline mode or 0 (infinite retries) when online + backup_fetch_max_retries=sidecar_config.CONFIG_FETCH_MAX_RETRIES + if sidecar_config.ENABLE_OFFLINE_MODE + else 0, + engine_token=sidecar_config.API_KEY, + piped_logs_format=EngineLogFormat.FULL, + ) + policy_store = policy_store or FactDBPolicyStoreClient( + factdb_client=lambda: self._factdb_runner.client, + opa_server_url=opal_client_config.POLICY_STORE_URL, + opa_auth_token=opal_client_config.POLICY_STORE_AUTH_TOKEN, + auth_type=opal_client_config.POLICY_STORE_AUTH_TYPE, + oauth_client_id=opal_client_config.POLICY_STORE_AUTH_OAUTH_CLIENT_ID, + oauth_client_secret=opal_client_config.POLICY_STORE_AUTH_OAUTH_CLIENT_SECRET, + oauth_server=opal_client_config.POLICY_STORE_AUTH_OAUTH_SERVER, + data_updater_enabled=opal_client_config.DATA_UPDATER_ENABLED, + policy_updater_enabled=opal_client_config.POLICY_UPDATER_ENABLED, + cache_policy_data=opal_client_config.OFFLINE_MODE_ENABLED, + tls_client_cert=opal_client_config.POLICY_STORE_TLS_CLIENT_CERT, + tls_client_key=opal_client_config.POLICY_STORE_TLS_CLIENT_KEY, + tls_ca=opal_client_config.POLICY_STORE_TLS_CA, + ) + super().__init__( + policy_store_type=policy_store_type, + policy_store=policy_store, + data_updater=data_updater, + data_topics=data_topics, + policy_updater=policy_updater, + inline_opa_enabled=inline_opa_enabled, + inline_opa_options=inline_opa_options, + inline_cedar_enabled=inline_cedar_enabled, + inline_cedar_options=inline_cedar_options, + verifier=verifier, + store_backup_path=store_backup_path, + store_backup_interval=store_backup_interval, + offline_mode_enabled=offline_mode_enabled, + shard_id=shard_id, + ) + + @staticmethod + async def _run_engine_runner( + callback: Callable[[], Awaitable] | None, + engine_runner: PolicyEngineRunner, + ): + # runs the callback after policy store is up + engine_runner.register_process_initial_start_callbacks([callback] if callback else []) + async with engine_runner: + await engine_runner.wait_until_done() + + async def start_factdb_runner(self): + await self._run_engine_runner(None, self._factdb_runner) + + async def stop_factdb_runner(self): + logger.info("Stopping FactDB runner") + await self._factdb_runner.stop() + + async def check_healthy(self) -> bool: + try: + opal_health = await super().check_healthy() + if not opal_health: + return False + if self._factdb_enabled: + return await self._factdb_runner.is_healthy() + except Exception as e: # noqa: BLE001 + logger.exception("Error checking health: {e}", e=e) + return False + else: + return True + + async def check_ready(self) -> bool: + try: + opal_ready = await super().check_ready() + if not opal_ready: + return False + if self._factdb_enabled: + return await self._factdb_runner.is_ready() + except Exception as e: # noqa: BLE001 + logger.exception("Error checking ready: {e}", e=e) + return False + else: + return True + + async def start_client_background_tasks(self): + tasks = [super().start_client_background_tasks()] + if self._factdb_enabled: + logger.info("Starting FactDB runner") + tasks.append(self.start_factdb_runner()) + await asyncio.gather(*tasks) + + async def stop_client_background_tasks(self): + """stops all background tasks (called on shutdown event)""" + await super().stop_client_background_tasks() + if self._factdb_enabled: + await self.stop_factdb_runner() diff --git a/horizon/factdb/data_update.py b/horizon/factdb/data_update.py new file mode 100644 index 00000000..2d58132a --- /dev/null +++ b/horizon/factdb/data_update.py @@ -0,0 +1,39 @@ +from collections.abc import Iterator + +from pydantic import BaseModel +from typing_extensions import Self + + +class Fact(BaseModel): + type: str + attributes: dict[str, str] + + +class InsertOperation(BaseModel): + fact: Fact + + +class DeleteOperation(BaseModel): + fact: Fact + + +AnyOperation = InsertOperation | DeleteOperation + + +class DataUpdate(BaseModel): + inserts: list[InsertOperation] + deletes: list[DeleteOperation] + + @classmethod + def from_operations(cls, operations: Iterator[AnyOperation]) -> Self: + inserts, deletes = [], [] + for operation in operations: + if isinstance(operation, InsertOperation): + inserts.append(operation) + elif isinstance(operation, DeleteOperation): + deletes.append(operation) + + return cls( + inserts=inserts, + deletes=deletes, + ) diff --git a/horizon/factdb/policy_store.py b/horizon/factdb/policy_store.py new file mode 100644 index 00000000..fae528e2 --- /dev/null +++ b/horizon/factdb/policy_store.py @@ -0,0 +1,145 @@ +import time +from collections.abc import Callable, Iterator +from typing import Any + +import aiohttp +from aiohttp import ClientSession +from loguru import logger +from opal_client.policy_store.opa_client import OpaClient +from opal_client.policy_store.schemas import PolicyStoreAuth +from opal_common.schemas.data import JsonableValue + +from horizon.factdb.data_update import AnyOperation, DataUpdate +from horizon.factdb.update_operations import ( + _get_operations_for_update_relationship_tuple, + _get_operations_for_update_resource_instance, + _get_operations_for_update_role_assigment, + _get_operations_for_update_user, +) + + +class FactDBPolicyStoreClient(OpaClient): + def __init__( + self, + factdb_client: ClientSession | Callable[[], ClientSession], + *, + opa_server_url=None, + opa_auth_token: str | None = None, + auth_type: PolicyStoreAuth = PolicyStoreAuth.NONE, + oauth_client_id: str | None = None, + oauth_client_secret: str | None = None, + oauth_server: str | None = None, + tls_client_cert: str | None = None, + tls_client_key: str | None = None, + tls_ca: str | None = None, + data_updater_enabled: bool = True, + policy_updater_enabled: bool = True, + cache_policy_data: bool = False, + ): + super().__init__( + opa_server_url, + opa_auth_token, + auth_type, + oauth_client_id, + oauth_client_secret, + oauth_server, + data_updater_enabled, + policy_updater_enabled, + cache_policy_data, + tls_client_cert, + tls_client_key, + tls_ca, + ) + self._client = factdb_client + + @property + def client(self): + if isinstance(self._client, ClientSession): + return self._client + return self._client() + + async def set_policy_data( + self, + policy_data: JsonableValue, + path: str = "", + transaction_id: str | None = None, + ): + parts = path.lstrip("/").split("/") + try: + update = DataUpdate.from_operations( + self._generate_operations(parts, policy_data), + ) + except NotImplementedError as e: + logger.warning(f"{e}, storing in OPA directly...") + return await super().set_policy_data(policy_data=policy_data, path=path, transaction_id=transaction_id) + + return await self._apply_data_update(update) + + def _generate_operations(self, parts: list[str], data: JsonableValue) -> Iterator[AnyOperation]: # noqa: C901 + match parts: + case ["relationships"]: + for obj, _data in data.items(): + yield from _get_operations_for_update_relationship_tuple(obj, _data) + case ["relationships", obj]: + yield from _get_operations_for_update_relationship_tuple(obj, data) + case ["role_assignments"]: + for full_user_key, _data in data.items(): + yield from _get_operations_for_update_role_assigment(full_user_key, _data) + case ["role_assignments", full_user_key]: + yield from _get_operations_for_update_role_assigment(full_user_key, data) + case ["users"]: + for user_key, _data in data.items(): + yield from _get_operations_for_update_user(user_key, _data) + case ["users", user_key]: + yield from _get_operations_for_update_user(user_key, data) + case ["resource_instances"]: + for instance_key, _data in data.items(): + yield from _get_operations_for_update_resource_instance(instance_key, _data) + case ["resource_instances", instance_key]: + yield from _get_operations_for_update_resource_instance(instance_key, data) + case _: + raise NotImplementedError(f"Unsupported path for FactDB: {parts}") + + async def _apply_data_update(self, data_update: DataUpdate) -> aiohttp.ClientResponse: + start_time = time.perf_counter_ns() + res = await self.client.post( + "/v1/facts/applyUpdate", + json=data_update.dict(), + ) + elapsed_time_ms = (time.perf_counter_ns() - start_time) / 1_000_000 + if res.status != 200: + logger.error( + "Failed to apply data update to FactDB: {}", + await res.text(), + ) + else: + logger.info(f"Data update applied to FactDB: status={res.status} duration={elapsed_time_ms:.2f}ms") + return res + + async def list_facts_by_type( + self, + fact_type: str, + page: int = 1, + per_page: int = 30, + filters: dict[str, Any] | None = None, + ) -> aiohttp.ClientResponse: + logger.info( + "Performing list facts for '{fact_type}' fact type from the FactDB", + fact_type=fact_type, + ) + query_params = { + "page": page, + "per_page": per_page, + } | (filters or {}) + res = await self.client.get( + f"/v1/facts/{fact_type}", + params=query_params, + ) + if res.status != 200: + logger.error( + "Failed to list '{fact_type}' facts from FactDB: {res}", + fact_type=fact_type, + res=await res.text(), + ) + res.raise_for_status() + return res diff --git a/horizon/factdb/runner.py b/horizon/factdb/runner.py new file mode 100644 index 00000000..192d657e --- /dev/null +++ b/horizon/factdb/runner.py @@ -0,0 +1,97 @@ +import json +import logging +import os +from pathlib import Path + +import aiohttp +from opal_client.config import EngineLogFormat +from opal_client.engine.logger import log_entire_dict, logging_level_from_string +from opal_client.engine.runner import PolicyEngineRunner +from opal_client.logger import logger + + +class FactDBRunner(PolicyEngineRunner): + def __init__( + self, + storage_path: Path, + engine_token: str, + factdb_url: str, + factdb_binary_path: str, + factdb_token: str | None, + factdb_backup_server_url: str | None, + backup_fetch_max_retries: int, + piped_logs_format: EngineLogFormat = EngineLogFormat.NONE, + ): + super().__init__(piped_logs_format=piped_logs_format) + self._storage_path = storage_path + self._engine_token = engine_token + self._factdb_url = factdb_url + self._factdb_binary_path = factdb_binary_path + self._factdb_token = factdb_token + self._factdb_backup_server_url = factdb_backup_server_url + self._backup_fetch_max_retries = backup_fetch_max_retries + self._client = None + + self._storage_path.mkdir(parents=True, exist_ok=True) + + @property + def client(self) -> aiohttp.ClientSession: + if self._client is None: + self._client = aiohttp.ClientSession( + base_url=self._factdb_url, + headers={"Authorization": f"Bearer {self._engine_token}"}, + ) + return self._client + + async def handle_log_line(self, line: bytes) -> None: + try: + log_line = json.loads(line) + level = logging.getLevelName(logging_level_from_string(log_line.pop("level", "info"))) + msg = log_line.pop("msg", None) + log_entire_dict(level, msg, log_line) + except json.JSONDecodeError: + logger.info(line.decode("utf-8")) + + async def __aenter__(self): + self.set_envs() + await super().__aenter__() + await self.client.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await super().__aexit__(exc_type, exc_val, exc_tb) + await self.client.__aexit__(exc_type, exc_val, exc_tb) + + async def is_healthy(self) -> bool: + async with self.client.get("/healthy") as resp: + try: + resp.raise_for_status() + except aiohttp.ClientResponseError: + return False + else: + return True + + async def is_ready(self) -> bool: + async with self.client.get("/ready") as resp: + try: + resp.raise_for_status() + except aiohttp.ClientResponseError: + return False + else: + return True + + def set_envs(self) -> None: + os.environ["PDP_ENGINE_TOKEN"] = self._engine_token + os.environ["PDP_FACT_STORE_DSN"] = str(self._storage_path / "fact.db") + os.environ["PDP_BACKUP_MAX_RETRIES"] = str(self._backup_fetch_max_retries) + if self._factdb_token: + os.environ["PDP_TOKEN"] = self._factdb_token + os.environ["PDP_BACKUP_ENABLED"] = "true" + if self._factdb_backup_server_url: + os.environ["PDP_BACKUP_URL"] = self._factdb_backup_server_url + + def get_executable_path(self) -> str: + return self._factdb_binary_path + + def get_arguments(self) -> list[str]: + return [] diff --git a/horizon/factdb/update_operations.py b/horizon/factdb/update_operations.py new file mode 100644 index 00000000..777ca2e2 --- /dev/null +++ b/horizon/factdb/update_operations.py @@ -0,0 +1,143 @@ +import json +from collections.abc import Iterator + +from opal_common.schemas.data import JsonableValue + +from horizon.factdb.data_update import ( + AnyOperation, + DeleteOperation, + Fact, + InsertOperation, +) + + +def _get_operations_for_update_relationship_tuple(obj: str, data: JsonableValue) -> Iterator[AnyOperation]: + yield DeleteOperation( + fact=Fact( + type="relationship_tuples", + attributes={"object": obj}, + ), + ) + for full_relation, targets in data.items(): + relation = full_relation.removeprefix("relation:") + for target_type, target_objects in targets.items(): + for target in target_objects: + yield InsertOperation( + fact=Fact( + type="relationship_tuples", + attributes={ + "subject": f"{target_type}:{target}", + "relation": relation, + "object": obj, + }, + ), + ) + + +def _get_operations_for_update_role_assigment(full_user_key: str, data: JsonableValue) -> Iterator[AnyOperation]: + user_key = full_user_key.removeprefix("user:") + yield DeleteOperation( + fact=Fact( + type="role_assignments", + attributes={"actor": f"user:{user_key}"}, + ), + ) + for subject, roles in data.items(): + subject_type, subject_key = subject.split(":", 1) + for role_key in roles: + if subject_type == "__tenant": + yield InsertOperation( + fact=Fact( + type="role_assignments", + attributes={ + "actor": f"user:{user_key}", + "tenant": subject_key, + "role": role_key, + "resource": subject, + }, + ), + ) + else: + yield InsertOperation( + fact=Fact( + type="role_assignments", + attributes={ + "actor": f"user:{user_key}", + "tenant": "", + "role": role_key, + "resource": subject, + }, + ), + ) + + +def _get_operations_for_update_user(user_key: str, data: JsonableValue) -> Iterator[AnyOperation]: + attributes = data.get("attributes", {}) + if attributes: + yield InsertOperation( + fact=Fact( + type="users", + attributes={ + "id": f"user:{user_key}", + "attributes": json.dumps(attributes), + # TODO remove the json.dumps after fixing the map[string]string issue in Go + }, + ), + ) + else: + # When an object is deleted, a data update with an empty attributes object is sent + # We cascade the deletion to all related facts + yield DeleteOperation( + fact=Fact( + type="users", + attributes={"id": f"user:{user_key}"}, + ), + ) + yield DeleteOperation( + fact=Fact( + type="role_assignments", + attributes={"actor": f"user:{user_key}"}, + ), + ) + + +def _get_operations_for_update_resource_instance(instance_key: str, data: JsonableValue) -> Iterator[AnyOperation]: + attributes = data.get("attributes", {}) + if attributes: + yield InsertOperation( + fact=Fact( + type="instances", + attributes={ + "id": instance_key, + "attributes": json.dumps(attributes), + # TODO remove the json.dumps after fixing the map[string]string issue in Go + }, + ), + ) + else: + # When an object is deleted, a data update with an empty attributes object is sent + # We cascade the deletion to all related facts + yield DeleteOperation( + fact=Fact( + type="instances", + attributes={"id": instance_key}, + ), + ) + yield DeleteOperation( + fact=Fact( + type="relationship_tuples", + attributes={"object": instance_key}, + ), + ) + yield DeleteOperation( + fact=Fact( + type="relationship_tuples", + attributes={"subject": instance_key}, + ), + ) + yield DeleteOperation( + fact=Fact( + type="role_assignments", + attributes={"resource": instance_key}, + ), + ) diff --git a/horizon/facts/__init__.py b/horizon/facts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/horizon/facts/client.py b/horizon/facts/client.py new file mode 100644 index 00000000..de41b66b --- /dev/null +++ b/horizon/facts/client.py @@ -0,0 +1,125 @@ +from typing import Annotated +from urllib.parse import urljoin + +from fastapi import Depends, HTTPException +from httpx import AsyncClient +from httpx import Request as HttpxRequest +from httpx import Response as HttpxResponse +from loguru import logger +from starlette import status +from starlette.requests import Request as FastApiRequest +from starlette.responses import Response as FastApiResponse +from starlette.responses import StreamingResponse + +from horizon.config import sidecar_config +from horizon.startup.api_keys import get_env_api_key +from horizon.startup.remote_config import get_remote_config + + +class FactsClient: + def __init__(self): + self._client: AsyncClient | None = None + + @property + def client(self) -> AsyncClient: + if self._client is None: + env_api_key = get_env_api_key() + self._client = AsyncClient( + base_url=sidecar_config.CONTROL_PLANE, + headers={"Authorization": f"Bearer {env_api_key}"}, + ) + return self._client + + async def build_forward_request(self, request: FastApiRequest, path: str) -> HttpxRequest: + """ + Build an HTTPX request from a FastAPI request to forward to the facts service. + :param request: FastAPI request + :param path: Backend facts service path to forward to + :return: HTTPX request + """ + forward_headers = { + key: value for key, value in request.headers.items() if key.lower() in {"authorization", "content-type"} + } + remote_config = get_remote_config() + project_id = remote_config.context.get("project_id") + environment_id = remote_config.context.get("env_id") + if project_id is None or environment_id is None: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="PDP API Key for environment is required.", + ) + + full_path = urljoin(f"/v2/facts/{project_id}/{environment_id}/", path.removeprefix("/")) + return self.client.build_request( + method=request.method, + url=full_path, + params=request.query_params, + headers=forward_headers, + content=request.stream(), + ) + + async def send(self, request: HttpxRequest, *, stream: bool = False) -> HttpxResponse: + logger.info(f"Forwarding facts request: {request.method} {request.url}") + return await self.client.send(request, stream=stream) + + async def send_forward_request(self, request: FastApiRequest, path: str) -> HttpxResponse: + """ + Send a forward request to the facts service. + :param request: FastAPI request + :param path: Backend facts service path to forward to + :return: HTTPX response + """ + forward_request = await self.build_forward_request(request, path) + return await self.send(forward_request) + + @staticmethod + def convert_response(response: HttpxResponse, *, stream: bool = False) -> FastApiResponse: + """ + Convert an HTTPX response to a FastAPI response. + :param response: HTTPX response + :param stream: Stream the response content (automatic by default if content has not loaded) + :return: + """ + if stream or not hasattr(response, "_content"): + # if the response content has not loaded yet, optimize it to stream the response. + return StreamingResponse( + content=response.aiter_bytes(), + status_code=response.status_code, + headers=response.headers, + ) + else: + return FastApiResponse( + content=response.content, + status_code=response.status_code, + headers=response.headers, + ) + + @staticmethod + def extract_body(response: HttpxResponse): + if not response.is_success: + logger.warning( + f"Response status code is not successful ( {response.status_code} ), " f"skipping wait for update." + ) + return None + + try: + body = response.json() + except Exception: # noqa: BLE001 + logger.exception("Failed to parse response body as JSON, skipping wait for update.") + return None + else: + return body + + +_facts_client: FactsClient | None = None + + +def get_facts_client() -> FactsClient: + global _facts_client + if _facts_client is None: + _facts_client = FactsClient() + + return _facts_client + + +FactsClientDependency = Annotated[FactsClient, Depends(get_facts_client)] diff --git a/horizon/facts/dependencies.py b/horizon/facts/dependencies.py new file mode 100644 index 00000000..898ba6a3 --- /dev/null +++ b/horizon/facts/dependencies.py @@ -0,0 +1,51 @@ +from typing import Annotated + +from fastapi import Depends, HTTPException, Request +from loguru import logger +from opal_client import OpalClient + +from horizon.config import sidecar_config +from horizon.facts.update_subscriber import DataUpdateSubscriber + + +def get_opal_client(request: Request) -> OpalClient: + return request.app.state.opal_client + + +OpalClientDependency = Annotated[OpalClient, Depends(get_opal_client)] + +_data_update_subscriber: DataUpdateSubscriber | None = None + + +def get_data_update_subscriber( + opal_client: OpalClientDependency, +) -> DataUpdateSubscriber: + global _data_update_subscriber + if _data_update_subscriber is None: + _data_update_subscriber = DataUpdateSubscriber(opal_client.data_updater) + + return _data_update_subscriber + + +DataUpdateSubscriberDependency = Annotated[DataUpdateSubscriber, Depends(get_data_update_subscriber)] + + +def get_wait_timeout(request: Request) -> float | None: + wait_timeout = request.headers.get("X-Wait-timeout", sidecar_config.LOCAL_FACTS_WAIT_TIMEOUT) + if not wait_timeout: + return None + try: + wait_timeout = float(wait_timeout) + except ValueError as e: + logger.error(f"Invalid X-Wait-timeout header, expected float, got {wait_timeout!r}") + raise HTTPException( + status_code=400, + detail=f"Invalid X-Wait-timeout header, expected float, got {wait_timeout!r}", + ) from e + if wait_timeout < 0: + return None + else: + return wait_timeout + + +WaitTimeoutDependency = Annotated[float | None, Depends(get_wait_timeout)] diff --git a/horizon/facts/opal_forwarder.py b/horizon/facts/opal_forwarder.py new file mode 100644 index 00000000..da00fc74 --- /dev/null +++ b/horizon/facts/opal_forwarder.py @@ -0,0 +1,68 @@ +from functools import cache +from urllib.parse import urljoin +from uuid import uuid4 + +from opal_common.fetcher.providers.http_fetch_provider import HttpFetcherConfig +from opal_common.schemas.data import DataSourceEntry, DataUpdate + +from horizon.config import sidecar_config +from horizon.startup.remote_config import get_remote_config + + +@cache +def get_opal_data_base_url() -> str: + remote_config = get_remote_config() + org_id = remote_config.context.get("org_id") + proj_id = remote_config.context.get("project_id") + env_id = remote_config.context.get("env_id") + return urljoin( + sidecar_config.CONTROL_PLANE_PDP_DELTAS_API, + f"v2/internal/opal_data/{org_id}/{proj_id}/{env_id}/", + ) + + +@cache +def get_opal_data_topic() -> str: + remote_config = get_remote_config() + pdp_client_id = remote_config.context.get("client_id") + topic = f"{pdp_client_id}:data:policy_data" + return topic + + +def create_data_source_entry( + obj_type: str, + obj_id: str, + obj_key: str, + authorization_header: str, +) -> DataSourceEntry: + obj_id = obj_id.replace("-", "") # convert UUID to Hex + url = urljoin( + get_opal_data_base_url(), + f"{obj_type}/{obj_id}", + ) + + topic = get_opal_data_topic() + + headers = { + "Authorization": authorization_header, + } + if sidecar_config.SHARD_ID: + headers["X-Shard-Id"] = sidecar_config.SHARD_ID + + return DataSourceEntry( + url=url, + data=None, + dst_path=f"{obj_type}/{obj_key}", + save_method="PUT", + topics=[topic], + config=HttpFetcherConfig(headers=headers).dict(), + ) + + +def create_data_update_entry(entries: list[DataSourceEntry]) -> DataUpdate: + entries_text = ", ".join(entry.dst_path for entry in entries) + return DataUpdate( + id=uuid4().hex, + entries=entries, + reason=f"Local facts upload for {entries_text}", + ) diff --git a/horizon/facts/router.py b/horizon/facts/router.py new file mode 100644 index 00000000..ce123620 --- /dev/null +++ b/horizon/facts/router.py @@ -0,0 +1,296 @@ +from collections.abc import Callable, Iterable +from typing import Any + +from fastapi import ( + APIRouter, + Depends, + Response, +) +from fastapi import ( + Request as FastApiRequest, +) +from loguru import logger +from opal_common.schemas.data import DataSourceEntry + +from horizon.authentication import enforce_pdp_token +from horizon.facts.client import FactsClient, FactsClientDependency +from horizon.facts.dependencies import ( + DataUpdateSubscriberDependency, + WaitTimeoutDependency, +) +from horizon.facts.opal_forwarder import ( + create_data_source_entry, + create_data_update_entry, +) +from horizon.facts.update_subscriber import DataUpdateSubscriber + +facts_router = APIRouter(dependencies=[Depends(enforce_pdp_token)]) + + +@facts_router.post("/users") +async def create_user( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path="/users", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="users", + obj_id=body["id"], + obj_key=body["key"], + authorization_header=r.headers.get("Authorization"), + ) + ], + ) + + +@facts_router.post("/tenants") +async def create_tenant( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path="/tenants", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="tenants", + obj_id=body["id"], + obj_key=body["key"], + authorization_header=r.headers.get("Authorization"), + ) + ], + ) + + +@facts_router.put("/users/{user_id}") +async def sync_user( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, + user_id: str, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path=f"/users/{user_id}", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="users", + obj_id=body["id"], + obj_key=body["key"], + authorization_header=r.headers.get("Authorization"), + ) + ], + ) + + +@facts_router.patch("/users/{user_id}") +async def update_user( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, + user_id: str, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path=f"/users/{user_id}", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="users", + obj_id=body["id"], + obj_key=body["key"], + authorization_header=r.headers.get("Authorization"), + ) + ], + ) + + +def create_role_assignment_data_entries(request: FastApiRequest, body: dict[str, Any]) -> Iterable[DataSourceEntry]: + if not body.get("resource_instance"): + yield create_data_source_entry( + obj_type="role_assignments", + obj_id=body["id"], + obj_key=f"user:{body['user']}", + authorization_header=request.headers.get("Authorization"), + ) + yield create_data_source_entry( + obj_type="users", + obj_id=body["user_id"], + obj_key=body["user"], + authorization_header=request.headers.get("Authorization"), + ) + else: + # note that user_id == subject_id, + # and user == user_key == subject_key == subject_str + yield create_data_source_entry( + obj_type="role_assignments", + obj_id=body["user_id"], + obj_key=body["user"], + authorization_header=request.headers.get("Authorization"), + ) + + +@facts_router.post("/users/{user_id}/roles") +async def assign_user_role( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, + user_id: str, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path=f"/users/{user_id}/roles", + entries_callback=create_role_assignment_data_entries, + ) + + +@facts_router.post("/role_assignments") +async def create_role_assignment( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path="/role_assignments", + entries_callback=create_role_assignment_data_entries, + ) + + +@facts_router.post("/resource_instances") +async def create_resource_instance( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path="/resource_instances", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="resource_instances", + obj_id=body["id"], + obj_key=f"{body['resource']}:{body['key']}", + authorization_header=r.headers.get("Authorization"), + ), + ], + ) + + +@facts_router.patch("/resource_instances/{instance_id}") +async def update_resource_instance( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, + instance_id: str, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path=f"/resource_instances/{instance_id}", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="resource_instances", + obj_id=body["id"], + obj_key=f"{body['resource']}:{body['key']}", + authorization_header=r.headers.get("Authorization"), + ), + ], + ) + + +@facts_router.post("/relationship_tuples") +async def create_relationship_tuple( + request: FastApiRequest, + client: FactsClientDependency, + update_subscriber: DataUpdateSubscriberDependency, + wait_timeout: WaitTimeoutDependency, +): + return await forward_request_then_wait_for_update( + client, + request, + update_subscriber, + wait_timeout, + path="/relationship_tuples", + entries_callback=lambda r, body: [ + create_data_source_entry( + obj_type="relationships", + obj_id=body["object_id"], + obj_key=body["object"], + authorization_header=r.headers.get("Authorization"), + ), + ], + ) + + +async def forward_request_then_wait_for_update( + client: FactsClient, + request: FastApiRequest, + update_subscriber: DataUpdateSubscriber, + wait_timeout: float | None, + *, + path: str, + entries_callback: Callable[[FastApiRequest, dict[str, Any]], Iterable[DataSourceEntry]], +) -> Response: + response = await client.send_forward_request(request, path) + body = client.extract_body(response) + if body is None: + return client.convert_response(response) + + try: + data_update_entry = create_data_update_entry(list(entries_callback(request, body))) + except KeyError as e: + logger.warning(f"Missing required field {e.args[0]} in the response body, skipping wait for update.") + return client.convert_response(response) + + await update_subscriber.publish_and_wait( + data_update_entry, + timeout=wait_timeout, + ) + return client.convert_response(response) + + +@facts_router.api_route( + "/{full_path:path}", + methods=["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"], + include_in_schema=False, +) +async def forward_remaining_requests(request: FastApiRequest, client: FactsClientDependency, full_path: str): + forward_request = await client.build_forward_request(request, full_path) + response = await client.send(forward_request, stream=True) + return client.convert_response(response, stream=True) diff --git a/horizon/facts/update_subscriber.py b/horizon/facts/update_subscriber.py new file mode 100644 index 00000000..a1c4355d --- /dev/null +++ b/horizon/facts/update_subscriber.py @@ -0,0 +1,99 @@ +import asyncio +from collections import defaultdict +from functools import wraps +from uuid import uuid4 + +from loguru import logger +from opal_client.data.updater import DataUpdater +from opal_common.schemas.data import DataUpdate, DataUpdateReport + + +class DataUpdateSubscriber: + def __init__(self, updater: DataUpdater): + self._updater = updater + self._updater._should_send_reports = True + self._notifier_id = uuid4().hex + self._update_listeners: dict[str, asyncio.Event] = defaultdict(asyncio.Event) + self._inject_subscriber() + + def _inject_subscriber(self): + reporter = self._updater.callbacks_reporter + reporter.report_update_results = self._reports_callback_decorator(reporter.report_update_results) + + def _reports_callback_decorator(self, func): + @wraps(func) + async def wrapper(report: DataUpdateReport, *args, **kwargs): + if report.update_id is not None: + self._resolve_listeners(report.update_id) + else: + logger.debug("Received report without update ID") + return await func(report, *args, **kwargs) + + return wrapper + + def _resolve_listeners(self, update_id: str) -> None: + event = self._update_listeners.get(update_id) + if event is not None: + logger.debug(f"Received acknowledgment for update ID {update_id!r}, resolving listener(s)") + event.set() + else: + logger.debug(f"Received acknowledgment for update ID {update_id!r}, but no listener found") + + async def wait_for_message(self, update_id: str, timeout: float | None = None) -> bool: + """ + Wait for a message with the given update ID to be received by the PubSub client. + :param update_id: id of the update to wait for + :param timeout: timeout in seconds + :return: True if the message was received, False if the timeout was reached + """ + logger.info(f"Waiting for update id={update_id!r}") + event = self._update_listeners[update_id] + try: + await asyncio.wait_for( + event.wait(), + timeout=timeout, + ) + return True + except asyncio.TimeoutError: + logger.warning(f"Timeout waiting for update id={update_id!r}") + return False + finally: + self._update_listeners.pop(update_id, None) + + async def publish(self, data_update: DataUpdate) -> bool: + await asyncio.sleep(0) # allow other wait task to run before publishing + topics = [topic for entry in data_update.entries for topic in entry.topics] + logger.debug( + f"Publishing data update with id={data_update.id!r} to topics {topics} as {self._notifier_id=}: " + f"{data_update}" + ) + return await self._updater._client.publish( + topics=topics, + data=data_update.dict(), + notifier_id=self._notifier_id, # we fake a different notifier id to make the other side broadcast + # the message back to our main channel + sync=False, # sync=False means we don't wait for the other side to acknowledge the message, + # as it causes a deadlock because we fake a different notifier id + ) + + async def publish_and_wait(self, data_update: DataUpdate, timeout: float | None = None) -> bool: + """ + Publish a data update and wait for it to be received by the PubSub client. + :param data_update: DataUpdate object to publish + :param timeout: Wait timeout in seconds + :return: True if the message was received, False if the timeout was reached or the message failed to publish + """ + if timeout == 0: + return await self.publish(data_update) + + # Start waiting before publishing, to avoid the message being received before we start waiting + wait_task = asyncio.create_task( + self.wait_for_message(data_update.id, timeout=timeout), + ) + + if not await self.publish(data_update): + logger.warning("Failed to publish data entry. Aborting wait.") + wait_task.cancel() + return False + + return await wait_task diff --git a/horizon/local/api.py b/horizon/local/api.py index 5c14772b..7d382a80 100644 --- a/horizon/local/api.py +++ b/horizon/local/api.py @@ -1,290 +1,123 @@ -from typing import Any, Dict, List, Optional +from typing import Annotated, cast -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, Query +from loguru import logger from opal_client.policy_store.base_policy_store_client import BasePolicyStoreClient from opal_client.policy_store.policy_store_client_factory import ( DEFAULT_POLICY_STORE_GETTER, ) +from pydantic import parse_obj_as, parse_raw_as +from starlette.responses import Response from horizon.authentication import enforce_pdp_token -from horizon.local.schemas import Message, SyncedRole, SyncedUser +from horizon.config import sidecar_config +from horizon.factdb.policy_store import FactDBPolicyStoreClient +from horizon.local.schemas import ( + ListRoleAssignmentsFilters, + ListRoleAssignmentsPagination, + ListRoleAssignmentsPDPBody, + RoleAssignment, + RoleAssignmentFactDBFact, + WrappedResponse, +) + +PageQuery = Annotated[int, Query(ge=1, description="The page number (starts from 1).")] +PerPageQuery = Annotated[int, Query(ge=1, le=100, description="The number of results per page (max 100).")] def init_local_cache_api_router(policy_store: BasePolicyStoreClient = None): policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER() router = APIRouter(dependencies=[Depends(enforce_pdp_token)]) - def error_message(msg: str): - return { - "model": Message, - "description": msg, - } - - async def get_data_for_synced_user(user_id: str) -> Dict[str, Any]: - response = await policy_store.get_data(f"/user_roles/{user_id}") - result = response.get("result", None) - if result is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"user with id '{user_id}' was not found in OPA cache! (not synced)", - ) - return result - - def permission_shortname(permission: Dict[str, Any]) -> Optional[str]: - resource = permission.get("resource", {}).get("type", None) - action = permission.get("action", None) - - if resource is None or action is None: - return None - return f"{resource}:{action}" - @router.get( - "/users/{user_id}", - response_model=SyncedUser, - responses={ - 404: error_message( - "User not found (i.e: not synced to Authorization service)" - ), - }, + "/role_assignments", + response_model=list[RoleAssignment], ) - async def get_user(user_id: str): - """ - Get user data directly from OPA cache. - - If user does not exist in OPA cache (i.e: not synced), returns 404. - """ - result = await get_data_for_synced_user(user_id) - roles = result.get("roles", []) - roles = [ - SyncedRole( - id=r.get("id"), - name=r.get("name"), - tenant_id=r.get("scope", {}).get("tenant", None), - ) - for r in roles - ] - user = SyncedUser( - id=user_id, - email=result.get("email", None), - name=result.get("name", None), - metadata=result.get("metadata", {}), - roles=roles, - ) - return user - - @router.get( - "/users", - response_model=List[SyncedUser], - responses={ - 404: error_message("OPA has no users stored in cache"), - }, - ) - async def list_users(): - """ - Get all users stored in OPA cache. - - Be advised, if you have many (i.e: few hundreds or more) users this query might be expensive latency-wise. - """ - response = await policy_store.get_data(f"/user_roles") - result = response.get("result", None) - if result is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"OPA has no users stored in cache! Did you synced users yet via the sdk or the cloud console?", - ) - users = [] - for user_id, user_data in iter(result.items()): - roles = user_data.get("roles", []) - roles = [ - SyncedRole( - id=r.get("id"), - name=r.get("name"), - tenant_id=r.get("scope", {}).get("tenant", None), - ) - for r in roles - ] - users.append( - SyncedUser( - id=user_id, - email=user_data.get("email", None), - name=user_data.get("name", None), - metadata=user_data.get("metadata", {}), - roles=roles, - ) - ) - return users - - @router.get( - "/users/{user_id}/roles", - response_model=List[SyncedRole], - responses={ - 404: error_message( - "User not found (i.e: not synced to Authorization service)" + async def list_role_assignments( + user: Annotated[ + str | None, + Query( + description="optional user filter, will only return role assignments granted to this user.", ), - }, - ) - async def get_user_roles(user_id: str): - """ - Get roles **assigned to user** directly from OPA cache. - - If user does not exist in OPA cache (i.e: not synced), returns 404. - """ - # will issue an opa request to get cached user data - result = await get_data_for_synced_user(user_id) - # will issue *another* opa request to list all roles, not just the roles for this user - cached_roles: List[SyncedRole] = await list_roles() - role_data = {role.id: role for role in cached_roles} - - raw_roles = result.get("roles", []) - - roles = [] - for r in raw_roles: - role_id = r.get("id") - roles.append( - SyncedRole( - id=role_id, - name=r.get("name"), - tenant_id=r.get("scope", {}).get("tenant", None), - metadata=role_data.get(role_id, {}).metadata, - permissions=role_data.get(role_id, {}).permissions, - ) - ) - return roles - - @router.get( - "/users/{user_id}/tenants", - response_model=List[str], - responses={ - 404: error_message( - "User not found (i.e: not synced to Authorization service)" + ] = None, + role: Annotated[ + str | None, + Query( + description="optional role filter, will only return role assignments granting this role.", ), - }, - ) - async def get_user_tenants(user_id: str): - """ - Get tenants **assigned to user** directly from OPA cache. - This endpoint only returns tenants that the user **has an assigned role in**. - i.e: if the user is assigned to tenant "tenant1" but has no roles in that tenant, - "tenant1" will not be returned by this endpoint. - - If user does not exist in OPA cache (i.e: not synced), returns 404. - """ - result = await get_data_for_synced_user(user_id) - roles = result.get("roles", []) - tenants = [r.get("scope", {}).get("tenant", None) for r in roles] - tenants = [tenant for tenant in tenants if tenant is not None] - return tenants - - @router.get( - "/roles", - response_model=List[SyncedRole], - responses={ - 404: error_message("OPA has no roles stored in cache"), - }, - ) - async def list_roles(): - """ - Get all roles stored in OPA cache. - """ - response = await policy_store.get_data(f"/role_permissions") - result = response.get("result", None) - if result is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"OPA has no roles stored in cache! Did you define roles yet via the sdk or the cloud console?", - ) - roles = [] - for role_id, role_data in iter(result.items()): - permissions = [ - permission_shortname(p) for p in role_data.get("permissions", []) - ] - permissions = [p for p in permissions if p is not None] - roles.append( - SyncedRole( - id=role_id, - name=role_data.get("name"), - metadata=role_data.get("metadata", {}), - permissions=permissions, - ) - ) - return roles - - @router.get( - "/roles/{role_id}", - response_model=SyncedRole, - responses={ - 404: error_message("Role not found"), - }, - ) - async def get_role_by_id(role_id: str): + ] = None, + tenant: Annotated[ + str | None, + Query( + description="optional tenant filter, will only return role assignments granted in that tenant.", + ), + ] = None, + resource: Annotated[ + str | None, + Query( + description="optional resource **type** filter, " + "will only return role assignments granted on that resource type.", + ), + ] = None, + resource_instance: Annotated[ + str | None, + Query( + description="optional resource instance filter, " + "will only return role assignments granted on that resource instance.", + ), + ] = None, + page: PageQuery = 1, + per_page: PerPageQuery = 30, + ) -> list[RoleAssignment]: """ - Get role (by the role id) directly from OPA cache. + Get all role assignments stored in the PDP. - If role is not found, returns 404. - Possible reasons are either: - - - role was never created via SDK or via the cloud console. - - role was (very) recently created and the policy update did not propagate yet. + You can filter the results by providing optional filters. """ - response = await policy_store.get_data(f"/role_permissions/{role_id}") - role_data = response.get("result", None) - if role_data is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"No such role in OPA cache!", - ) - permissions = [ - permission_shortname(p) for p in role_data.get("permissions", []) - ] - permissions = [p for p in permissions if p is not None] - role = SyncedRole( - id=role_id, - name=role_data.get("name"), - metadata=role_data.get("metadata", {}), - permissions=permissions, + filters = ListRoleAssignmentsFilters.construct( + user=user, + role=role, + tenant=tenant, + resource=resource, + resource_instance=resource_instance, + ).dict(exclude_none=True) + pagination = ListRoleAssignmentsPagination.construct( + page=page, + per_page=per_page, ) - return role - - @router.get( - "/roles/by-name/{role_name}", - response_model=SyncedRole, - responses={ - 404: error_message("Role not found"), - }, - ) - async def get_role_by_name(role_name: str): - """ - Get role (by the role name - case sensitive) directly from OPA cache. - If role is not found, returns 404. - Possible reasons are either: - - - role with such name was never created via SDK or via the cloud console. - - role was (very) recently created and the policy update did not propagate yet. - """ - response = await policy_store.get_data(f"/role_permissions") - result = response.get("result", None) - if result is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"OPA has no roles stored in cache!", + async def legacy_list_role_assignments() -> list[RoleAssignment]: + # the type hint of the get_data_with_input is incorrect, it claims it returns a dict but it + # actually returns a Response + result = cast( + Response | dict, + await policy_store.get_data_with_input( + "/permit/api/role_assignments/list_role_assignments", + ListRoleAssignmentsPDPBody.construct(filters=filters, pagination=pagination), + ), ) - for role_id, role_data in iter(result.items()): - name = role_data.get("name") - if name is None or name != role_name: - continue - permissions = [ - permission_shortname(p) for p in role_data.get("permissions", []) - ] - permissions = [p for p in permissions if p is not None] - return SyncedRole( - id=role_id, - name=name, - metadata=role_data.get("metadata", {}), - permissions=permissions, - ) - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail=f"No such role in OPA cache!" - ) + if isinstance(result, Response): + return parse_raw_as(WrappedResponse, result.body).result + else: + return parse_obj_as(WrappedResponse, result).result + + if sidecar_config.FACTDB_ENABLED: + if not isinstance(policy_store, FactDBPolicyStoreClient): + logger.warning( + "FactDB is enabled by policy store is not set to {store_type}", + store_type=FactDBPolicyStoreClient.__name__, + ) + return await legacy_list_role_assignments() + else: + res = await policy_store.list_facts_by_type( + "role_assignments", + page=page, + per_page=per_page, + filters=filters, + ) + res_json = parse_obj_as(list[RoleAssignmentFactDBFact], await res.json()) + return [fact.into_role_assignment() for fact in res_json] + else: + return await legacy_list_role_assignments() return router diff --git a/horizon/local/schemas.py b/horizon/local/schemas.py index dc720cde..d38f7e2e 100644 --- a/horizon/local/schemas.py +++ b/horizon/local/schemas.py @@ -1,6 +1,6 @@ -from typing import Any, Dict, List, Optional +from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field class BaseSchema(BaseModel): @@ -14,15 +14,114 @@ class Message(BaseModel): class SyncedRole(BaseSchema): id: str - name: str - tenant_id: Optional[str] - metadata: Optional[Dict[str, Any]] - permissions: Optional[List[str]] + name: str | None + tenant_id: str | None + metadata: dict[str, Any] | None + permissions: list[str] | None class SyncedUser(BaseSchema): id: str - name: Optional[str] - email: Optional[str] - metadata: Optional[Dict[str, Any]] - roles: List[SyncedRole] + name: str | None + email: str | None + metadata: dict[str, Any] | None + roles: list[SyncedRole] + + +class ListRoleAssignmentsFilters(BaseSchema): + user: str | None = None + role: str | None = None + tenant: str | None = None + resource: str | None = None + resource_instance: str | None = None + + +class ListRoleAssignmentsPagination(BaseSchema): + page: int = Field(1, ge=1, description="The page number to return") + per_page: int = Field(10, ge=1, le=100, description="The number of items to return per page") + + +class ListRoleAssignmentsPDPBody(BaseSchema): + filters: ListRoleAssignmentsFilters = Field(..., description="The filters to apply to the list") + pagination: ListRoleAssignmentsPagination = Field(..., description="The pagination settings") + + +class RoleAssignment(BaseSchema): + """ + The format of a role assignment + """ + + user: str = Field(..., description="the user the role is assigned to") + role: str = Field(..., description="the role that is assigned") + tenant: str = Field(..., description="the tenant the role is associated with") + resource_instance: str | None = Field(None, description="the resource instance the role is associated with") + + class Config: + schema_extra = { # noqa: RUF012 + "example": [ + { + "user": "jane@coolcompany.com", + "role": "admin", + "tenant": "stripe-inc", + }, + { + "user": "jane@coolcompany.com", + "role": "admin", + "tenant": "stripe-inc", + "resource_instance": "document:doc-1234", + }, + ] + } + + +class WrappedResponse(BaseSchema): + result: list[RoleAssignment] + + +class FactDBFact(BaseSchema): + type: str + attributes: dict[str, Any] + + +class RoleAssignmentFactDBFact(FactDBFact): + @property + def user(self) -> str: + return self.attributes.get("actor", "").removeprefix("user:") + + @property + def role(self) -> str: + return self.attributes.get("role", "") + + @property + def tenant(self) -> str: + return self.attributes.get("tenant", "") + + @property + def resource_instance(self) -> str | None: + resource = self.attributes.get("resource", "") + if not resource or resource.startswith("__tenant:"): + return None + return resource + + def into_role_assignment(self) -> RoleAssignment: + return RoleAssignment( + user=self.user, + role=self.role, + tenant=self.tenant, + resource_instance=self.resource_instance, + ) + + class Config: + schema_extra = { # noqa: RUF012 + "example": { + "type": "role_assignments", + "attributes": { + "actor": "user:author-user", + "id": "user:author-user-author-document:doc-1", + "last_modified": "2024-09-23 09:10:10 +0000 UTC", + "resource": "document:doc-1", + "role": "author", + "tenant": "default", + }, + } + } diff --git a/horizon/main.py b/horizon/main.py index c8d44567..d36f2dae 100644 --- a/horizon/main.py +++ b/horizon/main.py @@ -1,280 +1,13 @@ -import logging -import sys -from typing import List -from uuid import uuid4 - -from fastapi import Depends, FastAPI, status -from fastapi.responses import RedirectResponse -from logzio.handler import LogzioHandler -from opal_client.client import OpalClient -from opal_client.config import OpaLogFormat, opal_client_config, opal_common_config -from opal_client.opa.options import OpaServerOptions -from opal_common.confi import Confi -from opal_common.logger import Formatter, logger - -from horizon.authentication import enforce_pdp_token -from horizon.config import sidecar_config -from horizon.enforcer.api import init_enforcer_api_router -from horizon.enforcer.opa.config_maker import ( - get_opa_authz_policy_file_path, - get_opa_config_file_path, -) -from horizon.local.api import init_local_cache_api_router -from horizon.proxy.api import router as proxy_router -from horizon.startup.remote_config import RemoteConfigFetcher - -OPA_LOGGER_MODULE = "opal_client.opa.logger" - - -def apply_config(overrides_dict: dict, config_object: Confi): - """ - apply config values from dict into a confi object - """ - for key, value in overrides_dict.items(): - prefixed_key = config_object._prefix_key(key) - if key in config_object.entries: - setattr(config_object, key, value) - logger.info(f"Overriden config key: {prefixed_key}") - else: - logger.warning(f"Ignored non-existing config key: {prefixed_key}") - - -class PermitPDP: - """ - Permit.io PDP (Policy Decision Point) - - This process acts as a policy agents that is automatically configured by Permit.io cloud. - You only need an API key to configure this correctly. - - ----- - Implementation details: - The PDP is a thin wrapper on top of opal client. - - by extending opal client, it runs: - - a subprocess running the OPA agent (with opal client's opa runner) - - policy updater - - data updater - - it also run directly Permit.io specific apis: - - proxy api (proxies the REST api at api.permit.io to the sdks) - - local api (wrappers on top of opa cache) - - enforcer api (implementation of is_allowed()) - """ - - def __init__(self): - self._setup_temp_logger() - # fetch and apply config override from cloud control plane - remote_config = RemoteConfigFetcher().fetch_config() - - if not remote_config: - logger.warning( - "Could not fetch config from cloud control plane, reverting to local config!" - ) - else: - logger.info("Applying config overrides from cloud control plane...") - apply_config(remote_config.opal_common or {}, opal_common_config) - apply_config(remote_config.opal_client or {}, opal_client_config) - apply_config(remote_config.pdp or {}, sidecar_config) - - if ( - sidecar_config.OPA_BEARER_TOKEN_REQUIRED - or sidecar_config.OPA_DECISION_LOG_ENABLED - ): - # we need to pass to OPAL a custom inline OPA config to enable these features - self._configure_inline_opa_config() - - if sidecar_config.PRINT_CONFIG_ON_STARTUP: - logger.info( - "sidecar is loading with the following config:\n\n{sidecar_config}\n\n{opal_client_config}\n\n{opal_common_config}", - sidecar_config=sidecar_config.debug_repr(), - opal_client_config=opal_client_config.debug_repr(), - opal_common_config=opal_common_config.debug_repr(), - ) - - if sidecar_config.ENABLE_MONITORING: - self._configure_monitoring() - - self._opal = OpalClient() - self._configure_cloud_logging(remote_config.context) - - # use opal client app and add sidecar routes on top - app: FastAPI = self._opal.app - self._override_app_metadata(app) - self._configure_api_routes(app) - - self._app: FastAPI = app - - def _setup_temp_logger(self): - """ - until final config is set, we need to make sure sane defaults are in place - """ - # Clean slate - logger.remove() - # Logger configuration - logger.add( - sys.stdout, - format=sidecar_config.TEMP_LOG_FORMAT, - level="INFO", - backtrace=False, - diagnose=False, - colorize=True, - serialize=False, - ) - - def _configure_monitoring(self): - """ - patch fastapi to enable tracing and monitoring - """ - from ddtrace import config, patch - - # Datadog APM - patch(fastapi=True) - # Override service name - config.fastapi["service_name"] = "permit-pdp" - config.fastapi["request_span_name"] = "permit-pdp" - - def _configure_cloud_logging(self, remote_context: dict = {}): - if not sidecar_config.CENTRAL_LOG_ENABLED: - return - - if ( - not sidecar_config.CENTRAL_LOG_TOKEN - or len(sidecar_config.CENTRAL_LOG_TOKEN) == 0 - ): - logger.warning( - "Centralized log is enabled, but token is not valid. Disabling sink." - ) - return - - logzio_handler = LogzioHandler( - token=sidecar_config.CENTRAL_LOG_TOKEN, - logs_drain_timeout=sidecar_config.CENTRAL_LOG_DRAIN_TIMEOUT, - url=sidecar_config.CENTRAL_LOG_DRAIN_URL, - ) - formatter = Formatter(opal_common_config.LOG_FORMAT) - - # adds extra context to all loggers, helps identify between different sidecars. - extra_context = {} - extra_context["run_id"] = uuid4().hex - extra_context.update(remote_context) - - logger.info(f"Adding the following context to all loggers: {extra_context}") - - logger.configure(extra=extra_context) - logger.add( - logzio_handler, - serialize=True, - level=logging.INFO, - format=formatter.format, - colorize=False, # no colors - enqueue=True, # make sure logging to cloud is done asyncronously and thread-safe - catch=True, # if sink throws exceptions, swallow them as not critical - ) - - def _configure_inline_opa_config(self): - inline_opa_config = {} - - if sidecar_config.OPA_DECISION_LOG_ENABLED: - # decision logs needs to be configured via the config file - config_file_path = get_opa_config_file_path(sidecar_config) - - # append the config file to inline OPA config - inline_opa_config.update({"config_file": config_file_path}) - - if sidecar_config.OPA_BEARER_TOKEN_REQUIRED: - # overrides OPAL client config so that OPAL passes the bearer token in requests - opal_client_config.POLICY_STORE_AUTH_TOKEN = sidecar_config.API_KEY - - # append the bearer token authz policy to inline OPA config - auth_policy_file_path = get_opa_authz_policy_file_path(sidecar_config) - inline_opa_config.update( - { - "authorization": "basic", - "authentication": "token", - "files": [auth_policy_file_path], - } - ) - - logger.debug(f"setting OPAL_INLINE_OPA_CONFIG={inline_opa_config}") - - # apply inline OPA config to OPAL client config var - opal_client_config.INLINE_OPA_CONFIG = OpaServerOptions(**inline_opa_config) - - # override OPAL client default config to show OPA logs - if sidecar_config.OPA_DECISION_LOG_CONSOLE: - opal_client_config.INLINE_OPA_LOG_FORMAT = OpaLogFormat.FULL - exclude_list: List[str] = opal_common_config.LOG_MODULE_EXCLUDE_LIST.copy() - if OPA_LOGGER_MODULE in exclude_list: - exclude_list.remove(OPA_LOGGER_MODULE) - opal_common_config.LOG_MODULE_EXCLUDE_LIST = exclude_list - - def _override_app_metadata(self, app: FastAPI): - app.title = "Permit.io PDP" - app.description = ( - "The PDP (Policy decision point) container wraps Open Policy Agent (OPA) with a higher-level API intended for fine grained " - + "application-level authorization. The PDP automatically handles pulling policy updates in real-time " - + "from a centrally managed cloud-service (api.permit.io)." - ) - app.version = "0.2.0" - app.openapi_tags = sidecar_config.OPENAPI_TAGS_METADATA - return app - - def _configure_api_routes(self, app: FastAPI): - """ - mounts the api routes on the app object - """ - # Init api routers with required dependencies - enforcer_router = init_enforcer_api_router(policy_store=self._opal.policy_store) - local_router = init_local_cache_api_router(policy_store=self._opal.policy_store) - - # include the api routes - app.include_router( - enforcer_router, - tags=["Authorization API"], - dependencies=[Depends(enforce_pdp_token)], - ) - app.include_router( - local_router, - prefix="/local", - tags=["Local Queries"], - dependencies=[Depends(enforce_pdp_token)], - ) - app.include_router( - proxy_router, - tags=["Cloud API Proxy"], - dependencies=[Depends(enforce_pdp_token)], - ) - - # TODO: remove this when clients update sdk version (legacy routes) - @app.post( - "/update_policy", - status_code=status.HTTP_200_OK, - include_in_schema=False, - dependencies=[Depends(enforce_pdp_token)], - ) - async def legacy_trigger_policy_update(): - response = RedirectResponse(url="/policy-updater/trigger") - return response - - @app.post( - "/update_policy_data", - status_code=status.HTTP_200_OK, - include_in_schema=False, - dependencies=[Depends(enforce_pdp_token)], - ) - async def legacy_trigger_data_update(): - response = RedirectResponse(url="/data-updater/trigger") - return response - - @property - def app(self): - return self._app +from loguru import logger +from horizon.pdp import PermitPDP try: # expose app for Uvicorn sidecar = PermitPDP() app = sidecar.app -except Exception as ex: - logger.critical("Sidecar failed to start because of exception: {err}", err=ex) - raise SystemExit(1) +except SystemExit as e: + raise e +except Exception as e: + logger.opt(exception=True).critical("Sidecar failed to start because of exception: {err}") + raise SystemExit(1) from e diff --git a/horizon/opal_relay_api.py b/horizon/opal_relay_api.py new file mode 100644 index 00000000..ffe3fb17 --- /dev/null +++ b/horizon/opal_relay_api.py @@ -0,0 +1,194 @@ +import asyncio +import json +import time +from base64 import b64decode +from urllib.parse import urljoin +from uuid import UUID + +from aiohttp import ClientSession +from fastapi import status +from fastapi.encoders import jsonable_encoder +from loguru import logger +from opal_client.client import OpalClient +from opal_client.config import opal_client_config +from pydantic import BaseModel + +from horizon.config import sidecar_config +from horizon.startup.api_keys import get_env_api_key +from horizon.state import PersistentStateHandler + + +class RelayAPIError(Exception): + def __init__(self, service: str, status_code: int, message: str): + self.service = service + self.status_code = status_code + self.message = f"Relay API exception from {service} of {status_code}: {message}" + super().__init__(self.message) + + +class RelayJWTResponse(BaseModel): + token: str + + +class PDPPingPlatformPDPState(BaseModel): + version: str + os_name: str + os_machine: str + os_version: str + os_release: str + os_platform: str + python_version: str + python_implementation: str + + +class PDPPingPlatformOPAState(BaseModel): + version: str + go_version: str + platform: str + have_webassembly: bool + + +class PDPPingPlatformState(BaseModel): + pdp: PDPPingPlatformPDPState + opa: PDPPingPlatformOPAState + + +class PDPPingRequest(BaseModel): + pdp_instance_id: UUID + topics: list[str] + timestamp_ns: int + platform: PDPPingPlatformState + + +MAX_JWT_EXPIRY_BUFFER_TIME = 60 * 60 # 1 hour, has to be more than the ping interval + + +def get_jwt_expiry_time(jwt: str) -> int: + # We parse it like this to avoid pulling in a full JWT library + claims = json.loads(b64decode(jwt.split(".")[1])) + return claims["exp"] + + +class OpalRelayAPIClient: + def __init__(self, context: dict[str, str], opal_client: OpalClient): + self._relay_session: ClientSession | None = None + self._api_session: ClientSession | None = None + self._relay_token: str | None = None + self._available = False + self._opal_client = opal_client + self._apply_context(context) + + @property + def available(self) -> bool: + return self._available + + def _apply_context(self, context: dict[str, str]): + if "org_id" in context and "project_id" in context and "env_id" in context: + try: + self._org_id = UUID(context["org_id"]) + self._project_id = UUID(context["project_id"]) + self._env_id = UUID(context["env_id"]) + self._available = True + except TypeError: + logger.warning("Got bad context from backend. Not enabling OPAL relay client.") + + def api_session(self) -> ClientSession: + if self._api_session is None: + env_api_key = get_env_api_key() + self._api_session = ClientSession(headers={"Authorization": f"Bearer {env_api_key}"}) + return self._api_session + + async def relay_session(self) -> ClientSession: + if ( + self._relay_token is None + or get_jwt_expiry_time(self._relay_token) - time.time() < MAX_JWT_EXPIRY_BUFFER_TIME + ): + async with self.api_session().post( + urljoin( + sidecar_config.CONTROL_PLANE_RELAY_JWT_TIER, + f"v2/relay_jwt/{self._org_id.hex}/{self._project_id.hex}/{self._env_id.hex}", + ), + json={ + "service_name": "opal_relay_api", + }, + ) as response: + if response.status != status.HTTP_200_OK: + text = await response.text() + raise RelayAPIError( + "relay-jwt-api", + response.status, + f"Server responded to token request with a bad status: {text}", + ) + try: + obj = RelayJWTResponse.parse_obj(await response.json()) + except TypeError: + try: + text = await response.text() + except Exception as e: + raise RelayAPIError( + "relay-jwt-api", + response.status, + f"Server responded to token request with an invalid result: {text}", + ) from e + self._relay_token = obj.token + self._relay_session = ClientSession(headers={"Authorization": f"Bearer {self._relay_token}"}) + return self._relay_session + + async def send_ping(self): + session = await self.relay_session() + # This is ugly but for now this is not exposed publically in OPAL + policy_topics = self._opal_client.policy_updater.topics + data_topics = opal_client_config.DATA_TOPICS + if opal_client_config.SCOPE_ID != "default": + data_topics = [f"{opal_client_config.SCOPE_ID}:data:{topic}" for topic in opal_client_config.DATA_TOPICS] + topics = data_topics + policy_topics + async with session.post( + urljoin(sidecar_config.CONTROL_PLANE_RELAY_API, "v2/pdp/ping"), + json=jsonable_encoder( + PDPPingRequest( + pdp_instance_id=PersistentStateHandler.get().pdp_instance_id, + topics=topics, + timestamp_ns=time.time_ns(), + platform=PDPPingPlatformState.parse_obj( + await asyncio.get_event_loop().run_in_executor(None, PersistentStateHandler.get_runtime_state) + ), + ) + ), + ) as response: + if response.status != status.HTTP_202_ACCEPTED: + try: + text = await response.text() + except Exception as e: + raise RelayAPIError( + "relay-api", + response.status, + f"Server responded to token request with a bad status: {text}", + ) from e + logger.debug("Sent ping.") + + async def _run(self): + while True: + try: + await self.send_ping() + except RelayAPIError as e: + logger.warning( + "Could not report uptime status to server: got status code {} from {}. " + "This does not affect the PDP's operational state or data updates.", + e.status_code, + e.service, + ) + except Exception as e: # noqa: BLE001 + logger.warning( + "Could not report uptime status to server: {}. This does not affect the PDP's operational state " + "or data updates.", + str(e), + ) + + await asyncio.sleep(sidecar_config.PING_INTERVAL) + + async def start(self): + self._task = asyncio.create_task(self._run()) + + async def initialize(self): + if self.available: + await self.start() diff --git a/horizon/pdp.py b/horizon/pdp.py new file mode 100644 index 00000000..2ca9eaf1 --- /dev/null +++ b/horizon/pdp.py @@ -0,0 +1,393 @@ +import logging +import sys +from pathlib import Path +from uuid import UUID, uuid4 + +from fastapi import Depends, FastAPI, status +from fastapi.responses import RedirectResponse +from loguru import logger +from logzio.handler import LogzioHandler +from opal_client.config import ( + ConnRetryOptions, + EngineLogFormat, + PolicyStoreAuth, + opal_client_config, + opal_common_config, +) +from opal_client.engine.options import OpaServerOptions +from opal_common.confi import Confi +from opal_common.logging_utils.formatter import Formatter + +from horizon.authentication import enforce_pdp_token +from horizon.config import MOCK_API_KEY, sidecar_config +from horizon.enforcer.api import init_enforcer_api_router, stats_manager +from horizon.enforcer.opa.config_maker import ( + get_opa_authz_policy_file_path, + get_opa_config_file_path, +) +from horizon.factdb.client import FactDBClient +from horizon.facts.router import facts_router +from horizon.local.api import init_local_cache_api_router +from horizon.opal_relay_api import OpalRelayAPIClient +from horizon.proxy.api import router as proxy_router +from horizon.startup.api_keys import get_env_api_key +from horizon.startup.exceptions import InvalidPDPTokenError +from horizon.startup.remote_config import get_remote_config +from horizon.state import PersistentStateHandler +from horizon.system.api import init_system_api_router +from horizon.system.consts import GUNICORN_EXIT_APP + +OPA_LOGGER_MODULE = "opal_client.opa.logger" + + +def apply_config(overrides_dict: dict, config_object: Confi): + """ + apply config values from dict into a confi object + """ + for key, value in overrides_dict.items(): + prefixed_key = config_object._prefix_key(key) + if key in config_object.entries: + try: + setattr( + config_object, + key, + config_object.entries[key].cast_from_json(value), + ) + except Exception as e: # noqa: BLE001 + logger.opt(exception=True).warning(f"Unable to set config key {prefixed_key} from overrides: {e}") + continue + logger.info(f"Overriden config key: {prefixed_key}") + continue + logger.warning(f"Ignored non-existing config key: {prefixed_key}") + + +class PermitPDP: + """ + Permit.io PDP (Policy Decision Point) + + This process acts as a policy agents that is automatically configured by Permit.io cloud. + You only need an API key to configure this correctly. + + ----- + Implementation details: + The PDP is a thin wrapper on top of opal client. + + By extending opal client, it runs: + - a subprocess running the OPA agent (with opal client's opa runner) + - policy updater + - data updater + + It also run directly Permit.io specific apis: + - proxy api (proxies the REST api at api.permit.io to the sdks) + - local api (wrappers on top of opa cache) + - enforcer api (implementation of is_allowed()) + """ + + def __init__(self): + self._setup_temp_logger() + PersistentStateHandler.initialize(get_env_api_key()) + # fetch and apply config override from cloud control plane + try: + remote_config = get_remote_config() + except InvalidPDPTokenError as e: + logger.critical("An invalid API key was specified. Please verify the PDP_API_KEY environment variable.") + raise SystemExit(GUNICORN_EXIT_APP) from e + + if not remote_config: + logger.critical("No cloud configuration found. Exiting.") + raise SystemExit(GUNICORN_EXIT_APP) + + logger.info("Applying config overrides from cloud control plane...") + + apply_config(remote_config.opal_common or {}, opal_common_config) + apply_config(remote_config.opal_client or {}, opal_client_config) + apply_config(remote_config.pdp or {}, sidecar_config) + + self._log_environment(remote_config.context) + + if sidecar_config.OPA_BEARER_TOKEN_REQUIRED or sidecar_config.OPA_DECISION_LOG_ENABLED: + # we need to pass to OPAL a custom inline OPA config to enable these features + self._configure_inline_opa_config() + + self._configure_opal_data_updater() + self._configure_opal_offline_mode() + + if sidecar_config.PRINT_CONFIG_ON_STARTUP: + logger.info( + "sidecar is loading with the following config:" + "\n\n{sidecar_config}" + "\n\n{opal_client_config}" + "\n\n{opal_common_config}", + sidecar_config=sidecar_config.debug_repr(), + opal_client_config=opal_client_config.debug_repr(), + opal_common_config=opal_common_config.debug_repr(), + ) + + if sidecar_config.ENABLE_MONITORING: + self._configure_monitoring() + + self._opal = FactDBClient(shard_id=sidecar_config.SHARD_ID, data_topics=self._fix_data_topics()) + self._configure_cloud_logging(remote_config.context) + + self._opal_relay = OpalRelayAPIClient(remote_config.context, self._opal) + self._opal.data_updater.callbacks_reporter.set_user_data_handler( + PersistentStateHandler.get_instance().reporter_user_data_handler + ) + + # use opal client app and add sidecar routes on top + app: FastAPI = self._opal.app + app.state.opal_client = self._opal + self._override_app_metadata(app) + self._configure_api_routes(app) + + self._app: FastAPI = app + + @app.on_event("startup") + async def _initialize_opal_relay(): + await self._opal_relay.initialize() + + def _setup_temp_logger(self): + """ + until final config is set, we need to make sure sane defaults are in place + """ + # Clean slate + logger.remove() + # Logger configuration + logger.add( + sys.stdout, + format=sidecar_config.TEMP_LOG_FORMAT, + level="INFO", + backtrace=False, + diagnose=False, + colorize=True, + serialize=False, + ) + + def _log_environment(self, pdp_context: dict[str, str]): + if "org_id" not in pdp_context or "project_id" not in pdp_context or "env_id" not in pdp_context: + logger.warning("Didn't get org_id, project_id, or env_id context from backend.") + return + logger.info("PDP started at: ") + logger.info(" org_id: {}", UUID(pdp_context["org_id"])) + logger.info(" project_id: {}", UUID(pdp_context["project_id"])) + logger.info(" env_id: {}", UUID(pdp_context["env_id"])) + + def _configure_monitoring(self): + """ + patch fastapi to enable tracing and monitoring + """ + from ddtrace import config, patch + + # Datadog APM + patch(fastapi=True) + # Override service name + config.fastapi["service_name"] = "permit-pdp" + config.fastapi["request_span_name"] = "permit-pdp" + + def _configure_cloud_logging(self, remote_context: dict | None = None): + if not sidecar_config.CENTRAL_LOG_ENABLED: + return + + if not sidecar_config.CENTRAL_LOG_TOKEN or len(sidecar_config.CENTRAL_LOG_TOKEN) == 0: + logger.warning("Centralized log is enabled, but token is not valid. Disabling sink.") + return + + logzio_handler = LogzioHandler( + token=sidecar_config.CENTRAL_LOG_TOKEN, + logs_drain_timeout=sidecar_config.CENTRAL_LOG_DRAIN_TIMEOUT, + url=sidecar_config.CENTRAL_LOG_DRAIN_URL, + ) + formatter = Formatter(opal_common_config.LOG_FORMAT) + + # adds extra context to all loggers, helps identify between different sidecars. + extra_context = {} + extra_context["run_id"] = uuid4().hex + extra_context.update(remote_context or {}) + + logger.info(f"Adding the following context to all loggers: {extra_context}") + + logger.configure(extra=extra_context) + logger.add( + logzio_handler, + serialize=True, + level=logging.INFO, + format=formatter.format, + colorize=False, # no colors + enqueue=True, # make sure logging to cloud is done asyncronously and thread-safe + catch=True, # if sink throws exceptions, swallow them as not critical + ) + + def _configure_inline_opa_config(self): + # Start from the existing config + inline_opa_config = opal_client_config.INLINE_OPA_CONFIG.dict() + + logger.debug(f"existing OPAL_INLINE_OPA_CONFIG={inline_opa_config}") + + if sidecar_config.OPA_DECISION_LOG_ENABLED: + # decision logs needs to be configured via the config file + config_file_path = get_opa_config_file_path(sidecar_config) + + # append the config file to inline OPA config + inline_opa_config.update({"config_file": config_file_path}) + + if sidecar_config.OPA_BEARER_TOKEN_REQUIRED: + # overrides OPAL client config so that OPAL passes the bearer token in requests + opal_client_config.POLICY_STORE_AUTH_TOKEN = get_env_api_key() + opal_client_config.POLICY_STORE_AUTH_TYPE = PolicyStoreAuth.TOKEN + + # append the bearer token authz policy to inline OPA config + auth_policy_file_path = get_opa_authz_policy_file_path(sidecar_config) + inline_opa_config.update( + { + "authorization": "basic", + "authentication": "token", + "files": [auth_policy_file_path], + } + ) + + logger.debug(f"setting OPAL_INLINE_OPA_CONFIG={inline_opa_config}") + + # apply inline OPA config to OPAL client config var + opal_client_config.INLINE_OPA_CONFIG = OpaServerOptions(**inline_opa_config) + + # override OPAL client default config to show OPA logs + if sidecar_config.OPA_DECISION_LOG_CONSOLE: + opal_client_config.INLINE_OPA_LOG_FORMAT = EngineLogFormat.FULL + exclude_list: list[str] = opal_common_config.LOG_MODULE_EXCLUDE_LIST.copy() + if OPA_LOGGER_MODULE in exclude_list: + exclude_list.remove(OPA_LOGGER_MODULE) + opal_common_config.LOG_MODULE_EXCLUDE_LIST = exclude_list + + def _configure_opal_data_updater(self): + # Retry 10 times with (random) exponential backoff (wait times up to 1, 2, 4, 6, 8, 16, 32, 64, 128, 256 secs), + # and overall timeout of 64 seconds + opal_client_config.DATA_UPDATER_CONN_RETRY = ConnRetryOptions( + wait_strategy="random_exponential", + attempts=14, + wait_time=1, + ) + + def _configure_opal_offline_mode(self): + """ + configure opal to use offline mode when enabled + """ + opal_client_config.OFFLINE_MODE_ENABLED = sidecar_config.ENABLE_OFFLINE_MODE + opal_client_config.STORE_BACKUP_PATH = ( + Path(sidecar_config.OFFLINE_MODE_BACKUP_DIR) / sidecar_config.OFFLINE_MODE_POLICY_BACKUP_FILENAME + ) + + def _fix_data_topics(self) -> list[str]: + """ + This is a worksaround for the following issue: + Permit backend services use the topic 'policy_data/{client_id}' to configure PDPs and to publish data updates. + However, opal-server is configured to return DataSourceConfig with the topic 'policy_data' + (without the client_id suffix) from `/scope/{client_id}/data` endpoint. + In the new OPAL client, this is an issue since data updater validates DataSourceConfig's topics against its + configured data topics. + + Simply fixing the backend to use the shorter topic everywhere is problematic since it would require a breaking + change / migration for all clients. + The shorter version logically includes the longer version so it's fine having OPAL listen to the shorter + version when updates are still published to the longer one. + + We don't edit `opal_client_config.DATA_TOPICS` directly because relay's ping reports it - + and reported subscribed topics are expected to match the topics used in publish. + (relay ignores the hierarchical structure of topics - this could be fixed in the future) + """ + if opal_client_config.SCOPE_ID == "default": + return opal_client_config.DATA_TOPICS + + return [ + topic.removesuffix(f"/{opal_client_config.SCOPE_ID}") # Only remove suffix if it's of the expected form + for topic in opal_client_config.DATA_TOPICS + ] + + def _override_app_metadata(self, app: FastAPI): + app.title = "Permit.io PDP" + app.description = ( + "The PDP (Policy decision point) container wraps Open Policy Agent (OPA) with a higher-level API " + "intended for fine grained application-level authorization. The PDP automatically handles pulling policy " + "updates in real-time from a centrally managed cloud-service (api.permit.io)." + ) + app.version = "0.2.0" + app.openapi_tags = sidecar_config.OPENAPI_TAGS_METADATA + return app + + def _configure_api_routes(self, app: FastAPI): + """ + mounts the api routes on the app object + """ + + # Init api routers with required dependencies + app.on_event("startup")(stats_manager.run) + app.on_event("shutdown")(stats_manager.stop_tasks) + + enforcer_router = init_enforcer_api_router(policy_store=self._opal.policy_store) + local_router = init_local_cache_api_router(policy_store=self._opal.policy_store) + # Init system router + system_router = init_system_api_router() + + # include the api routes + app.include_router( + enforcer_router, + tags=["Authorization API"], + ) + + app.include_router( + local_router, + prefix="/local", + tags=["Local Queries"], + dependencies=[Depends(enforce_pdp_token)], + ) + app.include_router( + system_router, + include_in_schema=False, + ) + app.include_router( + proxy_router, + tags=["Cloud API Proxy"], + dependencies=[Depends(enforce_pdp_token)], + ) + app.include_router( + facts_router, + prefix="/facts", + tags=["Local Facts API"], + dependencies=[Depends(enforce_pdp_token)], + ) + app.include_router( + facts_router, + prefix="/v2/facts/{proj_id}/{env_id}", + tags=["Local Facts API (compat)"], + include_in_schema=False, + dependencies=[Depends(enforce_pdp_token)], + ) + + # TODO: remove this when clients update sdk version (legacy routes) + @app.post( + "/update_policy", + status_code=status.HTTP_200_OK, + include_in_schema=False, + dependencies=[Depends(enforce_pdp_token)], + ) + async def legacy_trigger_policy_update(): + response = RedirectResponse(url="/policy-updater/trigger") + return response + + @app.post( + "/update_policy_data", + status_code=status.HTTP_200_OK, + include_in_schema=False, + dependencies=[Depends(enforce_pdp_token)], + ) + async def legacy_trigger_data_update(): + response = RedirectResponse(url="/data-updater/trigger") + return response + + @property + def app(self): + return self._app + + def _verify_config(self): + if get_env_api_key() == MOCK_API_KEY: + logger.critical("No API key specified. Please specify one with the PDP_API_KEY environment variable.") + raise SystemExit(GUNICORN_EXIT_APP) diff --git a/horizon/proxy/api.py b/horizon/proxy/api.py index efe20435..37729bba 100644 --- a/horizon/proxy/api.py +++ b/horizon/proxy/api.py @@ -1,6 +1,6 @@ import json import re -from typing import Any, Dict, List, Optional +from typing import Any from urllib.parse import urlparse import aiohttp @@ -19,7 +19,6 @@ HTTP_PUT = "PUT" HTTP_PATCH = "PATCH" - ALL_METHODS = [ HTTP_GET, HTTP_DELETE, @@ -38,9 +37,7 @@ class JSONPatchAction(BaseModel): op: str = Field(..., description="patch action to perform") path: str = Field(..., description="target location in modified json") - value: Optional[Dict[str, Any]] = Field( - None, description="json document, the operand of the action" - ) + value: dict[str, Any] | None = Field(None, description="json document, the operand of the action") router = APIRouter() @@ -63,16 +60,14 @@ async def patch_handler(response: Response) -> Response: try: store = OpalClientConfig.load_policy_store() - patch = parse_obj_as(List[JSONPatchAction], patch_json) + patch = parse_obj_as(list[JSONPatchAction], patch_json) await store.patch_data("", patch) - except Exception as ex: - logger.error("Failed to update OPAL store with: {err}", err=ex) + except Exception as ex: # noqa: BLE001 + logger.exception("Failed to update OPAL store with: {err}", err=ex) del response_json["patch"] del response.headers["Content-Length"] - return JSONResponse( - response_json, status_code=response.status_code, headers=dict(response.headers) - ) + return JSONResponse(response_json, status_code=response.status_code, headers=dict(response.headers)) write_routes = { @@ -83,14 +78,18 @@ async def patch_handler(response: Response) -> Response: } -@router.api_route("/cloud/{path:path}", methods=ALL_METHODS, summary="Proxy Endpoint") +@router.api_route( + "/cloud/{path:path}", + methods=ALL_METHODS, + summary="Proxy Endpoint", + include_in_schema=False, +) async def cloud_proxy(request: Request, path: str): """ Proxies the request to the cloud API. Actual API docs are located here: https://api.permit.io/redoc """ write_route = any( - request.method == route[0] and route[1].match(request.path_params["path"]) - for route in write_routes + request.method == route[0] and route[1].match(request.path_params["path"]) for route in write_routes ) headers = {} @@ -110,6 +109,48 @@ async def cloud_proxy(request: Request, path: str): return response +@router.api_route( + "/healthchecks/opa/ready", + methods=[HTTP_GET], + summary="Proxy ready healthcheck - OPAL_OPA_HEALTH_CHECK_POLICY_ENABLED must be set to True", +) +async def ready_opa_healthcheck(request: Request): + return await proxy_request_to_cloud_service( + request, + path="v1/data/system/opal/ready", + cloud_service_url=opal_client_config.POLICY_STORE_URL, + additional_headers={}, + ) + + +@router.api_route( + "/healthchecks/opa/healthy", + methods=[HTTP_GET], + summary="Proxy healthy healthcheck - OPAL_OPA_HEALTH_CHECK_POLICY_ENABLED must be set to True", +) +async def health_opa_healthcheck(request: Request): + return await proxy_request_to_cloud_service( + request, + path="v1/data/system/opal/healthy", + cloud_service_url=opal_client_config.POLICY_STORE_URL, + additional_headers={}, + ) + + +@router.api_route( + "/healthchecks/opa/system", + methods=[HTTP_GET], + summary="Proxy system data - OPAL_OPA_HEALTH_CHECK_POLICY_ENABLED must be set to True", +) +async def system_opa_healthcheck(request: Request): + return await proxy_request_to_cloud_service( + request, + path="v1/data/system/opal", + cloud_service_url=opal_client_config.POLICY_STORE_URL, + additional_headers={}, + ) + + # TODO: remove this once we migrate all clients @router.api_route( "/sdk/{path:path}", @@ -130,7 +171,7 @@ async def proxy_request_to_cloud_service( request: Request, path: str, cloud_service_url: str, - additional_headers: Dict[str, str], + additional_headers: dict[str, str], ) -> Response: auth_header = request.headers.get("Authorization") if auth_header is None: @@ -147,52 +188,40 @@ async def proxy_request_to_cloud_service( # copy only required header for header_name in REQUIRED_HTTP_HEADERS: - if header_name in original_headers.keys(): + if header_name in original_headers: headers[header_name] = original_headers[header_name] # override host header (required by k8s ingress) try: headers["host"] = urlparse(cloud_service_url).netloc - except Exception as e: + except Exception as e: # noqa: BLE001 # fallback - logger.error( - f"could not urlparse cloud service url: {cloud_service_url}, exception: {e}" - ) + logger.error(f"could not urlparse cloud service url: {cloud_service_url}, exception: {e}") logger.info(f"Proxying request: {request.method} {path}") async with aiohttp.ClientSession() as session: if request.method == HTTP_GET: - async with session.get( - path, headers=headers, params=params - ) as backend_response: + async with session.get(path, headers=headers, params=params) as backend_response: return await proxy_response(backend_response) if request.method == HTTP_DELETE: - async with session.delete( - path, headers=headers, params=params - ) as backend_response: + async with session.delete(path, headers=headers, params=params) as backend_response: return await proxy_response(backend_response) # these methods has data payload data = await request.body() if request.method == HTTP_POST: - async with session.post( - path, headers=headers, data=data, params=params - ) as backend_response: + async with session.post(path, headers=headers, data=data, params=params) as backend_response: return await proxy_response(backend_response) if request.method == HTTP_PUT: - async with session.put( - path, headers=headers, data=data, params=params - ) as backend_response: + async with session.put(path, headers=headers, data=data, params=params) as backend_response: return await proxy_response(backend_response) if request.method == HTTP_PATCH: - async with session.patch( - path, headers=headers, data=data, params=params - ) as backend_response: + async with session.patch(path, headers=headers, data=data, params=params) as backend_response: return await proxy_response(backend_response) raise HTTPException( diff --git a/horizon/startup/__init__.py b/horizon/startup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/horizon/startup/api_keys.py b/horizon/startup/api_keys.py new file mode 100644 index 00000000..718289dd --- /dev/null +++ b/horizon/startup/api_keys.py @@ -0,0 +1,135 @@ +import requests +from opal_common.logger import logger +from tenacity import retry, retry_if_not_exception_type, stop, wait + +from horizon.config import MOCK_API_KEY, ApiKeyLevel, sidecar_config +from horizon.startup.blocking_request import BlockingRequest +from horizon.startup.exceptions import NoRetryError +from horizon.system.consts import GUNICORN_EXIT_APP + +DEFAULT_RETRY_CONFIG = { + "retry": retry_if_not_exception_type(NoRetryError), + "wait": wait.wait_random_exponential(max=10), + "stop": stop.stop_after_attempt(10), + "reraise": True, +} + + +class EnvApiKeyFetcher: + def __init__( + self, + backend_url: str = sidecar_config.CONTROL_PLANE, + retry_config=None, + ): + self._backend_url = backend_url + self._retry_config = retry_config or DEFAULT_RETRY_CONFIG + self.api_key_level = self._get_api_key_level() + + @staticmethod + def _get_api_key_level() -> ApiKeyLevel: + if sidecar_config.API_KEY != MOCK_API_KEY: + if sidecar_config.ORG_API_KEY or sidecar_config.PROJECT_API_KEY: + logger.warning( + "PDP_API_KEY is set, but PDP_ORG_API_KEY or PDP_PROJECT_API_KEY are also set and will be ignored." + ) + return ApiKeyLevel.ENVIRONMENT + + if sidecar_config.PROJECT_API_KEY: + if sidecar_config.ORG_API_KEY: + logger.warning("PDP_PROJECT_API_KEY is set, but PDP_ORG_API_KEY is also set and will be ignored.") + if not sidecar_config.ACTIVE_ENV: + logger.error( + "PDP_PROJECT_API_KEY is set, but PDP_ACTIVE_ENV is not. Please set it with Environment ID or Key." + ) + raise + return ApiKeyLevel.PROJECT + + if sidecar_config.ORG_API_KEY: + if not sidecar_config.ACTIVE_ENV or not sidecar_config.ACTIVE_PROJECT: + logger.error( + "PDP_ORG_API_KEY is set, but PDP_ACTIVE_ENV or PDP_ACTIVE_PROJECT are not. " + "Please set them with Environment ID/Key and Project ID/Key." + ) + raise + return ApiKeyLevel.ORGANIZATION + + logger.critical("No API key specified. Please specify one with the PDP_API_KEY environment variable.") + raise + + def get_env_api_key_by_level(self) -> str: + api_key_level = self.api_key_level + api_key = sidecar_config.ORG_API_KEY + active_project_id = sidecar_config.ACTIVE_PROJECT + active_env_id = sidecar_config.ACTIVE_ENV + + if api_key_level == ApiKeyLevel.ENVIRONMENT: + return sidecar_config.API_KEY + if api_key_level == ApiKeyLevel.PROJECT: + api_key = sidecar_config.PROJECT_API_KEY + active_project_id = get_scope(sidecar_config.ORG_API_KEY).get("project_id") + if not active_project_id: + logger.error( + "PDP_PROJECT_API_KEY is set, but failed to get Project ID from provided Organization API Key." + ) + raise + return self._fetch_env_key(api_key, active_project_id, active_env_id) + + def _fetch_env_key(self, api_key: str, active_project_key: str, active_env_key: str) -> str: + """ + fetches the active environment's API Key by identifying with the provided Project/Organization API Key. + """ + api_key_url = f"{self._backend_url}/v2/api-key/{active_project_key}/{active_env_key}" + logger.info("Fetching Environment API Key from control plane: {url}", url=api_key_url) + fetch_with_retry = retry(**self._retry_config)( + lambda: BlockingRequest( + token=api_key, + ).get(url=api_key_url) + ) + try: + secret = fetch_with_retry().get("secret") + if secret is None: + logger.error("No secret found in response from control plane") + raise + return secret + + except requests.RequestException as e: + logger.warning(f"Failed to get Environment API Key: {e}") + raise + + def fetch_scope(self, api_key: str) -> dict | None: + """ + fetches the provided Project/Organization Scope. + """ + api_key_url = f"{self._backend_url}/v2/api-key/scope" + logger.info("Fetching Scope from control plane: {url}", url=api_key_url) + fetch_with_retry = retry(**self._retry_config)( + lambda: BlockingRequest( + token=api_key, + ).get(url=api_key_url) + ) + try: + return fetch_with_retry() + except requests.RequestException: + logger.warning("Failed to get scope from provided API Key") + return + + +_env_api_key: str | None = None + + +def get_env_api_key() -> str: + global _env_api_key + if not _env_api_key: + try: + _env_api_key = EnvApiKeyFetcher().get_env_api_key_by_level() + except Exception as e: + logger.error(f"Failed to get Environment API Key: {e}") + raise SystemExit(GUNICORN_EXIT_APP) from e + return _env_api_key + + +def get_scope(api_key: str) -> dict: + if scope := EnvApiKeyFetcher().fetch_scope(api_key) is None: + logger.warning("Failed to get scope from provided API Key") + raise + return scope diff --git a/horizon/startup/blocking_request.py b/horizon/startup/blocking_request.py new file mode 100644 index 00000000..2ea3af2e --- /dev/null +++ b/horizon/startup/blocking_request.py @@ -0,0 +1,41 @@ +from typing import Any + +import requests + +from horizon.startup.exceptions import InvalidPDPTokenError + + +class BlockingRequest: + def __init__(self, token: str | None, extra_headers: dict[str, Any] | None = None): + self._token = token + self._extra_headers = {k: v for k, v in (extra_headers or {}).items() if v is not None} + + def _headers(self) -> dict[str, str]: + headers = {} + if self._token is not None: + headers["Authorization"] = f"Bearer {self._token}" + + headers.update(self._extra_headers) + return headers + + def get(self, url: str, params=None) -> dict: + """ + utility method to send a *blocking* HTTP GET request and get the response back. + """ + response = requests.get(url, headers=self._headers(), params=params) + + if response.status_code == 401: + raise InvalidPDPTokenError() + + return response.json() + + def post(self, url: str, payload: dict | None = None, params=None) -> dict: + """ + utility method to send a *blocking* HTTP POST request with a JSON payload and get the response back. + """ + response = requests.post(url, json=payload, headers=self._headers(), params=params) + + if response.status_code == 401: + raise InvalidPDPTokenError() + + return response.json() diff --git a/horizon/startup/exceptions.py b/horizon/startup/exceptions.py new file mode 100644 index 00000000..a8de5bfd --- /dev/null +++ b/horizon/startup/exceptions.py @@ -0,0 +1,4 @@ +class NoRetryError(Exception): ... + + +class InvalidPDPTokenError(NoRetryError): ... diff --git a/horizon/startup/offline_mode.py b/horizon/startup/offline_mode.py new file mode 100644 index 00000000..5214ee68 --- /dev/null +++ b/horizon/startup/offline_mode.py @@ -0,0 +1,84 @@ +import base64 +import secrets +from pathlib import Path + +from cryptography.fernet import Fernet +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.kdf.hkdf import HKDF +from opal_common.logger import logger +from pydantic import ValidationError + +from horizon.startup.schemas import RemoteConfig, RemoteConfigBackup + + +class OfflineModeManager: + """ + A backup for the remote config, in case the sidecar can't fetch the remote config. + """ + + def __init__(self, backup_path: Path, api_key: str): + self._backup_path = backup_path + self._api_key = api_key + + def _derive_backup_key(self, salt: bytes | None = None) -> tuple[bytes, bytes]: + salt = secrets.token_bytes(16) if salt is None else base64.urlsafe_b64decode(salt) + + hkdf = HKDF( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + info=b"Sidecar's local remote-config backup Key", + backend=default_backend(), + ) + # We don't bother extracting the actual cryptographic bytes from the API key + # (which has a urlsafe encoding + a prefix), + # The 512-bit entropy is still there, and HKDF's extract phase handles inputs of non-uniform randomness. + key_bytes = hkdf.derive(self._api_key.encode("utf-8")) + return base64.urlsafe_b64encode(key_bytes), base64.urlsafe_b64encode(salt) + + def backup_config(self, remote_config: RemoteConfig): + logger.info( + "Backing up remote config to {path}", + path=self._backup_path, + ) + + enc_key, salt = self._derive_backup_key() + self._backup_path.parent.mkdir(parents=True, exist_ok=True) + try: + content = RemoteConfigBackup( + enc_remote_config=Fernet(enc_key).encrypt(remote_config.json(ensure_ascii=False).encode()), + key_derivation_salt=salt, + ).json(ensure_ascii=False) + self._backup_path.write_text(content) + except Exception as e: # noqa: BLE001 + logger.exception(f"Failed to backup sidecar config: {e}") + + def restore_config(self) -> RemoteConfig | None: + logger.info( + "Loading config from local backup at {path}", + path=self._backup_path, + ) + remote_config_backup: RemoteConfigBackup + try: + remote_config_backup = RemoteConfigBackup.parse_file(self._backup_path) + except FileNotFoundError: + logger.warning("Local backup file of sidecar config not found") + return None + except ValidationError: + logger.error("Failed to parse sidecar config backup file") + return None + + dec_key, _ = self._derive_backup_key(remote_config_backup.key_derivation_salt) + return RemoteConfig.parse_raw(Fernet(dec_key).decrypt(remote_config_backup.enc_remote_config)) + + def process_remote_config(self, remote_config: RemoteConfig | None) -> RemoteConfig | None: + if remote_config is None: + # Cloud fetch failed, try to restore from backup + remote_config = self.restore_config() + else: + # Cloud fetch succeeded, backup the fetched config + self.backup_config(remote_config) + + # We handle enabling OPAL's offline mode in pdp.py + return remote_config diff --git a/horizon/startup/remote_config.py b/horizon/startup/remote_config.py index 90f89655..d1e2d586 100644 --- a/horizon/startup/remote_config.py +++ b/horizon/startup/remote_config.py @@ -1,4 +1,4 @@ -from typing import Optional +from pathlib import Path import requests from opal_common.logger import logger @@ -6,24 +6,19 @@ from tenacity import retry, retry_if_not_exception_type, stop, wait from horizon.config import sidecar_config +from horizon.startup.api_keys import get_env_api_key +from horizon.startup.blocking_request import BlockingRequest +from horizon.startup.exceptions import NoRetryError +from horizon.startup.offline_mode import OfflineModeManager from horizon.startup.schemas import RemoteConfig +from horizon.state import PersistentStateHandler - -class InvalidPDPTokenException(Exception): - pass - - -def blocking_get_request(url: str, token: str, params=None) -> dict: - """ - utility method to send a *blocking* HTTP GET request and get the response back. - """ - headers = {"Authorization": f"Bearer {token}"} if token is not None else {} - response = requests.get(url, headers=headers, params=params) - - if response.status_code == 401: - raise InvalidPDPTokenException() - - return response.json() +DEFAULT_RETRY_CONFIG = { + "retry": retry_if_not_exception_type(NoRetryError), + "wait": wait.wait_random_exponential(max=5), + "stop": stop.stop_after_attempt(sidecar_config.CONFIG_FETCH_MAX_RETRIES), + "reraise": True, +} class RemoteConfigFetcher: @@ -50,18 +45,11 @@ class RemoteConfigFetcher: organizations (which is not secure). """ - DEFAULT_RETRY_CONFIG = { - "retry": retry_if_not_exception_type(InvalidPDPTokenException), - "wait": wait.wait_random_exponential(max=10), - "stop": stop.stop_after_attempt(10), - "reraise": True, - } - def __init__( self, - backend_url: str = sidecar_config.BACKEND_SERVICE_URL, - sidecar_access_token: str = sidecar_config.API_KEY, + backend_url: str = sidecar_config.CONTROL_PLANE, remote_config_route: str = sidecar_config.REMOTE_CONFIG_ENDPOINT, + shard_id: str | None = sidecar_config.SHARD_ID, retry_config=None, ): """ @@ -72,13 +60,13 @@ def __init__( sidecar_access_token (string, optional): access token identifying this client (sidecar) to the backend remote_config_route (string, optional): api route to fetch sidecar config """ - self._url = f"{backend_url}/{remote_config_route}" - self._token = sidecar_access_token - self._retry_config = ( - retry_config if retry_config is not None else self.DEFAULT_RETRY_CONFIG - ) + self._url = f"{backend_url}{remote_config_route}" + self._backend_url = backend_url + self._token = get_env_api_key() + self._retry_config = retry_config if retry_config is not None else DEFAULT_RETRY_CONFIG + self._shard_id = shard_id - def fetch_config(self) -> Optional[RemoteConfig]: + def fetch_config(self) -> RemoteConfig | None: """ fetches the sidecar config by identifying with the sidecar access token. if failed to get config from backend, returns None. @@ -88,7 +76,7 @@ def fetch_config(self) -> Optional[RemoteConfig]: try: return fetch_with_retry() except requests.RequestException: - logger.warning("Failed to get PDP config") + logger.warning("Failed to get PDP config from control plane") return None def _fetch_config(self) -> RemoteConfig: @@ -103,22 +91,36 @@ def _fetch_config(self) -> RemoteConfig: However, this is ok because the RemoteConfigFetcher runs *once* when the sidecar starts. """ try: - response = blocking_get_request(url=self._url, token=self._token) + response = BlockingRequest(token=self._token, extra_headers={"X-Shard-ID": self._shard_id}).post( + url=self._url, payload=PersistentStateHandler.build_state_payload_sync() + ) try: sidecar_config = RemoteConfig(**response) - config_context = sidecar_config.dict(include={"context"}).get( - "context", {} - ) - logger.info( - f"Received remote config with the following context: {config_context}" - ) + config_context = sidecar_config.dict(include={"context"}).get("context", {}) + logger.info(f"Received remote config with the following context: {config_context}") except ValidationError as exc: - logger.error( - "Got invalid config contents: {exc}", exc=exc, response=response - ) + logger.error("Got invalid config contents: {exc}", exc=exc, response=response) raise return sidecar_config except requests.RequestException as exc: logger.error("Got exception: {exc}", exc=exc) raise + + +_remote_config: RemoteConfig | None = None + + +def get_remote_config(): + global _remote_config + if _remote_config is None: + _remote_config = RemoteConfigFetcher().fetch_config() + + if sidecar_config.ENABLE_OFFLINE_MODE: + offline_mode = OfflineModeManager( + Path(sidecar_config.OFFLINE_MODE_BACKUP_DIR) / sidecar_config.OFFLINE_MODE_BACKUP_FILENAME, + get_env_api_key(), + ) + _remote_config = offline_mode.process_remote_config(_remote_config) + + return _remote_config diff --git a/horizon/startup/schemas.py b/horizon/startup/schemas.py index 9a0952db..e012869a 100644 --- a/horizon/startup/schemas.py +++ b/horizon/startup/schemas.py @@ -1,8 +1,17 @@ -from pydantic import BaseModel +from pydantic import BaseModel, Field class RemoteConfig(BaseModel): - opal_common: dict = {} - opal_client: dict = {} - pdp: dict = {} - context: dict = {} + opal_common: dict = Field(default_factory=dict) + opal_client: dict = Field(default_factory=dict) + pdp: dict = Field(default_factory=dict) + context: dict = Field(default_factory=dict) + + +class RemoteConfigBackup(BaseModel): + """ + A backup for the remote config, in case the sidecar can't fetch the remote config. + """ + + enc_remote_config: bytes + key_derivation_salt: bytes diff --git a/horizon/state.py b/horizon/state.py new file mode 100644 index 00000000..9553aefa --- /dev/null +++ b/horizon/state.py @@ -0,0 +1,232 @@ +import asyncio +import platform +import subprocess +import time +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from functools import cache +from pathlib import Path +from typing import Any, Optional +from uuid import UUID, uuid4 + +import aiohttp +from fastapi import status +from opal_common.logger import logger +from opal_common.schemas.data import DataUpdateReport +from pydantic import BaseModel, ValidationError + +from horizon.config import sidecar_config +from horizon.system.consts import API_VERSION + +PERSISTENT_STATE_FILENAME = "/home/permit/persistent_state.json" +MAX_STATE_UPDATE_INTERVAL_SECONDS = 60 + + +class PersistentState(BaseModel): + pdp_instance_id: UUID + seen_sdks: list[str | None] | None = None + + +class StateUpdateThrottledError(Exception): + def __init__(self, next_allowed_update: float): + super().__init__() + self.next_allowed_update = next_allowed_update + + +class PersistentStateHandler: + _instance: Optional["PersistentStateHandler"] = None + + def __init__(self, filename: str, env_api_key: str): + self._filename = filename + self._path = Path(filename) + self._prev_state_update_attempt = 0.0 + self._seen_sdk_update_lock = asyncio.Lock() + self._state_update_lock = asyncio.Lock() + self._env_api_key = env_api_key + self._tasks: list[asyncio.Task] = [] + self._write_lock = asyncio.Lock() + if not self._load(): + self._new() + + def _new(self): + self._state = PersistentState( + pdp_instance_id=uuid4(), + seen_sdks=[], + ) + + def _load(self) -> bool: + if not self._path.exists(): + return False + + try: + self._state = PersistentState.parse_file(self._path) + except ValidationError: + logger.warning("Unable to load existing persistent state: Invalid schema.") + return False + else: + return True + + def _save(self): + content = self._state.json() + self._path.write_text(content) + + @classmethod + def initialize(cls, env_api_key: str): + cls._instance = cls(PERSISTENT_STATE_FILENAME, env_api_key) + logger.info("PDP ID is {}", cls.get().pdp_instance_id) + + @classmethod + def get_instance(cls) -> "PersistentStateHandler": + if cls._instance is None: + raise RuntimeError("PersistentStateHandler not initialized.") + return cls._instance + + @classmethod + def get(cls) -> "PersistentState": + return cls.get_instance()._state + + @asynccontextmanager + async def update_state(self) -> AsyncGenerator[PersistentState, None]: + async with self._state_update_lock: + next_allowed_update = MAX_STATE_UPDATE_INTERVAL_SECONDS - (time.time() - self._prev_state_update_attempt) + # Since state updated are (for now) opportunistic and happen + # regularly, we simply refuse to send them if they're too fast. + # TODO: When we actually report information that doesn't repeat, + # queue updates instead and retry if failing to report immediately + if next_allowed_update > 0: + raise StateUpdateThrottledError(next_allowed_update) + prev_state = self._state + try: + async with self._write_lock: + new_state = self._state.copy() + yield new_state + try: + await self._report(new_state) + finally: + # Throttle even if the report failed + self._prev_state_update_attempt = time.time() + self._state = new_state.copy() + self._save() + except Exception as e: # noqa: BLE001 + logger.exception("Failed to update state: {}, reverting...", e) + self._state = prev_state + + @classmethod + @cache + def _get_pdp_version(cls) -> str | None: + path = Path(sidecar_config.VERSION_FILE_PATH) + if not path.exists(): + return None + + return path.read_text().strip() + + @classmethod + def _get_pdp_runtime(cls) -> dict: + return { + "version": cls._get_pdp_version(), + "os_name": platform.system(), + "os_release": platform.release(), + "os_version": platform.version(), + "os_platform": platform.platform(), + "os_machine": platform.machine(), + "python_version": platform.python_version(), + "python_implementation": platform.python_implementation(), + } + + @classmethod + def _get_opa_version_vars(cls) -> dict: + opa_proc = subprocess.run(["opa", "version"], capture_output=True) + if opa_proc.returncode != 0: + logger.warning( + "Unable to get OPA version: {}", + opa_proc.stderr.decode(), + ) + return {} + + result = {} + for line in opa_proc.stdout.decode().splitlines(): + key, value = line.split(": ", 1) + result[key] = value + + return result + + @classmethod + def get_runtime_state(cls) -> dict: + # This is sync and called with run_in_executor because it has to be also + # called from a sync context without using asyncio.run + result = {} + opa_version_vars = cls._get_opa_version_vars() + result["pdp"] = cls._get_pdp_runtime() + result["opa"] = { + "version": opa_version_vars.get("Version"), + "go_version": opa_version_vars.get("Go Version"), + "platform": opa_version_vars.get("Platform"), + "have_webassembly": opa_version_vars.get("WebAssembly") == "available", + } + return result + + @classmethod + def _build_state_payload(cls, state: PersistentState | None = None) -> dict: + if state is None: + state = cls.get() + return { + "pdp_instance_id": str(state.pdp_instance_id), + "state": { + "api_version": API_VERSION, + "seen_sdks": state.seen_sdks, + }, + } + + async def reporter_user_data_handler(self, report: DataUpdateReport) -> dict[str, Any]: # noqa: ARG002 + return { + "pdp_instance_id": self.get().pdp_instance_id, + } + + @classmethod + async def build_state_payload(cls) -> dict: + payload = cls._build_state_payload() + payload["state"].update(await asyncio.get_event_loop().run_in_executor(None, cls.get_runtime_state)) + return payload + + @classmethod + def build_state_payload_sync(cls) -> dict: + payload = cls._build_state_payload() + payload["state"].update(cls.get_runtime_state()) + return payload + + async def _report(self, state: PersistentState | None = None): + if state is not None: + self._state = state.copy() + config_url = f"{sidecar_config.CONTROL_PLANE}{sidecar_config.REMOTE_STATE_ENDPOINT}" + async with aiohttp.ClientSession() as session: + logger.info("Reporting status update to server...") + response = await session.post( + url=config_url, + headers={"Authorization": f"Bearer {self._env_api_key}"}, + json=await PersistentStateHandler.build_state_payload(), + ) + if response.status != status.HTTP_204_NO_CONTENT: + logger.warning( + "Unable to post PDP state update to server: {}", + await response.text(), + ) + raise RuntimeError("Unable to post PDP state update to server.") + + async def seen_sdk(self, sdk: str): + if sdk not in self._state.seen_sdks: + await self._report_seen_sdk(sdk) + + async def _report_seen_sdk(self, sdk: str): + async with self._seen_sdk_update_lock: + # We check this again because we might have waited because of the lock + if sdk not in self._state.seen_sdks: + try: + async with self.update_state() as new_state: + if new_state.seen_sdks is None: + new_state.seen_sdks = [] + new_state.seen_sdks.append(sdk) + except StateUpdateThrottledError as e: + logger.debug( + "State updated throttled, next update {} seconds from now.", + e.next_allowed_update, + ) diff --git a/horizon/static/templates/authz.rego.template b/horizon/static/templates/authz.rego.template index de434184..fcde1b10 100644 --- a/horizon/static/templates/authz.rego.template +++ b/horizon/static/templates/authz.rego.template @@ -5,3 +5,13 @@ default allow = false # Reject requests by default. allow { # Allow request if... "{{ bearer_token }}" == input.identity # Identity is the secret root key. } + +allow { + input.path[0] == "health" +} + +{% if allow_metrics_unauthenticated %} +allow { + input.path[0] == "metrics" +} +{% endif %} diff --git a/horizon/static/templates/config.yaml.template b/horizon/static/templates/config.yaml.template index 1e468b7a..730c6d9e 100644 --- a/horizon/static/templates/config.yaml.template +++ b/horizon/static/templates/config.yaml.template @@ -16,3 +16,4 @@ decision_logs: reporting: min_delay_seconds: {{ min_delay_seconds }} max_delay_seconds: {{ max_delay_seconds }} + upload_size_limit_bytes: {{ upload_size_limit_bytes }} diff --git a/horizon/system/__init__.py b/horizon/system/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/horizon/system/api.py b/horizon/system/api.py new file mode 100644 index 00000000..1b7aee58 --- /dev/null +++ b/horizon/system/api.py @@ -0,0 +1,38 @@ +import asyncio +import os + +from fastapi import APIRouter, Depends, status +from loguru import logger + +from horizon.authentication import enforce_pdp_control_key, enforce_pdp_token +from horizon.system.consts import API_VERSION, GUNICORN_EXIT_APP +from horizon.system.schemas import VersionResult + + +def init_system_api_router(): + router = APIRouter() + + @router.get( + "/version", + response_model=VersionResult, + status_code=status.HTTP_200_OK, + dependencies=[Depends(enforce_pdp_token)], + ) + async def version() -> VersionResult: + result = VersionResult(api_version=API_VERSION) + return result + + @router.post( + "/_exit", + status_code=status.HTTP_204_NO_CONTENT, + dependencies=[Depends(enforce_pdp_control_key)], + ) + async def exit(): + async def do_exit(): + await asyncio.sleep(0.1) + logger.info("Exiting due to system request.") + os._exit(GUNICORN_EXIT_APP) + + await do_exit() + + return router diff --git a/horizon/system/consts.py b/horizon/system/consts.py new file mode 100644 index 00000000..8c3996b2 --- /dev/null +++ b/horizon/system/consts.py @@ -0,0 +1,9 @@ +# 3 is a magic Gunicorn error code signaling that the application should exit +GUNICORN_EXIT_APP = 3 + +# History: +# 1 - Didn't really exist, we pretend that was v1 +# 2 - Implicitly assumed by the v2 backend for old PDPs that don't report a version +# 3 - Basic data-callback mechanism fully supported +# 4 - Pings and additional data-callback values, for full resilience feature to work +API_VERSION = 4 diff --git a/horizon/system/schemas.py b/horizon/system/schemas.py new file mode 100644 index 00000000..e40af2d5 --- /dev/null +++ b/horizon/system/schemas.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class VersionResult(BaseModel): + api_version: int diff --git a/horizon/tests/test_enforcer_api.py b/horizon/tests/test_enforcer_api.py new file mode 100644 index 00000000..82a558a1 --- /dev/null +++ b/horizon/tests/test_enforcer_api.py @@ -0,0 +1,580 @@ +import asyncio +import random +from contextlib import asynccontextmanager + +import aiohttp +import pytest +from aioresponses import aioresponses +from fastapi import FastAPI +from fastapi.encoders import jsonable_encoder +from fastapi.testclient import TestClient +from horizon.config import sidecar_config +from horizon.enforcer.api import stats_manager +from horizon.enforcer.schemas import ( + AuthorizationQuery, + Resource, + UrlAuthorizationQuery, + User, + UserPermissionsQuery, + UserTenantsQuery, +) +from horizon.pdp import PermitPDP +from loguru import logger +from opal_client.client import OpalClient +from opal_client.config import opal_client_config +from pydantic import BaseModel +from starlette import status + + +class MockPermitPDP(PermitPDP): + def __init__(self): + self._setup_temp_logger() + + self._opal = OpalClient() + + sidecar_config.API_KEY = "mock_api_key" + app: FastAPI = self._opal.app + self._override_app_metadata(app) + self._configure_api_routes(app) + + self._app: FastAPI = app + + +sidecar = MockPermitPDP() + + +@asynccontextmanager +async def pdp_api_client() -> TestClient: + _client = TestClient(sidecar._app) + await stats_manager.run() + yield _client + await stats_manager.stop() + + +ALLOWED_ENDPOINTS = [ + ( + "/allowed", + "permit/root", + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ), + {"result": {"allow": True}}, + {"allow": True}, + ), + ( + "/allowed_url", + "mapping_rules", + UrlAuthorizationQuery( + user=User(key="user1"), + http_method="DELETE", + url="https://some.url/important_resource", + tenant="default", + ), + { + "result": { + "all": [ + { + "url": "https://some.url/important_resource", + "http_method": "delete", + "action": "delete", + "resource": "resource1", + } + ] + } + }, + {"allow": True}, + ), + ( + "/user-permissions", + "permit/user_permissions", + UserPermissionsQuery(user=User(key="user1"), resource_types=["resource1", "resource2"]), + { + "result": { + "permissions": { + "user1": { + "resource": { + "key": "resource_x", + "attributes": {}, + "type": "resource1", + }, + "permissions": ["read:read"], + } + } + } + }, + { + "user1": { + "resource": { + "key": "resource_x", + "attributes": {}, + "type": "resource1", + }, + "permissions": ["read:read"], + } + }, + ), + ( + "/allowed/all-tenants", + "permit/any_tenant", + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ), + { + "result": { + "allowed_tenants": [ + { + "tenant": {"key": "default", "attributes": {}}, + "allow": True, + "result": True, + } + ] + } + }, + { + "allowed_tenants": [ + { + "tenant": {"key": "default", "attributes": {}}, + "allow": True, + "result": True, + } + ] + }, + ), + ( + "/allowed/bulk", + "permit/bulk", + [ + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ) + ], + {"result": {"allow": [{"allow": True, "result": True}]}}, + {"allow": [{"allow": True, "result": True}]}, + ), + ( + "/user-tenants", + "permit/user_permissions/tenants", + UserTenantsQuery( + user=User(key="user1"), + ), + {"result": [{"attributes": {}, "key": "tenant-1"}]}, + [{"attributes": {}, "key": "tenant-1"}], + ), + # TODO: Add Kong +] + +ALLOWED_ENDPOINTS_FACTDB = [ + ( + "/allowed", + "/check", + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ), + None, + {"allow": True}, + {"allow": True}, + ), + ( + "/allowed_url", + "/check", + UrlAuthorizationQuery( + user=User(key="user1"), + http_method="DELETE", + url="https://some.url/important_resource", + tenant="default", + ), + None, + {"allow": True}, + {"allow": True}, + ), + ( + "/nginx_allowed", + "/check", + None, + { + "permit-user-key": "user1", + "permit-tenant-id": "default", + "permit-action": "read", + "permit-resource-type": "resource1", + }, + {"allow": True}, + {"allow": True}, + ), + ( + "/allowed/all-tenants", + "/check/all-tenants", + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ), + None, + { + "allowed_tenants": [ + { + "tenant": {"key": "default", "attributes": {}}, + "allow": True, + "result": True, + } + ] + }, + { + "allowed_tenants": [ + { + "tenant": {"key": "default", "attributes": {}}, + "allow": True, + "result": True, + } + ] + }, + ), + ( + "/allowed/bulk", + "/check/bulk", + [ + AuthorizationQuery( + user=User(key="user1"), + action="read", + resource=Resource(type="resource1"), + ) + ], + None, + [{"allow": True, "result": True}], + {"allow": [{"allow": True, "result": True}]}, + ), + ( + "/user-tenants", + "/users/user1/tenants", + UserTenantsQuery( + user=User(key="user1"), + ), + None, + [{"key": "default-2", "attributes": {}}, {"key": "default", "attributes": {}}], + [{"key": "default-2", "attributes": {}}, {"key": "default", "attributes": {}}], + ), + ( + "/user-permissions", + "/user-permissions", + UserPermissionsQuery( + user=User(key="user1"), + ), + None, + { + "user1": { + "resource": { + "key": "resource_x", + "attributes": {}, + "type": "resource1", + }, + "tenant": {"key": "default", "attributes": {}}, + "permissions": ["read:read"], + "roles": ["admin"], + } + }, + { + "user1": { + "resource": { + "key": "resource_x", + "attributes": {}, + "type": "resource1", + }, + "tenant": {"key": "default", "attributes": {}}, + "permissions": ["read:read"], + "roles": ["admin"], + } + }, + ), +] + + +@pytest.mark.parametrize( + "endpoint, opa_endpoint, query, opa_response, expected_response", + list(filter(lambda p: not isinstance(p[2], UrlAuthorizationQuery), ALLOWED_ENDPOINTS)), +) +@pytest.mark.timeout(30) +@pytest.mark.asyncio +async def test_enforce_endpoint_statistics( + endpoint: str, + opa_endpoint: str, + query: AuthorizationQuery | list[AuthorizationQuery], + opa_response: dict, + expected_response: dict, +) -> None: + async with pdp_api_client() as client: + + def post_endpoint(): + return client.post( + endpoint, + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"}, + json=query.dict() if not isinstance(query, list) else [q.dict() for q in query], + ) + + with aioresponses() as m: + opa_url = f"{opal_client_config.POLICY_STORE_URL}/v1/data/{opa_endpoint}" + + # Test valid response from OPA + m.post( + opa_url, + status=200, + payload=opa_response, + ) + + response = post_endpoint() + + assert response.status_code == 200 + logger.info(response.json()) + if isinstance(expected_response, list): + assert response.json() == expected_response + elif isinstance(expected_response, dict): + for k, v in expected_response.items(): + assert response.json()[k] == v + else: + raise TypeError( + f"Unexpected expected response type, expected one of list, dict and got {type(expected_response)}" + ) + + # Test bad status from OPA + bad_status = random.choice([401, 404, 400, 500, 503]) + m.post( + opa_url, + status=bad_status, + payload=opa_response, + ) + response = post_endpoint() + assert response.status_code == 502 + assert "OPA request failed" in response.text + assert f"status: {bad_status}" in response.text + + # Test connection error + m.post( + opa_url, + exception=aiohttp.ClientConnectionError("don't want to connect"), + ) + response = post_endpoint() + assert response.status_code == 502 + assert "OPA request failed" in response.text + assert "don't want to connect" in response.text + + # Test timeout - not working yet + m.post( + opa_url, + exception=asyncio.exceptions.TimeoutError(), + ) + response = post_endpoint() + assert response.status_code == 504 + assert "OPA request timed out" in response.text + await asyncio.sleep(2) + current_rate = await stats_manager.current_rate() + assert current_rate == (3.0 / 4.0) + assert client.get("/health").status_code == status.HTTP_503_SERVICE_UNAVAILABLE + await stats_manager.reset_stats() + current_rate = await stats_manager.current_rate() + assert current_rate == 0 + assert client.get("/health").status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + +@pytest.mark.parametrize("endpoint, opa_endpoint, query, opa_response, expected_response", ALLOWED_ENDPOINTS) +def test_enforce_endpoint( + endpoint, + opa_endpoint, + query, + opa_response, + expected_response, +): + _client = TestClient(sidecar._app) + + def post_endpoint(): + return _client.post( + endpoint, + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"}, + json=query.dict() if not isinstance(query, list) else [q.dict() for q in query], + ) + + with aioresponses() as m: + opa_url = f"{opal_client_config.POLICY_STORE_URL}/v1/data/{opa_endpoint}" + + if endpoint == "/allowed_url": + # allowed_url gonna first call the mapping rules endpoint then the normal OPA allow endpoint + m.post( + url=f"{opal_client_config.POLICY_STORE_URL}/v1/data/permit/root", + status=200, + payload={"result": {"allow": True}}, + repeat=True, + ) + + # Test valid response from OPA + m.post( + opa_url, + status=200, + payload=opa_response, + ) + + response = post_endpoint() + assert response.status_code == 200 + logger.info(response.json()) + if isinstance(expected_response, list): + assert response.json() == expected_response + elif isinstance(expected_response, dict): + for k, v in expected_response.items(): + assert response.json()[k] == v + else: + raise TypeError( + f"Unexpected expected response type, expected one of list, dict and got {type(expected_response)}" + ) + + # Test bad status from OPA + bad_status = random.choice([401, 404, 400, 500, 503]) + m.post( + opa_url, + status=bad_status, + payload=opa_response, + ) + response = post_endpoint() + assert response.status_code == 502 + assert "OPA request failed" in response.text + assert f"status: {bad_status}" in response.text + + # Test connection error + m.post( + opa_url, + exception=aiohttp.ClientConnectionError("don't want to connect"), + ) + response = post_endpoint() + assert response.status_code == 502 + assert "OPA request failed" in response.text + assert "don't want to connect" in response.text + + # Test timeout - not working yet + m.post( + opa_url, + exception=asyncio.exceptions.TimeoutError(), + ) + response = post_endpoint() + assert response.status_code == 504 + assert "OPA request timed out" in response.text + + +@pytest.mark.parametrize( + ( + "endpoint", + "factdb_endpoint", + "query", + "headers", + "factdb_response", + "expected_response", + ), + ALLOWED_ENDPOINTS_FACTDB, +) +def test_enforce_endpoint_factdb( + endpoint: str, + factdb_endpoint: str, + query: list[BaseModel] | BaseModel | None, + headers: dict | None, + factdb_response: dict, + expected_response: dict, +): + sidecar_config.FACTDB_ENABLED = True + _client = TestClient(sidecar._app) + + def post_endpoint(): + return _client.post( + endpoint, + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"} | (headers or {}), + json=jsonable_encoder(query) if query else None, + ) + + with aioresponses() as m: + factdb_url = f"{sidecar_config.FACTDB_SERVICE_URL}/v1/authz{factdb_endpoint}" + + method = "POST" + + match endpoint: + case "/allowed_url": + # allowed_url gonna first call the mapping rules endpoint then the normal OPA allow endpoint + m.post( + url=f"{opal_client_config.POLICY_STORE_URL}/v1/data/mapping_rules", + status=200, + payload={ + "result": { + "all": [ + { + "url": "https://some.url/important_resource", + "http_method": "delete", + "action": "delete", + "resource": "resource1", + } + ] + } + }, + repeat=True, + ) + case "/user-tenants": + method = "GET" + + # Test valid response from OPA + m.add( + factdb_url, + method=method, + status=200, + payload=factdb_response, + ) + + response = post_endpoint() + assert response.status_code == 200 + logger.info(response.json()) + if isinstance(expected_response, list): + assert response.json() == expected_response + elif isinstance(expected_response, dict): + for k, v in expected_response.items(): + try: + assert response.json()[k] == v, f"Expected {k} to be {v} but got {response.json()[k]}" + except KeyError: + pytest.fail(f"response missing key {k} from expected response:\n,{response.json()}") + else: + raise TypeError( + f"Unexpected expected response type, expected one of list, dict and got {type(expected_response)}" + ) + + # Test bad status from OPA + bad_status = random.choice([401, 404, 400, 500, 503]) + m.add( + factdb_url, + method=method, + status=bad_status, + payload=factdb_response, + ) + response = post_endpoint() + assert response.status_code == 502 + assert "FactDB request failed" in response.text + assert f"status: {bad_status}" in response.text + + # Test connection error + m.add( + factdb_url, + method=method, + exception=aiohttp.ClientConnectionError("don't want to connect"), + ) + response = post_endpoint() + assert response.status_code == 502 + assert "FactDB request failed" in response.text + assert "don't want to connect" in response.text + + # Test timeout - not working yet + m.add( + factdb_url, + method=method, + exception=asyncio.exceptions.TimeoutError(), + ) + response = post_endpoint() + assert response.status_code == 504 + assert "FactDB request timed out" in response.text + + sidecar_config.FACTDB_ENABLED = False diff --git a/horizon/tests/test_local_api.py b/horizon/tests/test_local_api.py new file mode 100644 index 00000000..400fa6a0 --- /dev/null +++ b/horizon/tests/test_local_api.py @@ -0,0 +1,166 @@ +from pathlib import Path + +import pytest +from aioresponses import aioresponses +from fastapi import FastAPI +from fastapi.testclient import TestClient +from horizon.config import sidecar_config +from horizon.factdb.client import FactDBClient +from horizon.pdp import PermitPDP +from loguru import logger +from opal_client.client import OpalClient +from opal_client.config import opal_client_config + + +class MockPermitPDP(PermitPDP): + def __init__(self, opal: OpalClient | None = None): + self._setup_temp_logger() + + self._opal = opal or OpalClient() + + sidecar_config.API_KEY = "mock_api_key" + app: FastAPI = self._opal.app + self._override_app_metadata(app) + self._configure_api_routes(app) + self._app: FastAPI = app + + +class MockFactDBPermitPDP(MockPermitPDP): + def __init__(self): + super().__init__(opal=FactDBClient(shard_id=sidecar_config.SHARD_ID, data_topics=self._fix_data_topics())) + + +sidecar = MockPermitPDP() + + +@pytest.mark.asyncio +async def test_list_role_assignments() -> None: + _client = TestClient(sidecar._app) + with aioresponses() as m: + opa_url = f"{opal_client_config.POLICY_STORE_URL}/v1/data/permit/api/role_assignments/list_role_assignments" + + # Test valid response from OPA + m.post( + opa_url, + status=200, + repeat=True, + payload={ + "result": [ + { + "user": "user1", + "role": "role1", + "tenant": "tenant1", + "resource_instance": "resource_instance1", + } + ] + }, + ) + + response = _client.get( + "/local/role_assignments", + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"}, + ) + + m.assert_called_once() + assert response.status_code == 200 + res_json = response.json() + assert len(res_json) == 1 + assert res_json[0] == { + "user": "user1", + "role": "role1", + "tenant": "tenant1", + "resource_instance": "resource_instance1", + } + + +@pytest.mark.asyncio +async def test_list_role_assignments_wrong_factdb_config() -> None: + _sidecar = MockFactDBPermitPDP() + # the FACTDB_ENABLED is set to True after the PDP was created + # this causes the PDP to be without the FactDBPolicyStoreClient - it is a uniquely rare case + # that will probably never happen as this config is managed either by a remote config or env var + sidecar_config.FACTDB_ENABLED = True + _client = TestClient(_sidecar._app) + with aioresponses() as m: + # Example 'http://localhost:8181/v1/data/permit/api/role_assignments/list_role_assignments' + opa_url = f"{opal_client_config.POLICY_STORE_URL}/v1/data/permit/api/role_assignments/list_role_assignments" + + # Test valid response from OPA + m.post( + opa_url, + status=200, + repeat=True, + payload={ + "result": [ + { + "user": "user1", + "role": "role1", + "tenant": "tenant1", + "resource_instance": "resource_instance1", + } + ] + }, + ) + + response = _client.get( + "/local/role_assignments", + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"}, + ) + + m.assert_called_once() + assert response.status_code == 200 + res_json = response.json() + assert len(res_json) == 1 + assert res_json[0] == { + "user": "user1", + "role": "role1", + "tenant": "tenant1", + "resource_instance": "resource_instance1", + } + + +@pytest.mark.asyncio +async def test_list_role_assignments_factdb(tmp_path: Path) -> None: + sidecar_config.FACTDB_ENABLED = True + sidecar_config.OFFLINE_MODE_BACKUP_DIR = tmp_path / "backup" + _sidecar = MockFactDBPermitPDP() + _client = TestClient(_sidecar._app) + with aioresponses() as m: + # The policy store client of the FactDB has base url configured, this means that the url + # we need to mock is '/v1/facts/role_assignments' - without the base url server + factdb_url = "/v1/facts/role_assignments?page=1&per_page=30" + logger.info("mocking FactDB url: {}", factdb_url) + # Test valid response from OPA + m.get( + f"{sidecar_config.FACTDB_SERVICE_URL}{factdb_url}", + status=200, + repeat=True, + payload=[ + { + "type": "role_assignment", + "attributes": { + "actor": "user:user1", + "role": "role1", + "tenant": "tenant1", + "resource": "resource_instance1", + "id": "user:user1-role1-resource_instance1", + }, + } + ], + ) + + response = _client.get( + "/local/role_assignments", + headers={"authorization": f"Bearer {sidecar_config.API_KEY}"}, + ) + + assert response.status_code == 200 + res_json = response.json() + assert len(res_json) == 1 + assert res_json[0] == { + "user": "user1", + "role": "role1", + "tenant": "tenant1", + "resource_instance": "resource_instance1", + } + sidecar_config.FACTDB_ENABLED = False diff --git a/imgs/PDP.png b/imgs/PDP.png new file mode 100644 index 00000000..78713c5c Binary files /dev/null and b/imgs/PDP.png differ diff --git a/kong_routes.json b/kong_routes.json new file mode 100644 index 00000000..6c091f19 --- /dev/null +++ b/kong_routes.json @@ -0,0 +1,5 @@ +[ + ["/v\\d+/([^/]+).*", 0], + ["/([^/]+).*", 0], + ["/", "index"] +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..eb5e57c6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[tool.ruff] +line-length = 120 +src = ["horizon"] +target-version = "py310" + +[tool.ruff.lint] +select = [ + "E", # https://docs.astral.sh/ruff/rules/#error-e + "W", # https://docs.astral.sh/ruff/rules/#warning-w + "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f + "N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n + "I", # https://docs.astral.sh/ruff/rules/#isort-i + "BLE", # https://docs.astral.sh/ruff/rules/#flake8-blind-except-ble + "FBT", # https://docs.astral.sh/ruff/rules/#flake8-boolean-trap-fbt + "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie + "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg + "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async + "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up + "ERA", # https://docs.astral.sh/ruff/rules/#eradicate-era + "C90", # https://docs.astral.sh/ruff/rules/#mccabe-c90 + "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf + "FAST", # https://docs.astral.sh/ruff/rules/#fastapi-fast +] +ignore = [ + "FAST001", # https://docs.astral.sh/ruff/rules/fast-api-redundant-response-model/#fast-api-redundant-response-model-fast001 +] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.mypy] +python_version = "3.10" +packages = ["horizon"] +plugins = ["pydantic.v1.mypy"] + +check_untyped_defs = true +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_unreachable = true diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..9ad4a92f --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest +pytest-asyncio +ruff +aioresponses diff --git a/requirements.txt b/requirements.txt index eb310129..1d814b2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,17 @@ -typer==0.3.2 -aiohttp==3.7.2 -fastapi==0.65.2 -gunicorn -opal-client>=0.1.18 -pydantic[email]==1.8.2 -starlette==0.14.2 -uvicorn[standard] -requests==2.25.0 -tenacity>=8.0.1 -Jinja2==3.0.3 +aiohttp>=3.9.4,<4 +fastapi>=0.109.1,<1 +gunicorn>=22.0.0,<23 +Jinja2>=3.1.2,<4 +pydantic[email]>=1.9.1,<2 +requests>=2.31.0,<3 +tenacity>=8.0.1,<9 +typer>=0.4.1,<1 +uvicorn[standard]>=0.17.6,<1 logzio-python-handler -rook ddtrace +sqlparse==0.5.0 +scalar-fastapi==1.0.3 +httpx>=0.27.0,<1 +protobuf>=3.20.2 # not directly required, pinned by Snyk to avoid a vulnerability +opal-common==0.8.0rc1 +opal-client==0.8.0rc1 diff --git a/scripts/gunicorn_conf.py b/scripts/gunicorn_conf.py index 51ed5c24..522b4487 100644 --- a/scripts/gunicorn_conf.py +++ b/scripts/gunicorn_conf.py @@ -1,26 +1,4 @@ -import os - -from opal_common.logger import logger - - def post_fork(server, worker): """ this hook takes effect if we are using gunicorn to run OPAL. """ - rookout_token = os.getenv("ROOKOUT_TOKEN", None) - if not rookout_token: - logger.info("No rookout token found, skipping.") - return - - service = os.getenv("ROOKOUT_SERVICE", "pdp") - env = os.getenv("ROOKOUT_ENV", "dev") - user = os.getenv("ROOKOUT_USER", None) - - logger.info("Running Rookout...") - labels = {"env": env, "service": service} - if user is not None: - labels.update({"user": user}) - - import rook - - rook.start(token=rookout_token, labels=labels) diff --git a/scripts/run_sidecar_v2.sh b/scripts/run_sidecar_v2.sh new file mode 100755 index 00000000..ea4cf550 --- /dev/null +++ b/scripts/run_sidecar_v2.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +printf '\x1b]0;%s\x07' "Sidecar" +PDP_API_KEY=$1 PDP_REMOTE_CONFIG_ENDPOINT=/v2/pdps/me/config uvicorn horizon.main:app --reload --port=7000 diff --git a/scripts/start.sh b/scripts/start.sh index 798d2a6d..f8235186 100644 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -1,11 +1,19 @@ -#! /usr/bin/env sh -set -e +#!/bin/bash -export GUNICORN_CONF=${GUNICORN_CONF:-/gunicorn_conf.py} +export GUNICORN_CONF=${GUNICORN_CONF:-./gunicorn_conf.py} +export GUNICORN_TIMEOUT=${GUNICORN_TIMEOUT:-600} +ddtrace="" +if [ "${PDP_ENABLE_MONITORING}" == "true" ] +then + ddtrace=ddtrace-run +fi +$ddtrace gunicorn -b 0.0.0.0:${UVICORN_PORT} -k uvicorn.workers.UvicornWorker --workers=${UVICORN_NUM_WORKERS} -c ${GUNICORN_CONF} ${UVICORN_ASGI_APP} --timeout ${GUNICORN_TIMEOUT} +return_code=$? -# Start Gunicorn -if [[ -z "${PDP_ENABLE_MONITORING}" && "${PDP_ENABLE_MONITORING}" = "true" ]]; then - exec ddtrace-run gunicorn -b 0.0.0.0:${UVICORN_PORT} -k uvicorn.workers.UvicornWorker --workers=${UVICORN_NUM_WORKERS} -c ${GUNICORN_CONF} ${UVICORN_ASGI_APP} -else - exec gunicorn -b 0.0.0.0:${UVICORN_PORT} -k uvicorn.workers.UvicornWorker --workers=${UVICORN_NUM_WORKERS} -c ${GUNICORN_CONF} ${UVICORN_ASGI_APP} +if [ "$return_code" == 3 ] +then + # The _exit route was used, change the 3 to a 0 + exit 0 fi + +exit $return_code diff --git a/setup.py b/setup.py index de7da89e..0e616812 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,18 @@ -import os import pathlib from setuptools import find_packages, setup -def get_requirements(env=""): +def get_requirements(env="") -> list[str]: if env: - env = "-{}".format(env) - with open("requirements{}.txt".format(env)) as fp: - return [x.strip() for x in fp.read().split("\n") if not x.startswith("#")] + env = f"-{env}" + with pathlib.Path(f"requirements{env}.txt").open() as fp: + return [x.strip() for x in fp.readlines() if not x.startswith("#")] def get_data_files(root_directory: str): - all_files = [ - str(f) for f in pathlib.Path(f"{root_directory}/").glob("**/*") if f.is_file() - ] - file_components = [(os.path.dirname(f), f) for f in all_files] + all_files: list[pathlib.Path] = [f for f in pathlib.Path(f"{root_directory}/").glob("**/*") if f.is_file()] + file_components = [(f.parent, f) for f in all_files] grouped_files = {} for directory, fullpath in file_components: grouped_files.setdefault(directory, []).append(fullpath) @@ -33,5 +30,7 @@ def get_data_files(root_directory: str): include_package_data=True, data_files=get_data_files("horizon/static"), install_requires=get_requirements(), - # dev_requires=get_requirements("dev"), + extras_require={ + "dev": get_requirements("dev"), + }, ) diff --git a/test_offline_mode/.env.example b/test_offline_mode/.env.example new file mode 100644 index 00000000..9940b561 --- /dev/null +++ b/test_offline_mode/.env.example @@ -0,0 +1,3 @@ +PDP_API_KEY= +PDP_CONTROL_PLANE=https://permitio.api.stg.permit.io +PDP_ENABLE_OFFLINE_MODE=True diff --git a/test_offline_mode/Dockerfile b/test_offline_mode/Dockerfile new file mode 100644 index 00000000..70b12c21 --- /dev/null +++ b/test_offline_mode/Dockerfile @@ -0,0 +1,7 @@ +FROM python:alpine + +RUN pip install --upgrade pip && pip install permit colorlog + +COPY checker.py /checker.py + +CMD ["python", "/checker.py"] diff --git a/test_offline_mode/README.md b/test_offline_mode/README.md new file mode 100644 index 00000000..c1c5089b --- /dev/null +++ b/test_offline_mode/README.md @@ -0,0 +1,33 @@ +# E2E tests for PDP Offline Mode + +### Create Permit Environment + +Login to Permit and create a new environment with the following objects: + +* Resource 'file' with action 'create' +* Role 'admin' with permission to create 'file' +* User 'user-1' with role 'admin' + +Copy the `.env.example` file to `.env` and update the values with the environment details. + +### Prepare repo for building PDP image + +This would download the Custom OPA and FactDB source code. + +```bash +VERSION= make run-prepare +``` +Replace `` with the version you want to use for the PDP image + +### Run the tests + +```bash +docker compose up +``` + + +### What does it do + +1. Start an online PDP with `PDP_ENABLE_OFFLINE_MODE=True` and connect the `/app/backup` to a volume. +2. Start another offline PDP that is also connected to the same volume. +3. Run a tester that run `permit.check("user-1", "create", "file")` on the online PDP and the offline PDP. diff --git a/test_offline_mode/checker.py b/test_offline_mode/checker.py new file mode 100644 index 00000000..fa07d61c --- /dev/null +++ b/test_offline_mode/checker.py @@ -0,0 +1,65 @@ +import asyncio +import logging.config +import os + +from permit import Permit, PermitConfig + +logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "color": { + "()": "colorlog.ColoredFormatter", + "format": "%(log_color)s[%(asctime)s.%(msecs)03d] %(levelname)s - " "%(name)s:%(lineno)d | %(message)s", + "datefmt": "%H:%M:%S", + "log_colors": { + "DEBUG": "white", + "INFO": "green", + "WARNING": "yellow", + "ERROR": "red", + "CRITICAL": "red,bg_white", + }, + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "color", + }, + }, + "root": { + "handlers": ["console"], + "level": "INFO", + }, + } +) +logger = logging.getLogger(__name__) + + +async def main(): + pdp_url = os.environ["PDP_URL"] + logger.info("Starting PDP checker against: %s", pdp_url) + permit = Permit( + PermitConfig( + token=os.environ["PDP_API_KEY"], + api_url=os.environ["PDP_CONTROL_PLANE"], + pdp=pdp_url, + ) + ) + + while True: + try: + result = await permit.check("user-1", "create", "file") + if result: + logger.info("Passed") + else: + logger.warning("Failed") + except Exception as e: + logger.exception(f"Error: {e}") + + await asyncio.sleep(1) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/test_offline_mode/docker-compose.yaml b/test_offline_mode/docker-compose.yaml new file mode 100644 index 00000000..32cad51e --- /dev/null +++ b/test_offline_mode/docker-compose.yaml @@ -0,0 +1,67 @@ +volumes: + backup: + +networks: + internet: + driver: bridge + no-internet: + driver: bridge + internal: true + +services: + online-pdp: + container_name: online-pdp + build: + context: .. + dockerfile: Dockerfile + volumes: + - backup:/app/backup:rw + networks: + - internet + env_file: .env + healthcheck: + test: "wget --no-verbose --tries=1 --spider http://127.0.0.1:7000/healthy || exit 1" + + offline-pdp: + container_name: offline-pdp + build: + context: .. + dockerfile: Dockerfile + volumes: + - backup:/app/backup:rw + networks: + - no-internet + env_file: .env + healthcheck: + test: "wget --no-verbose --tries=1 --spider http://127.0.0.1:7000/healthy || exit 1" + depends_on: + online-pdp: + condition: service_healthy + + online-tester: + container_name: online-tester + build: + context: . + dockerfile: Dockerfile + networks: + - internet + env_file: .env + environment: + - PDP_URL=http://online-pdp:7000 + depends_on: + online-pdp: + condition: service_healthy + + offline-tester: + container_name: offline-tester + build: + context: . + dockerfile: Dockerfile + networks: + - no-internet + env_file: .env + environment: + - PDP_URL=http://offline-pdp:7000 + depends_on: + offline-pdp: + condition: service_healthy