Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/aro-hcp-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ on:
- 'frontend/**'
- 'admin/**'
- 'backend/**'
- 'sessiongate/**'
- 'cluster-service/**'
- 'internal/**'
- 'maestro/**'
Expand Down
2 changes: 2 additions & 0 deletions .yamllint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ ignore:
- 'observability/arobit/deploy/templates/forwarder-secretprovider.yaml'
- 'observability/kube-events/deploy/templates/deployment.yaml'
- 'mgmt-fixes/deploy/kubelet-ds/templates/ds-kubelet-parameters.yaml'
- 'sessiongate/deploy/templates/deployment.yaml'
- 'sessiongate/deploy/templates/ext-authz.authorizationpolicy.yaml'
- '**/zz_fixture_TestHelmTemplate*.yaml'
- 'velero/deploy/templates/install-job.yaml'
- 'maestro/server/deploy/templates/allow-cluster-service.authorizationpolicy.yaml'
Expand Down
30 changes: 30 additions & 0 deletions acm/deploy/helm/policies/templates/sre-breakglass-role.policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,7 @@ spec:
- '*'
verbs:
- get
# groups used by hcpctl - migrate hcpctl to aro-sre-pso and aro-sre-csa once the new groups have been rolled out
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
Expand Down Expand Up @@ -727,6 +728,35 @@ spec:
- kind: Group
name: aro-sre-cluster-admin
apiGroup: rbac.authorization.k8s.io
# group used by breakglass
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: aro-sre-pso
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:aro-sre
subjects:
- kind: Group
name: aro-sre-pso
apiGroup: rbac.authorization.k8s.io
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: aro-sre-csa
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: Group
name: aro-sre-csa
apiGroup: rbac.authorization.k8s.io
---
apiVersion: policy.open-cluster-management.io/v1
kind: PlacementBinding
Expand Down
30 changes: 30 additions & 0 deletions acm/zz_fixture_TestHelmTemplate_dev_westus3_mgmt_1_policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ spec:
- '*'
verbs:
- get
# groups used by hcpctl - migrate hcpctl to aro-sre-pso and aro-sre-csa once the new groups have been rolled out
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
Expand Down Expand Up @@ -914,4 +915,33 @@ spec:
- kind: Group
name: aro-sre-cluster-admin
apiGroup: rbac.authorization.k8s.io
# group used by breakglass
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: aro-sre-pso
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:aro-sre
subjects:
- kind: Group
name: aro-sre-pso
apiGroup: rbac.authorization.k8s.io
- complianceType: MustHave
objectDefinition:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: aro-sre-csa
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: Group
name: aro-sre-csa
apiGroup: rbac.authorization.k8s.io

1 change: 1 addition & 0 deletions admin/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ARG PLATFORM
FROM --platform=${PLATFORM} mcr.microsoft.com/oss/go/microsoft/golang:1.25-azurelinux3.0 as builder
COPY internal/go.mod internal/go.sum internal/
COPY test-integration/go.mod test-integration/
COPY sessiongate/go.mod sessiongate/go.sum sessiongate/
COPY admin/server/go.mod admin/server/go.sum admin/server/
RUN cd admin/server && go mod download
WORKDIR /app
Expand Down
3 changes: 3 additions & 0 deletions admin/Env.mk
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ KUSTO_RG ?= {{ .kusto.rg }}
FPA_CLIENT_ID ?= {{ .firstPartyAppClientId }}
FPA_CERT_NAME ?= {{ .firstPartyAppCertificate.name }}
FPA_KEY_VAULT_NAME ?= {{ .serviceKeyVault.name }}

# Sessiongate
SESSIONGATE_NAMESPACE ?= {{ .sessiongate.k8s.namespace }}
5 changes: 3 additions & 2 deletions admin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ run-with-sc-integration: $(BINARY)
--cosmos-name ${COSMOS_DB_NAME} \
--kusto-endpoint $${KUSTO_ENDPOINT} \
--fpa-client-id ${FPA_CLIENT_ID} \
--fpa-cert-bundle-path $${CRT_BUNDLE}
--fpa-cert-bundle-path $${CRT_BUNDLE} \
--sessiongate-namespace ${SESSIONGATE_NAMESPACE}
.PHONY: run-with-sc-integration

clean:
Expand Down Expand Up @@ -135,4 +136,4 @@ run-hello-world-test-with-port-forward: $(CLI_BINARY) $(HCPCTL)
--host "${ADMIN_API_HOST}" \
--admin-api-endpoint "https://localhost:$(PORT_FORWARD_LOCAL_PORT)" \
--insecure-skip-verify
.PHONY: run-with-sc-integration
.PHONY: run-with-sc-integration
56 changes: 51 additions & 5 deletions admin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,55 @@

## Overview

Dislcaimer: As the Admin API is currently mostly a stub, so is this README.
The ARO HCP Admin API is a REST API deployed on each regional service cluster, offering administrative endpoints for SREs and platform operators invoked via Geneva Actions. Use cases include breakglass access to HCP clusters and cluster diagnostics.

The ARO HCP Admin API is a REST API deployed on each regional service cluster, offering a set of endpoints for administrative tasks for SREs and platform operators invoked via Geneva Actions.
## API Endpoints

All HCP-scoped endpoints include the full Azure resource ID in the path:

```
/admin/v1/hcp/subscriptions/{subscriptionId}/resourcegroups/{resourceGroupName}/providers/Microsoft.RedHatOpenshift/hcpOpenShiftClusters/{clusterName}
```

This prefix is abbreviated as `{resourceId}` below.

| Method | Path | Description |
|--------|------|-------------|
| `PUT` | `/admin/v1/hcp{resourceId}/breakglass?group=...&ttl=...` | Create a breakglass session ([details](breakglass.md)) |
| `GET` | `/admin/v1/hcp{resourceId}/breakglass/{sessionName}/kubeconfig` | Get kubeconfig for a breakglass session ([details](breakglass.md)) |
| `GET` | `/admin/v1/hcp{resourceId}/cosmosdump` | Cosmos DB dump for a cluster |
| `GET` | `/admin/v1/hcp{resourceId}/helloworld` | HCP hello world (dev/test) |
| `GET` | `/admin/helloworld` | Hello world (dev/test) |
| `GET` | `/healthz/ready` | Readiness probe |
| `GET` | `/healthz/live` | Liveness probe |
| `GET` | `/metrics` | Prometheus metrics (served on the metrics port) |

## Authentication

Authentication and authorization is layered across infrastructure and application:

1. **MISE** (external authorization via Istio): validates the Geneva Actions bearer token, proving the request comes from an authorized Geneva Action. Applied to all paths except `/metrics`.
2. **`WithClientPrincipal` middleware**: requires the `X-Ms-Client-Principal-Name` header on specific routes, returning 401 if missing. This header is set by Geneva Actions to identify the user or service principal who triggered the action. The Admin API trusts this header because MISE has already verified the caller is Geneva Actions.

```mermaid
sequenceDiagram
participant User as User/SRE
participant GA as Geneva Actions
participant Istio as Istio Ingress
participant MISE as MISE (ext-authz)
participant Admin as Admin API

User->>GA: Initiate action
Note over GA: Approval mechanisms<br/>(Lockbox, group membership, oncall)
GA->>Istio: Request with GA bearer token +<br/>X-Ms-Client-Principal-Name header
Istio->>MISE: Validate bearer token
MISE-->>Istio: Token valid (caller is GA)
Istio->>Admin: Forward request
Admin->>Admin: WithClientPrincipal middleware<br/>extracts principal name from header
Admin->>Admin: Process request
Admin-->>GA: Response
GA-->>User: Return result
```

## Development Workflow

Expand All @@ -17,13 +63,13 @@ The `Makefile` has access to a set of environment variables representing configu

### Local Run

Using the `make run` target, the Admin API binary can be run locally. At this point, the Admin API does not integrate with any other service like the RP Fronent, CS or Maestro. Hence there are no dedicated dependencies on infrastructure that need to be met upfront. This will change soon.
Using the `make run` target, the Admin API binary can be run locally. At this point, the Admin API does not integrate with any other service like the RP Frontend, CS or Maestro. Hence there are no dedicated dependencies on infrastructure that need to be met upfront. This will change soon.

### Personal DEV Environment deployment

The local code can also be deployed directly into a personal DEV environment by running `make deplioy`. Understand that this requires such an environment to be created first via `make personal-dev-env` from the root of the repository.
The local code can also be deployed directly into a personal DEV environment by running `make deploy`. Understand that this requires such an environment to be created first via `make personal-dev-env` from the root of the repository.

`make deploy` builds a custom developer image from the local code and uploads it to the DEV service ACR (`arohcpsvcdev`) into a developer specific repository. This way developer images will not conflict with other develooper images or CI built ones. The actual deployment is delegated to the pipeline/AdminAPI target in the root of the repository, providing a configuration override for `adminApi.image.repository` and `adminApi.image.digest` respectively.
`make deploy` builds a custom developer image from the local code and uploads it to the DEV service ACR (`arohcpsvcdev`) into a developer specific repository. This way developer images will not conflict with other developer images or CI built ones. The actual deployment is delegated to the pipeline/AdminAPI target in the root of the repository, providing a configuration override for `adminApi.image.repository` and `adminApi.image.digest` respectively.

## Deployment

Expand Down
90 changes: 90 additions & 0 deletions admin/breakglass.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Breakglass Access

Breakglass provides SREs with temporary, identity-scoped kubectl access to HCP clusters for debugging and emergency operations. The Admin API handles session creation and kubeconfig retrieval, while [sessiongate](../sessiongate/README.md) handles the actual proxying of kubectl traffic to the target cluster.

## Lifecycle

**Session creation** (via Geneva Actions → Admin API):

```
1. SRE triggers Geneva Action
2. Geneva Action → Admin API: POST .../breakglass with {"group":"...","ttl":"..."}
3. Admin API creates a Session CR in Kubernetes → returns 202 + Location header
4. SRE/GA polls: GET .../breakglass/{sessionName}/kubeconfig
- Sessiongate controller mints credentials and ensures a network path to the HCP is established
- Admin API returns 202 with status until ready
5. Session ready → Admin API returns 200 with kubeconfig YAML
- kubeconfig server URL points at sessiongate: https://sessiongate.{region}.{zone}/sessiongate/{sessionId}/kas
```

**Cluster access** (via kubectl → Sessiongate):

```
6. SRE uses kubeconfig with kubectl → traffic flows through sessiongate to HCP
```

## API

`{resourceId}` refers to the full Azure resource ID path as described in the [Admin API README](README.md#api-endpoints).

### Create session

```
POST /admin/v1/hcp{resourceId}/breakglass
```

- Required header: `X-Ms-Client-Principal-Name` (identity of the calling user/SP)
- Request body (JSON):
- `group` (required) - the RBAC group for the session (e.g. `aro-sre-csa`)
- `ttl` (required) - session lifetime (e.g. `1h`, `30m`), bounded by server-configured min/max
- Returns `202 Accepted` with a `Location` header pointing to the kubeconfig endpoint

Example:

```json
{"group": "aro-sre-csa", "ttl": "1h"}
```

### Get kubeconfig

```
GET /admin/v1/hcp{resourceId}/breakglass/{sessionName}/kubeconfig
```

- Required header: `X-Ms-Client-Principal-Name`
- While the session is being set up: returns `202 Accepted` with `Retry-After` header and a JSON body `{"status": "..."}` describing progress
- When ready: returns `200 OK` with `Content-Type: application/yaml` (the kubeconfig) and an `Expires` header with the session expiration time (RFC 3339)

## Cluster Access via Sessiongate

Once the SRE has a kubeconfig, they access the HCP through sessiongate's proxy endpoint. The kubeconfig points at sessiongate, not the Admin API.

```mermaid
sequenceDiagram
participant SRE as SRE (kubectl)
participant Exec as user.exec (kubeconfig)
participant Istio as Istio Ingress
participant MISE as MISE (ext-authz)
participant SG as Sessiongate
participant HCP as HCP KAS

SRE->>Exec: kubectl command
Exec->>Exec: Generate access token<br/>(with upn/oid claim)
Exec->>Istio: Request to /sessiongate/{sessionId}/kas/...
Istio->>MISE: Validate access token
MISE-->>Istio: Token valid
Istio->>Istio: Extract JWT claims to headers<br/>(RequestAuthentication)
Istio->>SG: Forward with X-JWT-Claim-Upn/Oid headers
SG->>SG: Verify claim header matches<br/>session owner
SG->>HCP: Proxy request with session credentials
HCP-->>SG: Response
SG-->>SRE: Response
```

Key points:

- No Geneva Actions involvement - direct access from the SRE's SAW device to the sessiongate proxy endpoint
- The kubeconfig's `user.exec` generates an access token with identity claims
- MISE validates the JWT; Istio's `RequestAuthentication` extracts claims (`upn`, `oid`) into request headers
- Sessiongate's own middleware checks that the claim header matches the session owner before proxying
- Sessiongate proxies requests to the HCP KAS using session-specific credentials
6 changes: 6 additions & 0 deletions admin/deploy/templates/admin.deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ spec:
value: "{{ .Values.fpa.clientId }}"
- name: AUDIT_CONNECT_SOCKET
value: "{{ .Values.audit.connectSocket }}"
- name: SESSIONGATE_NAMESPACE
value: "{{ .Values.sessiongate.namespace }}"
- name: MIN_SESSION_TTL
value: "{{ .Values.sessiongate.minSessionTTL }}"
- name: MAX_SESSION_TTL
value: "{{ .Values.sessiongate.maxSessionTTL }}"
ports:
- containerPort: 8443
name: http
Expand Down
13 changes: 13 additions & 0 deletions admin/deploy/templates/sessiongate.rolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-api-session-mgmt
namespace: '{{ .Values.sessiongate.namespace }}'
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: sessiongate-session-management
subjects:
- kind: ServiceAccount
name: {{ .Values.serviceAccount.name }}
namespace: '{{ .Release.Namespace }}'
23 changes: 20 additions & 3 deletions admin/hack/run-as-mi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,27 @@ ${HCPCTL} sc breakglass "${CLUSTER_NAME}" --output "${KUBECONFIG}" --no-shell

AZURE_TENANT_ID=$(az account show -o json | jq .homeTenantId -r)
AZURE_CLIENT_ID=$(kubectl get sa -n aro-hcp-admin-api admin-api -o yaml | yq '.metadata.annotations."azure.workload.identity/client-id"' -r)
SA_TOKEN=$(kubectl create token "${SA_NAME}" --namespace="${NAMESPACE}" --audience api://AzureADTokenExchange)
AZURE_SA_TOKEN=$(kubectl create token "${SA_NAME}" --namespace="${NAMESPACE}" --audience api://AzureADTokenExchange)
KUBE_SA_TOKEN=$(kubectl create token "${SA_NAME}" --namespace="${NAMESPACE}")

export AZURE_CONFIG_DIR="${HOME}/.azure-profile-admin-api"
rm -rf "${AZURE_CONFIG_DIR}"
az login --federated-token "${SA_TOKEN}" --service-principal -u "${AZURE_CLIENT_ID}" -t "${AZURE_TENANT_ID}"
az login --federated-token "${AZURE_SA_TOKEN}" --service-principal -u "${AZURE_CLIENT_ID}" -t "${AZURE_TENANT_ID}"

"$@"
# Add a new context to the kubeconfig that uses the service account token for Kubernetes auth
KUBE_CONTEXT_NAME=$(kubectl config current-context --kubeconfig="${KUBECONFIG}")
KUBE_CLUSTER_NAME=$(kubectl config view --kubeconfig="${KUBECONFIG}" -o jsonpath="{.contexts[?(@.name==\"${KUBE_CONTEXT_NAME}\")].context.cluster}")

# Create a new user entry with the SA token
kubectl config set-credentials "${SA_NAME}" --kubeconfig="${KUBECONFIG}" --token="${KUBE_SA_TOKEN}"

# Create a new context that uses the SA credentials
SA_CONTEXT_NAME="${KUBE_CONTEXT_NAME}-sa"
kubectl config set-context "${SA_CONTEXT_NAME}" --kubeconfig="${KUBECONFIG}" --cluster="${KUBE_CLUSTER_NAME}" --user="${SA_NAME}"

# Use the SA context by default
kubectl config use-context "${SA_CONTEXT_NAME}" --kubeconfig="${KUBECONFIG}"

echo $KUBECONFIG

KUBECONFIG="${KUBECONFIG}" "$@"
4 changes: 4 additions & 0 deletions admin/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ resourceGroups:
dependsOn:
- resourceGroup: global
step: mirror-image
externalDependsOn:
- serviceGroup: Microsoft.Azure.ARO.HCP.SessionGate
resourceGroup: service
step: deploy
identityFrom:
resourceGroup: global
step: output
Expand Down
Loading
Loading