diff --git a/.github/workflows/build-and-publish-beta-chart.yml b/.github/workflows/build-and-publish-beta-chart.yml index 9acf2509..9aacd131 100644 --- a/.github/workflows/build-and-publish-beta-chart.yml +++ b/.github/workflows/build-and-publish-beta-chart.yml @@ -5,9 +5,8 @@ on: workflow_dispatch: inputs: version: - description: 'Version to use for the release (beta suffix will be automatically appended (e.g. "0.0.1" + "-beta"))' - required: false - default: '' + description: 'Version to use for the release. Must contain "beta" (e.g. "0.0.1-beta").' + required: true branch: description: 'Branch to use for the release' required: true @@ -42,37 +41,53 @@ jobs: - name: Install Helm uses: azure/setup-helm@v3 - - name: Build Dependencies + - name: Build Insights Controller Dependencies + run: helm dependency update charts/cloudzero-insights-controller/ + + - name: Package Insights Controller Chart + run: helm package charts/cloudzero-insights-controller/ --destination .deploy + + - name: Build Cloudzero Agent Dependencies run: helm dependency update charts/cloudzero-agent/ - # Step 3: Determine Version - - name: Get Github Tag Version - id: version - uses: flatherskevin/semver-action@v1 - with: - incrementLevel: patch - source: tags + - name: Package Cloudzero Agent Chart + run: helm package charts/cloudzero-agent/ --destination .deploy - - name: Determine Chart Version + # Step 3: Validate and Set Version + - name: Validate Input Version run: | - NEW_VERSION=${{ github.event.inputs.version || steps.version.outputs.nextVersion }} - echo "NEW_VERSION=$NEW_VERSION-beta" >> $GITHUB_ENV + if [[ -z "${{ github.event.inputs.version }}" ]]; then + echo "Version input is required." + exit 1 + fi + if [[ "${{ github.event.inputs.version }}" != *"beta"* ]]; then + echo "Version must contain 'beta'. Provided version: ${{ github.event.inputs.version }}" + exit 1 + fi + echo "NEW_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_ENV - name: Update Chart Version run: | VERSION_LINE=$(awk '/version:/ && !done {print NR; done=1}' charts/cloudzero-agent/Chart.yaml) sed -i ''$VERSION_LINE's/.*/version: ${{ env.NEW_VERSION }}/' charts/cloudzero-agent/Chart.yaml + - name: Update Chart Version for insights-controller + run: | + VERSION_LINE=$(awk '/version:/ && !done {print NR; done=1}' charts/cloudzero-insights-controller/Chart.yaml) + sed -i ''$VERSION_LINE's/.*/version: ${{ env.NEW_VERSION }}/' charts/cloudzero-insights-controller/Chart.yaml + - name: Validate Release Notes are Present run: | - if [ ! -f "docs/releases/${{ env.NEW_VERSION }}.md" ]; then + if [ ! -f "charts/cloudzero-agent/docs/releases/${{ env.NEW_VERSION }}.md" ]; then echo "Release notes for ${{ env.NEW_VERSION }} are missing. Please create a release notes file at docs/releases/${{ env.NEW_VERSION }}.md" exit 1 fi # Step 4: Package and Commit Chart - name: Package Chart - run: helm package charts/cloudzero-agent/ --destination .deploy + run: | + helm package charts/cloudzero-agent/ --destination .deploy + helm package charts/cloudzero-insights-controller/ --destination .deploy - name: Commit updated Chart.yaml run: | @@ -84,7 +99,13 @@ jobs: continue-on-error: true # Step 7: Handle Artifacts and Update Pages - - name: Upload Chart as Artifact + - name: Upload Insight Controller Chart as Artifact + uses: actions/upload-artifact@v4 + with: + name: insights-controller-chart + path: .deploy/cloudzero-insights-controller-${{ env.NEW_VERSION }}.tgz + + - name: Upload Cloudzero Agent Chart as Artifact uses: actions/upload-artifact@v4 with: name: agent-chart @@ -97,6 +118,7 @@ jobs: - name: Move release Tarball run: | + cp .deploy/cloudzero-insights-controller-${{ env.NEW_VERSION }}.tgz ./beta/ cp .deploy/cloudzero-agent-${{ env.NEW_VERSION }}.tgz ./beta/ rm -fr .deploy @@ -105,7 +127,6 @@ jobs: - name: Save Index in GH Pages run: | - # copy the new chart and index.yaml git add beta git commit -m "Updating ${{ env.NEW_VERSION }} Index" git push origin gh-pages @@ -117,13 +138,12 @@ jobs: rm -fr .deploy charts/cloudzero-agent/charts git reset --hard # now checkout docs from main - git checkout main -- charts/cloudzero-agent/docs charts/cloudzero-agent/README.md README.md + git checkout ${{ github.event.inputs.branch }} -- charts/cloudzero-agent/docs charts/cloudzero-agent/README.md README.md git add README.md charts/cloudzero-agent/docs charts/cloudzero-agent/README.md git commit -m "Update docs for ${{ env.NEW_VERSION }}" git push origin gh-pages continue-on-error: true - # Step 5: Create GitHub Release - name: Create Release uses: softprops/action-gh-release@v2 @@ -133,4 +153,4 @@ jobs: files: .deploy/cloudzero-agent-${{ env.NEW_VERSION }}.tgz make_latest: false target_commitish: ${{ env.COMMIT_HASH }} - body_path: ${{ github.workspace }}/docs/releases/${{ env.NEW_VERSION }}.md + body_path: ${{ github.workspace }}/charts/cloudzero-agent/docs/releases/${{ env.NEW_VERSION }}.md diff --git a/.github/workflows/build-and-publish-chart.yml b/.github/workflows/build-and-publish-chart.yml index 0fcdb838..74b50aa8 100644 --- a/.github/workflows/build-and-publish-chart.yml +++ b/.github/workflows/build-and-publish-chart.yml @@ -66,6 +66,11 @@ jobs: VERSION_LINE=$(awk '/version:/ && !done {print NR; done=1}' charts/cloudzero-agent/Chart.yaml) sed -i ''$VERSION_LINE's/.*/version: ${{ env.NEW_VERSION }}/' charts/cloudzero-agent/Chart.yaml + - name: Update Chart Version for insights-controller + run: | + VERSION_LINE=$(awk '/version:/ && !done {print NR; done=1}' charts/cloudzero-insights-controller/Chart.yaml) + sed -i ''$VERSION_LINE's/.*/version: ${{ env.NEW_VERSION }}/' charts/cloudzero-insights-controller/Chart.yaml + - name: Validate Release Notes are Present run: | if [ ! -f "charts/cloudzero-agent/docs/releases/${{ env.NEW_VERSION }}.md" ]; then @@ -73,11 +78,16 @@ jobs: exit 1 fi - - name: Build Dependencies + - name: Build Insights Controller Dependencies + run: helm dependency update charts/cloudzero-insights-controller/ + + - name: Package Insights Controller Chart + run: helm package charts/cloudzero-insights-controller/ --destination .deploy + + - name: Build Cloudzero Agent Dependencies run: helm dependency update charts/cloudzero-agent/ - # Step 5: Package and Commit Chart - - name: Package Chart + - name: Package Cloudzero Agent Chart run: helm package charts/cloudzero-agent/ --destination .deploy - name: Get Main Changelog Beginning Hash @@ -104,18 +114,26 @@ jobs: echo "::set-output name=changes::${CHANGES}" # Step 7: Handle Artifacts and Update Pages - - name: Upload Chart as Artifact + - name: Upload Insight Controller Chart as Artifact + uses: actions/upload-artifact@v4 + with: + name: agent-chart + path: .deploy/cloudzero-insights-controller-${{ env.NEW_VERSION }}.tgz + + - name: Upload Cloudzero Agent Chart as Artifact uses: actions/upload-artifact@v4 with: name: agent-chart path: .deploy/cloudzero-agent-${{ env.NEW_VERSION }}.tgz + - name: Checkout GH Pages run: | git checkout -f gh-pages - name: Move release Tarball run: | + cp .deploy/cloudzero-insights-controller-${{ env.NEW_VERSION }}.tgz ./ cp .deploy/cloudzero-agent-${{ env.NEW_VERSION }}.tgz ./ rm -fr .deploy @@ -125,7 +143,7 @@ jobs: - name: Save Index in GH Pages run: | # copy the new chart and index.yaml - git add cloudzero-agent-${{ env.NEW_VERSION }}.tgz index.yaml + git add cloudzero-insights-controller-${{ env.NEW_VERSION }}.tgz cloudzero-agent-${{ env.NEW_VERSION }}.tgz index.yaml git commit -m "Updating ${{ env.NEW_VERSION }} Index" git push origin gh-pages continue-on-error: true diff --git a/.github/workflows/test-chart.yml b/.github/workflows/test-chart.yml index fab253ee..cff2b8d1 100644 --- a/.github/workflows/test-chart.yml +++ b/.github/workflows/test-chart.yml @@ -36,6 +36,9 @@ jobs: env: # Agent Chart settings (prom repo is to work around issue with chart-testing tool) PROM_CHART_REPO: https://prometheus-community.github.io/helm-charts + JETSTACK_CHART_REPO: https://charts.jetstack.io + CZ_CHART_REPO: https://cloudzero.github.io/cloudzero-charts + CZ_CHART_BETA_REPO: https://cloudzero.github.io/cloudzero-charts/beta CLUSTER_NAME: cz-node-agent-ci CLOUD_ACCOUNT_ID: '00000000' CZ_API_TOKEN: 'fake-api-token' @@ -45,7 +48,7 @@ jobs: helm dependency update ct lint --debug --charts . \ --chart-repos=kube-state-metrics=$PROM_CHART_REPO \ - --chart-repos=prometheus-node-exporter=$PROM_CHART_REPO \ + --chart-repos=cert-manager=$JETSTACK_CHART_REPO \ --helm-lint-extra-args "--set=existingSecretName=api-token,clusterName=$CLUSTER_NAME,cloudAccountId=$CLOUD_ACCOUNT_ID,region=$REGION" # This job tests the chart on a KinD cluster @@ -108,23 +111,30 @@ jobs: NAMESPACE: monitoring # Agent Chart settings (prom repo is to work around issue with chart-testing tool) PROM_CHART_REPO: https://prometheus-community.github.io/helm-charts + JETSTACK_CHART_REPO: https://charts.jetstack.io + CZ_CHART_REPO: https://cloudzero.github.io/cloudzero-charts + CZ_CHART_BETA_REPO: https://cloudzero.github.io/cloudzero-charts/beta CLUSTER_NAME: cz-node-agent-ci CLOUD_ACCOUNT_ID: '00000000' CZ_API_TOKEN: ${{ secrets.CZ_API_TOKEN || 'fake-api-token' }} REGION: 'us-east-1' - run: | + run: | + kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.16.1/cert-manager.crds.yaml kubectl create namespace $NAMESPACE kubectl create secret -n $NAMESPACE generic api-token --from-literal=value=$CZ_API_TOKEN cd charts/cloudzero-agent helm dependency update ct install --charts . \ --chart-repos=kube-state-metrics=$PROM_CHART_REPO \ - --chart-repos=prometheus-node-exporter=$PROM_CHART_REPO \ + --chart-repos=cert-manager=$JETSTACK_CHART_REPO \ --namespace $NAMESPACE \ --helm-extra-set-args "\ --set=existingSecretName=api-token \ + --set=host=dev-api.cloudzero.com \ --set=clusterName=$CLUSTER_NAME \ --set=cloudAccountId=$CLOUD_ACCOUNT_ID \ --set=region=$REGION \ - --set=kube-state-metrics.enabled=true \ - --set=prometheus-node-exporter.enabled=true" + --set=insightsController.enabled=true \ + --set=insightsController.labels.enabled=true \ + --set=insightsController.labels.patterns[0]='.*' \ + " diff --git a/charts/cloudzero-agent/BETA-INSTALLATION.md b/charts/cloudzero-agent/BETA-INSTALLATION.md index cf4f222a..08a09fc7 100644 --- a/charts/cloudzero-agent/BETA-INSTALLATION.md +++ b/charts/cloudzero-agent/BETA-INSTALLATION.md @@ -39,14 +39,7 @@ There are two ways to install a beta version of the chart: This method installs the latest beta version available. ```sh -helm install cloudzero-beta/cloudzero-agent \ - --devel \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - --set kube-state-metrics.enabled= \ - --create-namespace +helm install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --devel ``` - The `--devel` flag allows Helm to consider beta versions when resolving the chart version. @@ -57,17 +50,10 @@ helm install cloudzero-beta/cloudzero-agent \ If you want to install a specific beta version, specify it using the `--version` flag: ```sh -helm install cloudzero-beta/cloudzero-agent \ - --version \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - --set kube-state-metrics.enabled= \ - --create-namespace +helm install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --version ``` -- Replace `` with the specific beta version (e.g., `0.0.29-beta`). +- Replace `` with the specific beta version (e.g., `1.0.0-beta`). - This method does not require the `--devel` flag since you are explicitly specifying the version. ## Listing All Available Versions diff --git a/charts/cloudzero-agent/Chart.lock b/charts/cloudzero-agent/Chart.lock index a2094f70..72f52640 100644 --- a/charts/cloudzero-agent/Chart.lock +++ b/charts/cloudzero-agent/Chart.lock @@ -2,8 +2,8 @@ dependencies: - name: kube-state-metrics repository: https://prometheus-community.github.io/helm-charts version: 5.15.3 -- name: prometheus-node-exporter - repository: https://prometheus-community.github.io/helm-charts - version: 4.24.0 -digest: sha256:827a33fa07fde17be0bf808e0beba3ca7b23c9fc1960580b2ba6d0ecc0b57a3f -generated: "2024-03-20T11:42:44.034766-04:00" +- name: cert-manager + repository: https://charts.jetstack.io + version: v1.15.3 +digest: sha256:ef28b222788ce38b63857e7427d8f1dbb7c24e6d01e196049ac5af6209d132f6 +generated: "2024-12-12T16:10:45.087982225Z" diff --git a/charts/cloudzero-agent/Chart.yaml b/charts/cloudzero-agent/Chart.yaml index d110bab5..798cf4e0 100644 --- a/charts/cloudzero-agent/Chart.yaml +++ b/charts/cloudzero-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: cloudzero-agent description: A chart for using Prometheus in agent mode to send cluster metrics to the CloudZero platform. type: application -version: 0.0.0-dev +version: 1.0.0-beta-5 maintainers: - name: CloudZero email: support@cloudzero.com @@ -11,8 +11,9 @@ dependencies: - name: kube-state-metrics version: "5.15.*" repository: https://prometheus-community.github.io/helm-charts - condition: kube-state-metrics.enabled - - name: prometheus-node-exporter - version: "4.24.*" - repository: https://prometheus-community.github.io/helm-charts - condition: prometheus-node-exporter.enabled + condition: kubeStateMetrics.enabled + alias: kubeStateMetrics + - name: cert-manager + version: v1.15.3 + repository: https://charts.jetstack.io + condition: cert-manager.enabled diff --git a/charts/cloudzero-agent/README.md b/charts/cloudzero-agent/README.md index ccb295cc..59951610 100644 --- a/charts/cloudzero-agent/README.md +++ b/charts/cloudzero-agent/README.md @@ -14,234 +14,57 @@ For the latest release, see [Releases](https://github.com/Cloudzero/cloudzero-ch - Helm 3+ - A CloudZero API key - Each Kubernetes cluster must have a route to the internet and a rule that allows egress from the agent to the CloudZero collector endpoint at https://api.cloudzero.com on port 443 -- A kube-state-metrics exporter running in the cluster, available via Kubernetes Service (see below for details) ## Installation -### Get Helm Repository Info +### Easy Install (Most teams will use this!) -```console -helm repo add cloudzero https://cloudzero.github.io/cloudzero-charts -helm repo update -``` - -_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ - -### Install Helm Chart - -The chart can be installed directly with Helm or any other common Kubernetes deployment tools. - -If installing with Helm directly, the following command will install the chart: - -```console -helm install cloudzero/cloudzero-agent \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - # optionally deploy kube-state-metrics if it doesn't exist in the cluster already - --set kube-state-metrics.enabled= -``` - -### Update Helm Chart -Alternatively, if you are updating an existing installation, pull the latest chart information first: - -```console -helm repo update -``` - -Next, upgrade the installation to the latest chart version: - -```console -helm upgrade cloudzero/cloudzero-agent \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - --set kube-state-metrics.enabled= -``` - -### Mandatory Values - -There are several mandatory values that must be specified for the chart to install properly. Below are the required settings along with strategies for providing custom values during installation: - -| Key | Type | Default | Description | -|-------------------|--------|-----------------------|-------------------------------------------------------------------------------------------------------------------------| -| cloudAccountId | string | `nil` | Account ID in AWS or Subscription ID in Azure or Project Number in GCP where the cluster is running. Must be a string due to Helm limitations. | -| clusterName | string | `nil` | Name of the cluster. Must be RFC 1123 compliant. | -| host | string | `"api.cloudzero.com"` | CloudZero host to send metrics to. | -| apiKey | string | `nil` | The CloudZero API key to use for exporting metrics. Only used if `existingSecretName` is not set. | -| existingSecretName| string | `nil` | Name of the secret that contains the CloudZero API key. Required if not providing the API key via `apiKey`. | -| region | string | `nil` | Region where the cluster is running (e.g., `us-east-1`, `eastus`). For more information, see AWS or Azure documentation. | - -#### Overriding Default Values - -Default values are specified in the chart's `values.yaml` file. If you need to change any of these values, it is recommended to create a `values-override.yaml` file for your customizations. - -##### Using the `--values` Flag - -You can use the `--values` (or short form `-f`) flag in your Helm commands to override values in the chart with a new file. Specify the name of the file after the `--values` flag: - -```console -helm install cloudzero/cloudzero-agent \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - -f values-override.yaml -``` - -Ensure `values-override.yaml` contains only the values you wish to override from `values.yaml`. - -> Note it is possible to save values for different environments, or based on other criteria into seperate values files and multiple files using the `-f` helm parameters. - -##### Using the `--set` Flag +To use the chart or a beta version, you must add the repository to Helm. Refer to the [`helm repo`](https://helm.sh/docs/helm/helm_repo/) documentation for command details. -You can use the `--set` flag in Helm commands to directly set or override specific values from `values.yaml`. Use dot notation to specify nested values: +#### 1. Install the Helm Chart ```console -helm install cloudzero/cloudzero-agent \ - --set existingSecretName= \ - --set clusterName= \ - --set-string cloudAccountId= \ - --set region= \ - --set server.resources.limits.memory=2048Mi \ - -f values-override.yaml -``` - -### Metric Exporters - -This chart depends on metrics from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). There are two installation options for providing the `kube-state-metrics` metrics to the cloudzero-agent. If you don't know which option is right for you, use the second option. - -#### Option 1 (default): Use existing kube-state-metrics - -Using an existing `kube-state-metrics` exporter may be desirable for minimizing cost. By default, the `cloudzero-agent` will attempt to find an existing `kube-state-metrics` K8s Service by searching for a K8s Service with the annotation `prometheus.io/scrape: "true"`. If an existing `kube-state-metrics` Service exists but does not have that annotation and you do not wish to add it, see the **Custom Scrape Configs** section below. - -In addition to the above, the existing `kube-state-metrics` Service address should be added in `values-override.yaml` as shown below so that the `cloudzero-agent` can validate the connection: - -```yaml -validator: - serviceEndpoints: - kubeStateMetrics: ..svc.cluster.local:8080 +helm repo add cloudzero https://cloudzero.github.io/cloudzero-charts +helm install cloudzero cloudzero/cloudzero-agent \ + --set apiKey=\ + --set clusterName= ``` +### Advanced Install (Usually when you have specific security requirements.) -#### Option 2: Use kube-state-metrics subchart - -Alternatively, deploy the `kube-state-metrics` subchart that comes packaged with this chart. This is done by enabling settings in `values-override.yaml` as shown: +#### 1. Create and Configure a Values File ```yaml -kube-state-metrics: - enabled: true -``` -In this option, no additional configuration is required in the `validator` field. - -### Secret Management - -The chart requires a CloudZero API key to send metric data. Admins can retrieve API keys [here](https://app.cloudzero.com/organization/api-keys). +# -- values.yaml -The API key can be supplied as an existing secret (default) or created by the chart. Ensure the Secret is in the same namespace as the chart and follows this format: +# -- clusterName is required to identify this cluster in the CloudZero dashboard. +clusterName: -**values-override.yaml** -```yaml -data: - value: -``` +# -- apiKey is the CloudZero apiKey generated in the CloudZero platform. +apiKey: -Example of creating a secret: -```console -kubectl create secret -n example-namespace generic example-secret-name --from-literal=value= +# -- Other values here... ``` -The secret can then be used with `existingSecretName`. +Default values are specified in the chart's `values.yaml` file. Please reference this file for available override values. ### Memory Sizing Please see the [sizing guide](./docs/sizing-guide.md) in the docs directory. -#### Passing Values to Subcharts - -Values can be passed to subcharts like [kube-state-metrics](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml) by adding entries in `values-override.yaml` as per their specifications. - -A common addition may be to pull the container images from custom image registries/repositories: - -`values-override.yaml` -```yaml -kube-state-metrics: - enabled: true - image: - registry: my-custom-registry.io - repository: my-custom-kube-state-metrics/kube-state-metrics -``` - -### Custom Scrape Configs - -If running without the default `kube-state-metrics` exporter subchart and your existing `kube-state-metrics` deployment does not have the required `prometheus.io/scrape: "true"`, adjust the Prometheus scrape configs as shown: +### Update Helm Chart +If you are updating an existing installation, pull the latest chart information: -`values-override.yaml` -```yaml -prometheusConfig: - scrapeJobs: - kubeStateMetrics: - enabled: false # this disables the default kube-state-metrics scrape job, which will be replaced by an entry in additionalScrapeJobs - additionalScrapeJobs: - - job_name: custom-kube-state-metrics - honor_timestamps: true - scrape_interval: 1m - scrape_timeout: 10s - metrics_path: /metrics - static_configs: - - targets: - - 'my-kube-state-metrics-service.default.svc.cluster.local:8080' - relabel_configs: - - separator: ; - regex: __meta_kubernetes_service_label_(.+) - replacement: $1 - action: labelmap - - source_labels: [__meta_kubernetes_namespace] - separator: ; - regex: (.*) - target_label: namespace - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_name] - separator: ; - regex: (.*) - target_label: service - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_pod_node_name] - separator: ; - regex: (.*) - target_label: node - replacement: $1 - action: replace +```console +helm repo update ``` -### Exporting Pod Labels - -Pod labels can be exported as metrics using kube-state-metrics. To customize the labels for export, modify the values-override.yaml file as shown below: - -**Example: Exporting only the pod labels named foo and bar:** +Next, upgrade the installation to the latest chart version: -```yaml -kube-state-metrics: - extraArgs: - - --metric-labels-allowlist=pods=[foo,bar] +```console +helm upgrade --install cloudzero cloudzero/cloudzero-agent ``` -> This is preferable to including all labels with `*` because the performance and memory impact is reduced. Regular expression matching is not currently supported. See the `kube-state-metrics` [documentation](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/developer/cli-arguments.md) for more details. - -⚠️ Important: If you are running an existing `kube-state-metrics` instance, ensure that the labels you want to use are whitelisted. kube-state-metrics version 2.x and above will **_not_** export the `kube_pod_labels` metrics unless they are explicitly allowed. This prevents the use of those labels for cost allocation and other purposes. Make sure you have configured the labels at the appropriate level using the --metric-labels-allowlist parameter: - -> eg: `- --metric-labels-allowlist=namespaces=[*],pods=[*],deployments=[app.kubernetes.io/*,k8s.*]` - -## Dependencies - -| Repository | Name | Version | -|----------------------------------------------------|--------------------------|---------| -| https://prometheus-community.github.io/helm-charts | kube-state-metrics | 5.15.* | - ## Enabling Release Notifications To receive a notification when a new version of the chart is [released](https://github.com/Cloudzero/cloudzero-charts/releases), you can [watch the repository](https://docs.github.com/en/account-and-profile/managing-subscriptions-and-notifications-on-github/setting-up-notifications/configuring-notifications#configuring-your-watch-settings-for-an-individual-repository): @@ -251,47 +74,6 @@ To receive a notification when a new version of the chart is [released](https:// 3. Check the **Releases** box. 4. Select **Apply**. - -## Troubleshooting - -### Issue -I've deployed the chart, but I don't see Kubernetes data in CloudZero. - -## Resolution -This can happen for a number of reasons; see below for solutions to the most common problems - -### Ensure kube-state-metrics is deployed correctly - -1. Review the **Metric Exporters** section. -2. If opting for **Option 1** - - Is kube-state-metrics installed? - ```bash - kubectl get services --all-namespaces | grep kube-state-metrics - ``` - If the above command does not return any services, install a `kube-state-metrics` exporter, or use **Option 2** in the **Metric Exporters** section. - -3. If opting for **Option 2**, ensure that `kube-state-metrics.enabled=true` is set as an annotation on the Service. -4. Ensure the cloudzero-agent pod can find the `kube-state-metrics` Service. - Run the following command: - ``` - kubectl get services -A -o jsonpath='{range .items[?(@.metadata.annotations.prometheus\.io/scrape=="true")]}{.metadata.name}{" in "}{.metadata.namespace}{"\n"}{end}' - ``` - If this does not return a `kube-state-metrics` Service, then either annotate the existing Service found in Step 2 with `prometheus.io/scrape: "true"`, or following the instructions in the **Custom Scrape Configs** section above. -5. Ensure connectivity between the `cloudzero-agent` pod and the `kube-state-metrics` Service. - ``` - SERVER_POD=$(kubectl get pod -l app.kubernetes.io/name=cloudzero-agent -o jsonpath='{.items[0].metadata.name}') - kubectl exec -it -n $SERVER_POD -- wget -qO- ..svc.cluster.local:8080/metrics - ``` - The request should return a 200 response with a list of metrics prefixed with `kube_`, i.e., `kube_pod_info`. If not, ensure that the `kube-state-metrics` deployment is configured correctly. - -### Issue -I have Kubernetes data in CloudZero, but I don't see Kubernetes labels as Dimensions. - -## Resolution -Note that -1. Only labels on Pods are currently supported, and -2. Labels are "opt-in"; see the **Exporting Pod Labels** section for details. - ## Useful References - [Memory Sizing Guide](./docs/sizing-guide.md) diff --git a/charts/cloudzero-agent/configuration.example.yaml b/charts/cloudzero-agent/configuration.example.yaml new file mode 100644 index 00000000..114cdf5f --- /dev/null +++ b/charts/cloudzero-agent/configuration.example.yaml @@ -0,0 +1,26 @@ +# -- Account ID of the account the cluster is running in. This must be a string - even if it is a number in your system. +cloudAccountId: null +# -- Name of the clusters. +clusterName: null +# -- Region the cluster is running in. +region: null +# -- CloudZero API key. Required if existingSecretName is null. +apiKey: null +# -- If set, the agent will use the API key in this Secret to authenticate with CloudZero. +existingSecretName: null + +# label and annotation configuration: +insightsController: + # -- By default, a ValidatingAdmissionWebhook will be deployed that records all created labels and annotations + enabled: true + labels: + # -- This value MUST be set to either true or false. The installation will fail otherwise + enabled: null + # -- This value MUST be set to a list of regular expressions which will be used to gather labels from pods, deployments, statefulsets, daemonsets, cronjobs, jobs, nodes, and namespaces + patterns: + # - '.*' # -- This option enables gathering ALL labels from the above resources. Use with caution, as the number of labels can be large. + annotations: + # -- By default, the gathering of annotations is not enabled. To enable, set this field to true + enabled: false + patterns: + - '.*' diff --git a/charts/cloudzero-agent/docs/releases/1.0.0-beta-4.md b/charts/cloudzero-agent/docs/releases/1.0.0-beta-4.md new file mode 100644 index 00000000..4b8de8c0 --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.0-beta-4.md @@ -0,0 +1,13 @@ +## [1.0.0-beta-4](https://github.com/Cloudzero/cloudzero-agent/compare/v1.0.1-beta...v1.0.0-beta-4) (2024-12-12) + +The Insights controller now exposes a Prometheus Metrics endpoint, enabling CloudZero to monitor its operations. + +### Upgrade Steps +* Upgrade with: +```sh +helm upgrade --install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --version 1.0.0-beta-4 +``` +For more details, see the [beta installation instructions](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md). + +### Improvements +* Added insights controller scrape configuration for operational monitoring. More information is available in the [insights controller documentation](https://github.com/Cloudzero/cloudzero-insights-controller/blob/develop/docs/statistics.md). diff --git a/charts/cloudzero-agent/docs/releases/1.0.0-beta-5.md b/charts/cloudzero-agent/docs/releases/1.0.0-beta-5.md new file mode 100644 index 00000000..2b8ca6d5 --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.0-beta-5.md @@ -0,0 +1,18 @@ +## [1.0.0-beta-5](https://github.com/Cloudzero/cloudzero-agent/compare/v1.0.1-beta...v1.0.0-beta-5) (2024-12-12) + +The following new features are available in this version of the Beta Chart release: + +* **Automatic detection and reconfiguration of secrets rotation.** +* **Automatic detection and reconfiguration of TLS Certificate rotation.** + +### Upgrade Steps +* Upgrade with: +```sh +helm upgrade --install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --version 1.0.0-beta-5 +``` +For more details, see the [beta installation instructions](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md). + +### Improvements + +* **Availability Enhancement**: Healthcheck support ensures that requests are only forwarded to replica instances that are ready to accept work. +* **Security Policy Enhancements**: The application can now react to changes in the Cloudzero API Secret or TLS Certificates. In production environments, these secret values will rotate and update periodically. Instead of restarting the service, which can be costly, the application can now react to key changes and reinitialize the related layer. diff --git a/charts/cloudzero-agent/docs/releases/1.0.0-beta.md b/charts/cloudzero-agent/docs/releases/1.0.0-beta.md new file mode 100644 index 00000000..cf4321c5 --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.0-beta.md @@ -0,0 +1,53 @@ +## [1.0.0](https://github.com/cloudzero/cloudzero-charts/compare/v0.0.29...v1.0.0) (2024-11-12) + +Adds a subchart, `cloudzero-insights-controller`, that allows the chart to gather labels and annotations from a variety of Kubernetes resources. + +### Upgrade Steps +Upgrading to the `1.0.0-beta` version requires the following migration steps of settings from previous versions: +* Note that this is a beta release; follow the instructions in the [beta-installation](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md#adding-the-beta-helm-repository) document. +* Set the argument `tags.labels.enabled=true|false` if exporting labels for k8s resources. See [Install Helm Chart](https://github.com/Cloudzero/cloudzero-charts/tree/develop/charts/cloudzero-agent#install-helm-chart) for details. +* Move `apiKey` or `existingSecretName` arguments to `global.apiKey` or `global.existingSecretName`. + +An example `configuration-example.yaml` file: +```yaml +# unchaged: +cloudAccountId: YOUR_CLOUD_ACCOUNT_ID +clusterName: YOUR_CLUSTER_NAME +region: YOUR_CLOUD_REGION + +# changed +# apiKey: YOUR_CLOUDZERO_API_KEY <-- No longer set! +# existingSecretName: YOUR_EXISTING_API_KEY_K8S_SECRET <-- No longer set! +global: + apiKey: YOUR_CLOUDZERO_API_KEY #<-- API key now set here + existingSecretName: YOUR_EXISTING_API_KEY_K8S_SECRET #<-- existing secret name now set here + +# kube-state-metrics: +# extraArgs: <-- No longer set! +# - --metric-labels-allowlist=pods=[foo,bar] +tags: + enabled: true + labels: + enabled: true + patterns: + - '^foo$' #<-- Setting to export "foo=bar" label now set here +``` +* Upgrade with: +```sh +helm upgrade --install -n cloudzero-agent cloudzero-beta -f configuration-example.yaml +``` + +### Breaking Changes +* Labels export configuration method has changed + * Previously, pod labels were exported using the `kube-state-metrics.extraArgs` field. + * Pod labels must now be configured using the `tags.labels` section. See the [Labels and Annotaitons](https://github.com/Cloudzero/cloudzero-charts/tree/develop/charts/cloudzero-agent#labels-and-annotations) section for details +* API key management arguments have moved to the `global` section. + * Previous, an `apiKey` or `existingSecretName` argument could be passed to the chart. This is no longer allowed; those arguments should instead be passed as `global.apiKey` and `global.existingSecretName`, respectively. + +### New Features +* **Labels and Annotations:** A subchart `cloudzero-insights-controller` is added, which deploys one or more `ValidatingWebhookConfiguration` resources + * Allows users to export labels/annotations from pods, deployments, daemonsets, statefulsets, jobs, cronjobs, namespaces, and nodes. + * Supports filtering labels/anotations by regular expressions using the `tags.labels.patterns` and/or `tags.annotations.patterns` array. + +### Other Changes +* **CloudZero Metrics:** CloudZero State Metrics is enabled/installed by default. \ No newline at end of file diff --git a/charts/cloudzero-agent/docs/releases/1.0.1-beta.md b/charts/cloudzero-agent/docs/releases/1.0.1-beta.md new file mode 100644 index 00000000..a3fa540e --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.1-beta.md @@ -0,0 +1,17 @@ +## [1.0.1-beta](https://github.com/Cloudzero/cloudzero-insights-controller/compare/v1.0.0-beta...v1.0.1-beta) (2024-11-17) + +Bug fixes and improvements, including an upgrade to version `0.0.2` of the `cloudzero-insights-controller` image. + +### Upgrade Steps +* Upgrade with: +```sh +helm upgrade --install -n cloudzero-agent cloudzero-beta -f configuration-example.yaml +``` +See the [beta installation instructions](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md) for further detail + +### Bug Fixes +* **Remove Duplicate Service Account Name:** Removes a duplicate entry for Service Account used by the server Deployment. + +### Improvements +* **Increase default replica count for insights-controller server:** Increases the default replica count from 1 to 3 for high availability. +* **Initialization Job Uses Chart Service Account:** The `init-job` now uses the Service Account created in this chart instead of the default Service Account, which is required by some security policies. diff --git a/charts/cloudzero-agent/docs/releases/1.0.2-beta.md b/charts/cloudzero-agent/docs/releases/1.0.2-beta.md new file mode 100644 index 00000000..c6402b31 --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.2-beta.md @@ -0,0 +1,13 @@ +## [1.0.2-beta](https://github.com/Cloudzero/cloudzero-agent/compare/v1.0.1-beta...v1.0.2-beta) (2024-11-20) + +The internal `kube-state-metrics` is now renamed to `cloudzero-state-metrics`. It is enabled by default and set as a static target for the agent. + +### Upgrade Steps +* Upgrade with: +```sh +helm upgrade --install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --version 1.0.2-beta +``` +See the [beta installation instructions](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md) for further detail + +### Improvements +* **CloudZero Metrics:** The `cloudzero-state-metrics` deployment is enabled/installed by default and set as a static target, improving reliability and performance. diff --git a/charts/cloudzero-agent/docs/releases/1.0.3-beta.md b/charts/cloudzero-agent/docs/releases/1.0.3-beta.md new file mode 100644 index 00000000..e82381ef --- /dev/null +++ b/charts/cloudzero-agent/docs/releases/1.0.3-beta.md @@ -0,0 +1,13 @@ +## [1.0.3-beta](https://github.com/Cloudzero/cloudzero-agent/compare/v1.0.1-beta...v1.0.3-beta) (2024-11-20) + +The Agent now validates the existence of all required KSM metrics during the `post-start` phase. + +### Upgrade Steps +* Upgrade with: +```sh +helm upgrade --install cloudzero-beta/cloudzero-agent -n --create-namespace -f configuration.example.yaml --version 1.0.3-beta +``` +See the [beta installation instructions](https://github.com/Cloudzero/cloudzero-charts/blob/develop/charts/cloudzero-agent/BETA-INSTALLATION.md) for further detail + +### Improvements +* The Validator check (kube_state_metrics_reachable) now validates the existence of all required KSM metrics. diff --git a/charts/cloudzero-agent/templates/_helpers.tpl b/charts/cloudzero-agent/templates/_helpers.tpl index 0cf200b6..e75d7c64 100644 --- a/charts/cloudzero-agent/templates/_helpers.tpl +++ b/charts/cloudzero-agent/templates/_helpers.tpl @@ -19,11 +19,18 @@ Create chart name and version as used by the chart label. {{/* Define the path and filename on the container filesystem which holds the CloudZero API key */}} {{ define "cloudzero-agent.secretFileFullPath" -}} -{{ printf "%s%s" .Values.server.containerSecretFilePath .Values.server.containerSecretFileName }} +{{ printf "%s%s" .Values.serverConfig.containerSecretFilePath .Values.serverConfig.containerSecretFileName }} {{- end}} +{{/* +Name for the validating webhook +*/}} +{{- define "cloudzero-agent.validatingWebhookName" -}} +{{- printf "%s.%s.svc" (include "cloudzero-agent.validatingWebhookConfigName" .) .Release.Namespace }} +{{- end }} + {{ define "cloudzero-agent.configMapName" -}} -{{ .Values.prometheusConfig.configMapNameOverride | default (printf "%s-configuration" .Release.Name) }} +{{ .Values.configMapNameOverride | default (printf "%s-configuration" .Release.Name) }} {{- end}} {{ define "cloudzero-agent.validatorConfigMapName" -}} @@ -71,7 +78,6 @@ app.kubernetes.io/part-of: {{ include "cloudzero-agent.name" . }} {{ include "cloudzero-agent.common.metaLabels" . }} {{- end -}} - {{/* Define the cloudzero-agent.namespace template if set with forceNamespace or .Release.Namespace is set */}} @@ -82,11 +88,11 @@ Define the cloudzero-agent.namespace template if set with forceNamespace or .Rel {{/* Create the name of the service account to use for the server component */}} -{{- define "cloudzero-agent.serviceAccountName.server" -}} -{{- if .Values.serviceAccounts.server.create -}} - {{ default (include "cloudzero-agent.server.fullname" .) .Values.serviceAccounts.server.name }} +{{- define "cloudzero-agent.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "cloudzero-agent.server.fullname" .) .Values.serviceAccount.name }} {{- else -}} - {{ default "default" .Values.serviceAccounts.server.name }} + {{ default "default" .Values.server.serviceAccount.name }} {{- end -}} {{- end -}} @@ -149,3 +155,122 @@ Required metric labels {{- $result := join "|" $total -}} {{- $result -}} {{- end -}} + + +{{/* +Insights Controller +*/}} + +{{/* +Create common matchLabels for webhook server +*/}} +{{- define "cloudzero-agent.insightsController.common.matchLabels" -}} +app.kubernetes.io/name: {{ include "cloudzero-agent.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "cloudzero-agent.insightsController.server.matchLabels" -}} +app.kubernetes.io/component: {{ .Values.insightsController.server.name }} +{{ include "cloudzero-agent.common.matchLabels" . }} +{{- end -}} + +{{- define "cloudzero-agent.insightsController.initJob.matchLabels" -}} +app.kubernetes.io/component: {{ include "cloudzero-agent.initJobName" . }} +{{ include "cloudzero-agent.common.matchLabels" . }} +{{- end -}} + +{{/* +Service selector labels +*/}} +{{- define "cloudzero-agent.selectorLabels" -}} +{{ include "cloudzero-agent.common.matchLabels" . }} +{{ include "cloudzero-agent.insightsController.server.matchLabels" . }} +{{- end }} + +{{- define "cloudzero-agent.insightsController.labels" -}} +{{ include "cloudzero-agent.insightsController.server.matchLabels" . }} +{{ include "cloudzero-agent.common.metaLabels" . }} +{{- end -}} + +{{/* +Create a fully qualified webhook server name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "cloudzero-agent.insightsController.server.webhookFullname" -}} +{{- if .Values.server.fullnameOverride -}} +{{- .Values.server.fullnameOverride | trunc 63 | trimSuffix "-" -}}-webhook +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- printf "%s-%s" .Release.Name .Values.insightsController.server.name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s-%s" .Release.Name $name .Values.insightsController.server.name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Name for the webhook server service +*/}} +{{- define "cloudzero-agent.serviceName" -}} +{{- printf "%s-svc" (include "cloudzero-agent.insightsController.server.webhookFullname" .) }} +{{- end }} + +{{/* +Name for the validating webhook configuration resource +*/}} +{{- define "cloudzero-agent.validatingWebhookConfigName" -}} +{{- printf "%s-webhook" (include "cloudzero-agent.insightsController.server.webhookFullname" .) }} +{{- end }} + +{{/* +Name for the certificate secret +*/}} +{{- define "cloudzero-agent.tlsSecretName" -}} +{{- default (printf "%s-tls" (include "cloudzero-agent.insightsController.server.webhookFullname" .)) .Values.insightsController.server.tls.nameOverride }} +{{- end }} + + +{{ define "cloudzero-agent.webhookConfigMapName" -}} +{{ .Values.insightsController.ConfigMapNameOverride | default (printf "%s-webhook-configuration" .Release.Name) }} +{{- end}} + +{{/* +Mount path for the insights server configuration file +*/}} +{{- define "cloudzero-agent.insightsController.configurationMountPath" -}} +{{- default .Values.insightsController.configurationMountPath (printf "/etc/%s-insights" .Chart.Name) }} +{{- end }} + +{{/* +Name for the issuer resource +*/}} +{{- define "cloudzero-agent.issuerName" -}} +{{- printf "%s-issuer" (include "cloudzero-agent.insightsController.server.webhookFullname" .) }} +{{- end }} + +{{/* +Name for the job resource +*/}} +{{- define "cloudzero-agent.initJobName" -}} +{{- printf "%s-init" (include "cloudzero-agent.insightsController.server.webhookFullname" .) }} +{{- end }} + +{{/* +Annotations for the webhooks +*/}} +{{- define "cloudzero-agent.webhooks.annotations" -}} +{{- if .Values.insightsController.webhooks.annotations }} +{{ toYaml .Values.insightsController.webhook.annotations }} +{{- end }} +{{- if and .Values.insightsController.certificate.enabled .Values.insightsController.issuer.enabled }} +cert-manager.io/inject-ca-from: {{ .Values.insightsController.webhooks.caInjection | default (printf "%s/%s" .Release.Namespace (include "cloudzero-agent.certificateName" .)) }} +{{- end }} +{{- end }} + +{{/* +Name for the certificate resource +*/}} +{{- define "cloudzero-agent.certificateName" -}} +{{- printf "%s-certificate" (include "cloudzero-agent.insightsController.server.webhookFullname" .) }} +{{- end }} diff --git a/charts/cloudzero-agent/templates/certificate.yaml b/charts/cloudzero-agent/templates/certificate.yaml new file mode 100644 index 00000000..6d0e6b4a --- /dev/null +++ b/charts/cloudzero-agent/templates/certificate.yaml @@ -0,0 +1,28 @@ +{{ if and .Values.insightsController.certificate.enabled .Values.insightsController.enabled }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "cloudzero-agent.certificateName" . }} + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ include "cloudzero-agent.tlsSecretName" .}} + secretTemplate: + {{- with .Values.secretAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "cloudzero-agent.insightsController.labels" . | nindent 6 }} + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + duration: 2160h # 90d + renewBefore: 360h # 15d + dnsNames: + - {{ include "cloudzero-agent.serviceName" . }}.{{ .Release.Namespace }}.svc + issuerRef: + name: {{ include "cloudzero-agent.issuerName" . }} + kind: Issuer +{{ end }} diff --git a/charts/cloudzero-agent/templates/clusterrole.yaml b/charts/cloudzero-agent/templates/clusterrole.yaml index baee8c70..52806d6d 100644 --- a/charts/cloudzero-agent/templates/clusterrole.yaml +++ b/charts/cloudzero-agent/templates/clusterrole.yaml @@ -6,10 +6,28 @@ metadata: {{- include "cloudzero-agent.server.labels" . | nindent 4 }} name: {{ include "cloudzero-agent.clusterRoleName" . }} rules: + - apiGroups: + - "apps" + resources: + - "deployments" + - "statefulsets" + - "daemonsets" + verbs: + - "get" + - "list" + - apiGroups: + - "batch" + resources: + - "jobs" + - "cronjobs" + verbs: + - "get" + - "list" - apiGroups: - "" resources: - endpoints + - namespaces - nodes - nodes/proxy - nodes/metrics diff --git a/charts/cloudzero-agent/templates/clusterrolebinding.yaml b/charts/cloudzero-agent/templates/clusterrolebinding.yaml index 081a31c8..330fd930 100644 --- a/charts/cloudzero-agent/templates/clusterrolebinding.yaml +++ b/charts/cloudzero-agent/templates/clusterrolebinding.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.rbac.create (empty .Values.server.namespaces) (empty .Values.server.useExistingClusterRoleName) -}} +{{- if and .Values.rbac.create -}} apiVersion: {{ template "cloudzero-agent.rbac.apiVersion" . }} kind: ClusterRoleBinding metadata: @@ -7,7 +7,7 @@ metadata: name: {{ include "cloudzero-agent.clusterRoleName" . }} subjects: - kind: ServiceAccount - name: {{ template "cloudzero-agent.serviceAccountName.server" . }} + name: {{ template "cloudzero-agent.serviceAccountName" . }} namespace: {{ include "cloudzero-agent.namespace" . }} roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/charts/cloudzero-agent/templates/cm.yaml b/charts/cloudzero-agent/templates/cm.yaml index e73e816d..35f93d00 100644 --- a/charts/cloudzero-agent/templates/cm.yaml +++ b/charts/cloudzero-agent/templates/cm.yaml @@ -18,11 +18,10 @@ data: scrape_interval: {{ .Values.prometheusConfig.globalScrapeInterval }} scrape_configs: {{- if .Values.prometheusConfig.scrapeJobs.kubeStateMetrics.enabled }} - - job_name: cloudzero-service-endpoints # kube_*, node_* metrics - honor_labels: true + - job_name: static-kube-state-metrics honor_timestamps: true track_timestamps_staleness: false - scrape_interval: {{ .Values.prometheusConfig.scrapeJobs.kubeStateMetrics.scrapeInterval }} + scrape_interval: 1m scrape_timeout: 10s scrape_protocols: - OpenMetricsText1.0.0 @@ -34,38 +33,6 @@ data: follow_redirects: true enable_http2: true relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - separator: ; - regex: "true" - replacement: $1 - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] - separator: ; - regex: "true" - replacement: $1 - action: drop - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - separator: ; - regex: (https?) - target_label: __scheme__ - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - separator: ; - regex: (.+) - target_label: __metrics_path__ - replacement: $1 - action: replace - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - separator: ; - regex: (.+?)(?::\d+)?;(\d+) - target_label: __address__ - replacement: $1:$2 - action: replace - - separator: ; - regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) - replacement: __param_$1 - action: labelmap - separator: ; regex: __meta_kubernetes_service_label_(.+) replacement: $1 @@ -92,13 +59,13 @@ data: - source_labels: [__name__] regex: "^({{ join "|" .Values.kubeMetrics }})$" action: keep - - action: labelkeep - regex: "^({{ include "cloudzero-agent.requiredMetricLabels" . }})$" - kubernetes_sd_configs: - - role: endpoints - kubeconfig_file: "" - follow_redirects: true - enable_http2: true + - separator: ; + regex: ^(board_asset_tag|container|created_by_kind|created_by_name|image|instance|name|namespace|node|node_kubernetes_io_instance_type|pod|product_name|provider_id|resource|unit|uid|_.*|label_.*|app.kubernetes.io/*|k8s.*)$ + replacement: $1 + action: labelkeep + static_configs: + - targets: + - {{ printf "%s.%s.svc.cluster.local:%d" .Values.kubeStateMetrics.fullnameOverride .Release.Namespace (int .Values.kubeStateMetrics.service.port) }} {{- end }} {{- if .Values.prometheusConfig.scrapeJobs.cadvisor.enabled }} - job_name: cloudzero-nodes-cadvisor # container_* metrics @@ -151,11 +118,30 @@ data: kubeconfig_file: "" follow_redirects: true enable_http2: true - {{- end }} - {{- if .Values.prometheusConfig.scrapeJobs.additionalScrapeJobs -}} - {{ toYaml .Values.prometheusConfig.scrapeJobs.additionalScrapeJobs | toString | nindent 6 }} - {{- end}} - {{- end}} + {{- end }} + {{- if .Values.insightsController.enabled }} + - job_name: cloudzero-insights-controller-job + metrics_path: /metrics + scheme: https + enable_compression: true + tls_config: + insecure_skip_verify: true + follow_redirects: true + enable_http2: true + kubernetes_sd_configs: + - role: endpoints + kubeconfig_file: "" + follow_redirects: true + enable_http2: true + relabel_configs: + - source_labels: [__meta_kubernetes_endpoints_name] + action: keep + regex: {{ include "cloudzero-agent.insightsController.server.webhookFullname" . }}-svc + {{- end }} + {{- if .Values.prometheusConfig.scrapeJobs.additionalScrapeJobs -}} + {{ toYaml .Values.prometheusConfig.scrapeJobs.additionalScrapeJobs | toString | nindent 6 }} + {{- end}} + {{- end}} remote_write: - url: 'https://{{ include "cloudzero-agent.cleanString" .Values.host }}/v1/container-metrics?cluster_name={{ include "cloudzero-agent.cleanString" .Values.clusterName | urlquery }}&cloud_account_id={{ include "cloudzero-agent.cleanString" .Values.cloudAccountId | urlquery }}®ion={{ include "cloudzero-agent.cleanString" .Values.region | urlquery }}' authorization: @@ -166,3 +152,64 @@ data: action: keep metadata_config: send: false +{{- if .Values.insightsController.enabled }} +--- +{{- with .Values.insightsController }} +{{- if not (and .labels.enabled .labels.patterns) }} +{{- $msg := "\n\nThe required field(s) 'insightsController.labels.enabled' and/or 'insightsController.labels.patterns' is not set! See the README.md for more information." }} +{{- $enabledMsg:=""}} +{{- $patternMsg:=""}} +{{- if not .labels.enabled }} +{{- $enabledMsg = "Ensure that 'insightsController.labels.enabled' is a boolean (true or false). Set 'true' to enable exporting labels."}} +{{- end }} +{{- if not .labels.patterns }} +{{- $patternMsg = "The required field 'labels.patterns' is not set or set incorrectly. It must be an array of regular expressions that match label keys to be exported."}} +{{- end }} +{{- fail (printf "\n %s \n %s \n %s" $msg $enabledMsg $patternMsg) }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + {{- include "cloudzero-agent.server.labels" . | nindent 4 }} + name: {{ include "cloudzero-agent.webhookConfigMapName" . }} + namespace: {{ include "cloudzero-agent.namespace" . }} + {{- with .Values.prometheusConfig.configMapAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: + server-config.yaml: |- + cloud_account_id: {{ .Values.cloudAccountId }} + region: {{ .Values.region }} + cluster_name: {{ .Values.clusterName }} + host: {{ .Values.host }} + remote_write: + send_interval: 1m + max_bytes_per_send: 500000 + send_timeout: 10s + max_retries: 3 + k8s_client: + timeout: 30s + database: + retention_time: 24h + cleanup_interval: 3h + batch_update_size: 500 + api_key_path: {{ include "cloudzero-agent.secretFileFullPath" . }} + {{- with .Values.insightsController.server }} + certificate: + key: {{ .tls.mountPath }}/tls.key + cert: {{ .tls.mountPath }}/tls.crt + server: + port: {{ .port }} + read_timeout: {{ .read_timeout }} + write_timeout: {{ .write_timeout }} + idle_timeout: {{ .idle_timeout }} + {{- end }} + filters: + labels: + {{- .Values.insightsController.labels | toYaml | nindent 8 }} + annotations: + {{- .Values.insightsController.annotations | toYaml | nindent 8 }} +{{- end }} diff --git a/charts/cloudzero-agent/templates/deploy.yaml b/charts/cloudzero-agent/templates/deploy.yaml index 688305a0..60064b8e 100644 --- a/charts/cloudzero-agent/templates/deploy.yaml +++ b/charts/cloudzero-agent/templates/deploy.yaml @@ -29,7 +29,7 @@ spec: {{- if .Values.server.priorityClassName }} priorityClassName: "{{ .Values.server.priorityClassName }}" {{- end }} - serviceAccountName: {{ template "cloudzero-agent.serviceAccountName.server" . }} + serviceAccountName: {{ template "cloudzero-agent.serviceAccountName" . }} initContainers: - name: {{ .Values.validator.name }} image: "{{ .Values.validator.image.repository }}:{{ .Values.validator.image.tag }}" @@ -41,7 +41,7 @@ spec: volumeMounts: {{- if or .Values.existingSecretName .Values.apiKey }} - name: cloudzero-api-key - mountPath: {{ .Values.server.containerSecretFilePath }} + mountPath: {{ .Values.serverConfig.containerSecretFilePath }} subPath: "" readOnly: true {{- end }} @@ -134,7 +134,7 @@ spec: mountPath: /check/app/config/ {{- if or .Values.existingSecretName .Values.apiKey }} - name: cloudzero-api-key - mountPath: {{ .Values.server.containerSecretFilePath }} + mountPath: {{ .Values.serverConfig.containerSecretFilePath }} subPath: "" readOnly: true {{- end }} diff --git a/charts/cloudzero-agent/templates/init-job.yaml b/charts/cloudzero-agent/templates/init-job.yaml new file mode 100644 index 00000000..fe51882c --- /dev/null +++ b/charts/cloudzero-agent/templates/init-job.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.initJob.enabled .Values.insightsController.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "cloudzero-agent.initJobName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "cloudzero-agent.insightsController.labels" . | nindent 4 }} +spec: + template: + metadata: + name: {{ include "cloudzero-agent.initJobName" . }} + labels: + {{- include "cloudzero-agent.insightsController.initJob.matchLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "cloudzero-agent.serviceAccountName" . }} + restartPolicy: Never + containers: + - name: start-scrape + image: {{ .Values.initJob.image.repository }}:{{ .Values.initJob.image.tag }} + command: ["sh", "-c"] + args: + - | + while true; do + echo "Waiting for the insightsController server to be ready..."; + if curl -s -o /dev/null -w "%{http_code}" -k https://{{ include "cloudzero-agent.serviceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.insightsController.service.port }}/healthz | grep -q 200; then + echo "Server is ready, starting scrape job..."; + curl -X POST -k https://{{ include "cloudzero-agent.serviceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.insightsController.service.port }}/scrape; + echo "Scrape process started."; + break; + fi; + echo "No 200 response from health endpoint. Retrying in 30 seconds..."; + sleep 30; + done +{{- end }} diff --git a/charts/cloudzero-agent/templates/insights-deploy.yaml b/charts/cloudzero-agent/templates/insights-deploy.yaml new file mode 100644 index 00000000..bd6b5d11 --- /dev/null +++ b/charts/cloudzero-agent/templates/insights-deploy.yaml @@ -0,0 +1,125 @@ +{{- if .Values.insightsController.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: +{{- if .Values.insightsController.server.deploymentAnnotations }} + annotations: + {{- toYaml .Values.insightsController.server.deploymentAnnotations | nindent 4 }} +{{- end }} + name: {{ include "cloudzero-agent.insightsController.server.webhookFullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "cloudzero-agent.insightsController.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.insightsController.server.replicaCount }} + selector: + matchLabels: + {{- include "cloudzero-agent.insightsController.server.matchLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.insightsController.server.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cloudzero-agent.insightsController.labels" . | nindent 8 }} + {{- with .Values.insightsController.server.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "cloudzero-agent.serviceAccountName" . }} + {{- with .Values.insightsController.server.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + containers: + - name: webhook-server + image: "{{ .Values.insightsController.server.image.repository }}:{{ .Values.insightsController.server.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.insightsController.server.image.pullPolicy }} + command: + - /app/controller + args: + - -config + - "{{ include "cloudzero-agent.insightsController.configurationMountPath" . }}/server-config.yaml" + ports: + - containerPort: 8443 + resources: + {{- toYaml .Values.insightsController.resources | nindent 12 }} + volumeMounts: + - name: insights-server-config + mountPath: {{ include "cloudzero-agent.insightsController.configurationMountPath" . }} + {{- if or .Values.insightsController.volumeMounts .Values.insightsController.server.tls.useManagedCertificate }} + {{- if .Values.insightsController.server.tls.useManagedCertificate }} + - name: tls-certs + mountPath: {{ .Values.insightsController.server.tls.mountPath }} + readOnly: true + {{- end }} + {{- if or .Values.existingSecretName .Values.apiKey }} + - name: cloudzero-api-key + mountPath: {{ .Values.serverConfig.containerSecretFilePath }} + subPath: "" + readOnly: true + {{- end }} + {{- with .Values.insightsController.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- if and .Values.insightsController.server.healthCheck.enabled }} + livenessProbe: + httpGet: + scheme: HTTPS + path: {{ .Values.insightsController.server.healthCheck.path }} + port: {{ .Values.insightsController.server.healthCheck.port }} + initialDelaySeconds: {{ .Values.insightsController.server.healthCheck.initialDelaySeconds }} + periodSeconds: {{ .Values.insightsController.server.healthCheck.periodSeconds }} + timeoutSeconds: {{ .Values.insightsController.server.healthCheck.timeoutSeconds }} + successThreshold: {{ .Values.insightsController.server.healthCheck.successThreshold }} + failureThreshold: {{ .Values.insightsController.server.healthCheck.failureThreshold }} + readinessProbe: + httpGet: + scheme: HTTPS + path: {{ .Values.insightsController.server.healthCheck.path }} + port: {{ .Values.insightsController.server.healthCheck.port }} + initialDelaySeconds: {{ .Values.insightsController.server.healthCheck.initialDelaySeconds }} + periodSeconds: {{ .Values.insightsController.server.healthCheck.periodSeconds }} + timeoutSeconds: {{ .Values.insightsController.server.healthCheck.timeoutSeconds }} + successThreshold: {{ .Values.insightsController.server.healthCheck.successThreshold }} + failureThreshold: {{ .Values.insightsController.server.healthCheck.failureThreshold }} + {{- end }} + {{- if or .Values.insightsController.volumes .Values.insightsController.server.tls.useManagedCertificate }} + volumes: + - name: insights-server-config + configMap: + name: {{ include "cloudzero-agent.webhookConfigMapName" . }} + {{- if .Values.insightsController.server.tls.useManagedCertificate }} + - name: tls-certs + secret: + secretName: {{ include "cloudzero-agent.tlsSecretName" . }} + {{- end }} + {{- if or .Values.existingSecretName .Values.apiKey }} + - name: cloudzero-api-key + secret: + secretName: {{ include "cloudzero-agent.secretName" . }} + {{- end }} + {{- with .Values.insightsController.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- with .Values.insightsController.server.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.insightsController.server.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.insightsController.server.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/cloudzero-agent/templates/issuer.yaml b/charts/cloudzero-agent/templates/issuer.yaml new file mode 100644 index 00000000..a66c1c26 --- /dev/null +++ b/charts/cloudzero-agent/templates/issuer.yaml @@ -0,0 +1,10 @@ +{{ if and .Values.insightsController.issuer.enabled .Values.insightsController.enabled }} +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ include "cloudzero-agent.issuerName" . }} + namespace: {{ .Release.Namespace }} +spec: + {{- toYaml .Values.insightsController.issuer.spec | nindent 2 }} +{{- end }} diff --git a/charts/cloudzero-agent/templates/secret.yaml b/charts/cloudzero-agent/templates/secret.yaml index e01029db..21862f72 100644 --- a/charts/cloudzero-agent/templates/secret.yaml +++ b/charts/cloudzero-agent/templates/secret.yaml @@ -11,7 +11,7 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} data: - {{ .Values.server.containerSecretFileName }}: {{- $apiKey := .Values.apiKey | toString }} + {{ .Values.serverConfig.containerSecretFileName }}: {{- $apiKey := .Values.apiKey | toString }} {{- if not (regexMatch "^[a-zA-Z0-9-_.~!*'();]+$" $apiKey) }} {{- fail "The provided apiKey is invalid. Check that the provided value from apiKey matches exactly what is found in the CloudZero admin page." }} {{- end }} diff --git a/charts/cloudzero-agent/templates/service.yaml b/charts/cloudzero-agent/templates/service.yaml new file mode 100644 index 00000000..d8289f1d --- /dev/null +++ b/charts/cloudzero-agent/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "cloudzero-agent.serviceName" . }} + labels: + {{- include "cloudzero-agent.insightsController.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.insightsController.service.port }} + targetPort: 8443 + name: http + selector: + {{- include "cloudzero-agent.insightsController.server.matchLabels" . | nindent 4 }} diff --git a/charts/cloudzero-agent/templates/serviceaccount.yaml b/charts/cloudzero-agent/templates/serviceaccount.yaml index 206919d9..7bff4768 100644 --- a/charts/cloudzero-agent/templates/serviceaccount.yaml +++ b/charts/cloudzero-agent/templates/serviceaccount.yaml @@ -1,17 +1,17 @@ -{{- if .Values.serviceAccounts.server.create }} +{{- if .Values.serviceAccount.create }} apiVersion: v1 kind: ServiceAccount metadata: labels: {{- include "cloudzero-agent.server.labels" . | nindent 4 }} - name: {{ template "cloudzero-agent.serviceAccountName.server" . }} + name: {{ template "cloudzero-agent.serviceAccountName" . }} namespace: {{ include "cloudzero-agent.namespace" . }} annotations: -{{ toYaml .Values.serviceAccounts.server.annotations | indent 4 }} +{{ toYaml .Values.serviceAccount.annotations | indent 4 }} {{- if kindIs "bool" .Values.server.automountServiceAccountToken }} automountServiceAccountToken: {{ .Values.server.automountServiceAccountToken }} -{{- else if kindIs "bool" .Values.serviceAccounts.server.automountServiceAccountToken }} -automountServiceAccountToken: {{ .Values.serviceAccounts.server.automountServiceAccountToken }} +{{- else if kindIs "bool" .Values.serviceAccount.automountServiceAccountToken }} +automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} {{- end }} {{- end }} diff --git a/charts/cloudzero-agent/templates/validatorcm.yaml b/charts/cloudzero-agent/templates/validatorcm.yaml index 213f5df1..cd9b8259 100644 --- a/charts/cloudzero-agent/templates/validatorcm.yaml +++ b/charts/cloudzero-agent/templates/validatorcm.yaml @@ -33,7 +33,7 @@ data: {{- if .Values.validator.serviceEndpoints.kubeStateMetrics }} kube_state_metrics_service_endpoint: http://{{ .Values.validator.serviceEndpoints.kubeStateMetrics }}/ {{- else }} - kube_state_metrics_service_endpoint: http://{{- if .Release.Name }}{{.Release.Name}}-{{- end }}kube-state-metrics:8080/ + kube_state_metrics_service_endpoint: http://{{ .Values.kubeStateMetrics.nameOverride }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.kubeStateMetrics.service.port }} {{- end }} {{- if .Values.validator.serviceEndpoints.prometheusNodeExporter }} prometheus_node_exporter_service_endpoint: http://{{ .Values.validator.serviceEndpoints.prometheusNodeExporter }}/ @@ -41,6 +41,8 @@ data: prometheus_node_exporter_service_endpoint: http://{{- if .Release.Name }}{{.Release.Name}}-{{- end }}prometheus-node-exporter:9100/ {{- end }} executable: /bin/prometheus + kube_metrics: + {{- toYaml .Values.kubeMetrics | nindent 8 }} configurations: - /etc/prometheus/prometheus.yml - /etc/config/prometheus/configmaps/prometheus.yml @@ -57,7 +59,7 @@ data: checks: - k8s_version - kube_state_metrics_reachable - - node_exporter_reachable + #- node_exporter_reachable - prometheus_version - scrape_cfg - name: pre-stop diff --git a/charts/cloudzero-agent/templates/webhooks.yaml b/charts/cloudzero-agent/templates/webhooks.yaml new file mode 100644 index 00000000..eef7369c --- /dev/null +++ b/charts/cloudzero-agent/templates/webhooks.yaml @@ -0,0 +1,38 @@ +{{- if .Values.insightsController.enabled }} +{{- range $configType, $configs := .Values.insightsController.webhooks.configurations }} +{{- if or (index $.Values.insightsController.labels.resources $configType) (index $.Values.insightsController.annotations.resources $configType) }} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: {{ include "cloudzero-agent.validatingWebhookConfigName" $ }}-{{ $configType }} + namespace: {{ $.Release.Namespace }} + labels: + {{- include "cloudzero-agent.insightsController.labels" $ | nindent 4 }} + annotations: + {{- include "cloudzero-agent.webhooks.annotations" $ | nindent 4 }} +webhooks: + - name: {{ include "cloudzero-agent.validatingWebhookName" $ }} + namespaceSelector: {{ toYaml $.Values.insightsController.webhooks.namespaceSelector }} + failurePolicy: Ignore + rules: + - operations: [ "CREATE", "UPDATE" ] + apiGroups: {{ $configs.apiGroups }} + apiVersions: [ "v1" ] + resources: [ {{ $configType }} ] + scope: "*" + clientConfig: + service: + namespace: {{ $.Release.Namespace }} + name: {{ include "cloudzero-agent.serviceName" $ }} + path: "{{ $configs.path }}" + port: {{ $.Values.insightsController.service.port }} + {{- if or (and $.Values.insightsController.certificate.enabled $.Values.insightsController.issuer.enabled) (gt (len $.Values.insightsController.webhooks.caBundle) 1 ) }} + caBundle: {{ $.Values.insightsController.webhooks.caBundle }} + {{- end }} + admissionReviewVersions: ["v1"] + sideEffects: None + timeoutSeconds: 5 +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/cloudzero-agent/values.yaml b/charts/cloudzero-agent/values.yaml index 4b92db4a..14076419 100644 --- a/charts/cloudzero-agent/values.yaml +++ b/charts/cloudzero-agent/values.yaml @@ -1,13 +1,23 @@ +# ############### # +# Required Values # +# ############### # + +# -- Name of the clusters. +clusterName: null +# -- CloudZero API key. Required if existingSecretName is null. +apiKey: null + + +# Default Values # -- CloudZero host to send metrics to. host: api.cloudzero.com + + # -- Account ID of the account the cluster is running in. This must be a string - even if it is a number in your system. cloudAccountId: null -# -- Name of the clusters. -clusterName: null # -- Region the cluster is running in. region: null -# -- CloudZero API key. Required if useExistingSecret is false. -apiKey: null + # -- If set, the agent will use the API key in this Secret to authenticate with CloudZero. existingSecretName: null @@ -20,7 +30,6 @@ kubeMetrics: - kube_pod_container_resource_requests - kube_pod_labels - kube_pod_info - - node_dmi_info containerMetrics: - container_cpu_usage_seconds_total - container_memory_working_set_bytes @@ -47,12 +56,30 @@ prometheusConfig: # -- Any items added to this list will be added to the Prometheus scrape configuration. additionalScrapeJobs: [] -kube-state-metrics: - enabled: false - extraArgs: - - --metric-labels-allowlist=pods=[app.kubernetes.io/component] -prometheus-node-exporter: - enabled: false +# General server settings that apply to both the prometheus agent server and the webhook server +serverConfig: + # -- The agent will use this file path on the container filesystem to get the CZ API key. + containerSecretFilePath: /etc/config/secrets/ + # -- The agent will look for a file with this name to get the CZ API key. + containerSecretFileName: value + +initJob: + enabled: true + image: + repository: curlimages/curl + pullPolicy: Always + tag: "8.10.1" + +kubeStateMetrics: + enabled: true + fullnameOverride: "cloudzero-state-metrics" + nameOverride: "cloudzero-state-metrics" + # Disable CloudZero KSM as a Scrape Target since the service endpoint is explicity defined + # by the Validators config file. + prometheusScrape: false + # Set a default port other than 8080 to avoid collisions with any existing KSM services. + service: + port: 8080 # -- Annotations to be added to the Secret, if the chart is configured to create one secretAnnotations: {} @@ -67,7 +94,7 @@ validator: name: env-validator image: repository: ghcr.io/cloudzero/cloudzero-agent-validator/cloudzero-agent-validator - tag: 0.4.1 + tag: 0.9.0 digest: pullPolicy: Always @@ -89,17 +116,12 @@ server: memory: 1024Mi deploymentAnnotations: {} podAnnotations: {} - configMapOverrideName: configuration args: - --config.file=/etc/config/prometheus/configmaps/prometheus.yml - --web.enable-lifecycle - --web.console.libraries=/etc/prometheus/console_libraries - --web.console.templates=/etc/prometheus/consoles - --enable-feature=agent - # -- The agent will use this file path on the container filesystem to get the CZ API key. - containerSecretFilePath: /etc/config/prometheus/secrets/ - # -- The agent will look for a file with this name to get the CZ API key. - containerSecretFileName: value persistentVolume: existingClaim: "" enabled: false @@ -113,11 +135,113 @@ server: emptyDir: sizeLimit: 8Gi -serviceAccounts: +insightsController: + enabled: true + labels: + enabled: + patterns: + # - '.*' + resources: + pods: true + namespaces: true + deployments: false + statefulsets: false + nodes: false + jobs: false + cronjobs: false + daemonsets: false + annotations: + enabled: false + patterns: + - '.*' + resources: + pods: true + namespaces: true + deployments: false + statefulsets: false + nodes: false + jobs: false + cronjobs: false + daemonsets: false server: - create: true - name: "" + name: webhook-server + replicaCount: 3 + image: + repository: ghcr.io/cloudzero/cloudzero-insights-controller/cloudzero-insights-controller + tag: 0.0.4 + pullPolicy: Always + tls: + enabled: true + useManagedCertificate: true + nameOverride: "" + mountPath: /etc/certs + port: 8443 + read_timeout: 10s + write_timeout: 10s + idle_timeout: 120s + healthCheck: + enabled: true + path: /healthz + port: 8443 + initialDelaySeconds: 15 + periodSeconds: 20 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 5 + volumeMounts: [] + volumes: [] + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + imagePullSecrets: [] + podAnnotations: {} + podLabels: {} + service: + port: 443 + issuer: + enabled: true + spec: + selfSigned: {} + certificate: + enabled: true + webhooks: annotations: {} + namespaceSelector: {} # This denotes no specific selection, applies to all namespaces + caBundle: '' # by default, this is empty, and the value is populated by cert-manager's ca-injector if cert-manager is used + configurations: + pods: + path: /validate/pod + apiGroups: ['""'] + namespaces: + path: /validate/namespace + apiGroups: ['""'] + deployments: + path: /validate/deployment + apiGroups: ["apps"] + statefulsets: + path: /validate/statefulset + apiGroups: ["apps"] + nodes: + path: /validate/node + apiGroups: ['""'] + jobs: + path: /validate/job + apiGroups: ["batch"] + cronjobs: + path: /validate/cronjob + apiGroups: ["batch"] + daemonsets: + path: /validate/daemonset + apiGroups: ["apps"] + +serviceAccount: + create: true + name: "" + annotations: {} + +cert-manager: + enabled: true rbac: create: true diff --git a/charts/cloudzero-certificate/.helmignore b/charts/cloudzero-certificate/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/cloudzero-certificate/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/cloudzero-certificate/Chart.yaml b/charts/cloudzero-certificate/Chart.yaml new file mode 100644 index 00000000..3fdc0b18 --- /dev/null +++ b/charts/cloudzero-certificate/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: cloudzero-certificate +description: Creates a TLS certificate to be used by the CloudZero Insights Controller +type: application +version: 0.1.0 diff --git a/charts/cloudzero-certificate/templates/NOTES.txt b/charts/cloudzero-certificate/templates/NOTES.txt new file mode 100644 index 00000000..2496c3ee --- /dev/null +++ b/charts/cloudzero-certificate/templates/NOTES.txt @@ -0,0 +1,11 @@ +Get the caBundle value by running: + +CA_BUNDLE=$(kubectl get secret -n {{ .Release.Namespace }} {{ include "cloudzero-certificate.secretName" . }} -o jsonpath='{.data.ca\.crt}') + +This value should be used in the cloudzero-agent helm chart as shown: + +``` +insightsController: + webhooks: + caBundle: $CA_BUNDLE +``` diff --git a/charts/cloudzero-certificate/templates/_helpers.tpl b/charts/cloudzero-certificate/templates/_helpers.tpl new file mode 100644 index 00000000..7600ddc0 --- /dev/null +++ b/charts/cloudzero-certificate/templates/_helpers.tpl @@ -0,0 +1,71 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "cloudzero-certificate.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cloudzero-certificate.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cloudzero-certificate.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "cloudzero-certificate.labels" -}} +helm.sh/chart: {{ include "cloudzero-certificate.chart" . }} +{{ include "cloudzero-certificate.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "cloudzero-certificate.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cloudzero-certificate.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the secret to use +*/}} +{{- define "cloudzero-certificate.secretName" -}} +{{- default (include "cloudzero-certificate.fullname" .) .Values.secret.name }} +{{- end }} + +{{/* +Generate certificate for the webhook server +*/}} +{{- define "cloudzero-certificate.genCerts" -}} +{{- $releaseName := required "`cloudzeroAgentReleaseName` must be supplied. This value should be the name of the cloudzero-agent helm release that will be created" .Values.cloudzeroAgentReleaseName -}} +{{- $dnsName := printf "%s-svc.%s.cluster.local" $releaseName $.Release.Namespace -}} +{{- $ca := genCA "cloudzero-agent-ca" 365 -}} +{{- $cert := genSignedCert $dnsName nil (list $dnsName) 9999999 $ca -}} +ca.crt: {{ $cert.Cert | b64enc }} +tls.crt: {{ $cert.Cert | b64enc }} +tls.key: {{ $cert.Key | b64enc }} +{{- end -}} diff --git a/charts/cloudzero-certificate/templates/secret.yaml b/charts/cloudzero-certificate/templates/secret.yaml new file mode 100644 index 00000000..461dfca4 --- /dev/null +++ b/charts/cloudzero-certificate/templates/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + labels: + {{- include "cloudzero-certificate.labels" . | nindent 4 }} + name: {{ include "cloudzero-certificate.secretName" . }} + namespace: {{ .Release.Namespace }} +data: +{{- include "cloudzero-certificate.genCerts" . | nindent 2 }} diff --git a/charts/cloudzero-certificate/values.yaml b/charts/cloudzero-certificate/values.yaml new file mode 100644 index 00000000..ae181171 --- /dev/null +++ b/charts/cloudzero-certificate/values.yaml @@ -0,0 +1,13 @@ +cloudzeroAgentReleaseName: null + +nameOverride: "" +fullnameOverride: "" + +secret: + name: "" + +serviceAccount: + name: "" + +role: + name: "" diff --git a/charts/cloudzero-insights-controller/Chart.lock b/charts/cloudzero-insights-controller/Chart.lock new file mode 100644 index 00000000..346e17dc --- /dev/null +++ b/charts/cloudzero-insights-controller/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: cert-manager + repository: https://charts.jetstack.io + version: v1.15.3 +digest: sha256:9027951628db45ef674f00e5baeca157f95755de9818a9d1e78396b86971f527 +generated: "2024-08-29T11:00:51.842705-04:00" diff --git a/charts/cloudzero-insights-controller/Chart.yaml b/charts/cloudzero-insights-controller/Chart.yaml new file mode 100644 index 00000000..d021a331 --- /dev/null +++ b/charts/cloudzero-insights-controller/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: cloudzero-insights-controller +description: Provides telemetry to the CloudZero platform to enabling complex cost allocation and analysis. +type: application +version: 1.0.0-beta-5 +appVersion: "0.0.2" +dependencies: + - name: cert-manager + version: v1.15.3 + repository: https://charts.jetstack.io + alias: cert-manager + condition: cert-manager.enabled diff --git a/charts/cloudzero-insights-controller/README.md b/charts/cloudzero-insights-controller/README.md new file mode 100644 index 00000000..dd083dfe --- /dev/null +++ b/charts/cloudzero-insights-controller/README.md @@ -0,0 +1,145 @@ +# Cloudzero Insights Controller Helm Chart + +[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE-OF-CONDUCT.md) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) +![GitHub release](https://img.shields.io/github/release/Cloudzero/cloudzero-charts.svg) + +A Helm chart for a validating admission webhook to send cluster metrics to the CloudZero platform. + +## Overview + +This Validating Admission Webhook monitors and intercepts `CREATE` and `UPDATE` operations on the following Kubernetes resources: + +- `Pod` +- `Deployment` +- `StatefulSet` +- `Daemonset` +- `Job` +- `CronJob` +- `Node` +- `Namespace` + +The webhook captures the labels from these resources and uploads them to the CloudZero API endpoint. For both `CREATE` and `UPDATE` operations, the full set of labels is sent to the API, ensuring that the most up-to-date labels are always uploaded. For `Deployment` and `Statefulset` resources, annotations are also uploaded. + + +## Prerequisites + +- Kubernetes 1.23+ +- Helm 3+ +- A CloudZero API key + +## Installation + +This helm chart is best used alongside the [cloudzero-agent](https://github.com/Cloudzero/cloudzero-charts/tree/develop/charts/cloudzero-agent) chart. In this case, the same API key can be used for both installations. + +### Get Helm Repository Info + +```console +helm repo add cloudzero https://cloudzero.github.io/cloudzero-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +The chart can be installed directly with Helm or any other common Kubernetes deployment tools. See the next section for different deployment configurations. + +### Deployment Configurations and Certificate Management + +This chart contains a `ValidatingWebhookConfiguration` resource, which uses a certificate in order validate requests to the webhook server. See related Kubernetes documentation [here](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#configure-admission-webhooks-on-the-fly). + + +**There are two ways to install the chart as it relates to certificate management:** + +1. (Default) Manage certificates using [cert-manager](https://github.com/cert-manager/cert-manager/tree/master). +By default, the chart installs [cert-manager](https://github.com/cert-manager/cert-manager/tree/master) as a subchart. `cert-manager` handles the creation of the certificate and injects the CA bundle into the `ValidatingWebhookConfiguration` resource. For details on how cert-manager does this, see [here](https://cert-manager.io/docs/concepts/ca-injector/). + +To install the chart with this configuration, install the chart with the following helm command. The default configuration uses cert-manager to create the certificate: + +```console +helm install cloudzero/insights-controller \ + --set existingSecretName= \ + --set clusterName= \ + --set-string cloudAccountId= \ + --set region= +``` + +If `cert-manager` CRDs are not already installed, the installation may fail with the error message that contains: +```console +no matches for kind "Certificate" in version "cert-manager.io/v1" +``` + +If this happens, run the following: + +```bash +helm install cloudzero/insights-controller \ + --set webhook.issuer.enabled=false \ + --set webhook.certificate.enabled=false \ + --set cert-manager.installCRDs=true +``` +Or, alternatively, [install the cert-manager CRDs yourself](https://cert-manager.io/docs/installation/helm/). +Then rerun the original command: +```console +helm install cloudzero/insights-controller \ + --set existingSecretName= \ + --set clusterName= \ + --set-string cloudAccountId= \ + --set region= +``` + +2. The second option is to bring your own certificate. In this case, the tls information must be mounted to the server Deployment at the `/etc/certs/` path in a file formatted as: +``` +ca.crt: +tls.crt: +tls.key: +``` +An example command would be: +```bash +helm install cloudzero/insights-controller \ + --set existingSecretName= \ + --set clusterName= \ + --set-string cloudAccountId= \ + --set region= \ + -f config.yaml +``` +where `config.yaml` is: +``` +server: + tls: + useManagedSecret: false + volumeMounts: + - name: your-tls-volume + mountPath: /etc/certs + readOnly: true + volumes: + - name: tls-certs + secret: + secretName: your-tls-secret-name +webhook: + issuer: + enabled: false + certificate: + enabled: false + caBundle: '' + +cert-manager: + enabled: false +``` + +## Troubleshooting + +### `-server` pod stuck in `Pending` state + The server pod, which handles incoming webhook requests, may be stuck in this state if the TLS secret is not available. Confirm this is the case by describing the server pod: + ```console + kubectl describe pod -l app.kubernetes.io/name=insights-controller + ``` + If the event log shows that the pod cannot be created due to a missing volume, check that the TLS secret has been created successfully: + ```console + kubectl get secret -l app.kubernetes.io/name=insights-controller + ``` + If no secrets are returned by that command, then cert-manager did not provision a certificate. Consult the `cert-manager` pod logs and/or the cert-manager CRDs for more infomration: + ```console + kubectl get certificaterequests + kubectl get certificates + kubectl get certificatesigningrequests + kubectl get issuers + ``` diff --git a/charts/cloudzero-insights-controller/templates/_helpers.tpl b/charts/cloudzero-insights-controller/templates/_helpers.tpl new file mode 100644 index 00000000..7b0da7fe --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/_helpers.tpl @@ -0,0 +1,206 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "insights-controller.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "insights-controller.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Generate certificates for the webhook server +*/}} +{{- define "insights-controller.generate-certs" -}} +{{- $altNames := list ( printf "DNS:webhook-server.%s" ( .Release.Namespace )) -}} +{{- $ca := genCA "insights-controller-ca" 365 -}} +{{- $cert := genSignedCert ( include "insights-controller.name" . ) nil $altNames 365 $ca -}} +tls.crt: {{ $cert.Cert | b64enc }} +tls.key: {{ $cert.Key | b64enc }} +{{- end -}} + + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "insights-controller.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "insights-controller.labels" -}} +helm.sh/chart: {{ include "insights-controller.chart" . }} +{{ include "insights-controller.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "insights-controller.selectorLabels" -}} +app.kubernetes.io/name: {{ include "insights-controller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "insights-controller.annotations" -}} +{{- if .Values.webhook.annotations }} +{{ toYaml .Values.webhook.annotations }} +{{- end }} +{{- if and .Values.webhook.certificate.enabled .Values.webhook.issuer.enabled }} +cert-manager.io/inject-ca-from: {{ .Values.webhook.caInjection | default (printf "%s/%s" .Release.Namespace (include "insights-controller.certificateName" .)) }} +{{- end }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "insights-controller.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "insights-controller.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Name for the webhook server deployment +*/}} +{{- define "insights-controller.deploymentName" -}} +{{- printf "%s-server" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the webhook server service +*/}} +{{- define "insights-controller.serviceName" -}} +{{- printf "%s-svc" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the validating webhook configuration resource +*/}} +{{- define "insights-controller.validatingWebhookConfigName" -}} +{{- printf "%s-webhook" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the validating webhook +*/}} +{{- define "insights-controller.validatingWebhookName" -}} +{{- printf "%s.%s.svc" (include "insights-controller.validatingWebhookConfigName" .) .Release.Namespace }} +{{- end }} + +{{/* +Name for the certificate resource +*/}} +{{- define "insights-controller.certificateName" -}} +{{- printf "%s-certificate" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the certificate secret +*/}} +{{- define "insights-controller.tlsSecretName" -}} +{{- printf "%s-tls" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the cloudzero API key secret name +*/}} +{{- define "insights-controller.cloudzeroSecretName" -}} +{{- if and .Values.global .Values.global.existingSecretName }} +{{- .Values.global.existingSecretName }} +{{- else}} +{{- .Values.existingSecretName | default (printf "%s-api-key" .Release.Name) }} +{{- end }} +{{- end }} + +{{/* +Name for the cloudzero specific configuration file +*/}} +{{- define "insights-controller.cloudzeroConfigMapName" -}} +{{ .Values.cloudzeroConfigMapNameOverride | default (printf "%s-cloudzero-configuration" .Release.Name) }} +{{- end }} + +{{/* +Name for the webhook server configuration file +*/}} +{{- define "insights-controller.configMapName" -}} +{{- printf "%s-configuration" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Mount path for the cloudzero configuration file +*/}} +{{- define "insights-controller.cloudzeroConfigurationMountPath" -}} +{{- printf "/etc/%s-cloudzero-config" .Chart.Name }} +{{- end }} + +{{/* +Mount path for the insights server configuration file +*/}} +{{- define "insights-controller.serverConfigurationMountPath" -}} +{{- printf "/etc/%s-insights-server-config" .Chart.Name }} +{{- end }} + +{{/* +Name for the issuer resource +*/}} +{{- define "insights-controller.issuerName" -}} +{{- printf "%s-issuer" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the job resource +*/}} +{{- define "insights-controller.initJobName" -}} +{{- printf "%s-init" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the clusterrole resource +*/}} +{{- define "insights-controller.clusterRoleName" -}} +{{- printf "%s" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the clusterrolebinding resource +*/}} +{{- define "insights-controller.clusterRoleBindingName" -}} +{{- printf "%s" (include "insights-controller.fullname" .) }} +{{- end }} + +{{/* +Name for the clusterrolebinding resource +*/}} +{{- define "insights-controller.cloudAccountId" -}} +{{- if .Values.global.cloudAccountId }} +{{ tpl .Values.global.cloudAccountId . }} +{{- else}} +{{ .Values.cloudAccountId . }} +{{- end }} +{{- end }} diff --git a/charts/cloudzero-insights-controller/templates/certificate.yaml b/charts/cloudzero-insights-controller/templates/certificate.yaml new file mode 100644 index 00000000..20f47f69 --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/certificate.yaml @@ -0,0 +1,28 @@ +{{ if .Values.webhook.certificate.enabled }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "insights-controller.certificateName" . }} + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ include "insights-controller.tlsSecretName" .}} + secretTemplate: + {{- with .Values.secretAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "insights-controller.labels" . | nindent 6 }} + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + duration: 2160h # 90d + renewBefore: 360h # 15d + dnsNames: + - {{ include "insights-controller.serviceName" . }}.{{ .Release.Namespace }}.svc + issuerRef: + name: {{ include "insights-controller.issuerName" . }} + kind: Issuer +{{ end }} diff --git a/charts/cloudzero-insights-controller/templates/clusterrole.yaml b/charts/cloudzero-insights-controller/templates/clusterrole.yaml new file mode 100644 index 00000000..c893f76b --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/clusterrole.yaml @@ -0,0 +1,14 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "insights-controller.clusterRoleName" . }} +rules: + - apiGroups: [""] + resources: ["namespaces", "nodes", "pods"] + verbs: ["get", "list"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets"] + verbs: ["get", "list"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list"] diff --git a/charts/cloudzero-insights-controller/templates/clusterrolebinding.yaml b/charts/cloudzero-insights-controller/templates/clusterrolebinding.yaml new file mode 100644 index 00000000..85611453 --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/clusterrolebinding.yaml @@ -0,0 +1,14 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "insights-controller.clusterRoleBindingName" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "insights-controller.clusterRoleName" . }} +subjects: + - kind: ServiceAccount + name: {{ include "insights-controller.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/cloudzero-insights-controller/templates/cm.yaml b/charts/cloudzero-insights-controller/templates/cm.yaml new file mode 100644 index 00000000..bf9928d5 --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/cm.yaml @@ -0,0 +1,66 @@ +{{- if not (and .Values.labels.enabled .Values.labels.patterns) }} +{{- $msg := "\n\nThe required field(s) 'labels.enabled' and/or 'labels.patterns' is not set! See the README.md for more information." }} +{{- $enabledMsg:=""}} +{{- $patternMsg:=""}} +{{- if not .Values.labels.enabled }} +{{- $enabledMsg = "Ensure that 'labels.enabled' is a boolean (true or false). Set 'true' to enable exporting labels."}} +{{- end }} +{{- if not .Values.labels.patterns }} +{{- $patternMsg = "The required field 'labels.patterns' is not set or set incorrectly. It must be an array of regular expressions that match label keys to be exported."}} +{{- end }} +{{- fail (printf "\n %s \n %s \n %s" $msg $enabledMsg $patternMsg) }} +{{- end }} + +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + {{- include "insights-controller.labels" . | nindent 4 }} + name: {{ include "insights-controller.configMapName" . }} + namespace: {{ .Release.Namespace }} + {{- with .Values.server.configMap.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: + server-config.yaml: |- + {{- if .Values.cloudAccountId }} + cloud_account_id: {{ .Values.cloudAccountId }} + {{- end }} + {{- if .Values.region }} + region: {{ .Values.region }} + {{- end }} + {{- if .Values.clusterName }} + cluster_name: {{ .Values.clusterName }} + {{- end }} + {{- if .Values.host }} + host: {{ .Values.host }} + {{- end }} + remote_write: + send_interval: 1m + max_bytes_per_send: 500000 + send_timeout: 10s + max_retries: 3 + k8s_client: + timeout: 30s + database: + retention_time: 24h + cleanup_interval: 3h + batch_update_size: 500 + {{- with .Values.server.tls }} + certificate: + key: {{ .mountPath }}/tls.key + cert: {{ .mountPath }}/tls.crt + {{- end }} + api_key_path: {{ .Values.server.serverConfig.containerSecretFilePath }}/{{ .Values.server.serverConfig.containerSecretFileName }} + server: + port: {{ .Values.server.serverConfig.port }} + read_timeout: {{ .Values.server.serverConfig.read_timeout }} + write_timeout: {{ .Values.server.serverConfig.write_timeout }} + idle_timeout: {{ .Values.server.serverConfig.idle_timeout }} + filters: + labels: + {{- .Values.labels | toYaml | nindent 8 }} + annotations: + {{- .Values.annotations | toYaml | nindent 8 }} + diff --git a/charts/cloudzero-insights-controller/templates/deploy.yaml b/charts/cloudzero-insights-controller/templates/deploy.yaml new file mode 100644 index 00000000..164d0d9a --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/deploy.yaml @@ -0,0 +1,106 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "insights-controller.deploymentName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "insights-controller.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.server.replicaCount }} + selector: + matchLabels: + {{- include "insights-controller.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.server.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "insights-controller.labels" . | nindent 8 }} + {{- with .Values.server.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "insights-controller.serviceAccountName" . }} + {{- with .Values.server.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + containers: + - name: webhook-server + image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.server.image.pullPolicy }} + command: + - /app/controller + args: + - -config + - "{{ include "insights-controller.serverConfigurationMountPath" . }}/server-config.yaml" + {{- if (include "insights-controller.cloudzeroConfigMapName" .) }} + - -config + - "{{include "insights-controller.cloudzeroConfigurationMountPath" . }}/cloudzero-config.yaml" + {{- end }} + ports: + - containerPort: 8443 + resources: + {{- toYaml .Values.server.resources | nindent 12 }} + {{- if or .Values.server.volumeMounts .Values.server.tls.useManagedSecret }} + volumeMounts: + {{- if (include "insights-controller.cloudzeroConfigMapName" .) }} + - name: cloudzero-server-config + mountPath: {{ include "insights-controller.cloudzeroConfigurationMountPath" . }} + {{- end }} + - name: insights-server-config + mountPath: {{ include "insights-controller.serverConfigurationMountPath" . }} + {{- if .Values.server.tls.useManagedSecret }} + - name: tls-certs + mountPath: {{ .Values.server.tls.mountPath }} + readOnly: true + {{- end }} + - name: cloudzero-api-key + mountPath: {{ .Values.server.serverConfig.containerSecretFilePath }} + subPath: "" + readOnly: true + {{- with .Values.server.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- if or .Values.server.volumes .Values.server.tls.useManagedSecret }} + volumes: + {{- if (include "insights-controller.cloudzeroConfigMapName" .) }} + - name: cloudzero-server-config + configMap: + name: {{ include "insights-controller.cloudzeroConfigMapName" . }} + {{- end }} + - name: insights-server-config + configMap: + name: {{ include "insights-controller.configMapName" . }} + {{- if .Values.server.tls.useManagedSecret }} + - name: tls-certs + secret: + secretName: {{ include "insights-controller.tlsSecretName" . }} + {{- end }} + - name: cloudzero-api-key + secret: + secretName: {{ include "insights-controller.cloudzeroSecretName" . }} + {{- with .Values.server.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- with .Values.server.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.server.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.server.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/cloudzero-insights-controller/templates/init-job.yaml b/charts/cloudzero-insights-controller/templates/init-job.yaml new file mode 100644 index 00000000..97b2b6ba --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/init-job.yaml @@ -0,0 +1,35 @@ +{{- if .Values.initJob.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "insights-controller.initJobName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "insights-controller.labels" . | nindent 4 }} +spec: + template: + metadata: + name: {{ include "insights-controller.initJobName" . }} + labels: + {{- include "insights-controller.labels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "insights-controller.serviceAccountName" . }} + restartPolicy: Never + containers: + - name: start-scrape + image: {{ .Values.initJob.image.repository }}:{{ .Values.initJob.image.tag }} + command: ["sh", "-c"] + args: + - | + while true; do + echo "Waiting for the webhook server to be ready..."; + if curl -s -o /dev/null -w "%{http_code}" -k https://{{ include "insights-controller.serviceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.server.service.port }}/healthz | grep -q 200; then + echo "Server is ready, starting scrape job..."; + curl -X POST -k https://{{ include "insights-controller.serviceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.server.service.port }}/scrape; + echo "Scrape process started."; + break; + fi; + echo "No 200 response from health endpoint. Retrying in 30 seconds..."; + sleep 30; + done +{{- end }} diff --git a/charts/cloudzero-insights-controller/templates/issuer.yaml b/charts/cloudzero-insights-controller/templates/issuer.yaml new file mode 100644 index 00000000..3f2f81de --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/issuer.yaml @@ -0,0 +1,10 @@ +{{ if .Values.webhook.issuer.enabled }} +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ include "insights-controller.issuerName" . }} + namespace: {{ .Release.Namespace }} +spec: + {{- toYaml .Values.webhook.issuer.spec | nindent 2 }} +{{- end }} diff --git a/charts/cloudzero-insights-controller/templates/service.yaml b/charts/cloudzero-insights-controller/templates/service.yaml new file mode 100644 index 00000000..42f69659 --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "insights-controller.serviceName" . }} + labels: + {{- include "insights-controller.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.server.service.port }} + targetPort: 8443 + name: http + selector: + {{- include "insights-controller.selectorLabels" . | nindent 4 }} diff --git a/charts/cloudzero-insights-controller/templates/serviceaccount.yaml b/charts/cloudzero-insights-controller/templates/serviceaccount.yaml new file mode 100644 index 00000000..9f5647cc --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "insights-controller.serviceAccountName" . }} + labels: + {{- include "insights-controller.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: true +{{- end }} diff --git a/charts/cloudzero-insights-controller/templates/webhooks.yaml b/charts/cloudzero-insights-controller/templates/webhooks.yaml new file mode 100644 index 00000000..71c19f98 --- /dev/null +++ b/charts/cloudzero-insights-controller/templates/webhooks.yaml @@ -0,0 +1,32 @@ +{{- range $configType, $configs := .Values.webhook.configurations }} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: {{ include "insights-controller.validatingWebhookConfigName" $ }}-{{ $configType }} + namespace: {{ $.Release.Namespace }} + annotations: + {{- include "insights-controller.annotations" $ | nindent 4 }} +webhooks: + - name: {{ include "insights-controller.validatingWebhookName" $ }} + namespaceSelector: {{ toYaml $.Values.webhook.namespaceSelector }} + failurePolicy: Ignore + rules: + - operations: [ "CREATE", "UPDATE" ] + apiGroups: {{ $configs.apiGroups }} + apiVersions: [ "v1" ] + resources: [ {{ $configType }} ] + scope: "*" + clientConfig: + service: + namespace: {{ $.Release.Namespace }} + name: {{ include "insights-controller.serviceName" $ }} + path: "{{ $configs.path }}" + port: {{ $.Values.server.service.port }} + {{- if and $.Values.webhook.certificate.enabled $.Values.webhook.issuer.enabled }} + caBundle: {{ $.Values.webhook.caBundle }} + {{- end }} + admissionReviewVersions: ["v1"] + sideEffects: None + timeoutSeconds: 5 +{{- end }} diff --git a/charts/cloudzero-insights-controller/values.yaml b/charts/cloudzero-insights-controller/values.yaml new file mode 100644 index 00000000..ba2ac7ec --- /dev/null +++ b/charts/cloudzero-insights-controller/values.yaml @@ -0,0 +1,129 @@ +# -- CloudZero host to send metrics to. +host: null +# -- Account ID of the account the cluster is running in. This must be a string - even if it is a number in your system. +cloudAccountId: null +# -- Name of the clusters. +clusterName: null +# -- Region the cluster is running in. +region: null +# -- CloudZero API key. Required if useExistingSecret is false. +apiKey: null +# -- If set, the agent will use the API key in this Secret to authenticate with CloudZero. +existingSecretName: null + +nameOverride: "" +fullnameOverride: "" + +labels: + enabled: null + patterns: + # - '.*' + resources: + pods: true + namespaces: true + deployments: false + statefulsets: false + nodes: false + jobs: false + cronjobs: false + daemonsets: false +annotations: + enabled: false + patterns: + - '.*' + resources: + pods: true + namespaces: true + deployments: false + statefulsets: false + nodes: false + jobs: false + cronjobs: false + daemonsets: false + +server: + replicaCount: 3 + service: + port: 443 + image: + repository: ghcr.io/cloudzero/cloudzero-insights-controller/cloudzero-insights-controller + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + # tag: "develop" + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + imagePullSecrets: [] + podAnnotations: {} + podLabels: {} + configMap: + annotations: {} + serverConfig: + containerSecretFilePath: /etc/config/cloudzero/secrets + containerSecretFileName: value + port: 8443 + read_timeout: 10s + write_timeout: 10s + idle_timeout: 120s + tls: + enabled: true + useManagedSecret: true + mountPath: /etc/certs + volumeMounts: [] + volumes: [] + + +webhook: + annotations: {} + namespaceSelector: {} # This denotes no specific selection, applies to all namespaces + issuer: + enabled: true + spec: + selfSigned: {} + certificate: + enabled: true + caBundle: '' # by default, this is empty, and the value is populated by cert-manager's ca-injector if cert-manager is used + configurations: + pods: + path: /validate/pod + apiGroups: [ '""' ] + namespaces: + path: /validate/namespace + apiGroups: [ '""' ] + # -- Uncomment the following to enable exporting labels/annotations for the following resources + # deployments: + # path: /validate/deployment + # apiGroups: [ "apps" ] + # statefulsets: + # path: /validate/statefulset + # apiGroups: [ "apps" ] + # nodes: + # path: /validate/node + # apiGroups: [ '""' ] + # jobs: + # path: /validate/job + # apiGroups: [ "batch" ] + # cronjobs: + # path: /validate/cronjob + # apiGroups: [ "batch" ] + # daemonsets: + # path: /validate/daemonset + # apiGroups: [ "apps" ] + +serviceAccount: + create: true + annotations: {} + +rbac: + create: true + +cert-manager: + enabled: true + +initJob: + enabled: true + image: + repository: curlimages/curl + pullPolicy: Always + tag: "8.10.1"