From cc6dd849b6b457331a8f3b4bcf73f7ae71f8d24b Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 5 Jan 2026 21:29:55 +0000 Subject: [PATCH 1/7] klio product definition --- gatsby-config.js | 1 + src/constants/products.js | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/gatsby-config.js b/gatsby-config.js index 088dd8786a..8ebc5d269f 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -69,6 +69,7 @@ const sourceToPluginConfig = { name: "jdbc_connector", path: "product_docs/docs/jdbc_connector", }, + klio: { name: "klio", path: "product_docs/docs/klio" }, language_pack: { name: "language_pack", path: "product_docs/docs/language_pack", diff --git a/src/constants/products.js b/src/constants/products.js index d527b41047..8c818d1d03 100644 --- a/src/constants/products.js +++ b/src/constants/products.js @@ -51,6 +51,11 @@ const products = { alteruser_utility: { name: "alteruser", iconName: IconNames.TOOLS }, edb_sqlpatch: { name: "EDB SQL Patch", iconName: IconNames.TOOLS }, language_pack: { name: "Language Pack", iconName: IconNames.TOOLS }, + klio: { + name: "Enterprise Data Protection for CloudNativePG™", + shortName: "Klio", + iconName: IconNames.BACKUP, + }, lasso: { name: "Lasso" }, livecompare: { name: "LiveCompare", iconName: IconNames.INTEGRATION }, "Migration Handbook": { name: "Migration Handbook" }, From fc0631a4c8677e8f72b86b8425a04a86a94b09ff Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 5 Jan 2026 21:30:12 +0000 Subject: [PATCH 2/7] klio 0.0.10 import --- .../docs/klio/0/_helm_chart_values.mdx | 42 ++ product_docs/docs/klio/0/api/_klio_api.mdx | 195 ++++++ .../docs/klio/0/api/_klio_catalog_api.mdx | 103 +++ product_docs/docs/klio/0/api/index.mdx | 10 + product_docs/docs/klio/0/api/klio_api.mdx | 11 + .../docs/klio/0/api/klio_catalog_api.mdx | 11 + product_docs/docs/klio/0/api_service.mdx | 407 +++++++++++ product_docs/docs/klio/0/architectures.mdx | 230 ++++++ .../docs/klio/0/backup_and_restore.mdx | 314 +++++++++ product_docs/docs/klio/0/helm_chart.mdx | 188 +++++ .../klio/0/images/basebackups_walarchive.png | 3 + .../klio/0/images/overview-multi-tiers.png | 3 + .../klio/0/images/tier1-namespace-multi.png | 3 + .../klio/0/images/tier1-namespace-single.png | 3 + .../docs/klio/0/images/tier1-shared-multi.png | 3 + .../klio/0/images/tier1-shared-single.png | 3 + .../docs/klio/0/images/wal-streaming.png | 3 + product_docs/docs/klio/0/index.mdx | 124 ++++ product_docs/docs/klio/0/klio_server.mdx | 662 ++++++++++++++++++ product_docs/docs/klio/0/main_concepts.mdx | 131 ++++ product_docs/docs/klio/0/opentelemetry.mdx | 414 +++++++++++ .../docs/klio/0/plugin_configuration.mdx | 354 ++++++++++ product_docs/docs/klio/0/wal_streaming.mdx | 125 ++++ product_docs/docs/klio/0/walplayer.mdx | 263 +++++++ 24 files changed, 3605 insertions(+) create mode 100644 product_docs/docs/klio/0/_helm_chart_values.mdx create mode 100644 product_docs/docs/klio/0/api/_klio_api.mdx create mode 100644 product_docs/docs/klio/0/api/_klio_catalog_api.mdx create mode 100644 product_docs/docs/klio/0/api/index.mdx create mode 100644 product_docs/docs/klio/0/api/klio_api.mdx create mode 100644 product_docs/docs/klio/0/api/klio_catalog_api.mdx create mode 100644 product_docs/docs/klio/0/api_service.mdx create mode 100644 product_docs/docs/klio/0/architectures.mdx create mode 100644 product_docs/docs/klio/0/backup_and_restore.mdx create mode 100644 product_docs/docs/klio/0/helm_chart.mdx create mode 100644 product_docs/docs/klio/0/images/basebackups_walarchive.png create mode 100644 product_docs/docs/klio/0/images/overview-multi-tiers.png create mode 100644 product_docs/docs/klio/0/images/tier1-namespace-multi.png create mode 100644 product_docs/docs/klio/0/images/tier1-namespace-single.png create mode 100644 product_docs/docs/klio/0/images/tier1-shared-multi.png create mode 100644 product_docs/docs/klio/0/images/tier1-shared-single.png create mode 100644 product_docs/docs/klio/0/images/wal-streaming.png create mode 100644 product_docs/docs/klio/0/index.mdx create mode 100644 product_docs/docs/klio/0/klio_server.mdx create mode 100644 product_docs/docs/klio/0/main_concepts.mdx create mode 100644 product_docs/docs/klio/0/opentelemetry.mdx create mode 100644 product_docs/docs/klio/0/plugin_configuration.mdx create mode 100644 product_docs/docs/klio/0/wal_streaming.mdx create mode 100644 product_docs/docs/klio/0/walplayer.mdx diff --git a/product_docs/docs/klio/0/_helm_chart_values.mdx b/product_docs/docs/klio/0/_helm_chart_values.mdx new file mode 100644 index 0000000000..114fef14fa --- /dev/null +++ b/product_docs/docs/klio/0/_helm_chart_values.mdx @@ -0,0 +1,42 @@ +| Key | Type | Default | Description | +| -------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | +| certmanager.clusterDomain | string | `"cluster.local"` | The DNS domain of the cluster | +| certmanager.createMetricsCertificate | bool | `true` | Create certificates for the metrics service. | +| certmanager.createPluginClientCertificate | bool | `true` | Create certificates for the plugin client. | +| certmanager.createPluginServerCertificate | bool | `true` | Create certificates for the plugin server. | +| certmanager.duration | string | `"2160h"` | The duration of the certificates. | +| certmanager.enable | bool | `true` | Enable cert-manager integration for certificate creation. | +| certmanager.renewBefore | string | `"360h"` | The renew before time for the certificates. | +| controllerManager.affinity | object | `{}` | Affinity rules for the operator deployment. | +| controllerManager.manager.args | list | `["--metrics-bind-address=:8443","--leader-elect","--health-probe-bind-address=:8081","--plugin-server-cert=/pluginServer/tls.crt","--plugin-server-key=/pluginServer/tls.key","--plugin-client-cert=/pluginClient/tls.crt","--plugin-server-address=:9090","--custom-cnpg-group=postgresql.cnpg.io"]` | List of command line arguments to pass to the controller manager. | +| controllerManager.manager.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]}}` | The security context for the controller manager container. | +| controllerManager.manager.env | object | `{"SIDECAR_IMAGE":"ghcr.io/enterprisedb/klio:v0.0.10"}` | The environment variables to set in the controller manager container. | +| controllerManager.manager.image.pullPolicy | string | `"Always"` | The controller manager container imagePullPolicy. | +| controllerManager.manager.image.pullSecrets | list | `[]` | The list of imagePullSecrets. | +| controllerManager.manager.image.repository | string | `"ghcr.io/enterprisedb/klio-operator"` | The image to use for the controller manager container. | +| controllerManager.manager.image.tag | string | `"v0.0.10"` | The tag to use for the controller manager container image. | +| controllerManager.manager.livenessProbe | object | `{"httpGet":{"path":"/healthz","port":8081},"initialDelaySeconds":15,"periodSeconds":20}` | Liveness probe configuration. | +| controllerManager.manager.readinessProbe | object | `{"httpGet":{"path":"/readyz","port":8081},"initialDelaySeconds":5,"periodSeconds":10}` | Readiness probe configuration. | +| controllerManager.manager.resources | object | `{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}}` | The resources to allocate. | +| controllerManager.nodeSelector | object | `{}` | NodeSelector for the operator deployment. | +| controllerManager.podSecurityContext | object | `{"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | The security context for the controller manager pod. | +| controllerManager.priorityClassName | string | `""` | Priority class name for the controller manager pod. | +| controllerManager.serviceAccount.annotations | object | `{}` | The annotations to add to the service account. | +| controllerManager.tolerations | list | `[]` | Tolerations for the operator deployment. | +| controllerManager.topologySpreadConstraints | list | `[]` | Topology Spread Constraints for the operator deployment. | +| fullnameOverride | string | `""` | Override the fully qualified name of the Helm Chart. | +| kubernetesClusterDomain | string | `"cluster.local"` | The domain for the Kubernetes cluster. | +| metricsService.enable | bool | `true` | Enable the metrics service for the controller manager. | +| metricsService.metricsServiceSecret | string | `"klio-metrics-server-cert"` | The name of the secret containing the TLS certificate for the metrics service. | +| metricsService.ports | list | `[{"name":"https","port":8443,"protocol":"TCP","targetPort":8443}]` | The port the metrics service will listen on. | +| metricsService.type | string | `"ClusterIP"` | Service type for the metrics service. | +| nameOverride | string | `"klio"` | Override the name of the Helm Chart. | +| plugin.clientSecret | string | `"klio-plugin-client-tls"` | The Client TLS certificate. | +| plugin.name | string | `"klio.enterprisedb.io"` | The name the plugin will use to register itself with the CNPG Operator. | +| plugin.port | int | `9090` | The port the plugin will listen on. It must match the "--plugin-server-address" argument. | +| plugin.serverSecret | string | `"klio-plugin-server-tls"` | The Server TLS certificate. | +| prometheus.enable | bool | `true` | To enable a ServiceMonitor to export metrics to Prometheus set true. | +| serviceAccount.annotations | object | `{}` | The annotations to add to the service account. | +| serviceAccount.automount | bool | `true` | Automount service account token. | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created. | +| serviceAccount.name | string | `""` | The name of the service account | diff --git a/product_docs/docs/klio/0/api/_klio_api.mdx b/product_docs/docs/klio/0/api/_klio_api.mdx new file mode 100644 index 0000000000..b7d39ca094 --- /dev/null +++ b/product_docs/docs/klio/0/api/_klio_api.mdx @@ -0,0 +1,195 @@ +# Packages + +- [klio.enterprisedb.io/v1alpha1](#klioenterprisedbiov1alpha1) + +## klio.enterprisedb.io/v1alpha1 + +Package v1alpha1 contains API Schema definitions for the klio v1alpha1 API group. + +### Resource Types + +- [PluginConfiguration](#pluginconfiguration) +- [Server](#server) + +#### BaseConfiguration + +BaseConfiguration defines the configuration for the base server. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | -------- | ------- | ---------- | +| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | Resources defines the resource requirements for the Kopia server | | | | +| `adminUser` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#localobjectreference-v1-core)* | AdminUser is a reference to a secret of type 'kubernetes.io/basic-auth' | | | | + +#### CacheConfiguration + +CacheConfiguration defines the configuration for the cache directory. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | | True | | | + +#### DataConfiguration + +DataConfiguration defines the configuration for the data directory. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | Template to be used to generate the Persistent Volume Claim needed for the data folder,
containing base backups and WAL files. | True | | | + +#### PluginConfiguration + +PluginConfiguration is the Schema for the client configuration API. + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `PluginConfiguration` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[PluginConfigurationSpec](#pluginconfigurationspec)* | | True | | | +| `status` *[PluginConfigurationStatus](#pluginconfigurationstatus)* | | | | | + +#### PluginConfigurationSpec + +PluginConfigurationSpec defines the desired state of client configuration. + +*Appears in:* + +- [PluginConfiguration](#pluginconfiguration) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | ------- | -------------------------------------- | +| `serverAddress` *string* | ServerAddress is the address of the Klio server | True | | MinLength: 1
Required: {}
| +| `tier2` *boolean* | Tier2 enables backup lookup in tier 2. | True | | | +| `clientSecretName` *string* | ClientSecretName is the name of the secret containing the client credentials | True | | MinLength: 1
Required: {}
| +| `serverSecretName` *string* | ServerSecretName is the name of the secret containing the server TLS certificate | True | | MinLength: 1
Required: {}
| +| `clusterName` *string* | ClusterName is the name of the PostgreSQL cluster we are connecting to | | | | +| `pprof` *boolean* | Pprof enables the pprof endpoint for performance profiling | | | | +| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | | +| `containers` *[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#container-v1-core) array* | Containers allows defining a list of containers that will be merged with the Klio sidecar containers.
This enables users to customize the sidecars with additional environment variables, volume mounts,
resource limits, and other container settings without polluting the PostgreSQL container environment.
Merge behavior:
- Containers are matched by name (klio-plugin, klio-wal, klio-restore)
- User customizations serve as the base
- Klio required values (name, args, CONTAINER_NAME env var) always override user values
- User-defined environment variables and volume mounts are preserved
- Template defaults are applied only for fields not set by the user or Klio | | | MaxItems: 3
| + +#### PluginConfigurationStatus + +PluginConfigurationStatus defines the observed state of ClientConfig. + +*Appears in:* + +- [PluginConfiguration](#pluginconfiguration) + +#### QueueConfiguration + +QueueConfiguration defines the configuration for the directory hosting the +task queue. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------- | ------- | ---------- | +| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | QueueResources defines the resource requirements for the NATS server | | | | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | PersistentVolumeClaimTemplate is used to generate the configuration for
the PVC hosting the work queue. | True | | | + +#### RetentionPolicy + +RetentionPolicy defines how many backups we should keep. + +*Appears in:* + +- [PluginConfigurationSpec](#pluginconfigurationspec) + +| Field | Description | Required | Default | Validation | +| ----------------------- | ------------------------------------------------------------------ | -------- | ------- | ---------- | +| `keepLatest` *integer* | KeepLatest is the number of latest backups to keep
optional | True | | | +| `keepAnnual` *integer* | KeepAnnual is the number of annual backups to keep
optional | True | | | +| `keepMonthly` *integer* | KeepMonthly is the number of monthly backups to keep
optional | True | | | +| `keepWeekly` *integer* | KeepWeekly is the number of weekly backups to keep
optional | True | | | +| `keepDaily` *integer* | KeepDaily is the number of daily backups to keep
optional | True | | | +| `keepHourly` *integer* | KeepHourly is the number of hourly backups to keep
optional | True | | | + +#### S3Configuration + +S3Configuration is the configuration to a S3 defined tier 2. + +*Appears in:* + +- [Tier2Configuration](#tier2configuration) + +| Field | Description | Required | Default | Validation | +| ----------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `bucketName` *string* | BucketName is the name of the bucket | True | | | +| `prefix` *string* | Prefix is the prefix to be used for the stored files | | | | +| `endpoint` *string* | Endpoint is the endpoint to be used | | | | +| `region` *string* | Region is the region to be used | | | | +| `walEncryptionPassword` *[SecretKeySelector](#secretkeyselector)* | WALEncryptionPassword is a pointer to the key in a secret containing the encryption password. | True | | | +| `accessKeyId` *[SecretKeySelector](#secretkeyselector)* | The S3 access key ID | | | | +| `secretAccessKey` *[SecretKeySelector](#secretkeyselector)* | The S3 access key | | | | +| `sessionToken` *[SecretKeySelector](#secretkeyselector)* | The S3 session token | | | | +| `customCaBundle` *[SecretKeySelector](#secretkeyselector)* | A pointer to a custom CA bundle | | | | + +#### Server + +Server is the Schema for the servers API. + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `Server` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[ServerSpec](#serverspec)* | | True | | | +| `status` *[ServerStatus](#serverstatus)* | | | | | + +#### ServerSpec + +ServerSpec defines the desired state of Server. + +*Appears in:* + +- [Server](#server) + +| Field | Description | Required | Default | Validation | +| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ---------- | +| `baseConfiguration` *[BaseConfiguration](#baseconfiguration)* | BaseConfiguration is the configuration of the Kopia server | | | | +| `image` *string* | Image is the image to be used for the Klio server | True | | | +| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | | +| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | | +| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | +| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | +| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | Resources defines the resource requirements for the Klio server | | | | +| `cacheConfiguration` *[CacheConfiguration](#cacheconfiguration)* | CacheConfiguration is the configuration of the PVC that should be
used for the cache | True | | | +| `dataConfiguration` *[DataConfiguration](#dataconfiguration)* | DataConfiguration is the configuration of the PVC that should be used
for the base backups | True | | | +| `queueConfiguration` *[QueueConfiguration](#queueconfiguration)* | QueueConfiguration is the configuration of the PVC that should host
the task queue. | | | | +| `password` *[SecretKeySelector](#secretkeyselector)* | Password is a reference to a secret containing the Klio password | True | | | +| `tier2` *[Tier2Configuration](#tier2configuration)* | Tier2 is the Tier 2 configuration | True | | | +| `template` *[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#podtemplatespec-v1-core)* | Template to override the default StatefulSet of the Klio server.
WARNING: Modifying this template may break the server functionality if not done carefully.
This field is primarily intended for advanced configuration such as telemetry setup.
Use at your own risk and ensure thorough testing before applying changes. | | | | + +#### ServerStatus + +ServerStatus defines the observed state of Server. + +*Appears in:* + +- [Server](#server) + +#### Tier2Configuration + +Tier2Configuration is the tier 2 configuration. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `s3` *[S3Configuration](#s3configuration)* | S3 contains the configuration parameters for an S3-based tier 2 | True | | | diff --git a/product_docs/docs/klio/0/api/_klio_catalog_api.mdx b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx new file mode 100644 index 0000000000..41496c09f3 --- /dev/null +++ b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx @@ -0,0 +1,103 @@ +# Packages + +- [kliocatalog.enterprisedb.io/v1alpha1](#kliocatalogenterprisedbiov1alpha1) + +## kliocatalog.enterprisedb.io/v1alpha1 + +Package v1alpha1 the Klio Catalog API + +### Resource Types + +- [KlioBackup](#kliobackup) +- [KlioBackupList](#kliobackuplist) + +#### KlioBackup + +KlioBackup is the Schema for a Klio Backup API. + +*Appears in:* + +- [KlioBackupList](#kliobackuplist) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `KlioBackup` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[KlioBackupSpec](#kliobackupspec)* | | True | | | +| `status` *[KlioBackupStatus](#kliobackupstatus)* | | | | | + +#### KlioBackupList + +KlioBackupList contains a list of KlioBackup. + +| Field | Description | Required | Default | Validation | +| -------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `KlioBackupList` | True | | | +| `metadata` *[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#listmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `items` *[KlioBackup](#kliobackup) array* | | True | | | + +#### KlioBackupSpec + +KlioBackupSpec defines the desired state of a KlioBackup. + +*Appears in:* + +- [KlioBackup](#kliobackup) + +| Field | Description | Required | Default | Validation | +| ---------------------- | -------------------------------------------------------------- | -------- | ------- | ---------- | +| `clusterName` *string* | ClusterName is the name of the cluster that has been backed up | True | | | +| `backupID` *string* | BackupID is the unique identifier of the backup | True | | | + +#### KlioBackupStatus + +KlioBackupStatus defines the observed state of a KlioBackup. + +*Appears in:* + +- [KlioBackup](#kliobackup) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `startLSN` *integer* | StartLSN is the LSN of the backup start | True | | | +| `endLSN` *integer* | EndLSN is the LSN of the backup end | True | | | +| `startWAL` *string* | StartWAL is the current WAL when the backup started | True | | | +| `endWAL` *string* | EndWAL is the current WAL when the backup ends | True | | | +| `tablespaces` *[TablespaceLayoutList](#tablespacelayoutlist)* | Tablespaces are the metadata of the tablespaces | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each
backend can put its metadata. | True | | | +| `startedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)* | StartedAt is the current time when the backup started. | True | | | +| `stoppedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)* | StoppedAt is the current time when the backup ended. | True | | | + +#### TablespaceLayout + +TablespaceLayout is the on-disk structure of a tablespace. + +*Appears in:* + +- [TablespaceLayoutList](#tablespacelayoutlist) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------- | -------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `name` *string* | Name is the tablespace name | True | | | +| `oid` *string* | Oid is the OID of the tablespace. | True | | | +| `path` *string* | Path is the path where the tablespace can be found. | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each backend
can annotate its metadata. | True | | | + +#### TablespaceLayoutList + +*Underlying type:* *[TablespaceLayout](#tablespacelayout)* + +TablespaceLayoutList is a list of TablespaceLayout. + +*Appears in:* + +- [KlioBackupStatus](#kliobackupstatus) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------- | -------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `name` *string* | Name is the tablespace name | True | | | +| `oid` *string* | Oid is the OID of the tablespace. | True | | | +| `path` *string* | Path is the path where the tablespace can be found. | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each backend
can annotate its metadata. | True | | | diff --git a/product_docs/docs/klio/0/api/index.mdx b/product_docs/docs/klio/0/api/index.mdx new file mode 100644 index 0000000000..8b545fd4e1 --- /dev/null +++ b/product_docs/docs/klio/0/api/index.mdx @@ -0,0 +1,10 @@ +--- +title: API Reference +navigation: + - klio_api + - klio_catalog_api + - '!_klio_api' + - '!_klio_catalog_api' +indexCards: extra +--- + diff --git a/product_docs/docs/klio/0/api/klio_api.mdx b/product_docs/docs/klio/0/api/klio_api.mdx new file mode 100644 index 0000000000..1f5f08d6ac --- /dev/null +++ b/product_docs/docs/klio/0/api/klio_api.mdx @@ -0,0 +1,11 @@ +--- +title: Klio API reference +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api/klio_api.mdx +editTarget: originalFilePath + +--- + +import KlioAPI from "./_klio_api.mdx"; + + diff --git a/product_docs/docs/klio/0/api/klio_catalog_api.mdx b/product_docs/docs/klio/0/api/klio_catalog_api.mdx new file mode 100644 index 0000000000..4b27c18870 --- /dev/null +++ b/product_docs/docs/klio/0/api/klio_catalog_api.mdx @@ -0,0 +1,11 @@ +--- +title: Klio Catalog API reference +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api/klio_catalog_api.mdx +editTarget: originalFilePath + +--- + +import KlioCatalogAPI from "./_klio_catalog_api.mdx"; + + diff --git a/product_docs/docs/klio/0/api_service.mdx b/product_docs/docs/klio/0/api_service.mdx new file mode 100644 index 0000000000..d71c33a848 --- /dev/null +++ b/product_docs/docs/klio/0/api_service.mdx @@ -0,0 +1,407 @@ +--- +title: Klio API Service +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api_service.md +sidebar_position: 9 +editTarget: originalFilePath + +--- + +The Klio API Service is a Kubernetes API aggregation server that extends the +Kubernetes API to expose Klio backup metadata as native Kubernetes resources. +This allows users to query and inspect backup information using standard +Kubernetes tools like `kubectl`. + +## Overview + +The API service integrates with the Kubernetes API server through the +[API Aggregation Layer](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/apiserver-aggregation/), +registering a new API group `kliocatalog.enterprisedb.io/v1alpha1` that provides +access to backup catalog information stored in the Klio repository. + +### Key Features + +- **Native Kubernetes Integration**: Query backups using `kubectl` and other + Kubernetes tools +- **Secure Communication**: TLS-encrypted connections with certificate-based + authentication +- **Real-time Catalog Access**: Direct access to the Klio backup repository + for up-to-date information +- **Standard Kubernetes RBAC**: Control access using native Kubernetes + role-based access control + +### Architecture + +The API service acts as a bridge between the Kubernetes API server +and the Klio backup repository: + +```mermaid +flowchart LR; + client[kubectl]; + k8sapi[Kubernetes API Server]; + klioapi[Klio API Service Deployment]; + kliorepo[Klio Server]; + client --> k8sapi; + k8sapi --> klioapi; + klioapi --> kliorepo; +``` + +## What the API Service Provides + +### KlioBackup Resource + +The API service exposes a `KlioBackup` resource that represents +backup metadata from the Klio repository. + +The resource is described in the [Klio Catalog API reference](api/klio_catalog_api.mdx). + +### Available Operations + +The API service supports the following operations: + +1. **List all backups**: Retrieve all backups across all clusters +2. **Get specific backup**: Retrieve details for a specific backup by name + +## Setting Up the API Service + +### Prerequisites + +Before setting up the Klio API service, ensure you have: + +- A Kubernetes cluster with the Klio operator installed +- A running Klio server instance +- `kubectl` configured to access your cluster +- [cert-manager](https://cert-manager.io/) installed + for TLS certificate management (optional) + +### Required Components + +The API service setup includes: + +1. **TLS Certificates**: For secure communication between the Kubernetes API + server and the Klio API service, and between the Klio API service and the + Klio server +2. **Service Account**: Dedicated service account with necessary permissions +3. **RBAC Resources**: ClusterRole and Role for authorization +4. **Deployment**: The API service deployment specification +5. **Service**: Kubernetes service to expose the API server +6. **APIService**: Registration with the Kubernetes API aggregation layer + +### Step-by-Step Setup + +!!!note + +Setup procedure will be simplified in future Klio releases. +!!! + +#### 1. Create Service Account and RBAC Resources + +Create a service account and the necessary RBAC resources. + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: klio-api +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: klio-api + namespace: kube-system +rules: + - apiGroups: + - "" + resourceNames: + - extension-apiserver-authentication + resources: + - configmaps + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: klio-api + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: klio-api +subjects: + - kind: ServiceAccount + name: klio-api + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: klio-api + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "watch", "list"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations", "validatingadmissionpolicies", "validatingadmissionpolicybindings"] + verbs: ["get", "watch", "list"] + - apiGroups: ["flowcontrol.apiserver.k8s.io"] + resources: ['prioritylevelconfigurations', 'flowschemas'] + verbs: ['list', 'watch'] + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: sample-apiserver-clusterrolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: klio-api +subjects: + - kind: ServiceAccount + name: klio-api + namespace: default +``` + +#### 2. Create a client certificate secret + +Create a Kubernetes `Secret` containing the client certificate +and key for authenticating to the Klio server. + +Its common name must be `snapshot_reader@klio`. + +See the [Klio documentation](klio_server.mdx#creating-a-client-side-certificate) +for instructions on generating the certificate. + +#### 3. Create a server certificate secret + +Create a Kubernetes `Secret` containing the Klio server's TLS certificate +for validating the server's identity. + +If you're using `cert-manager`, you can create a `Certificate` resource as shown below: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: klio-api +spec: + secretName: klio-api-tls + commonName: klio-api + dnsNames: + - klio-api + - klio-api.default + - klio-api.default.svc + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - server auth + - client auth + + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +!!!info + +For production environments, use certificates signed by +your organization's Certificate Authority (CA) +or a trusted public CA instead of self-signed certificates. +!!! + +#### 4. Deploy the API Service + +Create a deployment configuration for the API service. + + + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: klio-api + name: klio-api +spec: + replicas: 1 + selector: + matchLabels: + app: klio-api + strategy: {} + template: + metadata: + labels: + app: klio-api + spec: + serviceAccountName: klio-api + volumes: + - name: klio-server-tls + secret: + defaultMode: 420 + secretName: <$KLIO_SERVER_CERT> # The Klio server cert secret + - name: klio-api-tls + secret: + defaultMode: 420 + secretName: klio-api-tls # The Klio API server certificate secret + - name: klio-client-tls + secret: + defaultMode: 420 + secretName: <$KLIO_CLIENT_CERT> # The client certificate used to authenticate to the Klio server + - emptyDir: {} + name: scratch-data + containers: + - args: + - server + - api-server + env: + - name: CLIENT_BASE_HOSTNAME + value: klio + - name: CLIENT_BASE_URL + value: <$KLIO_SERVER_ADDRESS> # The Klio server address in https://host:port format + - name: CLIENT_BASE_SERVER_CERT_PATH + value: /certs/tls.crt + - name: CLIENT_BASE_API_SERVER_CERTFILE + value: /klio-api/certs/tls.crt + - name: CLIENT_BASE_API_SERVER_KEYFILE + value: /klio-api/certs/tls.key + - name: CLIENT_BASE_CLIENT_CERT_PATH + value: /client-certs/tls.crt + - name: CLIENT_BASE_CLIENT_KEY_PATH + value: /client-certs/tls.key + - name: TMPDIR + value: /tmp + image: ghcr.io/enterprisedb/klio:v0.0.8 + imagePullPolicy: Always + name: api-server + resources: {} + securityContext: + runAsNonRoot: true + volumeMounts: + - mountPath: /certs + name: klio-server-tls + - mountPath: /client-certs + name: klio-client-tls + - mountPath: /klio-api/certs + name: klio-api-tls + - mountPath: /tmp + name: scratch-data +``` + + + +!!!warning + +`CLIENT_BASE_HOSTNAME` will be removed in a future Klio release. +!!! + +#### 5. Create a service + +Now expose the Deployment via a Kubernetes Service: + +```yaml +apiVersion: v1 +kind: Service +metadata: + labels: + app: klio-api + name: klio-api +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 443 + selector: + app: klio-api +``` + +#### 6. Register the `APIService` + +Finally, register the API service creating its resource: + +```yaml +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1alpha1.kliocatalog.enterprisedb.io + + # If using cert-manager, you can inject the CA bundle specifying the + # certificate: + # + # annotations: + # cert-manager.io/inject-ca-from: default/klio-api +spec: + group: kliocatalog.enterprisedb.io + groupPriorityMinimum: 1000 + versionPriority: 15 + service: + name: klio-api + version: v1alpha1 + # If not using cert-manager, you will need to manually set the CA bundle + # kubectl get secret klio-api-tls -o jsonpath='{.data.tls\.crt}' + # + # caBundle: put-CA-Bundle-here +``` + +## Using the API Service + +Once the API service is running, you can interact with it using `kubectl`. + +### List All Backups + +```bash +kubectl get kliobackups +``` + +Example output: + +``` +NAME CLUSTER NAME STARTED AT STOPPED AT +cluster-example.backup-20251113093137 cluster-example 2025-11-13 09:31:37 +0000 UTC 2025-11-13 09:31:39 +0000 UTC +``` + +### Get Specific Backup Details + +```bash +kubectl get kliobackups cluster-example.backup-20251113093137 -o yaml +``` + +Example output: + +```yaml +apiVersion: kliocatalog.enterprisedb.io/v1alpha1 +kind: KlioBackup +metadata: + name: cluster-example.backup-20251113093137 +spec: + backupID: backup-20251113093137 + clusterName: cluster-example +status: + annotations: + klio.io/controlDataKopiaManifestID: f055f78fb1db512b39e695465fa26ead + klio.io/kopiaManifestID: 2272afec220f48d0fa0064f85b043ac4 + endLSN: 117440800 + endWAL: "000000010000000000000007" + startLSN: 117440552 + startWAL: "000000010000000000000007" + startedAt: "2025-11-13T09:31:37Z" + stoppedAt: "2025-11-13T09:31:39Z" +``` + +## Limitations + +An API service it tied to a specific Klio server instance. If you have multiple +Klio servers, you will need to deploy a separate API service for each server, +defining a different API group for each to avoid conflicts. diff --git a/product_docs/docs/klio/0/architectures.mdx b/product_docs/docs/klio/0/architectures.mdx new file mode 100644 index 0000000000..7f0d3f48ff --- /dev/null +++ b/product_docs/docs/klio/0/architectures.mdx @@ -0,0 +1,230 @@ +--- +title: Architectures & Tiers +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/architectures.md +sidebar_position: 3 +editTarget: originalFilePath + +--- + +Klio employs a multi-tiered architecture designed to balance performance, +resilience, and cost. This approach separates immediate, high-speed backup and +recovery operations from long-term archival and disaster recovery (DR) needs. +The architecture is built around three distinct storage tiers, each serving a +specific purpose in the data lifecycle. + +![Multi-tiered architecture overview](images/overview-multi-tiers.png) + +* * * + +## Tier 0: Volume Snapshots + +!!!note + +Tier 0 is part of our long-term vision and will be introduced in a future +release. +!!! + +**Tier 0** leverages Kubernetes Volume Snapshots, if supported by the +underlying storage class. It consists of instantaneous, point-in-time snapshots +of all volumes used by the PostgreSQL cluster, including the `PGDATA` directory +and any tablespaces. + +This tier is not intended for long-term storage but acts as the **initial +source** for a base backup. By reading from a static snapshot, Klio avoids +impacting the performance of the running database. From a disaster recovery +perspective, these snapshots are often considered "ephemeral," as most local +storage solutions keep them within the same disks, unlike some cloud providers +or storage classes that allow them to be archived to object storage. +Volume snapshot objects reside in the same Kubernetes namespace of a PostgreSQL +cluster. + +Klio coordinates the creation of the snapshot as supported by CloudNativePG and +then uses it to **asynchronously offload** the base backup data to Tier 1. +Klio also manages retention policies for volume snapshots objects for a given +PostgreSQL cluster. + +* * * + +## Tier 1: Primary Storage (The Klio Server) + +**Tier 1** is the core operational tier, also referred to as the **Main Tier** +or **Klio Server**. It's designed for speed and provides immediate access to +all necessary backup artifacts for most recovery scenarios. + +This tier consists of a **local Persistent Volume (PV)** deployed by the +Klio Server. It can be located in the same namespace as the PostgreSQL cluster +or in a different one within the same Kubernetes cluster (see the ["Tier 1 Architectures" section below](#tier-1-architectures)). + +Its purpose is to store the **WAL archive** and the **catalog of physical base +backups**. Its high-throughput, low-latency nature is optimized for several key +tasks: + +- Receiving a continuous stream of WAL files directly from the PostgreSQL + primary. +- Storing base backups created from the primary or offloaded from Tier 0. +- Serving as the source for asynchronously replicating data to Tier 2. +- Managing retention policies for all tiers. + +### Tier 1 Architectures + +Klio supports several flexible deployment architectures for its Tier 1 storage. + +On the physical layer, it is recommended that both compute and, most +importantly, storage are separate from the PostgreSQL clusters. + +!!!warning + +Placing Tier 1 on the same nodes and storage as the PostgreSQL clusters +severely impacts the business continuity objectives of your organization. +!!! + +On the logical layer, a **Klio Server** can reside in the same namespace as the +PostgreSQL cluster(s) it manages or in a separate, dedicated namespace. + +When choosing an architecture, it's important to consider +**security and tenancy**. +PostgreSQL clusters managed by a single Klio Server share the same master +encryption key. For this reason, it's recommended to use separate Klio Servers +for clusters that serve different tenants or have distinct security +requirements. + +#### Clusters and Klio Server in the Same Namespace + +The simplest deployment places the Klio Server in the same namespace as the +PostgreSQL cluster(s). + +This can be a **dedicated 1:1 mapping** (one Klio Server per cluster): + +![Cluster and Klio server in the same namespace](images/tier1-namespace-single.png) + +Or a **shared N:1 mapping** where one server manages all clusters in the +namespace. + +![Multiple clusters share a Klio server in the same namespace](images/tier1-namespace-multi.png) + +#### Clusters and Klio Server in Different Namespaces + +For greater isolation or centralized management, the Klio Server can be +deployed in a namespace separate from the PostgreSQL clusters it protects. + +The following diagram shows a PostgreSQL cluster being backed up by a Klio +Server in another namespace: + +![Cluster and Klio server in a different namespace](images/tier1-shared-single.png) + +This model also allows a central Klio Server to manage clusters that reside in +different namespaces, as shown below: + +![Multiple clusters share a Klio server in the same namespace](images/tier1-shared-multi.png) + +### Reserving Nodes for Klio Workloads + +For dedicated performance and resource isolation, you can reserve specific +worker nodes for Klio pods using Kubernetes taints and tolerations. + +1. **Taint the Node**: Apply a taint to the desired node. This prevents most + pods from being scheduled on it. + + ```sh + kubectl taint node node-role.kubernetes.io/klio=:NoSchedule + ``` + +2. **Add Toleration to Klio Server**: Add the corresponding toleration to your + Klio `Server` resource, adding it to `.spec.template`. + This allows the Klio Server to be scheduled on the tainted node. + + ```yaml + # In your Server resource definition + spec: + template: + spec: + containers: [] + tolerations: + - key: "node-role.kubernetes.io/klio" + operator: "Exists" + effect: "NoSchedule" + ``` + +* * * + +## Tier 2: Secondary Storage (Object Storage) + +**Tier 2** provides durable, long-term storage for robust disaster recovery +(DR) strategies. It's physically and logically separate from the primary +Kubernetes cluster and typically consists of an external object storage system, +such as Amazon S3, Google Cloud Storage, or Azure Blob Storage. +Storing backups off-site ensures **geographical redundancy**, protecting data +against a full cluster or site failure. + +Klio asynchronously relays both base backups and WAL files from Tier 1 to +Tier 2. This decoupling ensures that primary backup and recovery operations in +Tier 1 are not directly affected by the latency or availability of the remote +object storage. + +Additionally, Tier 2 can serve as a read-only fallback source. In a distributed +CloudNativePG topology, this allows a Klio server at a secondary site to use +the shared Tier 2 storage to bootstrap a new cluster, enhancing DR +capabilities. + +### Restoring from Tier 2 + +When a backup is requested for restore, Klio will first look for it in Tier 1. +If the backup is not found in Tier 1, Klio will automatically check Tier 2. +This fallback mechanism ensures that backups that have been migrated to Tier 2 +are still accessible for restore operations. + +To enable Tier 2 restore capabilities, set the `tier2` field to `true` in your +`PluginConfiguration`: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: client-config-example-restore +spec: + serverAddress: server-sample.default + clientSecretName: cluster-restore-klio-user + serverSecretName: server-sample-tls + clusterName: cluster-example + tier2: true +``` + +When Tier 2 is enabled and a backup exists in both tiers, Tier 1 takes +precedence as restore from it will be faster. + +### Read-Only WAL Server Mode (currently unavailable) + +The Klio WAL server supports a **read-only mode** that allows it to serve WAL +files for download without accepting any write operations. This mode is useful +when implementing Tier 2 functionality, enabling clients to read WAL files from +object storage without the risk of accidental modifications. + +When a WAL server is started in read-only mode: + +- All **read operations** (e.g., `Get`, `GetMetadata`) continue to function normally +- All **write operations** (e.g., `Put`, `SetFirstRequiredWAL`, `RequestWALStart`, `ResetWALStream`) are rejected with a `FailedPrecondition` gRPC error +- The server will return error code `3` (FailedPrecondition) for any write attempt + +This ensures data integrity in distributed backup scenarios where secondary sites +only need read access to the WAL archive for recovery purposes. + +* * * + +## Planning Your Backup Strategy + +When planning your backup strategy with Klio, **Tier 1 is the most critical +layer** to define architecturally. You have several options, ranging from +running Klio servers on any worker node using your cluster's primary storage +solution, to dedicating a single worker node with local storage for a +centralized Klio server. + +**Tier 0** capabilities are determined by the underlying Kubernetes +`StorageClass`. Klio is particularly valuable when using local storage +solutions (such as LVM with TopoLVM or OpenEBS), as it can **offload** volume +snapshot backups to Tier 1, freeing up high-performance local disk space via +retention policies. + +**Tier 2** is often determined by your organization's infrastructure teams, who +have likely already selected one or more standard object storage solutions for +long-term archival. diff --git a/product_docs/docs/klio/0/backup_and_restore.mdx b/product_docs/docs/klio/0/backup_and_restore.mdx new file mode 100644 index 0000000000..71173abb85 --- /dev/null +++ b/product_docs/docs/klio/0/backup_and_restore.mdx @@ -0,0 +1,314 @@ +--- +title: Backup and Restore +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/backup_and_restore.md +sidebar_position: 7 +editTarget: originalFilePath + +--- + +This guide explains how to take backups of PostgreSQL clusters managed by +CloudNativePG and restore them using Klio. + +## Overview + +Klio follows PostgreSQL's native physical backup and recovery mechanisms, +leveraging CloudNativePG's backup and restore capabilities through its +[`Backup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-Backup) +and +[`ScheduledBackup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-ScheduledBackup). + +A working **online backup** is composed of: + +- A **physical base backup**: A filesystem copy of the PostgreSQL data directory. +- A set of **WAL (Write-Ahead Log) files**: Continuous logs of all changes made + to the database during the entire period of the base backup. + +!!!important + +It is recommended to periodically test backup restores to ensure correct +recovery procedures. +!!! + +!!!warning + +The Klio MVP does not currently verify the presence of all required WAL files +for a given backup. This limitation will be resolved before the GA release. +!!! + +## Prerequisites + +Before performing backup and restore operations, ensure you have: + +- A running [Klio server](klio_server.mdx) with proper configuration +- A PostgreSQL cluster configured with the [Klio plugin](plugin_configuration.mdx) + +## Taking a Backup + +With the Klio plugin configured, you can take on-demand backups using +CloudNativePG's [`Backup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-Backup) +or the [Kubectl plugin](https://cloudnative-pg.io/documentation/current/kubectl-plugin/#requesting-a-new-physical-backup) +for CNPG. + +### Create a Backup + +You can trigger a new backup by creating a `Backup` resource. + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Backup +metadata: + name: my-cluster-backup-20251027 + namespace: default +spec: + method: plugin + target: primary + cluster: + name: my-cluster + pluginConfiguration: + name: klio.enterprisedb.io +``` + +Apply the manifest: + +```bash +kubectl apply -f backup.yaml +``` + +Alternatively, you can request a backup directly using the + [`kubectl cnpg` plugin](https://cloudnative-pg.io/documentation/current/kubectl-plugin/#requesting-a-new-physical-backup): + +```bash +kubectl cnpg backup my-cluster \ + --method plugin \ + --plugin-name klio.enterprisedb.io \ + --backup-target primary +``` + +If you don’t specify the `--backup-name` option, the `cnpg backup` command +automatically generates one using the format `-`, +which is suitable in most cases. + +For a complete list of available options, run: + +```bash +kubectl cnpg backup --help +``` + +### Monitor Backup Progress + +Check the backup status: + +```bash +# Watch the backup status +kubectl get backup my-cluster-backup-20251027 -w + +# Get detailed backup information +kubectl describe backup my-cluster-backup-20251027 +``` + +A successful backup will show: + +``` +NAME AGE CLUSTER METHOD PHASE ERROR +my-cluster-backup-20251027 2m my-cluster plugin Completed +``` + +### Scheduled Backups + +You can schedule automatic backups using CloudNativePG's +[`ScheduledBackup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-ScheduledBackup). + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: my-cluster-daily-backup + namespace: default +spec: + # Cron schedule: daily at 2:00 AM + schedule: "0 0 2 * * *" + method: plugin + target: primary + cluster: + name: my-cluster + pluginConfiguration: + name: klio.enterprisedb.io +``` + +Apply the scheduled backup: + +```bash +kubectl apply -f scheduled-backup.yaml +``` + +## Backup Retention and Maintenance + +Klio automatically manages backup retention based on the +[retention policy](plugin_configuration.mdx#retention-policies) defined in the +`PluginConfiguration` referred by the `Cluster`. + +!!!important + +Deleting a `Backup` resource through `kubectl` only removes the Kubernetes +object. The actual backup data in the Klio server may be retained according to +the retention policy. +!!! + +## Finding Your backupID for Recovery + +To restore a specific backup, you need its backupID, otherwise Klio will +choose the latest one autonomously. +You can list all available, completed Backup resources using kubectl: + +```bash +kubectl get backups -n +``` + +Once you identify the backup you want to use, you can identify its backupID + +```bash +kubectl get backup -n -o jsonpath='{.status.backupId}' +``` + +## Restoring from a Backup + +Klio supports restoring PostgreSQL clusters from backups using CloudNativePG's +recovery mechanism. Unlike traditional in-place recovery, Klio follows +CloudNativePG's approach of **bootstrapping a new cluster** from a backup, +which ensures data integrity and allows for flexible recovery scenarios. + +### How Recovery Works + +Klio integrates with CloudNativePG's recovery process by performing the +following actions during a restore: + +1. **Restores the base backup**: Copies the physical backup data to the new + cluster's data directory. Uses `klio restore` command under the hood. +2. **Restores WAL files**: Klio is configured to retrieve the WAL files from + required for the PostgreSQL recovery as needed. + Uses `klio get-wal` command under the hood. + +The execution of these commands is driven by CloudNativePG's recovery +mechanism, which ensures that the PostgreSQL server starts correctly after +the restore. + +A restored cluster operates independently of the original cluster. By default, +it will **not** perform backups unless you explicitly configure the Klio plugin +for backup operations in the new cluster's specification. + +### Full Restore + +To restore from a backup, create a new `Cluster` resource with a +`bootstrap.recovery` section that references the Klio plugin: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-restored-cluster + namespace: default +spec: + instances: 3 + + # Bootstrap from a Klio backup + bootstrap: + recovery: + source: source + # OPTIONAL: Specify the backup to restore from + backupID: my-cluster-backup-YYYYMMDDHHMMSS + + # Reference the Klio plugin configuration + externalClusters: + - name: source + plugin: + name: klio.enterprisedb.io + parameters: + pluginConfigurationRef: my-restore-config + + storage: + size: 10Gi +``` + +!!!note + +Klio will choose the latest backup available in case the `backupID` field is omitted. +!!! + +Create a corresponding `PluginConfiguration` that specifies which backup to restore: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: my-restore-config + namespace: default +spec: + # Connection details + serverAddress: klio-server.default + clientSecretName: my-client-credentials + serverSecretName: klio-server-tls + + # Optional: specify the original cluster name if different + clusterName: my-cluster +``` + +The client credentials secret (`my-client-credentials`) should contain the +necessary authentication information to access the Klio server, as described +in the [Klio plugin configuration guide](plugin_configuration.mdx#client-credentials-secret). + +!!!note + +The `clusterName` field in the `PluginConfiguration` and the `commonName` +of the certificate should match the name of the **original cluster** that +was backed up, not the name of the new restored cluster. +!!! + +Apply both resources: + +```bash +kubectl apply -f restore-config.yaml +kubectl apply -f restored-cluster.yaml +``` + +### Point-in-Time Recovery (PITR) + +Klio supports Point-in-Time Recovery, allowing you to restore your database +to a specific moment in time rather than the latest available state. This is +useful for recovering from accidental data deletion or corruption. + +The process involves specifying a recovery target in the `Cluster` resource. +The available recovery targets are described in the +[CloudNativePG documentation](https://cloudnative-pg.io/documentation/current/recovery/#recovery-targets). + +#### Example: recover to a `targetTime` + +Restore to a specific timestamp: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-pitr-cluster +spec: + bootstrap: + recovery: + source: source + # Recover to a specific point in time + recoveryTarget: + targetTime: "2025-11-06 15:00:00.0000+00" + # other cluster spec fields... +``` + +!!!important + +The target of a point in time recovery must fall between the time the base +backup was completed and the time of the latest transaction recorded in the +available WAL files. +!!! + +!!!note + +During the Point in Time Recovery, Klio will automatically choose the right +backup if not specified with the `backupID` field. +!!! diff --git a/product_docs/docs/klio/0/helm_chart.mdx b/product_docs/docs/klio/0/helm_chart.mdx new file mode 100644 index 0000000000..db363213d2 --- /dev/null +++ b/product_docs/docs/klio/0/helm_chart.mdx @@ -0,0 +1,188 @@ +--- +title: EDB Klio Operator Helm Chart +navTitle: '' +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/helm_chart.mdx +sidebar_position: 90 +editTarget: originalFilePath + +--- + + + +import PartialValues from "./_helm_chart_values.mdx"; + + + +The EDB Klio Operator Helm chart from EDB allows you to deploy the Klio +Operator in your Kubernetes cluster. It is distributed as a private OCI image. + +## Prerequisites + +Before installing the Klio Operator, ensure you have: + +- **Helm** - see the [Helm installation guide](https://helm.sh/docs/intro/install/) +- **Kubernetes** cluster with appropriate permissions +- **Credentials** to access the registry hosting the Helm chart, the Klio operator + image, and the Klio operand image. +- **CloudNativePG Operator** already installed in your Kubernetes cluster. + See the [CloudNativePG installation guide](https://cloudnative-pg.io/documentation/current/installation_upgrade/). +- **cert-manager** (optional, but strongly recommended for managing TLS certificates). + See the [cert-manager installation guide](https://cert-manager.io/docs/installation/). +- **Prometheus Operator** (optional, for operator monitoring). + See the [Prometheus Operator installation guide](https://prometheus-operator.dev/docs/getting-started/installation/). + +## Installation + +### Step 1: Registry Authentication + +First, authenticate with the EDB registry where the Helm chart is hosted: + +```sh +helm registry login -u -p +``` + +Replace ``, ``, and `` with the required credentials. + +### Step 2: Create an Image Pull Secret + +Create a Kubernetes secret to allow the operator to pull container images from the registry: + +```sh +kubectl create secret docker-registry klio-registry-secret \ + --docker-server= \ + --docker-username= \ + --docker-password= \ + --namespace +``` + +!!!info Namespace Selection + +Select the namespace where you want to deploy the Klio Operator. This must be +the same namespace where CloudNativePG is deployed. +!!! + +### Step 3: Install the Helm Chart + +Deploy the Klio Operator to your cluster: + + + +```sh +helm install klio-operator oci://ghcr.io/enterprisedb/klio-operator-chart \ + --version 0.0.6 \ + --namespace \ + --set controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret +``` + + + +### Step 4: Verify Installation + +After installation, verify that the Klio Operator is running: + +```sh +kubectl get pods -n -l app.kubernetes.io/name=klio +``` + +You should see the operator pod in a `Running` state. Check the logs to ensure +there are no errors: + +```sh +kubectl logs -n deployment/klio-controller-manager -f +``` + +Verify that the Custom Resource Definitions (CRDs) were created: + +```sh +kubectl get crds | grep klio.enterprisedb.io +``` + +You should see CRDs like `servers.klio.enterprisedb.io` and `pluginconfigurations.klio.enterprisedb.io`. + +## Configuration + +### Customizing the Installation + +The chart is designed to be customizable, allowing you to configure multiple +aspects of the Klio Operator deployment, passing in values through a custom +`values.yaml` file or using the `--set` flag during installation. +See the [Helm documentation](https://helm.sh/docs/) for more details on how to customize and +manage Helm charts. + +#### Inspecting the Chart + +Before installing, you can download the Helm chart to inspect its contents, +review the default values, and understand what resources it will create: + + + +```sh +helm pull oci://ghcr.io/enterprisedb/klio-operator-chart --version 0.0.6 +``` + + + +This downloads the chart as a `.tgz` file. Extract it to examine the templates, +default `values.yaml`, and other chart files: + + + +```sh +tar -xzf klio-operator-chart-0.0.6.tgz +cd klio-operator-chart +cat values.yaml +``` + + + +### Configuration Reference + + + +## Upgrading + +To upgrade the Klio Operator to a newer version: + +```sh +helm upgrade klio-operator oci://ghcr.io/enterprisedb/klio-operator-chart \ + --version \ + --namespace +``` + +When upgrading, you can control how Helm handles values from the previous +installation. Please refer to the [Helm upgrade documentation](https://helm.sh/docs/helm/helm_upgrade/) +to understand the different options. + +!!!warning CRD Upgrades + +Helm does not automatically upgrade CRDs. If the new version includes CRD +updates, you may need to apply them manually. Check the release notes for +specific upgrade instructions. +!!! + +## Uninstalling + +To uninstall the Klio Operator: + +```sh +helm uninstall klio-operator --namespace +``` + +!!!warning Data Preservation + +Uninstalling the operator does not automatically remove: + +- Custom Resource Definitions (CRDs) +- Existing Klio resources (Servers, PluginConfigurations) +- Persistent volumes containing backup data + To completely remove Klio from your cluster, you must manually delete these resources. + If you want to completely remove Klio, you must manually delete these resources. +!!! + +To remove the CRDs after uninstalling: + +```sh +kubectl delete crd servers.klio.enterprisedb.io +kubectl delete crd pluginconfigurations.klio.enterprisedb.io +``` diff --git a/product_docs/docs/klio/0/images/basebackups_walarchive.png b/product_docs/docs/klio/0/images/basebackups_walarchive.png new file mode 100644 index 0000000000..8880b8c2ba --- /dev/null +++ b/product_docs/docs/klio/0/images/basebackups_walarchive.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4de2c6fa708c49065b625ee92375ae279077a10dc6aad07ecc178b51f291f7 +size 17516 diff --git a/product_docs/docs/klio/0/images/overview-multi-tiers.png b/product_docs/docs/klio/0/images/overview-multi-tiers.png new file mode 100644 index 0000000000..b5171be827 --- /dev/null +++ b/product_docs/docs/klio/0/images/overview-multi-tiers.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc1bce7c07c22ae7fc114748b522b99d5c51169b1271896539012fa8da49648 +size 73488 diff --git a/product_docs/docs/klio/0/images/tier1-namespace-multi.png b/product_docs/docs/klio/0/images/tier1-namespace-multi.png new file mode 100644 index 0000000000..c0af30615a --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-namespace-multi.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6921bdaf4b76a8e9b863edfc6614f5a3f6e63e155beff68006bffb6da7429591 +size 95217 diff --git a/product_docs/docs/klio/0/images/tier1-namespace-single.png b/product_docs/docs/klio/0/images/tier1-namespace-single.png new file mode 100644 index 0000000000..879f1b98c0 --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-namespace-single.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6ae084132f92cec5e0ff7f15b4b5db3f957988af25b3cd224d801fd232d37a +size 38665 diff --git a/product_docs/docs/klio/0/images/tier1-shared-multi.png b/product_docs/docs/klio/0/images/tier1-shared-multi.png new file mode 100644 index 0000000000..6d83dcfc7e --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-shared-multi.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb75f834d4c850d884ede3fba795c499407e89ff79e252051d95080c30ac8d4 +size 75090 diff --git a/product_docs/docs/klio/0/images/tier1-shared-single.png b/product_docs/docs/klio/0/images/tier1-shared-single.png new file mode 100644 index 0000000000..22106089b3 --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-shared-single.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582b2289089c5f0d7648d28646ccd00b0ea47080031596bd040cd2de032ee4cb +size 40722 diff --git a/product_docs/docs/klio/0/images/wal-streaming.png b/product_docs/docs/klio/0/images/wal-streaming.png new file mode 100644 index 0000000000..7ba175eab2 --- /dev/null +++ b/product_docs/docs/klio/0/images/wal-streaming.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc19985d9893e142755ffbc7a838ac101a9b5cbb3fe8e5e35b000c51c612f05 +size 34122 diff --git a/product_docs/docs/klio/0/index.mdx b/product_docs/docs/klio/0/index.mdx new file mode 100644 index 0000000000..1dc15ebc54 --- /dev/null +++ b/product_docs/docs/klio/0/index.mdx @@ -0,0 +1,124 @@ +--- +title: Klio Overview +navigation: + - main_concepts + - architectures + - wal_streaming + - klio_server + - plugin_configuration + - backup_and_restore + - opentelemetry + - api_service + - walplayer + - helm_chart + - api + - images + - '!_helm_chart_values' +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/index.mdx +directoryDefaults: + displayBanner: >- + This is documentation for a Tech Preview of EDB's {{name.ln}} + ({{name.short}}) solution. It is made available AS IS for testing and early + evaluation purposes ONLY! Is is not to be used in production environments. + For details, please refer to EULA + section 9.4. +version: 0.0.10 +sidebar_position: 1 +editTarget: originalFilePath + +--- + +**Klio** is a cloud-native solution for enterprise-grade backup and recovery of +PostgreSQL databases managed by [CloudNativePG](https://cloudnative-pg.io) on +Kubernetes. It is designed to handle: + +- The **Write-Ahead Log (WAL) archive** for a given PostgreSQL `Cluster` + resource, within the same Kubernetes namespace as the Klio deployment +- The **catalog of physical base backups** for that same cluster +- Optionally, multiple PostgreSQL clusters in the same namespace + +These critical backup artifacts are stored across two distinct storage tiers: + +- Tier 1 – **Local Volume**: A local Persistent Volume (PV) within the + same namespace as the associated `Cluster` resource. It offers immediate, + high-throughput access for backup and recovery operations. Also referred to as + the **Main Tier** or **Klio Server**. + +- Tier 2 – **Secondary Storage**: An external object storage system where data + from Tier 1 is asynchronously replicated. This tier typically resides outside + the Kubernetes cluster, enabling geographical redundancy and enhancing disaster + recovery (DR) resilience. + +![Multi-tiered architecture overview](images/overview-multi-tiers.png) + +* * * + +## Key Features + +!!!note + +Most of the following features are currently aspirational and under active +development. +!!! + +### WAL Management + +- Native WAL streaming from the primary, eliminating the need for + `archive_command`, with support for: + + - Partial WAL file handling + - WAL file compression + - WAL file encryption using user-provided keys + - Controlled replication slot advancement to ensure uninterrupted streaming + - Synchronous replication + +- WAL archive storage on a local PVC (Tier 1) + +- Extension of base backup retention policy enforcement to WAL files + +- Asynchronous WAL relay to Tier 2 object storage + +!!!important + +Klio's WAL management utilizes the `READ_REPLICATION_SLOT` streaming +replication command, which was introduced in PostgreSQL 15. +Therefore, Klio requires PostgreSQL version 15 or greater to function properly. +!!! + +### Base Backup Catalog + +- Physical online base backups from the primary node to Tier 1, with support + for: + + - Data deduplication for efficient remote incremental backups + - Compression to optimize storage usage + - Encryption using user-provided keys for data confidentiality + +- Backup catalog stored on a file system Persistent Volume Claim (PVC) in Tier 1 + +- Integration with CloudNativePG Kubernetes Volume Snapshots (Tier 0), + enabling asynchronous offload to Tier 1 using the same physical backup + process + +- Retention policy enforcement based on defined recovery windows, including + Kubernetes Volume Snapshots + +- Asynchronous replication of base backups to Tier 2 object storage for + long-term durability and disaster recovery (DR) + +!!!important + +Kubernetes Volume Snapshot integration (Tier 0) is only available for storage +classes that support volume snapshots. +!!! + +### General Capabilities + +- End-to-end encryption: both in-transit and at-rest +- Designed for seamless integration with Kubernetes-native data protection + tools such as Veeam Kasten, Velero, and others +- Delivered as a CNPG-I plugin, with an accompanying Kubernetes Operator +- Available as a Certified Red Hat OpenShift Operator +- Distributed via a Helm chart for streamlined deployment diff --git a/product_docs/docs/klio/0/klio_server.mdx b/product_docs/docs/klio/0/klio_server.mdx new file mode 100644 index 0000000000..9dae47f4cc --- /dev/null +++ b/product_docs/docs/klio/0/klio_server.mdx @@ -0,0 +1,662 @@ +--- +title: The Klio Server +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/klio_server.md +sidebar_position: 5 +editTarget: originalFilePath + +--- + +The Klio server is a central component of the Klio backup solution. It is +defined as the `Server` custom resource in Kubernetes, which creates a +StatefulSet running the Klio server application. + +The Klio server is composed of three main containers: + +- `base`: Manages full and incremental backups using Kopia. +- `wal`: Receives the stream of PostgreSQL Write-Ahead Logs (WAL). +- `nats`: Provides a work queue using NATS JetStream for async WAL processing. + +An additional init container, `init`, is responsible for initializing the +Kopia repository and setting up the necessary configuration. + +The base backups and WAL files are stored in multiple PersistentVolume attached +to the Klio server pod in the `/data/base` and `/data/wal` directories, respectively. + +An additional cache defined by a PersistentVolume is used for the Kopia cache. This cache allows Kopia to +quickly browse repository contents without having to download from the storage +location. + +The work queue is backed by NATS JetStream with file storage on a separate PersistentVolume mounted at `/queue`. +When a WAL file is received, the server publishes a notification to the queue, enabling asynchronous processing +of WAL files by consumers. + +## Setting up a new Klio server + +Setting up a Klio server involves creating a `Server` resource along with the +required Kubernetes secrets and certificates. + +### Prerequisites + +Before setting up a Klio server, ensure you have: + +- A Kubernetes cluster with the Klio operator installed +- `kubectl` configured to access your cluster +- [cert-manager](https://cert-manager.io/) installed for certificate + management (recommended) +- Enough storage resources for the data and cache PersistentVolumeClaims +- Enough storage resources for the queue PersistentVolumeClaim + +### Required Components + +A Klio server setup requires the following components: + +1. **Server Resource**: The main `Server` custom resource +2. **TLS Certificate**: For secure communication +3. **Encryption Password**: For encrypting backup data at rest +4. **CA Certificate**: For client authentication via mTLS +5. **Admin User Credentials**: Optional admin user for Kopia operations +6. **Storage**: PersistentVolumeClaims for data, cache, and queue + +### Step-by-step setup + +#### 1. Create the Encryption Password Secret + +The encryption password is used to encrypt backup data at rest: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: my-server-encryption + namespace: default +type: Opaque +data: + password: "bXktc2VjdXJlLXBhc3N3b3Jk" # my-secure-password +``` + +Apply the secret: + +```bash +kubectl apply -f encryption-secret.yaml +``` + +!!!tip + +Use a strong, randomly generated password. This password is critical for +data security and recovery. +!!! + +#### 2. Create CA Certificate + +Using cert-manager, a CA certificate can be created by using the following +Certificate resource: + +```yaml +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer + namespace: default +spec: + selfSigned: { } +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: server-sample-ca +spec: + commonName: server-sample-ca + secretName: server-sample-ca + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: true + usages: + - cert sign + + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +Apply the CA configuration with: + +``` +kubectl apply -f ca-configuration.yaml +``` + +In the previous example, the CA to be used for authentication is signed by a +self-signed issuer. This doesn't pose any security issue as this CA is only +used internally and trust is established through configuration. + +The primary concern is the relationship between the client and the certificates +signed by the CA. + +!!!info + +The usage of a self-signed CA is not required by the Klio server. If your +PKI infrastructure already includes a CA for this scope, that CA can be used +for the Klio server, too. +!!! + +#### 3. (Optional) Create Admin User Credentials + +If you need admin access to the underlying Kopia server web interface +(mostly for debugging purposes), define the secret as follows: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: my-server-adm + namespace: default +type: kubernetes.io/basic-auth +data: + username: "YWRtaW4=" # admin + password: "YWRtaW4tcGFzc3dvcmQ=" # admin-password +``` + +Apply the secret: + +```bash +kubectl apply -f admin-credentials.yaml +``` + +#### 4. Create TLS Certificate + +Using cert-manager, create a self-signed certificate (for development) or use +your organization's certificate issuer: + +```yaml +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer + namespace: default +spec: + selfSigned: { } +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: my-server-cert + namespace: default +spec: + secretName: my-server-tls + commonName: my-server + dnsNames: + - my-server + - my-server.default + - my-server.default.svc + - my-server.default.svc.cluster.local + duration: 2160h # 90 days + renewBefore: 360h # 15 days + isCA: false + usages: + - server auth + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +Apply the certificate configuration: + +```bash +kubectl apply -f tls-certificate.yaml +``` + +!!!info + +For production environments, use certificates signed by your organization's Certificate Authority (CA) or a trusted public CA instead of self-signed certificates. +!!! + +#### 5. Create the Server Resource + +Now create the main `Server` resource: + + + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: Server +metadata: + name: my-server + namespace: default +spec: + # Container image for the Klio server + image: ghcr.io/enterprisedb/klio:v0.0.10 + imagePullPolicy: IfNotPresent + imagePullSecrets: [] # Add image pull secrets if needed + + # TLS configuration + tlsSecretName: my-server-tls + + # Client authentication configuration + caSecretName: server-sample-ca + + # Encryption password reference + password: + name: my-server-encryption + key: password + + # Optional: Admin user for Kopia operations + baseConfiguration: + adminUser: + name: my-server-adm + + # Cache storage configuration + cacheConfiguration: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi # Adjust based on your needs + + # Data storage pvcTemplate (for backups and WAL) + dataConfiguration: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi # Adjust based on your backup needs + + # Queue storage configuration (for NATS work queue) + queueConfiguration: + pvcTemplate: + storageClassName: standard # Adjust to your storage class + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi # Adjust based on queue volume needs + + # Optional: Resource requirements + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "2000m" +``` + + + +Apply the Server resource: + +```bash +kubectl apply -f klio-server.yaml +``` + +#### 6. Verify the Server is Running + +Check the status of your Klio server: + +```bash +# Check the Server resource status +kubectl get server my-server -n default + +# Check the StatefulSet +kubectl get statefulset my-server-klio -n default + +# Check the Pod +kubectl get pods -l klio.enterprisedb.io/klio-server=my-server -n default + +# View logs +kubectl logs -l klio.enterprisedb.io/klio-server=my-server -n default -f +``` + +The server should create a StatefulSet with a pod named `my-server-klio-0`. + +## Advanced Configuration + +The `.spec.template` field allows you to customize the Klio server's pod +template. You can add additional containers, volumes, or modify existing +settings. + +!!!warning Advanced Users Only + +The `.spec.template` field is primarily designed for advanced configurations. +While powerful, improper modifications can affect server functionality. +Always test changes in a non-production environment first. +!!! + +!!!note + +The `containers` field within `.spec.template.spec` is mandatory but will be +merged with the default Klio server containers `base` and `wal`. If you do not +need to add containers or modify the default ones, you must still include an +empty list. +!!! + +### Node Affinity and Tolerations + +To dedicate specific nodes for Klio workloads (e.g., for performance isolation +or to separate backup workloads from application workloads), you can use the +`template` field to define affinity and toleration rules. + +```yaml +spec: + template: + spec: + # Mandatory field; merged with default containers + containers: [] + tolerations: + # Allow scheduling on nodes tainted for Klio + - key: node-role.kubernetes.io/klio + operator: Exists + effect: NoSchedule + affinity: + # Require nodes labeled for Klio + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/klio + operator: Exists +``` + +See [Reserving Nodes for Klio Workloads](architectures.mdx#reserving-nodes-for-klio-workloads) +for details on node tainting. + +### Monitoring + +Refer to the [OpenTelemetry](opentelemetry.mdx#klio-server-with-opentelemetry) +documentation for setting up monitoring and telemetry for the Klio server. + +## Encryption + +Klio implements encryption at rest for both base backups and WAL files to +ensure data security throughout the backup lifecycle. + +### Base Backups Encryption + +Base backups are encrypted by Kopia using the encryption password provided in +the `password` secret reference. Kopia handles encryption transparently. + +The encryption password is set during repository initialization and is required +for all subsequent backup and restore operations. + +!!!warning Critical + +Store the encryption password securely. Loss of this password means permanent +loss of access to all backup data. There is no password recovery mechanism. +!!! + +### WAL Files Encryption + +WAL files are encrypted using a master key derivation system with authenticated +encryption. The encryption process works as follows: + +1. **Master Key Generation**: A 32-byte master key is derived from the encryption + password using PBKDF2 +2. **Key Enveloping**: The master key itself is encrypted using AES-256-GCM with a + password-derived encryption key to protect the key at rest +3. **Per-File Encryption**: Each WAL file is compressed and then encrypted using + the master key with authenticated encryption before being stored + +WAL files are first compressed using Snappy S2 compression, then encrypted to ensure both space +efficiency and security. + +The same encryption password used for base backups encrypts the WAL files, +ensuring a unified security model across all backup artifacts. + +### Encryption Password Rotation + +Currently, encryption password rotation is not supported. To change the +encryption password, you would need to: + +1. Create a new Klio server with a new encryption password +2. Perform new base backups to the new server +3. Migrate to using the new server + +!!!tip + +Choose a strong encryption password from the start. Use a password manager or +key management system to generate and store a cryptographically secure password +(recommended: 32+ random characters). +!!! + +### Encryption in Transit + +In addition to encryption at rest, Klio protects both base backups and WAL files +during transmission using TLS (Transport Layer Security). + +All communication between a Klio client and the Klio server is secured +with TLS: + +- **Base Backup Traffic**: Kopia client connections to the base backup server + are encrypted using TLS, protecting backup data as it transfers to the Klio + server +- **WAL Streaming**: PostgreSQL instances streaming WAL files to the Klio server + use gRPC over TLS, ensuring WAL data is encrypted during transmission + +The TLS certificate is configured via the `.spec.tlsSecretName` field in the +Server resource, which references a Kubernetes secret containing the TLS +certificate and private key. This provides end-to-end encryption, ensuring that +backup data is protected both at rest and in transit. + +## Authentication + +Klio uses mTLS Authentication for securing access to both the base backup server +and the WAL streaming server. Authentication is handled by verify the client +certificates against the CA certificate which has been created when configuring +the Klio server. + +### Creating a client-side certificate + +To create a client-side certificate, you need a issuer that will sign all the +certificates with a CA known by the Klio server. Supposing that such a issuer is +called `server-sample-ca` and available in the current namespace, you can create +a client certificate with the following Certificate object: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: client-sample-tls +spec: + secretName: client-sample-tls + commonName: klio@cluster-1 + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - client auth + + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +``` + +If used the example proposed in the [server configuration documentation +page](#2-create-ca-certificate), the issuer can be created with: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: server-sample-ca +spec: + ca: + secretName: server-sample-ca +``` + +### Admin User (Optional) + +The optional admin user (`.spec.baseConfiguration.adminUser`) provides access to +the Kopia web interface for administrative and debugging purposes. This is +separate from the regular user authentication: + +```yaml +spec: + baseConfiguration: + adminUser: + name: my-server-adm # Reference to kubernetes.io/basic-auth secret +``` + +The admin user secret must be of type `kubernetes.io/basic-auth`: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: my-server-adm +type: kubernetes.io/basic-auth +data: + username: + password: +``` + +!!!info + +The admin user is primarily intended for debugging and should be used sparingly +in production environments. Regular backup and restore operations use mTLS +certificates. +!!! + +## Access Control Lists (ACLs) + +Klio automatically configures Kopia's Access Control Lists (ACLs) during server +startup to provide fine-grained access control to backup snapshots. This +automation eliminates the need for manual ACL configuration. + +### Automatic ACL Configuration + +When the Klio server starts, it automatically: + +1. **Enables ACL support** in the Kopia repository +2. **Creates a read-only user** (`snapshot_reader@klio`) with READ access to all snapshots +3. **Configures the API server** to use the read-only user for backup catalog queries + +This automation ensures that the Klio API server (used for backup observability +and catalog browsing) operates with minimal privileges, following the principle +of least privilege. + +### How ACLs Work + +Kopia's ACL system controls access to repository resources based on: + +- **User identity**: The authenticated username +- **Resource type**: What is being accessed (e.g., snapshots, policies) +- **Access level**: READ, APPEND, or FULL access + +The automated ACL configuration creates the following rule: + +``` +User: snapshot_reader@klio +Access: READ +Target: type=snapshot (all snapshots in the repository) +``` + +This allows the API server to: + +- List all available backups +- Read backup metadata and manifests +- Browse backup catalogs +- Provide observability into the backup state + +However, the read-only user **cannot**: + +- Create new snapshots +- Modify existing snapshots +- Delete backups +- Change repository configuration +- Modify ACL rules + +### User Configuration + +From the authentication point-of-view, the `snapshot_reader@klio` user is not +special, and to use it you need a corresponding Secret containing a certificate +to be used for authentication. + +Cert-manager can create such a secret with the following Certificate definition: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: client-sample-tls +spec: + secretName: client-sample-tls + commonName: snapshot_reader@klio + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - client auth + + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +``` + +### API Server Integration + +The Klio API server deployment is automatically configured to use the +`snapshot_reader@klio` user for all Kopia operations. This happens through +environment variable configuration in the API server deployment: + +```yaml +- name: CLIENT_BASE_CLIENT_CERT_PATH + value: /client-certs/tls.crt +- name: CLIENT_BASE_CLIENT_KEY_PATH + value: /client-certs/tls.key +``` + +No manual configuration is required - the API server will automatically use the +restricted read-only access for all backup catalog queries. + +### Benefits + +The automated ACL configuration provides several benefits: + +1. **Security**: API server operates with minimal privileges +2. **Simplicity**: No manual ACL commands required during setup +3. **Consistency**: ACL configuration is standardized across all deployments +4. **Separation of Concerns**: Read operations (API server) are isolated from + write operations (backup/restore processes) + +### Idempotency + +The ACL automation is idempotent - if ACLs are already enabled or the user +already exists, the startup process will detect this and continue without +error. This allows for safe server restarts and upgrades. + +### Troubleshooting ACLs + +If you encounter ACL-related issues, check the Klio server logs: + +```bash +kubectl logs my-server-klio-0 -n default -c base +``` + +Look for log entries related to ACL enablement: + +- `"ACLs enabled"`: ACLs were successfully enabled +- `"ACLs already enabled"`: ACLs were previously enabled (normal on restart) +- `"User snapshot_reader added to ACLs"`: Read-only user was successfully configured +- `"failed to execute ACLs enablement"`: An error occurred during ACL setup + +!!!note + +ACL configuration happens during server startup, before the Kopia server process +begins accepting connections. Any ACL errors will appear early in the container +logs. +!!! diff --git a/product_docs/docs/klio/0/main_concepts.mdx b/product_docs/docs/klio/0/main_concepts.mdx new file mode 100644 index 0000000000..fa3d053546 --- /dev/null +++ b/product_docs/docs/klio/0/main_concepts.mdx @@ -0,0 +1,131 @@ +--- +title: Main Concepts +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/main_concepts.md +sidebar_position: 2 +editTarget: originalFilePath + +--- + +Klio is built on top of two foundational technologies: + +- PostgreSQL's native physical backup infrastructure +- The CloudNativePG Interface (CNPG-I) for backup and recovery + +PostgreSQL has provided **native continuous backup and point-in-time recovery +(PITR) capabilities since version 8.0, released in 2005**, enabling reliable +disaster recovery and business continuity for mission-critical systems +worldwide. + +!!!info + +PostgreSQL offers logical backups using tools like `pg_dump`, which generate a +logical representation of the database as SQL statements or data files. Logical +backups do not provide continuous protection or point-in-time recovery +capabilities. As a result, they are not suitable for **business continuity +scenarios** in mission-critical environments where minimizing downtime and data +loss is essential. +!!! + +At its core, [PostgreSQL’s continuous backup and recovery](https://www.postgresql.org/docs/current/continuous-archiving.html) +system uses **physical (file system level) copies** combined with **write-ahead +log (WAL) archiving**. +This approach enables consistent, recoverable backups while keeping systems +online, a strategy proven effective in production environments for over two +decades. + +In a PostgreSQL backup solution, the infrastructure typically consists of: + +- **WAL Archive**: A designated location for continuously archived WAL + (write-ahead log) files, preserving all changes made to the database to + support data durability and recovery. +- **Physical Base Backups**: A consistent copy of all data files used by + PostgreSQL (primarily the `PGDATA` directory and any tablespaces), forming + the foundational layer for any recovery operation. + +The diagram below illustrates the relationship between physical base backups +and the WAL archive over time: + +![Physical backups, WAL archive, and time](images/basebackups_walarchive.png) + +* * * + +## WAL Archive + +The WAL archive is central to **continuous backup** in PostgreSQL and is +essential for: + +- **Hot (Online) Backups**: Allowing physical base backups to be taken from any + node (primary or standby) without shutting down PostgreSQL, ensuring backups + can proceed without service disruption. +- **Point-in-Time Recovery (PITR)**: Enabling recovery to any precise moment + after the earliest available base backup, using archived WAL files to replay + transactions up to the desired recovery point. + +!!!important + +WAL archives on their own are insufficient for disaster recovery. +A **physical base backup is required** to restore a PostgreSQL cluster. +!!! + +Using a WAL archive significantly enhances the resilience of a PostgreSQL +system. WAL files can be fetched by any PostgreSQL instance for replication or +recovery, with archives typically retaining WAL segments longer than local +retention policies, ensuring historical data is preserved for PITR and disaster +recovery workflows. + +Klio receives WAL content from a PostgreSQL primary via streaming replication. + +* * * + +## Physical base backups + +PostgreSQL supports **physical base backups** as the cornerstone of its +disaster recovery and PITR strategies. A base backup is a **consistent, file +system-level copy** of all data files used by a PostgreSQL cluster, including +the `PGDATA` directory and any additional tablespaces. + +Key properties of PostgreSQL base backups: + +- **Online (Hot) Backups**: Base backups can be taken while the database is + online, avoiding downtime. PostgreSQL maintains consistency during an online + backup by coordinating with its write-ahead logging system, ensuring a valid + restore point. +- **Foundation for PITR**: A base backup provides the starting point for + point-in-time recovery. After restoring the base backup, archived WAL files + are replayed to advance the system to a specific recovery target, allowing + precise restoration following accidental data loss or corruption. +- **Efficient Storage and Transport**: Base backups can be compressed and + streamed to external or object storage, supporting offsite and cloud-based + disaster recovery workflows. + +Klio leverages CNPG-I to coordinate the hot backup procedure, using +PostgreSQL’s `pg_backup_start` and `pg_backup_stop` concurrent API to ensure +consistency. It uses [Kopia](https://github.com/kopia/kopia/) to efficiently +transfer backup data across locations, ensuring backups are portable, +secure, and space-efficient. + +* * * + +## Recovery + +In PostgreSQL, **recovery** is the process of restoring a database cluster from +a **physical base backup**, bringing it to a consistent state by replaying +**write-ahead log (WAL)** files, which contain the necessary *redo* information +for all changes made after the backup. + +PostgreSQL’s recovery system supports [Point-in-Time Recovery (PITR)](https://www.postgresql.org/docs/current/continuous-archiving.html#BACKUP-PITR-RECOVERY), +enabling you to restore a cluster to **any precise moment** between your +earliest base backup and the latest available WAL segment. To perform recovery, +a **valid WAL archive is required alongside the physical base backup**. + +Klio follows the approach of CloudNativePG and implements the recovery part of +CNPG-I. It **does not perform in-place recovery on an existing cluster**; +instead, recovery is used to **bootstrap a new cluster** from a base backup and +replay WAL files to reach a desired state. + +Recovery can operate in two primary modes: full recovery (replaying WAL files +to the latest available segment) or **Point-in-Time Recovery (PITR)**, allowing +restoration to a chosen state before an incident such as accidental data +deletion. Klio supports all PITR targets provided by CloudNativePG, including +time, restore point, and transaction. diff --git a/product_docs/docs/klio/0/opentelemetry.mdx b/product_docs/docs/klio/0/opentelemetry.mdx new file mode 100644 index 0000000000..9712e50b2d --- /dev/null +++ b/product_docs/docs/klio/0/opentelemetry.mdx @@ -0,0 +1,414 @@ +--- +title: OpenTelemetry Observability +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/opentelemetry.md +sidebar_position: 8 +editTarget: originalFilePath + +--- + +Klio provides built-in support for [OpenTelemetry](https://opentelemetry.io/), +enabling comprehensive observability through distributed tracing and metrics +collection. This allows you to monitor backup operations, performance +characteristics, and system health across your Klio deployment. + +## Available Telemetry + +Klio automatically collects the following: + +- Traces + - Distributed WAL streaming and processing +- Metrics + - Server + - Backup operation metrics + - Number of snapshots + - Number of files in the latest snapshot + - Number of directories in the latest snapshot + - Size of the latest snapshot + - Age of the latest snapshot + - Age of the oldest snapshot + - WAL processing metrics + - Number of WAL files written + - Bytes written + - [GRPC metrics](https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/) + - [Go runtime statistics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/runtime) + - [Host metrics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/host) + - [Controller runtime metrics](https://book.kubebuilder.io/reference/metrics-reference) + - Client + - [GRPC metrics](https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/) + - [Go runtime statistics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/runtime) + - [Host metrics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/host) + - [Controller runtime metrics](https://book.kubebuilder.io/reference/metrics-reference) + +!!!note + +Log exporters are not currently supported. +!!! + +## Configuration + +Klio automatically detects OpenTelemetry configuration through standard +environment variables. If no OpenTelemetry environment variables are present, +Klio will use no-op providers that don't collect any telemetry data. + +Traces and metrics exporters can be configured independently through the +[`autoexport`](https://go.opentelemetry.io/contrib/exporters/autoexport) package. + +### General Settings + +The following environment variables are used to configure OpenTelemetry: + +- `OTEL_SERVICE_NAME`: (required) Name of the service, e.g., `klio-server` +- `OTEL_RESOURCE_ATTRIBUTES`: Comma-separated list of resource attributes + (e.g., `deployment.environment=production,service.namespace=klio-system`) +- `OTEL_RESOURCE_DETECTORS`: Comma-separated list of resource detectors + from the [`autodetect`](https://pkg.go.dev/go.opentelemetry.io/contrib/detectors/autodetect) + package, used to automatically populate resource attributes + +### Traces exporter + +To enable the traces exporter, set the `OTEL_TRACES_EXPORTER` environment +variable to one of the supported exporters: + +- `otlp`: OpenTelemetry Protocol (OTLP) exporter +- `console`: Console exporter (useful for debugging) +- `none`: No-op exporter (disables tracing) + +You can define the OTLP protocol using the `OTEL_EXPORTER_OTLP_TRACES_PROTOCOL` +variable, or the general `OTEL_EXPORTER_OTLP_PROTOCOL`. Supported protocols include: + +- `http/protobuf` (default) +- `grpc` + +Additional configuration options for trace exporters can be found in the documentation +of the respective exporters: + +- [OTLP Trace gRPC Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc) +- [OTLP Trace HTTP Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp) + +### Metrics Exporter + +To enable the metrics exporter, set the `OTEL_METRICS_EXPORTER` environment +variable to one of the supported exporters: + +- `otlp`: OpenTelemetry Protocol (OTLP) exporter +- `prometheus`: Prometheus exporter + HTTP server +- `console`: Console exporter (useful for debugging) +- `none`: No-op exporter (disables metrics) + +You can define the OTLP protocol using the `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` +variable, or the general `OTEL_EXPORTER_OTLP_PROTOCOL`. Supported protocols include: + +- `http/protobuf` (default) +- `grpc` + +Additional configuration options for metrics exporters can be found in the documentation +of the respective exporters: + +- [OTLP Metric gRPC Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc) +- [OTLP Metric HTTP Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp) + +For the Prometheus exporter, you can configure the host and port of the HTTP +server using the following environment variables: + +- `OTEL_EXPORTER_PROMETHEUS_HOST` (default: `localhost`) +- `OTEL_EXPORTER_PROMETHEUS_PORT` (default: `9464`) + +## Configuring Klio with OpenTelemetry in Kubernetes + +When running in a Kubernetes environment, Klio will automatically define +`CONTAINER_NAME`, `POD_NAME` and `NAMESPACE_NAME` environment variables. +When any of these environment variables are set, Klio will automatically add +the corresponding resource attributes (`k8s.container.name`, `k8s.pod.name`, +`k8s.namespace.name`) to all telemetry data. Each attribute is added +independently - you don't need all three environment variables to be present. + +!!!important + +If you have already defined any of these attributes in `OTEL_RESOURCE_ATTRIBUTES`, +Klio will **not override** them. Only missing attributes will be added from the +environment variables. This allows you to customize the values while still +benefiting from automatic defaults for any attributes you don't explicitly set. +!!! + +### Klio server with OpenTelemetry + +When deploying Klio `Server`, you can configure OpenTelemetry specifying the +necessary environment variables in the `template` section of the `Server` spec, +overriding the generated pod. + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: Server +metadata: + name: server-sample +spec: + # ... other configuration ... + template: + spec: + containers: + - name: base + env: + - name: OTEL_SERVICE_NAME + value: "klio-base" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/otel/tls.key" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/otel/tls.key" + volumeMounts: + - mountPath: /otel + name: otel + - name: wal + env: + - name: OTEL_SERVICE_NAME + value: "klio-wal" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/otel/tls.key" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/otel/tls.key" + volumeMounts: + - mountPath: /otel + name: otel + # Projected volume for OTEL certificates + volumes: + - name: otel + projected: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key +``` + +For simpler management, you can achieve the same results using a `ConfigMap`: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: klio-otel-config +data: + OTEL_RESOURCE_DETECTORS: "telemetry.sdk,host,os.type,process.executable.name" + OTEL_TRACES_EXPORTER: "otlp" + OTEL_METRICS_EXPORTER: "otlp" + # Use the same endpoint configuration for both traces and metrics + # to keep it DRY, if no substantial differences are needed. + OTEL_EXPORTER_OTLP_PROTOCOL: "grpc" + OTEL_EXPORTER_OTLP_ENDPOINT: "https://otel-collector:4317" + OTEL_EXPORTER_OTLP_COMPRESSION: "gzip" + OTEL_EXPORTER_OTLP_TIMEOUT: "10000" + OTEL_EXPORTER_OTLP_INSECURE: "false" + OTEL_EXPORTER_OTLP_CERTIFICATE: "/otel/ca.crt" + OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE: "/otel/tls.crt" + OTEL_EXPORTER_OTLP_CLIENT_KEY: "/otel/tls.key" +--- +apiVersion: klio.edb.io/v1alpha1 +kind: Server +metadata: + name: my-klio-server +spec: + # ... other configuration ... + template: + spec: + containers: + - name: base + env: + - name: OTEL_SERVICE_NAME + value: "klio-base" + envFrom: + - configMapRef: + name: klio-otel-config + volumeMounts: + - mountPath: /otel + name: otel + - name: wal + env: + - name: OTEL_SERVICE_NAME + value: "klio-wal" + envFrom: + - configMapRef: + name: klio-otel-config + volumeMounts: + - mountPath: /otel + name: otel + # Projected volume for OTEL certificates + volumes: + - name: otel + projected: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key +``` + +### Klio plugins with OpenTelemetry + +When deploying Klio as a CNPG Cluster plugin, you can configure OpenTelemetry +by specifying the necessary environment variables in the `env` section of the +`Cluster` spec. + +```yaml + +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: cluster-example +spec: + # ... other configuration ... + env: + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/projected/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/projected/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/projected/tls.key" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_SERVICE_NAME + value: "klio-walsender" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/projected/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/projected/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/projected/tls.key" + + projectedVolumeTemplate: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-walsender-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + + plugins: + - name: klio.enterprisedb.io + enabled: true + parameters: + pluginConfigurationRef: client-config-cluster-example +--- +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: client-config-cluster-example +spec: + serverAddress: klio.default + clientSecretName: klio-client + serverSecretName: klio-server-tls +``` diff --git a/product_docs/docs/klio/0/plugin_configuration.mdx b/product_docs/docs/klio/0/plugin_configuration.mdx new file mode 100644 index 0000000000..3581803578 --- /dev/null +++ b/product_docs/docs/klio/0/plugin_configuration.mdx @@ -0,0 +1,354 @@ +--- +title: The Klio Plugin +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/plugin_configuration.md +sidebar_position: 6 +editTarget: originalFilePath + +--- + +The Klio plugin for CloudNativePG allows you to leverage the backup and WAL +streaming capabilities of Klio for your PostgreSQL clusters managed by +CloudNativePG. It will add two containers to each PostgreSQL instance pod: + +- A `klio-plugin` container that handles backup creation and management +- A `klio-wal` container that streams WAL files to the Klio server in real-time + +## Configuration + +The Klio plugin integrates with CloudNativePG through the CNPG-I (CloudNativePG +Interface) specification, enabling Klio to manage backups and WAL streaming for +your PostgreSQL clusters. To use Klio with a CloudNativePG cluster, you need to: + +1. Create a `PluginConfiguration` resource that defines how to connect to the + Klio server +2. Reference the plugin in your `Cluster` resource specification + +## Prerequisites + +Before configuring a cluster to use the Klio plugin, ensure you have: + +- A running Klio `Server` resource deployed in your namespace +- Client credentials (username and password) stored in a Kubernetes Secret +- The server's TLS certificate available in a Secret + +## Creating a PluginConfiguration resource + +The `PluginConfiguration` custom resource defines how the Klio plugin connects +to and communicates with the Klio server. This resource contains connection +details, authentication credentials, and optional configuration for metrics, +profiling, and backup retention policies. + +### Basic example + +Here's a minimal `PluginConfiguration` example: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config + namespace: default +spec: + serverAddress: klio-server.default + clientSecretName: client-sample-tls + serverSecretName: klio-server-tls +``` + +### Client credentials secret + +The client credentials must be stored in a Kubernetes Secret of type +`kubernetes.io/tls`, containing a secret to be presented to the Klio server. + +This secret can be generated with cert-manager by following the [documentation +in the Klio server page](klio_server.mdx#creating-a-client-side-certificate). + +### Server Address + +The `serverAddress` field specifies where the Klio server can be reached. This +can be: + +- A Kubernetes service name: `klio-server.default` (within the same namespace) +- A fully qualified domain name: `klio-server.default.svc.cluster.local` +- An external address: `klio.example.com` + +Connections will be done using the default ports of the Klio base and WAL +servers, respectively 51515 and 52000. + +### TLS configuration + +The `serverSecretName` field references a Secret containing the TLS certificate +used to secure communication with the Klio server. This is the same +certificate configured on the `Server` resource. + +## Configuring a Cluster to use the Klio plugin + +Once you have created a `PluginConfiguration`, reference it in your CloudNativePG +`Cluster` resource: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster + namespace: default +spec: + instances: 3 + + postgresql: + pg_hba: + - local replication all peer # Allow replication connections locally + + plugins: + - name: klio.enterprisedb.io + enabled: true # Activate the Klio plugin (default) + parameters: + pluginConfigurationRef: klio-plugin-config + + storage: + size: 10Gi +``` + +To be able to stream WAL files, ensure that your PostgreSQL configuration +allows local replication connections. You can do this by adding an entry to the +`pg_hba` section, as shown in the example above. + +### Plugin parameters + +The `plugins` section in the `Cluster` specification requires: + +- **name**: Must be set to `klio.enterprisedb.io` to identify the Klio plugin +- **enabled**: Set to `true` to activate the plugin. This is the default value. +- **parameters.pluginConfigurationRef**: The name of your `PluginConfiguration` resource + +!!!note + +Even though the Klio plugin is used to archive WAL files on the Klio server, +it does not use the `archiveCommand` parameter in the PostgreSQL configuration, +as the WAL are streamed directly to the Klio server. Thus, you must not set +`isWALArchiver: true` in the plugin configuration. +!!! + +## Advanced configuration options + +The `PluginConfiguration` resource supports several advanced options to +customize the plugin's behavior. + +### Retention policies + +Define how long backups should be retained by configuring the retention policy: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + retention: + keepLatest: 5 + keepHourly: 12 + keepDaily: 7 + keepWeekly: 4 + keepMonthly: 6 + keepAnnual: 2 +``` + +Except for `keepLatest`, each option defines how many backups to retain +for the specified time period. For example, `keepDaily: 7` means that we should +retain at most one backup for each of the past 7 days. + +If multiple backups exist within the same time bucket, the most recent one is +kept, unless preserved by a different *keep* rule. Backups that are not +retained by any rule are deleted. Rule evaluation is done when a new backup is +taken. + +The Klio server will automatically delete WAL files that are no longer needed +for recovery by any retained backup. + +All retention settings are optional. For each unspecified retention level, +the default Kopia value is applied: + +```yaml +keepLatest: 10 +keepHourly: 48 +keepDaily: 7 +keepWeekly: 4 +keepMonthly: 24 +keepAnnual: 1 +``` + +Set a rule to `0` to disable that retention level. + +### Cluster name override + +By default, the plugin uses the name of the CloudNativePG `Cluster` resource. +You can override this if needed: + +```yaml +spec: + clusterName: my-custom-cluster-name +``` + +This can be useful working with backups from different clusters, for example +when restoring clusters or configuring replica clusters. + +### Tier 2 restore + +To enable restore from Tier 2 storage, set the `tier2` field to `true`: + +```yaml +spec: + tier2: true +``` + +When enabled, Klio will look for backups in both Tier 1 and Tier 2. If a backup +is available in both tiers, Tier 1 takes precedence as restore from it will be +faster. + +See the [Architecture documentation](architectures.mdx#tier-2-secondary-storage-object-storage) +for more details on Tier 2 storage. + +### Restore configuration + +When performing a restore, you can specify which backup to use: + +```yaml +spec: + backupId: backup-YYYYMMDDHHMMSS +``` + +You can find the backup ID in the `Backup` resources status, or through the +Klio API server. + +### Observability + +See the [OpenTelemetry observability](opentelemetry.mdx) section for more +details on how to monitor the Klio plugin using OpenTelemetry. + +### Performance profiling + +Enable the pprof HTTP endpoint for performance profiling and troubleshooting: + +```yaml +spec: + pprof: true +``` + +When enabled, the pprof endpoint is exposed and can be used with Go's profiling +tools to analyze CPU usage, memory allocation, goroutines, and other runtime +metrics. + +!!!warning + +Only enable pprof in development or testing environments, or when actively +troubleshooting performance issues. It should not be enabled in production +unless necessary. +!!! + +## Container customization + +The `PluginConfiguration` resource allows you to customize the Klio sidecar +containers by providing base container specifications that are used as the +foundation for the sidecars. This feature enables you to add custom environment +variables, volume mounts, resource limits, and other container settings without +modifying the PostgreSQL container environment. + +### Basic example + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + containers: + - name: klio-plugin + env: + - name: CUSTOM_ENV_VAR + value: "my-value" + - name: DEBUG_LEVEL + value: "info" + - name: klio-wal + env: + - name: WAL_BUFFER_SIZE + value: "8192" +``` + +### How container merging works + +The containers you define serve as the base for the Klio sidecars, with the +following merge behavior: + +1. **Your container is the base**: When you define a container (e.g., `klio-plugin`), + your specification serves as the starting point +2. **Klio enforces required values**: Klio sets its essential configuration: + - Container `name` (klio-plugin, klio-wal, or klio-restore) + - Container `args` (the command arguments needed for operation) + - `CONTAINER_NAME` environment variable +3. **Your customizations are preserved**: All other fields you define remain intact +4. **Template defaults fill gaps**: For fields you don't specify, Klio applies + sensible defaults (image, security context, standard volume mounts, etc.) + +**Important**: Klio's required values (name, args, CONTAINER_NAME env var) will +always override any conflicting values you set. All other customizations are +respected. + +### Available sidecar containers + +The following containers can be customized: + +- **`klio-plugin`**: Handles backup creation and management in PostgreSQL pods +- **`klio-wal`**: Streams WAL files to the Klio server in PostgreSQL pods +- **`klio-restore`**: Restores backups during recovery jobs + +### Example: Resource limits and environment variables + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + containers: + - name: klio-plugin + env: + - name: LOG_LEVEL + value: "debug" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4317" + resources: + limits: + memory: "512Mi" + cpu: "1" + requests: + memory: "256Mi" + cpu: "500m" + - name: klio-wal + env: + - name: WAL_STREAM_TIMEOUT + value: "30s" + resources: + limits: + memory: "256Mi" + cpu: "500m" + requests: + memory: "128Mi" + cpu: "250m" +``` + +!!!warning + +Be careful when customizing containers. While your customizations serve as the +base, Klio will override certain critical values (name, args, CONTAINER_NAME env var) +that are required for proper operation. Avoid setting these fields as they will be +replaced. Always test changes in a non-production environment first. +!!! diff --git a/product_docs/docs/klio/0/wal_streaming.mdx b/product_docs/docs/klio/0/wal_streaming.mdx new file mode 100644 index 0000000000..4a6495aa0b --- /dev/null +++ b/product_docs/docs/klio/0/wal_streaming.mdx @@ -0,0 +1,125 @@ +--- +title: WAL Streaming +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/wal_streaming.md +sidebar_position: 4 +editTarget: originalFilePath + +--- + +A standout feature of Klio is its native, cloud-first implementation of WAL +streaming for PostgreSQL. This architecture enables: + +- Partial WAL segment streaming, ensuring real-time data transfer +- Built-in compression and encryption using user-provided keys +- Controlled replication slot advancement, protecting against WAL loss +- Optional synchronous replication, offering zero RPO when enabled + +## Architecture + +WAL streaming in Klio is built around two components: a client and a server. + +- The client, invoked using the `klio send-wal` command, typically runs + alongside PostgreSQL but does not have to. +- The server, started with the `klio server start-wal` command, runs as a + dedicated process on the Klio server. + +In Kubernetes environments, as illustrated in the diagram above, Klio streams +WAL records directly from the PostgreSQL primary over a local Unix domain +socket. The WAL streamer runs as a lightweight sidecar container within the +same pod as the primary instance and is managed by the CNPG-I–compliant plugin. +It continuously pushes data to a remote Klio WAL server (Tier 1), which handles +partial WAL file synchronization and archives completed segments into the +central WAL archive for the PostgreSQL cluster. + +![WAL streaming architectural overview](images/wal-streaming.png) + +## Moving Beyond `archive_command` + +Klio replaces the traditional PostgreSQL `archive_command` method for WAL +handling in CloudNativePG clusters, providing improved reliability, efficiency, +security, and observability. + +PostgreSQL’s `archive_command` is a shell command executed when a WAL segment +is complete—either because the segment reached its size limit (typically 16MB) +or the `archive_timeout` elapsed (5 minutes by default in CloudNativePG). + +The streaming model provided by Klio offers several key advantages over this +approach: + +- **Near-zero RPO:** WAL changes are streamed incrementally in near real-time, + reducing the worst-case recovery point objective (RPO) from 5 minutes to + near-zero, or even zero in synchronous mode. + +- **Improved efficiency and scalability:** A single, continuously running WAL + streamer process replaces the need to spawn a new process for each WAL + segment, resulting in lower CPU and I/O usage and better scalability during + periods of high WAL volume. + +- **Enhanced security:** WAL data is encrypted end-to-end, both in transit and + at rest, providing protection not available with the traditional + `archive_command`. + + + +- **Comprehensive observability:** Native metrics and structured logging + provide full visibility into WAL streaming operations, simplifying + monitoring, anomaly detection, and troubleshooting compared to the opaque + nature of `archive_command`. + +## Monitoring Klio WAL Streamer in PostgreSQL + +The Klio WAL streamer is a PostgreSQL streaming replication client and, +as such, can be monitored using the standard `pg_stat_replication` +system view in the PostgreSQL catalog. + +The WAL streamer identifies itself with `application_name` set to `klio`. + +To verify whether any Klio WAL streamer is connected to an instance (in +Kubernetes deployments, this will always be the primary), run the following +query: + +```sql +SELECT * FROM pg_stat_replication WHERE application_name = 'klio'; +``` + +An example output might look like this: + +The following excerpt is an a example: + +```console +-[ RECORD 1 ]----+------------------------------ +pid | 1070 +usesysid | 10 +usename | postgres +application_name | klio +client_addr | +client_hostname | +client_port | -1 +backend_start | 2025-08-07 01:14:39.619662+00 +backend_xmin | +state | streaming +sent_lsn | 2/C765A000 +write_lsn | 2/C75FA000 +flush_lsn | 2/C741A000 +replay_lsn | 2/C741A000 +write_lag | 00:00:00.919907 +flush_lag | 00:00:00.923556 +replay_lag | 00:00:00.923556 +sync_priority | 0 +sync_state | async +reply_time | 2025-08-07 01:54:44.756306+00 +``` + +As you can see, Klio provides relevant feedback to PostgreSQL. Here is a brief +explanation of the key fields: + +- `state`: The replication connection status (`streaming` indicates active + streaming). +- `sent_lsn`, `write_lsn`, `flush_lsn`, `replay_lsn`: Positions in the WAL + indicating how far data has been sent, written, flushed, and replayed on the + Klio server (replayed and flushed are always identical). +- `write_lag`, `flush_lag`, `replay_lag`: Delays between WAL positions + indicating replication latency. +- `sync_state`: The synchronization state of this standby (e.g., `async`, + `sync`, `potential`, `quorum`). diff --git a/product_docs/docs/klio/0/walplayer.mdx b/product_docs/docs/klio/0/walplayer.mdx new file mode 100644 index 0000000000..b74cd02ca0 --- /dev/null +++ b/product_docs/docs/klio/0/walplayer.mdx @@ -0,0 +1,263 @@ +--- +title: WAL Player +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/walplayer.md +sidebar_position: 80 +editTarget: originalFilePath + +--- + +The WAL Player is a command-line tool designed to benchmark the performance of +your Klio servers by simulating PostgreSQL Write-Ahead Log (WAL) file streaming +workloads. It is an essential tool for ensuring your Klio servers can handle +your production workloads efficiently. Use it regularly to validate performance +and capacity planning decisions. + +## Overview + +WAL Player provides two main commands: + +- **`generate`** - Creates synthetic WAL files for testing +- **`play`** - Sends WAL files to a Klio server and measures performance + +This tool is essential for: + +- Performance testing and benchmarking Klio servers +- Validating server capacity under different workloads +- Measuring throughput and latency characteristics +- Load testing before production deployment + +## Prerequisites + +- Klio binary installed and accessible +- A running Klio server to test against +- Sufficient disk space for generating test WAL files + +## Commands + +### `klio wal-player generate` + +Generates synthetic WAL files for testing purposes. + +#### Usage + +```bash +klio wal-player generate [output-directory] [flags] +``` + +#### Parameters + +- `output-directory` - Directory where WAL files will be created (defaults to + current directory) + +#### Flags + +- `--wal-size` - Size of each WAL file in MB (default: 16) +- `--length` - Number of WAL files to generate (required) + +#### Examples + +```bash +# Generate 10 WAL files of 16MB each in the current directory +klio wal-player generate --length 10 + +# Generate 50 WAL files of 32MB each in a specific directory +klio wal-player generate /tmp/test-wals --wal-size 32 --length 50 +``` + +### `klio wal-player play` + +Sends WAL files to a Klio server and measures performance metrics. + +#### Usage + +```bash +klio wal-player play [directory] [flags] +``` + +#### Parameters + +- `directory` - Directory containing WAL files to send (required). This + directory should contain PostgreSQL WAL files in the standard format (e.g., + `000000010000000000000001`). It also supports files compressed with gzip, + provided they have the `.gz` extension. + +#### Flags + +- `--jobs, -j` - Number of parallel jobs for concurrent uploads (default: 1). + Can be used to simulate multiple Klio clients sending data simultaneously. +- `--block-size` - Block size in KB for streaming (default: 2048). This controls + how much data is sent in each request. + +#### Configuration + +The play command requires client configuration to connect to your Klio server. +This can be provided via: + +- Configuration file +- Environment variables +- Command-line flags + +Example configuration: + +```yaml +# klio-config.yaml +client: + wal: + address: localhost:52000 + cluster_name: walplayer + server_cert_path: "/path/to/server.crt" + username: klio + password: password +``` + +#### Examples + +```bash +# Send WAL files using single connection +klio wal-player play ./test-wals + +# Send WAL files using 4 workers for parallel uploads +klio wal-player play ./test-wals --jobs 4 + +# Benchmark with different block sizes +klio wal-player play ./test-wals --jobs 2 --block-size 1024 +``` + +## Performance Metrics + +The `play` command outputs detailed performance metrics in JSON format for each +WAL file: + +```json +{ + "walFullPath": "/path/to/000000010000000000000001", + "startTime": "2025-01-15T10:30:00Z", + "endTime": "2025-01-15T10:30:02Z", + "elapsedTime": "7651680", + "error": "" +} +``` + +### Metrics Explained + +- **`walFullPath`** - Full path to the WAL file that was sent +- **`startTime`** - When the upload started +- **`endTime`** - When the upload completed +- **`elapsedTime`** - Total time taken for the upload in nanoseconds +- **`error`** - Error message if the upload failed (empty on success) + +## Benchmarking Workflow + +### 1. Generate Test Data + +First, create WAL files that represent your expected workload: + +```bash +# For high-throughput testing (many small files) +klio wal-player generate ./benchmark-high-throughput --wal-size 16 --length 1000 + +# For high-bandwidth testing (fewer large files) +klio wal-player generate ./benchmark-high-bandwidth --wal-size 256 --length 100 +``` + +### 2. Single Connection Baseline + +Test with a single connection to establish baseline performance: + +```bash +klio wal-player play ./benchmark-high-throughput --jobs 1 > baseline-results.json +``` + +### 3. Scale Testing + +Test with increasing concurrency to find optimal parallelism: + +```bash +# Test with different job counts +for jobs in 1 2 4 8 16; do + echo "Testing with $jobs jobs..." + klio wal-player play ./benchmark-high-throughput --jobs $jobs > results-$jobs-jobs.json +done +``` + +### 4. Analyze Results + +Parse the JSON output to calculate performance metrics: + +```bash +# Calculate total throughput +jq -s '[.[] | select(.error == "")] | length' results.json + + +# Calculate average upload time +jq -s '[.[] | select(.error == "") | .elapsedTime | tonumber] | add / length' results.json + +# Find failed uploads +jq -s '.[] | select(.error != "")' results.json +``` + +## Performance Optimization Tips + +### Client-Side Optimization + +1. **Parallel Jobs**: Start with 2-4 jobs and increase until performance + plateaus +2. **Block Size**: Adjust based on network characteristics: + - Higher latency networks: Use larger block sizes (4096KB+) + - Lower latency networks: Use smaller block sizes (512-1024KB) +3. **WAL File Size**: Match your production WAL segment size + +### Server-Side Considerations + +1. **Resource Monitoring**: Monitor CPU, memory, and disk I/O on the Klio server +2. **Network Bandwidth**: Ensure sufficient bandwidth between client and server +3. **Storage Performance**: Verify storage can handle the write throughput + +## Example Benchmark Script + +Here's a complete benchmarking script: + +```bash +#!/bin/bash +set -e + +# Configuration +WAL_DIR="./benchmark-wals" +RESULTS_DIR="./benchmark-results" +WAL_SIZE=16 +WAL_COUNT=500 +BLOCK_SIZE=2048 + +# Clean up previous runs +rm -rf "$WAL_DIR" "$RESULTS_DIR" +mkdir -p "$WAL_DIR" "$RESULTS_DIR" + +# Generate test WAL files +echo "Generating $WAL_COUNT WAL files of ${WAL_SIZE}MB each..." +klio wal-player generate "$WAL_DIR" --wal-size "$WAL_SIZE" --length "$WAL_COUNT" + +# Test different concurrency levels +for jobs in 1 2 4 8 16; do + echo "Testing with $jobs parallel jobs..." + + start_time=$(date +%s) + /home/fcanovai/prj/cloud-native/klio/core/dist/klio_linux_amd64_v1/klio wal-player play "$WAL_DIR" --config /home/fcanovai/.klio-client.yaml --jobs ${jobs} --block-size ${BLOCK_SIZE} > "$RESULTS_DIR/results-$jobs-jobs.json" 2> "$RESULTS_DIR/error-$jobs-jobs.log" + end_time=$(date +%s) + + # Calculate summary statistics + total_time=$((end_time - start_time)) + successful_uploads=$(jq -s '[.[] | select(.error == "")] | length' "$RESULTS_DIR/results-$jobs-jobs.json") + failed_uploads=$(jq -s '[.[] | select(.error != "")] | length' "$RESULTS_DIR/results-$jobs-jobs.json") + avg_upload_time=$(jq -s '[.[] | select(.error == "") | .elapsedTime | tonumber] | add / length' "$RESULTS_DIR/results-$jobs-jobs.json") + + echo " Total time: ${total_time}s" + echo " Successful uploads: $successful_uploads" + echo " Failed uploads: $failed_uploads" + echo " Throughput: $(echo "scale=2; $successful_uploads / $total_time" | bc) WAL/s" + echo " Avg WAL upload time: $(echo "scale=6; $avg_upload_time" / 1000000 | bc) millis" + echo +done + +echo "Benchmark complete! Results saved in $RESULTS_DIR" +``` From 9a5449ef434c5a8dd5ebcf296a79e54668087252 Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 19 Jan 2026 17:12:09 +0000 Subject: [PATCH 3/7] add 'caution' admonition (maps to warning) --- gatsby-config.js | 1 + src/styles/_admonitions.scss | 4 ++-- src/styles/_dark.scss | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gatsby-config.js b/gatsby-config.js index 8ebc5d269f..13f9fbd984 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -431,6 +431,7 @@ module.exports = { seealso: "note", hint: "tip", interactive: "interactive", + caution: "warning", }, }, ], diff --git a/src/styles/_admonitions.scss b/src/styles/_admonitions.scss index 4c6e9a391f..762cad6e7c 100644 --- a/src/styles/_admonitions.scss +++ b/src/styles/_admonitions.scss @@ -10,7 +10,7 @@ .admonition-info { @extend .alert-secondary; } -.admonition-warning { +.admonition-warning, .admonition-caution { @extend .alert-warning; } .admonition-danger { @@ -82,7 +82,7 @@ } } -.admonition-warning { +.admonition-warning, .admonition-caution { > .admonition-heading h5:before { content: url('data:image/svg+xml;utf8,'); } diff --git a/src/styles/_dark.scss b/src/styles/_dark.scss index bfab15fa0b..9506c3754c 100644 --- a/src/styles/_dark.scss +++ b/src/styles/_dark.scss @@ -166,7 +166,7 @@ html.dark { background-color: darken(#e4eef5, 70%) !important; color: darken($light, 10%) !important; } - .admonition-warning { + .admonition-warning, .admonition-caution { background-color: darken(#ffedd1, 70%) !important; color: darken($light, 10%) !important; } From 32377498763783ebacbf3462af971430178f1895 Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 19 Jan 2026 17:12:43 +0000 Subject: [PATCH 4/7] klio 0.0.11 import --- .../docs/klio/0/_helm_chart_values.mdx | 6 +- product_docs/docs/klio/0/api/_klio_api.mdx | 196 +++++++---- .../docs/klio/0/api/_klio_catalog_api.mdx | 20 +- product_docs/docs/klio/0/api/klio_api.mdx | 2 +- .../docs/klio/0/api/klio_catalog_api.mdx | 2 +- product_docs/docs/klio/0/api_service.mdx | 21 +- product_docs/docs/klio/0/architectures.mdx | 30 +- .../docs/klio/0/backup_and_restore.mdx | 26 +- product_docs/docs/klio/0/helm_chart.mdx | 32 +- product_docs/docs/klio/0/index.mdx | 30 +- product_docs/docs/klio/0/klio_server.mdx | 327 +++++++++--------- product_docs/docs/klio/0/main_concepts.mdx | 2 +- product_docs/docs/klio/0/opentelemetry.mdx | 2 +- .../docs/klio/0/plugin_configuration.mdx | 92 ++--- product_docs/docs/klio/0/wal_streaming.mdx | 4 +- product_docs/docs/klio/0/walplayer.mdx | 258 +++++++++----- 16 files changed, 594 insertions(+), 456 deletions(-) diff --git a/product_docs/docs/klio/0/_helm_chart_values.mdx b/product_docs/docs/klio/0/_helm_chart_values.mdx index 114fef14fa..d77cdfa098 100644 --- a/product_docs/docs/klio/0/_helm_chart_values.mdx +++ b/product_docs/docs/klio/0/_helm_chart_values.mdx @@ -10,11 +10,11 @@ | controllerManager.affinity | object | `{}` | Affinity rules for the operator deployment. | | controllerManager.manager.args | list | `["--metrics-bind-address=:8443","--leader-elect","--health-probe-bind-address=:8081","--plugin-server-cert=/pluginServer/tls.crt","--plugin-server-key=/pluginServer/tls.key","--plugin-client-cert=/pluginClient/tls.crt","--plugin-server-address=:9090","--custom-cnpg-group=postgresql.cnpg.io"]` | List of command line arguments to pass to the controller manager. | | controllerManager.manager.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]}}` | The security context for the controller manager container. | -| controllerManager.manager.env | object | `{"SIDECAR_IMAGE":"ghcr.io/enterprisedb/klio:v0.0.10"}` | The environment variables to set in the controller manager container. | +| controllerManager.manager.env | object | `{"SIDECAR_IMAGE":"docker.enterprisedb.com/k8s/klio:v0.0.11"}` | The environment variables to set in the controller manager container. | | controllerManager.manager.image.pullPolicy | string | `"Always"` | The controller manager container imagePullPolicy. | | controllerManager.manager.image.pullSecrets | list | `[]` | The list of imagePullSecrets. | -| controllerManager.manager.image.repository | string | `"ghcr.io/enterprisedb/klio-operator"` | The image to use for the controller manager container. | -| controllerManager.manager.image.tag | string | `"v0.0.10"` | The tag to use for the controller manager container image. | +| controllerManager.manager.image.repository | string | `"docker.enterprisedb.com/k8s/klio-operator"` | The image to use for the controller manager container. | +| controllerManager.manager.image.tag | string | `"v0.0.11"` | The tag to use for the controller manager container image. | | controllerManager.manager.livenessProbe | object | `{"httpGet":{"path":"/healthz","port":8081},"initialDelaySeconds":15,"periodSeconds":20}` | Liveness probe configuration. | | controllerManager.manager.readinessProbe | object | `{"httpGet":{"path":"/readyz","port":8081},"initialDelaySeconds":5,"periodSeconds":10}` | Readiness probe configuration. | | controllerManager.manager.resources | object | `{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}}` | The resources to allocate. | diff --git a/product_docs/docs/klio/0/api/_klio_api.mdx b/product_docs/docs/klio/0/api/_klio_api.mdx index b7d39ca094..bc13e7d324 100644 --- a/product_docs/docs/klio/0/api/_klio_api.mdx +++ b/product_docs/docs/klio/0/api/_klio_api.mdx @@ -11,54 +11,57 @@ Package v1alpha1 contains API Schema definitions for the klio v1alpha1 API group - [PluginConfiguration](#pluginconfiguration) - [Server](#server) -#### BaseConfiguration +#### Cache -BaseConfiguration defines the configuration for the base server. +Cache defines the configuration for the cache directory. *Appears in:* -- [ServerSpec](#serverspec) +- [Tier1Configuration](#tier1configuration) +- [Tier2Configuration](#tier2configuration) -| Field | Description | Required | Default | Validation | -| --------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | -------- | ------- | ---------- | -| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | Resources defines the resource requirements for the Kopia server | | | | -| `adminUser` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#localobjectreference-v1-core)* | AdminUser is a reference to a secret of type 'kubernetes.io/basic-auth' | | | | +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | | True | | | -#### CacheConfiguration +#### Data -CacheConfiguration defines the configuration for the cache directory. +Data defines the configuration for the data directory. *Appears in:* -- [ServerSpec](#serverspec) +- [Tier1Configuration](#tier1configuration) -| Field | Description | Required | Default | Validation | -| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -------- | ------- | ---------- | -| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | | True | | | +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | Template to be used to generate the Persistent Volume Claim needed for the data folder,
containing base backups and WAL files. | True | | | -#### DataConfiguration +#### ImageConfiguration -DataConfiguration defines the configuration for the data directory. +ImageConfiguration contains the information needed to download +the Klio image. *Appears in:* - [ServerSpec](#serverspec) -| Field | Description | Required | Default | Validation | -| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | -| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | Template to be used to generate the Persistent Volume Claim needed for the data folder,
containing base backups and WAL files. | True | | | +| Field | Description | Required | Default | Validation | +| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ------------------- | +| `image` *string* | Image is the image to be used for the Klio server | True | | | +| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | Optional: {}
| +| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | Optional: {}
| #### PluginConfiguration PluginConfiguration is the Schema for the client configuration API. -| Field | Description | Required | Default | Validation | -| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | -| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | -| `kind` *string* | `PluginConfiguration` | True | | | -| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | -| `spec` *[PluginConfigurationSpec](#pluginconfigurationspec)* | | True | | | -| `status` *[PluginConfigurationStatus](#pluginconfigurationstatus)* | | | | | +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `PluginConfiguration` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[PluginConfigurationSpec](#pluginconfigurationspec)* | | True | | | +| `status` *[PluginConfigurationStatus](#pluginconfigurationstatus)* | | | | Optional: {}
| #### PluginConfigurationSpec @@ -71,13 +74,13 @@ PluginConfigurationSpec defines the desired state of client configuration. | Field | Description | Required | Default | Validation | | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | ------- | -------------------------------------- | | `serverAddress` *string* | ServerAddress is the address of the Klio server | True | | MinLength: 1
Required: {}
| -| `tier2` *boolean* | Tier2 enables backup lookup in tier 2. | True | | | +| `tier1` *[Tier1PluginConfiguration](#tier1pluginconfiguration)* | Tier1 is the Tier 1 configuration | | | Optional: {}
| +| `tier2` *[Tier2PluginConfiguration](#tier2pluginconfiguration)* | Tier2 is the Tier 2 configuration | | | Optional: {}
| | `clientSecretName` *string* | ClientSecretName is the name of the secret containing the client credentials | True | | MinLength: 1
Required: {}
| | `serverSecretName` *string* | ServerSecretName is the name of the secret containing the server TLS certificate | True | | MinLength: 1
Required: {}
| -| `clusterName` *string* | ClusterName is the name of the PostgreSQL cluster we are connecting to | | | | -| `pprof` *boolean* | Pprof enables the pprof endpoint for performance profiling | | | | -| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | | -| `containers` *[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#container-v1-core) array* | Containers allows defining a list of containers that will be merged with the Klio sidecar containers.
This enables users to customize the sidecars with additional environment variables, volume mounts,
resource limits, and other container settings without polluting the PostgreSQL container environment.
Merge behavior:
- Containers are matched by name (klio-plugin, klio-wal, klio-restore)
- User customizations serve as the base
- Klio required values (name, args, CONTAINER_NAME env var) always override user values
- User-defined environment variables and volume mounts are preserved
- Template defaults are applied only for fields not set by the user or Klio | | | MaxItems: 3
| +| `clusterName` *string* | ClusterName is the name of the PostgreSQL cluster we are connecting to | | | Optional: {}
| +| `pprof` *boolean* | Pprof enables the pprof endpoint for performance profiling | | | Optional: {}
| +| `containers` *[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#container-v1-core) array* | Containers allows defining a list of containers that will be merged with the Klio sidecar containers.
This enables users to customize the sidecars with additional environment variables, volume mounts,
resource limits, and other container settings without polluting the PostgreSQL container environment.
Merge behavior:
- Containers are matched by name (klio-plugin, klio-wal, klio-restore)
- User customizations serve as the base
- Klio required values (name, args, CONTAINER_NAME env var) always override user values
- User-defined environment variables and volume mounts are preserved
- Template defaults are applied only for fields not set by the user or Klio | | | MaxItems: 3
Optional: {}
| #### PluginConfigurationStatus @@ -87,9 +90,9 @@ PluginConfigurationStatus defines the observed state of ClientConfig. - [PluginConfiguration](#pluginconfiguration) -#### QueueConfiguration +#### Queue -QueueConfiguration defines the configuration for the directory hosting the +Queue defines the configuration for the directory hosting the task queue. *Appears in:* @@ -98,8 +101,7 @@ task queue. | Field | Description | Required | Default | Validation | | --------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------- | ------- | ---------- | -| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | QueueResources defines the resource requirements for the NATS server | | | | -| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#persistentvolumeclaimspec-v1-core)* | PersistentVolumeClaimTemplate is used to generate the configuration for
the PVC hosting the work queue. | True | | | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | PersistentVolumeClaimTemplate is used to generate the configuration for
the PVC hosting the work queue. | True | | | #### RetentionPolicy @@ -107,7 +109,8 @@ RetentionPolicy defines how many backups we should keep. *Appears in:* -- [PluginConfigurationSpec](#pluginconfigurationspec) +- [Tier1PluginConfiguration](#tier1pluginconfiguration) +- [Tier2PluginConfiguration](#tier2pluginconfiguration) | Field | Description | Required | Default | Validation | | ----------------------- | ------------------------------------------------------------------ | -------- | ------- | ---------- | @@ -126,29 +129,28 @@ S3Configuration is the configuration to a S3 defined tier 2. - [Tier2Configuration](#tier2configuration) -| Field | Description | Required | Default | Validation | -| ----------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | -| `bucketName` *string* | BucketName is the name of the bucket | True | | | -| `prefix` *string* | Prefix is the prefix to be used for the stored files | | | | -| `endpoint` *string* | Endpoint is the endpoint to be used | | | | -| `region` *string* | Region is the region to be used | | | | -| `walEncryptionPassword` *[SecretKeySelector](#secretkeyselector)* | WALEncryptionPassword is a pointer to the key in a secret containing the encryption password. | True | | | -| `accessKeyId` *[SecretKeySelector](#secretkeyselector)* | The S3 access key ID | | | | -| `secretAccessKey` *[SecretKeySelector](#secretkeyselector)* | The S3 access key | | | | -| `sessionToken` *[SecretKeySelector](#secretkeyselector)* | The S3 session token | | | | -| `customCaBundle` *[SecretKeySelector](#secretkeyselector)* | A pointer to a custom CA bundle | | | | +| Field | Description | Required | Default | Validation | +| ----------------------------------------------------------- | ---------------------------------------------------- | -------- | ------- | ------------------- | +| `bucketName` *string* | BucketName is the name of the bucket | True | | | +| `prefix` *string* | Prefix is the prefix to be used for the stored files | | | Optional: {}
| +| `endpoint` *string* | Endpoint is the endpoint to be used | | | Optional: {}
| +| `region` *string* | Region is the region to be used | | | Optional: {}
| +| `accessKeyId` *[SecretKeySelector](#secretkeyselector)* | The S3 access key ID | | | Optional: {}
| +| `secretAccessKey` *[SecretKeySelector](#secretkeyselector)* | The S3 access key | | | Optional: {}
| +| `sessionToken` *[SecretKeySelector](#secretkeyselector)* | The S3 session token | | | Optional: {}
| +| `customCaBundle` *[SecretKeySelector](#secretkeyselector)* | A pointer to a custom CA bundle | | | Optional: {}
| #### Server Server is the Schema for the servers API. -| Field | Description | Required | Default | Validation | -| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | -| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | -| `kind` *string* | `Server` | True | | | -| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | -| `spec` *[ServerSpec](#serverspec)* | | True | | | -| `status` *[ServerStatus](#serverstatus)* | | | | | +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `Server` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[ServerSpec](#serverspec)* | | True | | | +| `status` *[ServerStatus](#serverstatus)* | | | | Optional: {}
| #### ServerSpec @@ -158,21 +160,17 @@ ServerSpec defines the desired state of Server. - [Server](#server) -| Field | Description | Required | Default | Validation | -| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ---------- | -| `baseConfiguration` *[BaseConfiguration](#baseconfiguration)* | BaseConfiguration is the configuration of the Kopia server | | | | -| `image` *string* | Image is the image to be used for the Klio server | True | | | -| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | | -| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | | -| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | -| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | -| `resources` *[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcerequirements-v1-core)* | Resources defines the resource requirements for the Klio server | | | | -| `cacheConfiguration` *[CacheConfiguration](#cacheconfiguration)* | CacheConfiguration is the configuration of the PVC that should be
used for the cache | True | | | -| `dataConfiguration` *[DataConfiguration](#dataconfiguration)* | DataConfiguration is the configuration of the PVC that should be used
for the base backups | True | | | -| `queueConfiguration` *[QueueConfiguration](#queueconfiguration)* | QueueConfiguration is the configuration of the PVC that should host
the task queue. | | | | -| `password` *[SecretKeySelector](#secretkeyselector)* | Password is a reference to a secret containing the Klio password | True | | | -| `tier2` *[Tier2Configuration](#tier2configuration)* | Tier2 is the Tier 2 configuration | True | | | -| `template` *[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#podtemplatespec-v1-core)* | Template to override the default StatefulSet of the Klio server.
WARNING: Modifying this template may break the server functionality if not done carefully.
This field is primarily intended for advanced configuration such as telemetry setup.
Use at your own risk and ensure thorough testing before applying changes. | | | | +| Field | Description | Required | Default | Validation | +| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ------------------- | +| `image` *string* | Image is the image to be used for the Klio server | True | | | +| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | Optional: {}
| +| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | Optional: {}
| +| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | +| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | +| `tier1` *[Tier1Configuration](#tier1configuration)* | Tier1 is the Tier 1 configuration | True | | | +| `tier2` *[Tier2Configuration](#tier2configuration)* | Tier2 is the Tier 2 configuration | True | | | +| `queue` *[Queue](#queue)* | Queue is the configuration of the PVC that should host
the task queue. | | | Optional: {}
| +| `template` *[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#podtemplatespec-v1-core)* | Template to override the default StatefulSet of the Klio server.
WARNING: Modifying this template may break the server functionality if not done carefully.
This field is primarily intended for advanced configuration such as telemetry setup.
Use at your own risk and ensure thorough testing before applying changes. | | | Optional: {}
| #### ServerStatus @@ -182,6 +180,46 @@ ServerStatus defines the observed state of Server. - [Server](#server) +#### TLSConfiguration + +TLSConfiguration contains the information needed to configure +the PKI infrastructure of the Klio server. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | +| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | + +#### Tier1Configuration + +Tier1Configuration is the tier 1 configuration. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------- | ---------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `cache` *[Cache](#cache)* | Cache is the configuration of the PVC that should be
used for the cache | True | | | +| `data` *[Data](#data)* | Data is the configuration of the PVC that should be used
for the base backups | True | | | +| `encryptionKey` *[SecretKeySelector](#secretkeyselector)* | EncryptionKey is a reference to a secret containing the Klio password | True | | | + +#### Tier1PluginConfiguration + +Tier1PluginConfiguration configures tier1 backup and recovery settings. + +*Appears in:* + +- [PluginConfigurationSpec](#pluginconfigurationspec) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------- | ------------------------------------------------------- | -------- | ------- | ------------------- | +| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | Optional: {}
| + #### Tier2Configuration Tier2Configuration is the tier 2 configuration. @@ -190,6 +228,22 @@ Tier2Configuration is the tier 2 configuration. - [ServerSpec](#serverspec) -| Field | Description | Required | Default | Validation | -| ------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | -| `s3` *[S3Configuration](#s3configuration)* | S3 contains the configuration parameters for an S3-based tier 2 | True | | | +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------- | ---------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `cache` *[Cache](#cache)* | Cache is the configuration of the PVC that should be
used for the cache | True | | | +| `s3` *[S3Configuration](#s3configuration)* | S3 contains the configuration parameters for an S3-based tier 2 | True | | | +| `encryptionKey` *[SecretKeySelector](#secretkeyselector)* | EncryptionKey is a reference to a secret containing the Klio password | True | | | + +#### Tier2PluginConfiguration + +Tier2PluginConfiguration configures tier2 backup and recovery settings. + +*Appears in:* + +- [PluginConfigurationSpec](#pluginconfigurationspec) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------- | ------------------------------------------------------------------------------------ | -------- | ------- | ------------------- | +| `enableBackup` *boolean* | EnableBackup controls whether WAL and base backups should be stored in tier2 | | | Optional: {}
| +| `enableRecovery` *boolean* | EnableRecovery controls whether tier2 should be included in the recovery source list | | | Optional: {}
| +| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | Optional: {}
| diff --git a/product_docs/docs/klio/0/api/_klio_catalog_api.mdx b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx index 41496c09f3..ef2021b22c 100644 --- a/product_docs/docs/klio/0/api/_klio_catalog_api.mdx +++ b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx @@ -19,13 +19,13 @@ KlioBackup is the Schema for a Klio Backup API. - [KlioBackupList](#kliobackuplist) -| Field | Description | Required | Default | Validation | -| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ---------- | -| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | -| `kind` *string* | `KlioBackup` | True | | | -| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | -| `spec` *[KlioBackupSpec](#kliobackupspec)* | | True | | | -| `status` *[KlioBackupStatus](#kliobackupstatus)* | | | | | +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `KlioBackup` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[KlioBackupSpec](#kliobackupspec)* | | True | | | +| `status` *[KlioBackupStatus](#kliobackupstatus)* | | | | Optional: {}
| #### KlioBackupList @@ -35,7 +35,7 @@ KlioBackupList contains a list of KlioBackup. | -------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------- | ------- | ---------- | | `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | | `kind` *string* | `KlioBackupList` | True | | | -| `metadata` *[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#listmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `metadata` *[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#listmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | | `items` *[KlioBackup](#kliobackup) array* | | True | | | #### KlioBackupSpec @@ -67,8 +67,8 @@ KlioBackupStatus defines the observed state of a KlioBackup. | `endWAL` *string* | EndWAL is the current WAL when the backup ends | True | | | | `tablespaces` *[TablespaceLayoutList](#tablespacelayoutlist)* | Tablespaces are the metadata of the tablespaces | True | | | | `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each
backend can put its metadata. | True | | | -| `startedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)* | StartedAt is the current time when the backup started. | True | | | -| `stoppedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)* | StoppedAt is the current time when the backup ended. | True | | | +| `startedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)* | StartedAt is the current time when the backup started. | True | | | +| `stoppedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)* | StoppedAt is the current time when the backup ended. | True | | | #### TablespaceLayout diff --git a/product_docs/docs/klio/0/api/klio_api.mdx b/product_docs/docs/klio/0/api/klio_api.mdx index 1f5f08d6ac..725ba6dfda 100644 --- a/product_docs/docs/klio/0/api/klio_api.mdx +++ b/product_docs/docs/klio/0/api/klio_api.mdx @@ -1,7 +1,7 @@ --- title: Klio API reference originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api/klio_api.mdx + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api/klio_api.mdx editTarget: originalFilePath --- diff --git a/product_docs/docs/klio/0/api/klio_catalog_api.mdx b/product_docs/docs/klio/0/api/klio_catalog_api.mdx index 4b27c18870..4265b9535f 100644 --- a/product_docs/docs/klio/0/api/klio_catalog_api.mdx +++ b/product_docs/docs/klio/0/api/klio_catalog_api.mdx @@ -1,7 +1,7 @@ --- title: Klio Catalog API reference originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api/klio_catalog_api.mdx + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api/klio_catalog_api.mdx editTarget: originalFilePath --- diff --git a/product_docs/docs/klio/0/api_service.mdx b/product_docs/docs/klio/0/api_service.mdx index d71c33a848..203de48f7d 100644 --- a/product_docs/docs/klio/0/api_service.mdx +++ b/product_docs/docs/klio/0/api_service.mdx @@ -1,7 +1,7 @@ --- title: Klio API Service originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/api_service.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api_service.md sidebar_position: 9 editTarget: originalFilePath @@ -12,6 +12,13 @@ Kubernetes API to expose Klio backup metadata as native Kubernetes resources. This allows users to query and inspect backup information using standard Kubernetes tools like `kubectl`. +!!!caution Experimental - Subject to Change + +The Klio API Service is **experimental** and under evaluation. Future releases +may provide an alternative method for listing backups, which +may result in changes or removal of the API service. +!!! + ## Overview The API service integrates with the Kubernetes API server through the @@ -94,7 +101,7 @@ The API service setup includes: Setup procedure will be simplified in future Klio releases. !!! -#### 1. Create Service Account and RBAC Resources +#### 1. Create ServiceAccount and RBAC Resources Create a service account and the necessary RBAC resources. @@ -186,7 +193,8 @@ for instructions on generating the certificate. Create a Kubernetes `Secret` containing the Klio server's TLS certificate for validating the server's identity. -If you're using `cert-manager`, you can create a `Certificate` resource as shown below: +If you're using `cert-manager`, you can create a `Certificate` resource +as shown below: ```yaml apiVersion: cert-manager.io/v1 @@ -283,7 +291,7 @@ spec: value: /client-certs/tls.key - name: TMPDIR value: /tmp - image: ghcr.io/enterprisedb/klio:v0.0.8 + image: docker.enterprisedb.com/k8s/klio:v0.0.11 imagePullPolicy: Always name: api-server resources: {} @@ -302,11 +310,6 @@ spec: -!!!warning - -`CLIENT_BASE_HOSTNAME` will be removed in a future Klio release. -!!! - #### 5. Create a service Now expose the Deployment via a Kubernetes Service: diff --git a/product_docs/docs/klio/0/architectures.mdx b/product_docs/docs/klio/0/architectures.mdx index 7f0d3f48ff..1a4199038b 100644 --- a/product_docs/docs/klio/0/architectures.mdx +++ b/product_docs/docs/klio/0/architectures.mdx @@ -1,7 +1,7 @@ --- title: Architectures & Tiers originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/architectures.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/architectures.md sidebar_position: 3 editTarget: originalFilePath @@ -54,7 +54,8 @@ all necessary backup artifacts for most recovery scenarios. This tier consists of a **local Persistent Volume (PV)** deployed by the Klio Server. It can be located in the same namespace as the PostgreSQL cluster -or in a different one within the same Kubernetes cluster (see the ["Tier 1 Architectures" section below](#tier-1-architectures)). +or in a different one within the same Kubernetes cluster +(see the ["Tier 1 Architectures" section below](#tier-1-architectures)). Its purpose is to store the **WAL archive** and the **catalog of physical base backups**. Its high-throughput, low-latency nature is optimized for several key @@ -150,6 +151,12 @@ worker nodes for Klio pods using Kubernetes taints and tolerations. ## Tier 2: Secondary Storage (Object Storage) +!!!warning Work in Progress + +Tier 2 functionality is currently under heavy development and should be +considered experimental. The features described below are subject to change. +!!! + **Tier 2** provides durable, long-term storage for robust disaster recovery (DR) strategies. It's physically and logically separate from the primary Kubernetes cluster and typically consists of an external object storage system, @@ -174,22 +181,6 @@ If the backup is not found in Tier 1, Klio will automatically check Tier 2. This fallback mechanism ensures that backups that have been migrated to Tier 2 are still accessible for restore operations. -To enable Tier 2 restore capabilities, set the `tier2` field to `true` in your -`PluginConfiguration`: - -```yaml -apiVersion: klio.enterprisedb.io/v1alpha1 -kind: PluginConfiguration -metadata: - name: client-config-example-restore -spec: - serverAddress: server-sample.default - clientSecretName: cluster-restore-klio-user - serverSecretName: server-sample-tls - clusterName: cluster-example - tier2: true -``` - When Tier 2 is enabled and a backup exists in both tiers, Tier 1 takes precedence as restore from it will be faster. @@ -203,7 +194,8 @@ object storage without the risk of accidental modifications. When a WAL server is started in read-only mode: - All **read operations** (e.g., `Get`, `GetMetadata`) continue to function normally -- All **write operations** (e.g., `Put`, `SetFirstRequiredWAL`, `RequestWALStart`, `ResetWALStream`) are rejected with a `FailedPrecondition` gRPC error +- All **write operations** (e.g., `Put`, `SetFirstRequiredWAL`, `RequestWALStart`, + `ResetWALStream`) are rejected with a `FailedPrecondition` gRPC error - The server will return error code `3` (FailedPrecondition) for any write attempt This ensures data integrity in distributed backup scenarios where secondary sites diff --git a/product_docs/docs/klio/0/backup_and_restore.mdx b/product_docs/docs/klio/0/backup_and_restore.mdx index 71173abb85..0da8167a61 100644 --- a/product_docs/docs/klio/0/backup_and_restore.mdx +++ b/product_docs/docs/klio/0/backup_and_restore.mdx @@ -1,7 +1,7 @@ --- title: Backup and Restore originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/backup_and_restore.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/backup_and_restore.md sidebar_position: 7 editTarget: originalFilePath @@ -30,12 +30,6 @@ It is recommended to periodically test backup restores to ensure correct recovery procedures. !!! -!!!warning - -The Klio MVP does not currently verify the presence of all required WAL files -for a given backup. This limitation will be resolved before the GA release. -!!! - ## Prerequisites Before performing backup and restore operations, ensure you have: @@ -145,13 +139,13 @@ kubectl apply -f scheduled-backup.yaml ## Backup Retention and Maintenance Klio automatically manages backup retention based on the -[retention policy](plugin_configuration.mdx#retention-policies) defined in the +[retention policies](plugin_configuration.mdx#retention-policies) defined in the `PluginConfiguration` referred by the `Cluster`. !!!important Deleting a `Backup` resource through `kubectl` only removes the Kubernetes -object. The actual backup data in the Klio server may be retained according to +object. The actual backup data in the Klio server will be retained according to the retention policy. !!! @@ -171,6 +165,9 @@ Once you identify the backup you want to use, you can identify its backupID kubectl get backup -n -o jsonpath='{.status.backupId}' ``` +Alternatively, you can use the [API service](api_service.mdx) to get the backup +list. + ## Restoring from a Backup Klio supports restoring PostgreSQL clusters from backups using CloudNativePG's @@ -232,10 +229,12 @@ spec: !!!note -Klio will choose the latest backup available in case the `backupID` field is omitted. +Klio will choose the latest backup available in case the `backupID` field is +omitted. !!! -Create a corresponding `PluginConfiguration` that specifies which backup to restore: +Create a corresponding `PluginConfiguration` that specifies which backup to +restore: ```yaml apiVersion: klio.enterprisedb.io/v1alpha1 @@ -309,6 +308,7 @@ available WAL files. !!!note -During the Point in Time Recovery, Klio will automatically choose the right -backup if not specified with the `backupID` field. +During the Point in Time Recovery, if `targetTime` or `targetLSN` are specified, +Klio will automatically choose the closest backup for the PITR, if not defined +with the `backupID` field. !!! diff --git a/product_docs/docs/klio/0/helm_chart.mdx b/product_docs/docs/klio/0/helm_chart.mdx index db363213d2..28fb9c7986 100644 --- a/product_docs/docs/klio/0/helm_chart.mdx +++ b/product_docs/docs/klio/0/helm_chart.mdx @@ -2,7 +2,7 @@ title: EDB Klio Operator Helm Chart navTitle: '' originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/helm_chart.mdx + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/helm_chart.mdx sidebar_position: 90 editTarget: originalFilePath @@ -21,7 +21,7 @@ Operator in your Kubernetes cluster. It is distributed as a private OCI image. Before installing the Klio Operator, ensure you have: -- **Helm** - see the [Helm installation guide](https://helm.sh/docs/intro/install/) +- **Helm** – see the [Helm installation guide](https://helm.sh/docs/intro/install/) - **Kubernetes** cluster with appropriate permissions - **Credentials** to access the registry hosting the Helm chart, the Klio operator image, and the Klio operand image. @@ -39,18 +39,19 @@ Before installing the Klio Operator, ensure you have: First, authenticate with the EDB registry where the Helm chart is hosted: ```sh -helm registry login -u -p +helm registry login docker.enterprisedb.com -u -p ``` -Replace ``, ``, and `` with the required credentials. +Replace ``, and `` with the required credentials. ### Step 2: Create an Image Pull Secret -Create a Kubernetes secret to allow the operator to pull container images from the registry: +Create a Kubernetes secret to allow the operator to pull container images from +the registry: ```sh kubectl create secret docker-registry klio-registry-secret \ - --docker-server= \ + --docker-server=docker.enterprisedb.com \ --docker-username= \ --docker-password= \ --namespace @@ -69,8 +70,8 @@ Deploy the Klio Operator to your cluster: ```sh -helm install klio-operator oci://ghcr.io/enterprisedb/klio-operator-chart \ - --version 0.0.6 \ +helm install klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ + --version 0.0.11 \ --namespace \ --set controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret ``` @@ -107,8 +108,8 @@ You should see CRDs like `servers.klio.enterprisedb.io` and `pluginconfiguration The chart is designed to be customizable, allowing you to configure multiple aspects of the Klio Operator deployment, passing in values through a custom `values.yaml` file or using the `--set` flag during installation. -See the [Helm documentation](https://helm.sh/docs/) for more details on how to customize and -manage Helm charts. +See the [Helm documentation](https://helm.sh/docs/) for more details +on how to customize and manage Helm charts. #### Inspecting the Chart @@ -118,7 +119,7 @@ review the default values, and understand what resources it will create: ```sh -helm pull oci://ghcr.io/enterprisedb/klio-operator-chart --version 0.0.6 +helm pull oci://docker.enterprisedb.com/k8s/klio-operator-chart --version 0.0.11 ``` @@ -129,7 +130,7 @@ default `values.yaml`, and other chart files: ```sh -tar -xzf klio-operator-chart-0.0.6.tgz +tar -xzf klio-operator-chart-0.0.11.tgz cd klio-operator-chart cat values.yaml ``` @@ -145,7 +146,7 @@ cat values.yaml To upgrade the Klio Operator to a newer version: ```sh -helm upgrade klio-operator oci://ghcr.io/enterprisedb/klio-operator-chart \ +helm upgrade klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ --version \ --namespace ``` @@ -176,8 +177,9 @@ Uninstalling the operator does not automatically remove: - Custom Resource Definitions (CRDs) - Existing Klio resources (Servers, PluginConfigurations) - Persistent volumes containing backup data - To completely remove Klio from your cluster, you must manually delete these resources. - If you want to completely remove Klio, you must manually delete these resources. + +To completely remove Klio from your cluster, you must manually delete these resources. +If you want to completely remove Klio, you must manually delete these resources. !!! To remove the CRDs after uninstalling: diff --git a/product_docs/docs/klio/0/index.mdx b/product_docs/docs/klio/0/index.mdx index 1dc15ebc54..ba3eb7855b 100644 --- a/product_docs/docs/klio/0/index.mdx +++ b/product_docs/docs/klio/0/index.mdx @@ -15,8 +15,9 @@ navigation: - images - '!_helm_chart_values' originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/index.mdx + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/index.mdx directoryDefaults: + version: 0.0.11 displayBanner: >- This is documentation for a Tech Preview of EDB's {{name.ln}} ({{name.short}}) solution. It is made available AS IS for testing and early @@ -24,7 +25,6 @@ directoryDefaults: For details, please refer to EULA section 9.4. -version: 0.0.10 sidebar_position: 1 editTarget: originalFilePath @@ -37,7 +37,7 @@ Kubernetes. It is designed to handle: - The **Write-Ahead Log (WAL) archive** for a given PostgreSQL `Cluster` resource, within the same Kubernetes namespace as the Klio deployment - The **catalog of physical base backups** for that same cluster -- Optionally, multiple PostgreSQL clusters in the same namespace +- Optionally, multiple PostgreSQL clusters These critical backup artifacts are stored across two distinct storage tiers: @@ -59,7 +59,7 @@ These critical backup artifacts are stored across two distinct storage tiers: !!!note -Most of the following features are currently aspirational and under active +Some of the following features are currently aspirational and under active development. !!! @@ -67,17 +67,13 @@ development. - Native WAL streaming from the primary, eliminating the need for `archive_command`, with support for: - - Partial WAL file handling - WAL file compression - WAL file encryption using user-provided keys - Controlled replication slot advancement to ensure uninterrupted streaming - Synchronous replication - - WAL archive storage on a local PVC (Tier 1) - - Extension of base backup retention policy enforcement to WAL files - - Asynchronous WAL relay to Tier 2 object storage !!!important @@ -91,22 +87,16 @@ Therefore, Klio requires PostgreSQL version 15 or greater to function properly. - Physical online base backups from the primary node to Tier 1, with support for: - - Data deduplication for efficient remote incremental backups - Compression to optimize storage usage - Encryption using user-provided keys for data confidentiality - - Backup catalog stored on a file system Persistent Volume Claim (PVC) in Tier 1 - - Integration with CloudNativePG Kubernetes Volume Snapshots (Tier 0), enabling asynchronous offload to Tier 1 using the same physical backup - process - -- Retention policy enforcement based on defined recovery windows, including - Kubernetes Volume Snapshots - + process[^1] +- Retention policy enforcement - Asynchronous replication of base backups to Tier 2 object storage for - long-term durability and disaster recovery (DR) + long-term durability and disaster recovery !!!important @@ -118,7 +108,9 @@ classes that support volume snapshots. - End-to-end encryption: both in-transit and at-rest - Designed for seamless integration with Kubernetes-native data protection - tools such as Veeam Kasten, Velero, and others + tools such as Veeam Kasten, Velero, and others[^1] - Delivered as a CNPG-I plugin, with an accompanying Kubernetes Operator -- Available as a Certified Red Hat OpenShift Operator +- Available as a Certified Red Hat OpenShift Operator[^1] - Distributed via a Helm chart for streamlined deployment + +[^1]\: Not yet available; planned for a future release. diff --git a/product_docs/docs/klio/0/klio_server.mdx b/product_docs/docs/klio/0/klio_server.mdx index 9dae47f4cc..ea81ad935a 100644 --- a/product_docs/docs/klio/0/klio_server.mdx +++ b/product_docs/docs/klio/0/klio_server.mdx @@ -1,7 +1,7 @@ --- title: The Klio Server originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/klio_server.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/klio_server.md sidebar_position: 5 editTarget: originalFilePath @@ -11,11 +11,10 @@ The Klio server is a central component of the Klio backup solution. It is defined as the `Server` custom resource in Kubernetes, which creates a StatefulSet running the Klio server application. -The Klio server is composed of three main containers: +The Klio server is composed of two main containers: - `base`: Manages full and incremental backups using Kopia. - `wal`: Receives the stream of PostgreSQL Write-Ahead Logs (WAL). -- `nats`: Provides a work queue using NATS JetStream for async WAL processing. An additional init container, `init`, is responsible for initializing the Kopia repository and setting up the necessary configuration. @@ -23,13 +22,80 @@ Kopia repository and setting up the necessary configuration. The base backups and WAL files are stored in multiple PersistentVolume attached to the Klio server pod in the `/data/base` and `/data/wal` directories, respectively. -An additional cache defined by a PersistentVolume is used for the Kopia cache. This cache allows Kopia to -quickly browse repository contents without having to download from the storage -location. +An additional cache defined by a PersistentVolume is used for the Kopia cache. +This cache allows Kopia to quickly browse repository contents without +having to download from the storage location. -The work queue is backed by NATS JetStream with file storage on a separate PersistentVolume mounted at `/queue`. -When a WAL file is received, the server publishes a notification to the queue, enabling asynchronous processing -of WAL files by consumers. +## Storage Tiers + +### Tier 1: Local Storage + +Tier 1 uses local `PersistentVolumes` for immediate data access. +This is the primary landing zone for backups and WAL files, +providing the fastest recovery times. + +### Tier 2: Remote Object Storage + +!!!warning Work in Progress + +Tier 2 functionality is currently under heavy development and should be +considered experimental. The features described below are subject to change. +!!! + +Tier 2 offloads data to S3-compatible object storage. +This is used for long-term retention and disaster recovery. +When Tier 2 is enabled, the server uses a work queue to manage +the asynchronous transfer of data from the local environment to the cloud. + +### The Work Queue + +If both Tier 1 and Tier 2 are configured, it is mandatory to configure +a work queue in the klio Server resource. +The work queue is backed by NATS JetStream with file storage on a separate +`PersistentVolume mounted` at `/queue`. +When a WAL file is received, the server publishes a notification to the queue, +enabling asynchronous processing. This ensures that the primary backup flow +is not slowed down by network latency to remote object storage. + +## Storage Requirements + +The Klio Server uses three distinct PersistentVolumeClaims (PVCs), each +serving a different purpose. Understanding what each PVC contains helps you +size them appropriately for your environment. + +### Data PVC + +The data PVC stores all backup data and WAL archives for Tier 1 storage. + +It holds the base backups and the WAL archive of all the servers that are backed +up. + +The following factors should be considered when defining the PVC size: + +1. WAL file production rate +2. Base backup size +3. Retention policies + +### Cache PVCs + +The cache PVCs (one for Tier 1 and Tier 2 each) are used by Kopia for its +[caching operations](https://kopia.io/docs/advanced/caching/). +They are used to speed up snapshot operations. + +!!!warning + +Klio is currently limited to use the default cache size when creating a Kopia +repository, 5GB for content and 5GB for metadata. +The cache sizes are not hard limits, as the cache is swept periodically, +so users should have a space buffer to account for this additional space. +This limitation will be removed in a future version. +!!! + +### Queue PVC + +The queue PVC is only required when both Tier 1 and Tier 2 are configured. +It stores the NATS JetStream work queue used for asynchronous Tier 2 +replication. ## Setting up a new Klio server @@ -55,14 +121,13 @@ A Klio server setup requires the following components: 2. **TLS Certificate**: For secure communication 3. **Encryption Password**: For encrypting backup data at rest 4. **CA Certificate**: For client authentication via mTLS -5. **Admin User Credentials**: Optional admin user for Kopia operations -6. **Storage**: PersistentVolumeClaims for data, cache, and queue +5. **Storage**: PersistentVolumeClaims for data, cache, and queue ### Step-by-step setup -#### 1. Create the Encryption Password Secret +#### 1. Create the Encryption Key Secret -The encryption password is used to encrypt backup data at rest: +The encryption key is used to encrypt backup data at rest: ```yaml apiVersion: v1 @@ -72,7 +137,7 @@ metadata: namespace: default type: Opaque data: - password: "bXktc2VjdXJlLXBhc3N3b3Jk" # my-secure-password + encryptionKey: "bXktc2VjdXJlLWtleQ==" # my-secure-key ``` Apply the secret: @@ -83,7 +148,7 @@ kubectl apply -f encryption-secret.yaml !!!tip -Use a strong, randomly generated password. This password is critical for +Use a strong, randomly generated key. This key is critical for data security and recovery. !!! @@ -143,30 +208,7 @@ PKI infrastructure already includes a CA for this scope, that CA can be used for the Klio server, too. !!! -#### 3. (Optional) Create Admin User Credentials - -If you need admin access to the underlying Kopia server web interface -(mostly for debugging purposes), define the secret as follows: - -```yaml -apiVersion: v1 -kind: Secret -metadata: - name: my-server-adm - namespace: default -type: kubernetes.io/basic-auth -data: - username: "YWRtaW4=" # admin - password: "YWRtaW4tcGFzc3dvcmQ=" # admin-password -``` - -Apply the secret: - -```bash -kubectl apply -f admin-credentials.yaml -``` - -#### 4. Create TLS Certificate +#### 3. Create TLS Certificate Using cert-manager, create a self-signed certificate (for development) or use your organization's certificate issuer: @@ -213,10 +255,12 @@ kubectl apply -f tls-certificate.yaml !!!info -For production environments, use certificates signed by your organization's Certificate Authority (CA) or a trusted public CA instead of self-signed certificates. +For production environments, use certificates signed by your organization's +Certificate Authority (CA) or a trusted public CA instead of self-signed +certificates. !!! -#### 5. Create the Server Resource +#### 4. Create the Server Resource Now create the main `Server` resource: @@ -230,7 +274,7 @@ metadata: namespace: default spec: # Container image for the Klio server - image: ghcr.io/enterprisedb/klio:v0.0.10 + image: docker.enterprisedb.com/k8s/klio:v0.0.11 imagePullPolicy: IfNotPresent imagePullSecrets: [] # Add image pull secrets if needed @@ -240,38 +284,34 @@ spec: # Client authentication configuration caSecretName: server-sample-ca - # Encryption password reference - password: - name: my-server-encryption - key: password - - # Optional: Admin user for Kopia operations - baseConfiguration: - adminUser: - name: my-server-adm - - # Cache storage configuration - cacheConfiguration: - pvcTemplate: - storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi # Adjust based on your needs - - # Data storage pvcTemplate (for backups and WAL) - dataConfiguration: - pvcTemplate: - storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Gi # Adjust based on your backup needs + # tier 1 configuration + tier1: + # Cache storage configuration + cache: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi # Adjust based on your needs + # Data storage pvcTemplate (for backups and WAL) + data: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi # Adjust based on your backup needs + # Encryption key reference + encryptionKey: + name: my-server-encryption + key: encryptionKey # Queue storage configuration (for NATS work queue) - queueConfiguration: + # It can be added only if both tier1 and tier2 are configured + queue: pvcTemplate: storageClassName: standard # Adjust to your storage class accessModes: @@ -280,14 +320,35 @@ spec: requests: storage: 10Gi # Adjust based on queue volume needs - # Optional: Resource requirements - resources: - requests: - memory: "1Gi" - cpu: "500m" - limits: - memory: "2Gi" - cpu: "2000m" + # tier 2 configuration + tier2: + # Cache storage configuration + cache: + pvcTemplate: + resources: + requests: + storage: 1Gi + accessModes: + - ReadWriteOnce + # Encryption key reference. Can differ from tier1 encryption key. + encryptionKey: + name: my-server-encryption + key: encryptionKey + # S3 access configuration + s3: + prefix: klio + bucketName: klio-bucket + endpoint: https://minio:9000 + region: us-east-1 + accessKeyId: + name: minio + key: ACCESS_KEY_ID + secretAccessKey: + name: minio + key: ACCESS_SECRET_KEY + customCaBundle: + name: minio-server-tls + key: tls.crt ``` @@ -298,7 +359,7 @@ Apply the Server resource: kubectl apply -f klio-server.yaml ``` -#### 6. Verify the Server is Running +#### 5. Verify the Server is Running Check the status of your Klio server: @@ -382,15 +443,15 @@ ensure data security throughout the backup lifecycle. ### Base Backups Encryption Base backups are encrypted by Kopia using the encryption password provided in -the `password` secret reference. Kopia handles encryption transparently. +the `encryptionKey` secret references. Kopia handles encryption transparently. -The encryption password is set during repository initialization and is required +The encryption key is set during repository initialization and is required for all subsequent backup and restore operations. !!!warning Critical -Store the encryption password securely. Loss of this password means permanent -loss of access to all backup data. There is no password recovery mechanism. +Store the encryption key securely. Loss of this key means permanent +loss of access to all backup data. There is no key recovery mechanism. !!! ### WAL Files Encryption @@ -399,31 +460,31 @@ WAL files are encrypted using a master key derivation system with authenticated encryption. The encryption process works as follows: 1. **Master Key Generation**: A 32-byte master key is derived from the encryption - password using PBKDF2 -2. **Key Enveloping**: The master key itself is encrypted using AES-256-GCM with a - password-derived encryption key to protect the key at rest + key using PBKDF2 +2. **Key Enveloping**: The master key itself is encrypted using AES-256-GCM + with a password-derived encryption key to protect the key at rest 3. **Per-File Encryption**: Each WAL file is compressed and then encrypted using the master key with authenticated encryption before being stored -WAL files are first compressed using Snappy S2 compression, then encrypted to ensure both space -efficiency and security. +WAL files are first compressed using Snappy S2 compression, +then encrypted to ensure both space efficiency and security. -The same encryption password used for base backups encrypts the WAL files, +The same encryption key used for base backups encrypts the WAL files, ensuring a unified security model across all backup artifacts. ### Encryption Password Rotation -Currently, encryption password rotation is not supported. To change the -encryption password, you would need to: +Currently, encryption key rotation is not supported. To change the +encryption key, you would need to: -1. Create a new Klio server with a new encryption password +1. Create a new Klio server with a new encryption key 2. Perform new base backups to the new server 3. Migrate to using the new server !!!tip -Choose a strong encryption password from the start. Use a password manager or -key management system to generate and store a cryptographically secure password +Choose a strong encryption key from the start. Use a password manager or +key management system to generate and store a cryptographically secure key (recommended: 32+ random characters). !!! @@ -449,7 +510,7 @@ backup data is protected both at rest and in transit. ## Authentication Klio uses mTLS Authentication for securing access to both the base backup server -and the WAL streaming server. Authentication is handled by verify the client +and the WAL streaming server. Authentication is handled by verifying the client certificates against the CA certificate which has been created when configuring the Klio server. @@ -495,43 +556,10 @@ spec: secretName: server-sample-ca ``` -### Admin User (Optional) - -The optional admin user (`.spec.baseConfiguration.adminUser`) provides access to -the Kopia web interface for administrative and debugging purposes. This is -separate from the regular user authentication: - -```yaml -spec: - baseConfiguration: - adminUser: - name: my-server-adm # Reference to kubernetes.io/basic-auth secret -``` - -The admin user secret must be of type `kubernetes.io/basic-auth`: - -```yaml -apiVersion: v1 -kind: Secret -metadata: - name: my-server-adm -type: kubernetes.io/basic-auth -data: - username: - password: -``` - -!!!info - -The admin user is primarily intended for debugging and should be used sparingly -in production environments. Regular backup and restore operations use mTLS -certificates. -!!! - ## Access Control Lists (ACLs) Klio automatically configures Kopia's Access Control Lists (ACLs) during server -startup to provide fine-grained access control to backup snapshots. This +startup to provide fine-grained access control to back up snapshots. This automation eliminates the need for manual ACL configuration. ### Automatic ACL Configuration @@ -539,12 +567,13 @@ automation eliminates the need for manual ACL configuration. When the Klio server starts, it automatically: 1. **Enables ACL support** in the Kopia repository -2. **Creates a read-only user** (`snapshot_reader@klio`) with READ access to all snapshots +2. **Creates a read-only user** (`snapshot_reader@klio`) + with READ access to all snapshots 3. **Configures the API server** to use the read-only user for backup catalog queries -This automation ensures that the Klio API server (used for backup observability -and catalog browsing) operates with minimal privileges, following the principle -of least privilege. +This automation ensures that the Klio [API service](api_service.mdx) (used for +backup observability and catalog browsing) operates with minimal privileges, +following the principle of least privilege. ### How ACLs Work @@ -609,7 +638,7 @@ spec: ### API Server Integration -The Klio API server deployment is automatically configured to use the +The Klio API service deployment is automatically configured to use the `snapshot_reader@klio` user for all Kopia operations. This happens through environment variable configuration in the API server deployment: @@ -620,7 +649,7 @@ environment variable configuration in the API server deployment: value: /client-certs/tls.key ``` -No manual configuration is required - the API server will automatically use the +No manual configuration is required. The API server will automatically use the restricted read-only access for all backup catalog queries. ### Benefits @@ -635,28 +664,6 @@ The automated ACL configuration provides several benefits: ### Idempotency -The ACL automation is idempotent - if ACLs are already enabled or the user +The ACL automation is idempotent. If ACLs are already enabled or the user already exists, the startup process will detect this and continue without error. This allows for safe server restarts and upgrades. - -### Troubleshooting ACLs - -If you encounter ACL-related issues, check the Klio server logs: - -```bash -kubectl logs my-server-klio-0 -n default -c base -``` - -Look for log entries related to ACL enablement: - -- `"ACLs enabled"`: ACLs were successfully enabled -- `"ACLs already enabled"`: ACLs were previously enabled (normal on restart) -- `"User snapshot_reader added to ACLs"`: Read-only user was successfully configured -- `"failed to execute ACLs enablement"`: An error occurred during ACL setup - -!!!note - -ACL configuration happens during server startup, before the Kopia server process -begins accepting connections. Any ACL errors will appear early in the container -logs. -!!! diff --git a/product_docs/docs/klio/0/main_concepts.mdx b/product_docs/docs/klio/0/main_concepts.mdx index fa3d053546..5f3df1a7d1 100644 --- a/product_docs/docs/klio/0/main_concepts.mdx +++ b/product_docs/docs/klio/0/main_concepts.mdx @@ -1,7 +1,7 @@ --- title: Main Concepts originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/main_concepts.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/main_concepts.md sidebar_position: 2 editTarget: originalFilePath diff --git a/product_docs/docs/klio/0/opentelemetry.mdx b/product_docs/docs/klio/0/opentelemetry.mdx index 9712e50b2d..d7dcf707d9 100644 --- a/product_docs/docs/klio/0/opentelemetry.mdx +++ b/product_docs/docs/klio/0/opentelemetry.mdx @@ -1,7 +1,7 @@ --- title: OpenTelemetry Observability originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/opentelemetry.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/opentelemetry.md sidebar_position: 8 editTarget: originalFilePath diff --git a/product_docs/docs/klio/0/plugin_configuration.mdx b/product_docs/docs/klio/0/plugin_configuration.mdx index 3581803578..69a509625d 100644 --- a/product_docs/docs/klio/0/plugin_configuration.mdx +++ b/product_docs/docs/klio/0/plugin_configuration.mdx @@ -1,7 +1,7 @@ --- title: The Klio Plugin originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/plugin_configuration.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/plugin_configuration.md sidebar_position: 6 editTarget: originalFilePath @@ -61,7 +61,8 @@ The client credentials must be stored in a Kubernetes Secret of type `kubernetes.io/tls`, containing a secret to be presented to the Klio server. This secret can be generated with cert-manager by following the [documentation -in the Klio server page](klio_server.mdx#creating-a-client-side-certificate). +in the Klio server page](klio_server.mdx#creating-a-client-side-certificate), +or be provided by the user. ### Server Address @@ -136,7 +137,9 @@ customize the plugin's behavior. ### Retention policies -Define how long backups should be retained by configuring the retention policy: +Define how long backups should be retained by configuring retention policies +for Tier 1 and Tier 2 storage. Retention policies can be configured +independently for each tier: ```yaml apiVersion: klio.enterprisedb.io/v1alpha1 @@ -147,13 +150,22 @@ spec: serverAddress: klio-server.default clientSecretName: klio-client-credentials serverSecretName: klio-server-tls - retention: - keepLatest: 5 - keepHourly: 12 - keepDaily: 7 - keepWeekly: 4 - keepMonthly: 6 - keepAnnual: 2 + tier1: + retention: + keepLatest: 5 + keepHourly: 12 + keepDaily: 7 + keepWeekly: 4 + keepMonthly: 6 + keepAnnual: 2 + tier2: + enableBackup: true + enableRecovery: true + retention: + keepLatest: 10 + keepDaily: 30 + keepMonthly: 12 + keepAnnual: 5 ``` Except for `keepLatest`, each option defines how many backups to retain @@ -195,33 +207,39 @@ spec: This can be useful working with backups from different clusters, for example when restoring clusters or configuring replica clusters. -### Tier 2 restore +### Tier 2 configuration -To enable restore from Tier 2 storage, set the `tier2` field to `true`: +Tier 2 provides secondary storage (typically object storage like S3) for +long-term backup retention and disaster recovery. Configure Tier 2 using the +`tier2` section: ```yaml spec: - tier2: true + tier2: + enableBackup: true + enableRecovery: true + retention: + keepDaily: 30 + keepMonthly: 12 ``` -When enabled, Klio will look for backups in both Tier 1 and Tier 2. If a backup -is available in both tiers, Tier 1 takes precedence as restore from it will be -faster. +#### Options -See the [Architecture documentation](architectures.mdx#tier-2-secondary-storage-object-storage) -for more details on Tier 2 storage. +- **`enableBackup`**: When set to `true`, backups and WAL files are + automatically synchronized to Tier 2 storage after being stored in Tier 1. + This ensures your backups are available in long-term storage. -### Restore configuration +- **`enableRecovery`**: When set to `true`, Klio will look for backups and + WAL files in both Tier 1 and Tier 2 during restore operations. If a backup + is available in both tiers, Tier 1 takes precedence as restore from it will + be faster. -When performing a restore, you can specify which backup to use: +- **`retention`**: Configure a separate retention policy for Tier 2. + Typically, you would configure longer retention periods for Tier 2 since + object storage is more cost-effective for long-term storage. -```yaml -spec: - backupId: backup-YYYYMMDDHHMMSS -``` - -You can find the backup ID in the `Backup` resources status, or through the -Klio API server. +See the [Architecture documentation](architectures.mdx#tier-2-secondary-storage-object-storage) +for more details on Tier 2 storage. ### Observability @@ -285,19 +303,23 @@ spec: The containers you define serve as the base for the Klio sidecars, with the following merge behavior: -1. **Your container is the base**: When you define a container (e.g., `klio-plugin`), - your specification serves as the starting point +1. **Your container is the base**: When you define a container + (e.g., `klio-plugin`), your specification serves as the starting point 2. **Klio enforces required values**: Klio sets its essential configuration: - Container `name` (klio-plugin, klio-wal, or klio-restore) - Container `args` (the command arguments needed for operation) - `CONTAINER_NAME` environment variable -3. **Your customizations are preserved**: All other fields you define remain intact +3. **Your customizations are preserved**: All other fields you define remain + intact 4. **Template defaults fill gaps**: For fields you don't specify, Klio applies sensible defaults (image, security context, standard volume mounts, etc.) -**Important**: Klio's required values (name, args, CONTAINER_NAME env var) will +!!!important + +Klio's required values (name, args, `CONTAINER_NAME` env var) will always override any conflicting values you set. All other customizations are respected. +!!! ### Available sidecar containers @@ -344,11 +366,3 @@ spec: memory: "128Mi" cpu: "250m" ``` - -!!!warning - -Be careful when customizing containers. While your customizations serve as the -base, Klio will override certain critical values (name, args, CONTAINER_NAME env var) -that are required for proper operation. Avoid setting these fields as they will be -replaced. Always test changes in a non-production environment first. -!!! diff --git a/product_docs/docs/klio/0/wal_streaming.mdx b/product_docs/docs/klio/0/wal_streaming.mdx index 4a6495aa0b..a9f2b4dc94 100644 --- a/product_docs/docs/klio/0/wal_streaming.mdx +++ b/product_docs/docs/klio/0/wal_streaming.mdx @@ -1,7 +1,7 @@ --- title: WAL Streaming originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/wal_streaming.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/wal_streaming.md sidebar_position: 4 editTarget: originalFilePath @@ -60,8 +60,6 @@ approach: at rest, providing protection not available with the traditional `archive_command`. - - - **Comprehensive observability:** Native metrics and structured logging provide full visibility into WAL streaming operations, simplifying monitoring, anomaly detection, and troubleshooting compared to the opaque diff --git a/product_docs/docs/klio/0/walplayer.mdx b/product_docs/docs/klio/0/walplayer.mdx index b74cd02ca0..97d51bc067 100644 --- a/product_docs/docs/klio/0/walplayer.mdx +++ b/product_docs/docs/klio/0/walplayer.mdx @@ -1,7 +1,7 @@ --- title: WAL Player originalFilePath: >- - https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.10/walplayer.md + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/walplayer.md sidebar_position: 80 editTarget: originalFilePath @@ -107,8 +107,8 @@ client: address: localhost:52000 cluster_name: walplayer server_cert_path: "/path/to/server.crt" - username: klio - password: password + client_cert_path: "/path/to/client/tls.crt" + client_key_path: "/path/to/client/tls.key" ``` #### Examples @@ -147,117 +147,193 @@ WAL file: - **`elapsedTime`** - Total time taken for the upload in nanoseconds - **`error`** - Error message if the upload failed (empty on success) -## Benchmarking Workflow +## Benchmarking Example -### 1. Generate Test Data +The following Kubernetes Job definition demonstrates how to use +the WAL Player to benchmark a Klio server. This example covers generating +WAL files and then playing them back to the server. -First, create WAL files that represent your expected workload: + -```bash -# For high-throughput testing (many small files) -klio wal-player generate ./benchmark-high-throughput --wal-size 16 --length 1000 - -# For high-bandwidth testing (fewer large files) -klio wal-player generate ./benchmark-high-bandwidth --wal-size 256 --length 100 +```yaml +--- +# PVC for storing generated WAL files +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: walplayer-data +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi # Enough space to hold the generated amount of WAL files +--- +# Client certificate for authenticating with the Klio server +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: walplayer-client-cert +spec: + commonName: klio@walplayer + secretName: walplayer-client-cert + duration: 2160h # 90d + renewBefore: 360h # 15d + isCA: false + usages: + - client auth + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +--- +# ConfigMap with klio client configuration +apiVersion: v1 +kind: ConfigMap +metadata: + name: walplayer-config +data: + # Address your klio server + klio-config.yaml: | + client: + wal: + address: server-sample.default:52000 + cluster_name: walplayer + server_cert_path: /certs/server/ca.crt + client_cert_path: /certs/client/tls.crt + client_key_path: /certs/client/tls.key +--- +# Job to generate and play WAL files +apiVersion: batch/v1 +kind: Job +metadata: + name: walplayer-benchmark +spec: + template: + metadata: + labels: + app: walplayer + spec: + restartPolicy: Never + initContainers: + # Generate synthetic WAL files + - name: generate-wals + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: Always + command: + - /usr/bin/klio + - wal-player + - generate + - /data + - --wal-size=16 + - --length=100 + volumeMounts: + - name: data + mountPath: /data + containers: + # Play WAL files to the Klio server + - name: play-wals + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: Always + command: + - /usr/bin/klio + - wal-player + - play + - /data + - --config=/config/klio-config.yaml + - --jobs=4 + - --block-size=2048 + volumeMounts: + - name: data + mountPath: /data + - name: config + mountPath: /config + readOnly: true + - name: server-cert + mountPath: /certs/server + readOnly: true + - name: client-cert + mountPath: /certs/client + readOnly: true + volumes: + - name: data + persistentVolumeClaim: + claimName: walplayer-data + - name: config + configMap: + name: walplayer-config + - name: server-cert + secret: + secretName: server-sample-tls + - name: client-cert + secret: + secretName: walplayer-client-cert ``` -### 2. Single Connection Baseline + -Test with a single connection to establish baseline performance: +### Customizing the Benchmark -```bash -klio wal-player play ./benchmark-high-throughput --jobs 1 > baseline-results.json -``` +You can adjust the following parameters to simulate different workload scenarios: + +#### WAL Generation Parameters + +Modify the `generate-wals` init container to create different test workloads: + +- Many small files: + + ```yaml + - --wal-size=16 + - --length=1000 + ``` + +- Less large files: + + ```yaml + - --wal-size=256 + - --length=100 + ``` -### 3. Scale Testing +Match actual production for better results. -Test with increasing concurrency to find optimal parallelism: +#### WAL Playback Parameters + +Modify the `play-wals` container to test different upload patterns: + +- **Jobs** (`--jobs`): Number of parallel upload workers + - Start with `--jobs=1` to establish baseline performance + - Increase to `--jobs=2`, `--jobs=4`, `--jobs=8` to find optimal concurrency + - Performance typically plateaus at some point +- **Block Size** (`--block-size`): Size of each streaming chunk in KB + - Default is `--block-size=2048` + - Maximum is 8192 + +### Analyzing Results + +View the job logs to see the JSON performance metrics: ```bash -# Test with different job counts -for jobs in 1 2 4 8 16; do - echo "Testing with $jobs jobs..." - klio wal-player play ./benchmark-high-throughput --jobs $jobs > results-$jobs-jobs.json -done +kubectl logs job/walplayer-benchmark -c play-wals ``` -### 4. Analyze Results - -Parse the JSON output to calculate performance metrics: +You can analyze the output using `jq`: ```bash -# Calculate total throughput -jq -s '[.[] | select(.error == "")] | length' results.json +# Get all results +kubectl logs job/walplayer-benchmark -c play-wals > results.json +# Calculate total successful uploads +jq -s '[.[] | select(.error == "")] | length' results.json -# Calculate average upload time +# Calculate average upload time (in nanoseconds) jq -s '[.[] | select(.error == "") | .elapsedTime | tonumber] | add / length' results.json -# Find failed uploads -jq -s '.[] | select(.error != "")' results.json +# Find any failed uploads +jq -s '.[] | select(has("error") and .error != "")' results.json ``` ## Performance Optimization Tips -### Client-Side Optimization - -1. **Parallel Jobs**: Start with 2-4 jobs and increase until performance - plateaus -2. **Block Size**: Adjust based on network characteristics: - - Higher latency networks: Use larger block sizes (4096KB+) - - Lower latency networks: Use smaller block sizes (512-1024KB) -3. **WAL File Size**: Match your production WAL segment size - -### Server-Side Considerations - 1. **Resource Monitoring**: Monitor CPU, memory, and disk I/O on the Klio server 2. **Network Bandwidth**: Ensure sufficient bandwidth between client and server 3. **Storage Performance**: Verify storage can handle the write throughput - -## Example Benchmark Script - -Here's a complete benchmarking script: - -```bash -#!/bin/bash -set -e - -# Configuration -WAL_DIR="./benchmark-wals" -RESULTS_DIR="./benchmark-results" -WAL_SIZE=16 -WAL_COUNT=500 -BLOCK_SIZE=2048 - -# Clean up previous runs -rm -rf "$WAL_DIR" "$RESULTS_DIR" -mkdir -p "$WAL_DIR" "$RESULTS_DIR" - -# Generate test WAL files -echo "Generating $WAL_COUNT WAL files of ${WAL_SIZE}MB each..." -klio wal-player generate "$WAL_DIR" --wal-size "$WAL_SIZE" --length "$WAL_COUNT" - -# Test different concurrency levels -for jobs in 1 2 4 8 16; do - echo "Testing with $jobs parallel jobs..." - - start_time=$(date +%s) - /home/fcanovai/prj/cloud-native/klio/core/dist/klio_linux_amd64_v1/klio wal-player play "$WAL_DIR" --config /home/fcanovai/.klio-client.yaml --jobs ${jobs} --block-size ${BLOCK_SIZE} > "$RESULTS_DIR/results-$jobs-jobs.json" 2> "$RESULTS_DIR/error-$jobs-jobs.log" - end_time=$(date +%s) - - # Calculate summary statistics - total_time=$((end_time - start_time)) - successful_uploads=$(jq -s '[.[] | select(.error == "")] | length' "$RESULTS_DIR/results-$jobs-jobs.json") - failed_uploads=$(jq -s '[.[] | select(.error != "")] | length' "$RESULTS_DIR/results-$jobs-jobs.json") - avg_upload_time=$(jq -s '[.[] | select(.error == "") | .elapsedTime | tonumber] | add / length' "$RESULTS_DIR/results-$jobs-jobs.json") - - echo " Total time: ${total_time}s" - echo " Successful uploads: $successful_uploads" - echo " Failed uploads: $failed_uploads" - echo " Throughput: $(echo "scale=2; $successful_uploads / $total_time" | bc) WAL/s" - echo " Avg WAL upload time: $(echo "scale=6; $avg_upload_time" / 1000000 | bc) millis" - echo -done - -echo "Benchmark complete! Results saved in $RESULTS_DIR" -``` From 3bd07db4b8c0b9a55bb921c31581677f28f9adf0 Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Wed, 21 Jan 2026 21:47:42 +0000 Subject: [PATCH 5/7] Address Stephen's concerns --- product_docs/docs/klio/0/helm_chart.mdx | 39 +++++++++++++++++-------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/product_docs/docs/klio/0/helm_chart.mdx b/product_docs/docs/klio/0/helm_chart.mdx index 28fb9c7986..c94defb78b 100644 --- a/product_docs/docs/klio/0/helm_chart.mdx +++ b/product_docs/docs/klio/0/helm_chart.mdx @@ -24,7 +24,8 @@ Before installing the Klio Operator, ensure you have: - **Helm** – see the [Helm installation guide](https://helm.sh/docs/intro/install/) - **Kubernetes** cluster with appropriate permissions - **Credentials** to access the registry hosting the Helm chart, the Klio operator - image, and the Klio operand image. + image, and the Klio operand image. For the EDB registry, your username will be `k8s` + and your password will be [your EDB Repos 2.0 token](https://www.enterprisedb.com/docs/repos/getting_started/with_web/get_your_token/). - **CloudNativePG Operator** already installed in your Kubernetes cluster. See the [CloudNativePG installation guide](https://cloudnative-pg.io/documentation/current/installation_upgrade/). - **cert-manager** (optional, but strongly recommended for managing TLS certificates). @@ -34,16 +35,28 @@ Before installing the Klio Operator, ensure you have: ## Installation +For simplicity, the instructions below assume you've put the following values into environment variables: + +- `$USERNAME` - will be `k8s` for the EDB registry. +- `$TOKEN` - will be your [EDB Repos 2.0 token](https://www.enterprisedb.com/docs/repos/getting_started/with_web/get_your_token/). +- `$NAMESPACE` - will be the namespace where you installed CNPG (`cnpg-system` is the default namespace for CNPG installations). + +Example: + +```sh +export USERNAME=k8s +export TOKEN=your-edb-token +export NAMESPACE=cnpg-system +``` + ### Step 1: Registry Authentication First, authenticate with the EDB registry where the Helm chart is hosted: ```sh -helm registry login docker.enterprisedb.com -u -p +helm registry login docker.enterprisedb.com -u "${USERNAME}" -p "${TOKEN}" ``` -Replace ``, and `` with the required credentials. - ### Step 2: Create an Image Pull Secret Create a Kubernetes secret to allow the operator to pull container images from @@ -52,9 +65,9 @@ the registry: ```sh kubectl create secret docker-registry klio-registry-secret \ --docker-server=docker.enterprisedb.com \ - --docker-username= \ - --docker-password= \ - --namespace + "--docker-username=${USERNAME}" \ + "--docker-password=${TOKEN}" \ + "--namespace "${NAMESPACE}" ``` !!!info Namespace Selection @@ -72,7 +85,7 @@ Deploy the Klio Operator to your cluster: ```sh helm install klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ --version 0.0.11 \ - --namespace \ + --namespace "${NAMESPACE}" \ --set controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret ``` @@ -83,14 +96,14 @@ helm install klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart After installation, verify that the Klio Operator is running: ```sh -kubectl get pods -n -l app.kubernetes.io/name=klio +kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=klio ``` You should see the operator pod in a `Running` state. Check the logs to ensure there are no errors: ```sh -kubectl logs -n deployment/klio-controller-manager -f +kubectl logs -n "${NAMESPACE}" deployment/klio-controller-manager -f ``` Verify that the Custom Resource Definitions (CRDs) were created: @@ -148,9 +161,11 @@ To upgrade the Klio Operator to a newer version: ```sh helm upgrade klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ --version \ - --namespace + --namespace "${NAMESPACE}" ``` +...where `` is the version you're upgrading to. + When upgrading, you can control how Helm handles values from the previous installation. Please refer to the [Helm upgrade documentation](https://helm.sh/docs/helm/helm_upgrade/) to understand the different options. @@ -167,7 +182,7 @@ specific upgrade instructions. To uninstall the Klio Operator: ```sh -helm uninstall klio-operator --namespace +helm uninstall klio-operator --namespace "${NAMESPACE}" ``` !!!warning Data Preservation From c41e60b9e10ae0e9e66adb7ece5d1615a0dfaf1f Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Thu, 22 Jan 2026 20:53:37 +0000 Subject: [PATCH 6/7] zsh compatibility --- product_docs/docs/klio/0/helm_chart.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/product_docs/docs/klio/0/helm_chart.mdx b/product_docs/docs/klio/0/helm_chart.mdx index c94defb78b..279057f59d 100644 --- a/product_docs/docs/klio/0/helm_chart.mdx +++ b/product_docs/docs/klio/0/helm_chart.mdx @@ -86,7 +86,7 @@ Deploy the Klio Operator to your cluster: helm install klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ --version 0.0.11 \ --namespace "${NAMESPACE}" \ - --set controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret + --set "controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret" ``` From 3cb62e210e95ad1ed1c4d09c23d79758340c48db Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Thu, 29 Jan 2026 14:33:33 +0000 Subject: [PATCH 7/7] fix typo (it->is) --- product_docs/docs/klio/0/api_service.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/product_docs/docs/klio/0/api_service.mdx b/product_docs/docs/klio/0/api_service.mdx index 203de48f7d..6c85141ae9 100644 --- a/product_docs/docs/klio/0/api_service.mdx +++ b/product_docs/docs/klio/0/api_service.mdx @@ -405,6 +405,6 @@ status: ## Limitations -An API service it tied to a specific Klio server instance. If you have multiple +An API service is tied to a specific Klio server instance. If you have multiple Klio servers, you will need to deploy a separate API service for each server, defining a different API group for each to avoid conflicts.