diff --git a/gatsby-config.js b/gatsby-config.js index 088dd8786a..13f9fbd984 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -69,6 +69,7 @@ const sourceToPluginConfig = { name: "jdbc_connector", path: "product_docs/docs/jdbc_connector", }, + klio: { name: "klio", path: "product_docs/docs/klio" }, language_pack: { name: "language_pack", path: "product_docs/docs/language_pack", @@ -430,6 +431,7 @@ module.exports = { seealso: "note", hint: "tip", interactive: "interactive", + caution: "warning", }, }, ], diff --git a/product_docs/docs/klio/0/_helm_chart_values.mdx b/product_docs/docs/klio/0/_helm_chart_values.mdx new file mode 100644 index 0000000000..d77cdfa098 --- /dev/null +++ b/product_docs/docs/klio/0/_helm_chart_values.mdx @@ -0,0 +1,42 @@ +| Key | Type | Default | Description | +| -------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | +| certmanager.clusterDomain | string | `"cluster.local"` | The DNS domain of the cluster | +| certmanager.createMetricsCertificate | bool | `true` | Create certificates for the metrics service. | +| certmanager.createPluginClientCertificate | bool | `true` | Create certificates for the plugin client. | +| certmanager.createPluginServerCertificate | bool | `true` | Create certificates for the plugin server. | +| certmanager.duration | string | `"2160h"` | The duration of the certificates. | +| certmanager.enable | bool | `true` | Enable cert-manager integration for certificate creation. | +| certmanager.renewBefore | string | `"360h"` | The renew before time for the certificates. | +| controllerManager.affinity | object | `{}` | Affinity rules for the operator deployment. | +| controllerManager.manager.args | list | `["--metrics-bind-address=:8443","--leader-elect","--health-probe-bind-address=:8081","--plugin-server-cert=/pluginServer/tls.crt","--plugin-server-key=/pluginServer/tls.key","--plugin-client-cert=/pluginClient/tls.crt","--plugin-server-address=:9090","--custom-cnpg-group=postgresql.cnpg.io"]` | List of command line arguments to pass to the controller manager. | +| controllerManager.manager.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]}}` | The security context for the controller manager container. | +| controllerManager.manager.env | object | `{"SIDECAR_IMAGE":"docker.enterprisedb.com/k8s/klio:v0.0.11"}` | The environment variables to set in the controller manager container. | +| controllerManager.manager.image.pullPolicy | string | `"Always"` | The controller manager container imagePullPolicy. | +| controllerManager.manager.image.pullSecrets | list | `[]` | The list of imagePullSecrets. | +| controllerManager.manager.image.repository | string | `"docker.enterprisedb.com/k8s/klio-operator"` | The image to use for the controller manager container. | +| controllerManager.manager.image.tag | string | `"v0.0.11"` | The tag to use for the controller manager container image. | +| controllerManager.manager.livenessProbe | object | `{"httpGet":{"path":"/healthz","port":8081},"initialDelaySeconds":15,"periodSeconds":20}` | Liveness probe configuration. | +| controllerManager.manager.readinessProbe | object | `{"httpGet":{"path":"/readyz","port":8081},"initialDelaySeconds":5,"periodSeconds":10}` | Readiness probe configuration. | +| controllerManager.manager.resources | object | `{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}}` | The resources to allocate. | +| controllerManager.nodeSelector | object | `{}` | NodeSelector for the operator deployment. | +| controllerManager.podSecurityContext | object | `{"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | The security context for the controller manager pod. | +| controllerManager.priorityClassName | string | `""` | Priority class name for the controller manager pod. | +| controllerManager.serviceAccount.annotations | object | `{}` | The annotations to add to the service account. | +| controllerManager.tolerations | list | `[]` | Tolerations for the operator deployment. | +| controllerManager.topologySpreadConstraints | list | `[]` | Topology Spread Constraints for the operator deployment. | +| fullnameOverride | string | `""` | Override the fully qualified name of the Helm Chart. | +| kubernetesClusterDomain | string | `"cluster.local"` | The domain for the Kubernetes cluster. | +| metricsService.enable | bool | `true` | Enable the metrics service for the controller manager. | +| metricsService.metricsServiceSecret | string | `"klio-metrics-server-cert"` | The name of the secret containing the TLS certificate for the metrics service. | +| metricsService.ports | list | `[{"name":"https","port":8443,"protocol":"TCP","targetPort":8443}]` | The port the metrics service will listen on. | +| metricsService.type | string | `"ClusterIP"` | Service type for the metrics service. | +| nameOverride | string | `"klio"` | Override the name of the Helm Chart. | +| plugin.clientSecret | string | `"klio-plugin-client-tls"` | The Client TLS certificate. | +| plugin.name | string | `"klio.enterprisedb.io"` | The name the plugin will use to register itself with the CNPG Operator. | +| plugin.port | int | `9090` | The port the plugin will listen on. It must match the "--plugin-server-address" argument. | +| plugin.serverSecret | string | `"klio-plugin-server-tls"` | The Server TLS certificate. | +| prometheus.enable | bool | `true` | To enable a ServiceMonitor to export metrics to Prometheus set true. | +| serviceAccount.annotations | object | `{}` | The annotations to add to the service account. | +| serviceAccount.automount | bool | `true` | Automount service account token. | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created. | +| serviceAccount.name | string | `""` | The name of the service account | diff --git a/product_docs/docs/klio/0/api/_klio_api.mdx b/product_docs/docs/klio/0/api/_klio_api.mdx new file mode 100644 index 0000000000..bc13e7d324 --- /dev/null +++ b/product_docs/docs/klio/0/api/_klio_api.mdx @@ -0,0 +1,249 @@ +# Packages + +- [klio.enterprisedb.io/v1alpha1](#klioenterprisedbiov1alpha1) + +## klio.enterprisedb.io/v1alpha1 + +Package v1alpha1 contains API Schema definitions for the klio v1alpha1 API group. + +### Resource Types + +- [PluginConfiguration](#pluginconfiguration) +- [Server](#server) + +#### Cache + +Cache defines the configuration for the cache directory. + +*Appears in:* + +- [Tier1Configuration](#tier1configuration) +- [Tier2Configuration](#tier2configuration) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | | True | | | + +#### Data + +Data defines the configuration for the data directory. + +*Appears in:* + +- [Tier1Configuration](#tier1configuration) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | Template to be used to generate the Persistent Volume Claim needed for the data folder,
containing base backups and WAL files. | True | | | + +#### ImageConfiguration + +ImageConfiguration contains the information needed to download +the Klio image. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ------------------- | +| `image` *string* | Image is the image to be used for the Klio server | True | | | +| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | Optional: {}
| +| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | Optional: {}
| + +#### PluginConfiguration + +PluginConfiguration is the Schema for the client configuration API. + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `PluginConfiguration` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[PluginConfigurationSpec](#pluginconfigurationspec)* | | True | | | +| `status` *[PluginConfigurationStatus](#pluginconfigurationstatus)* | | | | Optional: {}
| + +#### PluginConfigurationSpec + +PluginConfigurationSpec defines the desired state of client configuration. + +*Appears in:* + +- [PluginConfiguration](#pluginconfiguration) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | ------- | -------------------------------------- | +| `serverAddress` *string* | ServerAddress is the address of the Klio server | True | | MinLength: 1
Required: {}
| +| `tier1` *[Tier1PluginConfiguration](#tier1pluginconfiguration)* | Tier1 is the Tier 1 configuration | | | Optional: {}
| +| `tier2` *[Tier2PluginConfiguration](#tier2pluginconfiguration)* | Tier2 is the Tier 2 configuration | | | Optional: {}
| +| `clientSecretName` *string* | ClientSecretName is the name of the secret containing the client credentials | True | | MinLength: 1
Required: {}
| +| `serverSecretName` *string* | ServerSecretName is the name of the secret containing the server TLS certificate | True | | MinLength: 1
Required: {}
| +| `clusterName` *string* | ClusterName is the name of the PostgreSQL cluster we are connecting to | | | Optional: {}
| +| `pprof` *boolean* | Pprof enables the pprof endpoint for performance profiling | | | Optional: {}
| +| `containers` *[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#container-v1-core) array* | Containers allows defining a list of containers that will be merged with the Klio sidecar containers.
This enables users to customize the sidecars with additional environment variables, volume mounts,
resource limits, and other container settings without polluting the PostgreSQL container environment.
Merge behavior:
- Containers are matched by name (klio-plugin, klio-wal, klio-restore)
- User customizations serve as the base
- Klio required values (name, args, CONTAINER_NAME env var) always override user values
- User-defined environment variables and volume mounts are preserved
- Template defaults are applied only for fields not set by the user or Klio | | | MaxItems: 3
Optional: {}
| + +#### PluginConfigurationStatus + +PluginConfigurationStatus defines the observed state of ClientConfig. + +*Appears in:* + +- [PluginConfiguration](#pluginconfiguration) + +#### Queue + +Queue defines the configuration for the directory hosting the +task queue. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------- | ------- | ---------- | +| `pvcTemplate` *[PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#persistentvolumeclaimspec-v1-core)* | PersistentVolumeClaimTemplate is used to generate the configuration for
the PVC hosting the work queue. | True | | | + +#### RetentionPolicy + +RetentionPolicy defines how many backups we should keep. + +*Appears in:* + +- [Tier1PluginConfiguration](#tier1pluginconfiguration) +- [Tier2PluginConfiguration](#tier2pluginconfiguration) + +| Field | Description | Required | Default | Validation | +| ----------------------- | ------------------------------------------------------------------ | -------- | ------- | ---------- | +| `keepLatest` *integer* | KeepLatest is the number of latest backups to keep
optional | True | | | +| `keepAnnual` *integer* | KeepAnnual is the number of annual backups to keep
optional | True | | | +| `keepMonthly` *integer* | KeepMonthly is the number of monthly backups to keep
optional | True | | | +| `keepWeekly` *integer* | KeepWeekly is the number of weekly backups to keep
optional | True | | | +| `keepDaily` *integer* | KeepDaily is the number of daily backups to keep
optional | True | | | +| `keepHourly` *integer* | KeepHourly is the number of hourly backups to keep
optional | True | | | + +#### S3Configuration + +S3Configuration is the configuration to a S3 defined tier 2. + +*Appears in:* + +- [Tier2Configuration](#tier2configuration) + +| Field | Description | Required | Default | Validation | +| ----------------------------------------------------------- | ---------------------------------------------------- | -------- | ------- | ------------------- | +| `bucketName` *string* | BucketName is the name of the bucket | True | | | +| `prefix` *string* | Prefix is the prefix to be used for the stored files | | | Optional: {}
| +| `endpoint` *string* | Endpoint is the endpoint to be used | | | Optional: {}
| +| `region` *string* | Region is the region to be used | | | Optional: {}
| +| `accessKeyId` *[SecretKeySelector](#secretkeyselector)* | The S3 access key ID | | | Optional: {}
| +| `secretAccessKey` *[SecretKeySelector](#secretkeyselector)* | The S3 access key | | | Optional: {}
| +| `sessionToken` *[SecretKeySelector](#secretkeyselector)* | The S3 session token | | | Optional: {}
| +| `customCaBundle` *[SecretKeySelector](#secretkeyselector)* | A pointer to a custom CA bundle | | | Optional: {}
| + +#### Server + +Server is the Schema for the servers API. + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `klio.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `Server` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[ServerSpec](#serverspec)* | | True | | | +| `status` *[ServerStatus](#serverstatus)* | | | | Optional: {}
| + +#### ServerSpec + +ServerSpec defines the desired state of Server. + +*Appears in:* + +- [Server](#server) + +| Field | Description | Required | Default | Validation | +| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------ | ------------------- | +| `image` *string* | Image is the image to be used for the Klio server | True | | | +| `imagePullPolicy` *[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#pullpolicy-v1-core)* | ImagePullPolicy defines the policy for pulling the image | | IfNotPresent | Optional: {}
| +| `imagePullSecrets` *[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core) array* | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the
images | | | Optional: {}
| +| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | +| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | +| `tier1` *[Tier1Configuration](#tier1configuration)* | Tier1 is the Tier 1 configuration | True | | | +| `tier2` *[Tier2Configuration](#tier2configuration)* | Tier2 is the Tier 2 configuration | True | | | +| `queue` *[Queue](#queue)* | Queue is the configuration of the PVC that should host
the task queue. | | | Optional: {}
| +| `template` *[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#podtemplatespec-v1-core)* | Template to override the default StatefulSet of the Klio server.
WARNING: Modifying this template may break the server functionality if not done carefully.
This field is primarily intended for advanced configuration such as telemetry setup.
Use at your own risk and ensure thorough testing before applying changes. | | | Optional: {}
| + +#### ServerStatus + +ServerStatus defines the observed state of Server. + +*Appears in:* + +- [Server](#server) + +#### TLSConfiguration + +TLSConfiguration contains the information needed to configure +the PKI infrastructure of the Klio server. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `tlsSecretName` *string* | TLSSecretName is the name of the Kubernetes secret containing the server-side certificate
to be used for the Klio server. | True | | | +| `caSecretName` *string* | ClientCASecretName is the name of the Kubernetes secret containing the CA certificate
to be used by the Klio server to validate the users. | True | | | + +#### Tier1Configuration + +Tier1Configuration is the tier 1 configuration. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------- | ---------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `cache` *[Cache](#cache)* | Cache is the configuration of the PVC that should be
used for the cache | True | | | +| `data` *[Data](#data)* | Data is the configuration of the PVC that should be used
for the base backups | True | | | +| `encryptionKey` *[SecretKeySelector](#secretkeyselector)* | EncryptionKey is a reference to a secret containing the Klio password | True | | | + +#### Tier1PluginConfiguration + +Tier1PluginConfiguration configures tier1 backup and recovery settings. + +*Appears in:* + +- [PluginConfigurationSpec](#pluginconfigurationspec) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------- | ------------------------------------------------------- | -------- | ------- | ------------------- | +| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | Optional: {}
| + +#### Tier2Configuration + +Tier2Configuration is the tier 2 configuration. + +*Appears in:* + +- [ServerSpec](#serverspec) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------------- | ---------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `cache` *[Cache](#cache)* | Cache is the configuration of the PVC that should be
used for the cache | True | | | +| `s3` *[S3Configuration](#s3configuration)* | S3 contains the configuration parameters for an S3-based tier 2 | True | | | +| `encryptionKey` *[SecretKeySelector](#secretkeyselector)* | EncryptionKey is a reference to a secret containing the Klio password | True | | | + +#### Tier2PluginConfiguration + +Tier2PluginConfiguration configures tier2 backup and recovery settings. + +*Appears in:* + +- [PluginConfigurationSpec](#pluginconfigurationspec) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------- | ------------------------------------------------------------------------------------ | -------- | ------- | ------------------- | +| `enableBackup` *boolean* | EnableBackup controls whether WAL and base backups should be stored in tier2 | | | Optional: {}
| +| `enableRecovery` *boolean* | EnableRecovery controls whether tier2 should be included in the recovery source list | | | Optional: {}
| +| `retention` *[RetentionPolicy](#retentionpolicy)* | RetentionPolicy defines how many backups we should keep | | | Optional: {}
| diff --git a/product_docs/docs/klio/0/api/_klio_catalog_api.mdx b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx new file mode 100644 index 0000000000..ef2021b22c --- /dev/null +++ b/product_docs/docs/klio/0/api/_klio_catalog_api.mdx @@ -0,0 +1,103 @@ +# Packages + +- [kliocatalog.enterprisedb.io/v1alpha1](#kliocatalogenterprisedbiov1alpha1) + +## kliocatalog.enterprisedb.io/v1alpha1 + +Package v1alpha1 the Klio Catalog API + +### Resource Types + +- [KlioBackup](#kliobackup) +- [KlioBackupList](#kliobackuplist) + +#### KlioBackup + +KlioBackup is the Schema for a Klio Backup API. + +*Appears in:* + +- [KlioBackupList](#kliobackuplist) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------- | -------- | ------- | ------------------- | +| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `KlioBackup` | True | | | +| `metadata` *[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `spec` *[KlioBackupSpec](#kliobackupspec)* | | True | | | +| `status` *[KlioBackupStatus](#kliobackupstatus)* | | | | Optional: {}
| + +#### KlioBackupList + +KlioBackupList contains a list of KlioBackup. + +| Field | Description | Required | Default | Validation | +| -------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------- | ------- | ---------- | +| `apiVersion` *string* | `kliocatalog.enterprisedb.io/v1alpha1` | True | | | +| `kind` *string* | `KlioBackupList` | True | | | +| `metadata` *[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#listmeta-v1-meta)* | Refer to Kubernetes API documentation for fields of `metadata`. | True | | | +| `items` *[KlioBackup](#kliobackup) array* | | True | | | + +#### KlioBackupSpec + +KlioBackupSpec defines the desired state of a KlioBackup. + +*Appears in:* + +- [KlioBackup](#kliobackup) + +| Field | Description | Required | Default | Validation | +| ---------------------- | -------------------------------------------------------------- | -------- | ------- | ---------- | +| `clusterName` *string* | ClusterName is the name of the cluster that has been backed up | True | | | +| `backupID` *string* | BackupID is the unique identifier of the backup | True | | | + +#### KlioBackupStatus + +KlioBackupStatus defines the observed state of a KlioBackup. + +*Appears in:* + +- [KlioBackup](#kliobackup) + +| Field | Description | Required | Default | Validation | +| ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `startLSN` *integer* | StartLSN is the LSN of the backup start | True | | | +| `endLSN` *integer* | EndLSN is the LSN of the backup end | True | | | +| `startWAL` *string* | StartWAL is the current WAL when the backup started | True | | | +| `endWAL` *string* | EndWAL is the current WAL when the backup ends | True | | | +| `tablespaces` *[TablespaceLayoutList](#tablespacelayoutlist)* | Tablespaces are the metadata of the tablespaces | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each
backend can put its metadata. | True | | | +| `startedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)* | StartedAt is the current time when the backup started. | True | | | +| `stoppedAt` *[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)* | StoppedAt is the current time when the backup ended. | True | | | + +#### TablespaceLayout + +TablespaceLayout is the on-disk structure of a tablespace. + +*Appears in:* + +- [TablespaceLayoutList](#tablespacelayoutlist) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------- | -------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `name` *string* | Name is the tablespace name | True | | | +| `oid` *string* | Oid is the OID of the tablespace. | True | | | +| `path` *string* | Path is the path where the tablespace can be found. | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each backend
can annotate its metadata. | True | | | + +#### TablespaceLayoutList + +*Underlying type:* *[TablespaceLayout](#tablespacelayout)* + +TablespaceLayoutList is a list of TablespaceLayout. + +*Appears in:* + +- [KlioBackupStatus](#kliobackupstatus) + +| Field | Description | Required | Default | Validation | +| --------------------------------------------------- | -------------------------------------------------------------------------------------- | -------- | ------- | ---------- | +| `name` *string* | Name is the tablespace name | True | | | +| `oid` *string* | Oid is the OID of the tablespace. | True | | | +| `path` *string* | Path is the path where the tablespace can be found. | True | | | +| `annotations` *object (keys:string, values:string)* | Annotations is a generic data store where each backend
can annotate its metadata. | True | | | diff --git a/product_docs/docs/klio/0/api/index.mdx b/product_docs/docs/klio/0/api/index.mdx new file mode 100644 index 0000000000..8b545fd4e1 --- /dev/null +++ b/product_docs/docs/klio/0/api/index.mdx @@ -0,0 +1,10 @@ +--- +title: API Reference +navigation: + - klio_api + - klio_catalog_api + - '!_klio_api' + - '!_klio_catalog_api' +indexCards: extra +--- + diff --git a/product_docs/docs/klio/0/api/klio_api.mdx b/product_docs/docs/klio/0/api/klio_api.mdx new file mode 100644 index 0000000000..725ba6dfda --- /dev/null +++ b/product_docs/docs/klio/0/api/klio_api.mdx @@ -0,0 +1,11 @@ +--- +title: Klio API reference +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api/klio_api.mdx +editTarget: originalFilePath + +--- + +import KlioAPI from "./_klio_api.mdx"; + + diff --git a/product_docs/docs/klio/0/api/klio_catalog_api.mdx b/product_docs/docs/klio/0/api/klio_catalog_api.mdx new file mode 100644 index 0000000000..4265b9535f --- /dev/null +++ b/product_docs/docs/klio/0/api/klio_catalog_api.mdx @@ -0,0 +1,11 @@ +--- +title: Klio Catalog API reference +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api/klio_catalog_api.mdx +editTarget: originalFilePath + +--- + +import KlioCatalogAPI from "./_klio_catalog_api.mdx"; + + diff --git a/product_docs/docs/klio/0/api_service.mdx b/product_docs/docs/klio/0/api_service.mdx new file mode 100644 index 0000000000..6c85141ae9 --- /dev/null +++ b/product_docs/docs/klio/0/api_service.mdx @@ -0,0 +1,410 @@ +--- +title: Klio API Service +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/api_service.md +sidebar_position: 9 +editTarget: originalFilePath + +--- + +The Klio API Service is a Kubernetes API aggregation server that extends the +Kubernetes API to expose Klio backup metadata as native Kubernetes resources. +This allows users to query and inspect backup information using standard +Kubernetes tools like `kubectl`. + +!!!caution Experimental - Subject to Change + +The Klio API Service is **experimental** and under evaluation. Future releases +may provide an alternative method for listing backups, which +may result in changes or removal of the API service. +!!! + +## Overview + +The API service integrates with the Kubernetes API server through the +[API Aggregation Layer](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/apiserver-aggregation/), +registering a new API group `kliocatalog.enterprisedb.io/v1alpha1` that provides +access to backup catalog information stored in the Klio repository. + +### Key Features + +- **Native Kubernetes Integration**: Query backups using `kubectl` and other + Kubernetes tools +- **Secure Communication**: TLS-encrypted connections with certificate-based + authentication +- **Real-time Catalog Access**: Direct access to the Klio backup repository + for up-to-date information +- **Standard Kubernetes RBAC**: Control access using native Kubernetes + role-based access control + +### Architecture + +The API service acts as a bridge between the Kubernetes API server +and the Klio backup repository: + +```mermaid +flowchart LR; + client[kubectl]; + k8sapi[Kubernetes API Server]; + klioapi[Klio API Service Deployment]; + kliorepo[Klio Server]; + client --> k8sapi; + k8sapi --> klioapi; + klioapi --> kliorepo; +``` + +## What the API Service Provides + +### KlioBackup Resource + +The API service exposes a `KlioBackup` resource that represents +backup metadata from the Klio repository. + +The resource is described in the [Klio Catalog API reference](api/klio_catalog_api.mdx). + +### Available Operations + +The API service supports the following operations: + +1. **List all backups**: Retrieve all backups across all clusters +2. **Get specific backup**: Retrieve details for a specific backup by name + +## Setting Up the API Service + +### Prerequisites + +Before setting up the Klio API service, ensure you have: + +- A Kubernetes cluster with the Klio operator installed +- A running Klio server instance +- `kubectl` configured to access your cluster +- [cert-manager](https://cert-manager.io/) installed + for TLS certificate management (optional) + +### Required Components + +The API service setup includes: + +1. **TLS Certificates**: For secure communication between the Kubernetes API + server and the Klio API service, and between the Klio API service and the + Klio server +2. **Service Account**: Dedicated service account with necessary permissions +3. **RBAC Resources**: ClusterRole and Role for authorization +4. **Deployment**: The API service deployment specification +5. **Service**: Kubernetes service to expose the API server +6. **APIService**: Registration with the Kubernetes API aggregation layer + +### Step-by-Step Setup + +!!!note + +Setup procedure will be simplified in future Klio releases. +!!! + +#### 1. Create ServiceAccount and RBAC Resources + +Create a service account and the necessary RBAC resources. + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: klio-api +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: klio-api + namespace: kube-system +rules: + - apiGroups: + - "" + resourceNames: + - extension-apiserver-authentication + resources: + - configmaps + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: klio-api + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: klio-api +subjects: + - kind: ServiceAccount + name: klio-api + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: klio-api + namespace: kube-system +rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "watch", "list"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations", "validatingadmissionpolicies", "validatingadmissionpolicybindings"] + verbs: ["get", "watch", "list"] + - apiGroups: ["flowcontrol.apiserver.k8s.io"] + resources: ['prioritylevelconfigurations', 'flowschemas'] + verbs: ['list', 'watch'] + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: sample-apiserver-clusterrolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: klio-api +subjects: + - kind: ServiceAccount + name: klio-api + namespace: default +``` + +#### 2. Create a client certificate secret + +Create a Kubernetes `Secret` containing the client certificate +and key for authenticating to the Klio server. + +Its common name must be `snapshot_reader@klio`. + +See the [Klio documentation](klio_server.mdx#creating-a-client-side-certificate) +for instructions on generating the certificate. + +#### 3. Create a server certificate secret + +Create a Kubernetes `Secret` containing the Klio server's TLS certificate +for validating the server's identity. + +If you're using `cert-manager`, you can create a `Certificate` resource +as shown below: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: klio-api +spec: + secretName: klio-api-tls + commonName: klio-api + dnsNames: + - klio-api + - klio-api.default + - klio-api.default.svc + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - server auth + - client auth + + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +!!!info + +For production environments, use certificates signed by +your organization's Certificate Authority (CA) +or a trusted public CA instead of self-signed certificates. +!!! + +#### 4. Deploy the API Service + +Create a deployment configuration for the API service. + + + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: klio-api + name: klio-api +spec: + replicas: 1 + selector: + matchLabels: + app: klio-api + strategy: {} + template: + metadata: + labels: + app: klio-api + spec: + serviceAccountName: klio-api + volumes: + - name: klio-server-tls + secret: + defaultMode: 420 + secretName: <$KLIO_SERVER_CERT> # The Klio server cert secret + - name: klio-api-tls + secret: + defaultMode: 420 + secretName: klio-api-tls # The Klio API server certificate secret + - name: klio-client-tls + secret: + defaultMode: 420 + secretName: <$KLIO_CLIENT_CERT> # The client certificate used to authenticate to the Klio server + - emptyDir: {} + name: scratch-data + containers: + - args: + - server + - api-server + env: + - name: CLIENT_BASE_HOSTNAME + value: klio + - name: CLIENT_BASE_URL + value: <$KLIO_SERVER_ADDRESS> # The Klio server address in https://host:port format + - name: CLIENT_BASE_SERVER_CERT_PATH + value: /certs/tls.crt + - name: CLIENT_BASE_API_SERVER_CERTFILE + value: /klio-api/certs/tls.crt + - name: CLIENT_BASE_API_SERVER_KEYFILE + value: /klio-api/certs/tls.key + - name: CLIENT_BASE_CLIENT_CERT_PATH + value: /client-certs/tls.crt + - name: CLIENT_BASE_CLIENT_KEY_PATH + value: /client-certs/tls.key + - name: TMPDIR + value: /tmp + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: Always + name: api-server + resources: {} + securityContext: + runAsNonRoot: true + volumeMounts: + - mountPath: /certs + name: klio-server-tls + - mountPath: /client-certs + name: klio-client-tls + - mountPath: /klio-api/certs + name: klio-api-tls + - mountPath: /tmp + name: scratch-data +``` + + + +#### 5. Create a service + +Now expose the Deployment via a Kubernetes Service: + +```yaml +apiVersion: v1 +kind: Service +metadata: + labels: + app: klio-api + name: klio-api +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 443 + selector: + app: klio-api +``` + +#### 6. Register the `APIService` + +Finally, register the API service creating its resource: + +```yaml +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1alpha1.kliocatalog.enterprisedb.io + + # If using cert-manager, you can inject the CA bundle specifying the + # certificate: + # + # annotations: + # cert-manager.io/inject-ca-from: default/klio-api +spec: + group: kliocatalog.enterprisedb.io + groupPriorityMinimum: 1000 + versionPriority: 15 + service: + name: klio-api + version: v1alpha1 + # If not using cert-manager, you will need to manually set the CA bundle + # kubectl get secret klio-api-tls -o jsonpath='{.data.tls\.crt}' + # + # caBundle: put-CA-Bundle-here +``` + +## Using the API Service + +Once the API service is running, you can interact with it using `kubectl`. + +### List All Backups + +```bash +kubectl get kliobackups +``` + +Example output: + +``` +NAME CLUSTER NAME STARTED AT STOPPED AT +cluster-example.backup-20251113093137 cluster-example 2025-11-13 09:31:37 +0000 UTC 2025-11-13 09:31:39 +0000 UTC +``` + +### Get Specific Backup Details + +```bash +kubectl get kliobackups cluster-example.backup-20251113093137 -o yaml +``` + +Example output: + +```yaml +apiVersion: kliocatalog.enterprisedb.io/v1alpha1 +kind: KlioBackup +metadata: + name: cluster-example.backup-20251113093137 +spec: + backupID: backup-20251113093137 + clusterName: cluster-example +status: + annotations: + klio.io/controlDataKopiaManifestID: f055f78fb1db512b39e695465fa26ead + klio.io/kopiaManifestID: 2272afec220f48d0fa0064f85b043ac4 + endLSN: 117440800 + endWAL: "000000010000000000000007" + startLSN: 117440552 + startWAL: "000000010000000000000007" + startedAt: "2025-11-13T09:31:37Z" + stoppedAt: "2025-11-13T09:31:39Z" +``` + +## Limitations + +An API service is tied to a specific Klio server instance. If you have multiple +Klio servers, you will need to deploy a separate API service for each server, +defining a different API group for each to avoid conflicts. diff --git a/product_docs/docs/klio/0/architectures.mdx b/product_docs/docs/klio/0/architectures.mdx new file mode 100644 index 0000000000..1a4199038b --- /dev/null +++ b/product_docs/docs/klio/0/architectures.mdx @@ -0,0 +1,222 @@ +--- +title: Architectures & Tiers +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/architectures.md +sidebar_position: 3 +editTarget: originalFilePath + +--- + +Klio employs a multi-tiered architecture designed to balance performance, +resilience, and cost. This approach separates immediate, high-speed backup and +recovery operations from long-term archival and disaster recovery (DR) needs. +The architecture is built around three distinct storage tiers, each serving a +specific purpose in the data lifecycle. + +![Multi-tiered architecture overview](images/overview-multi-tiers.png) + +* * * + +## Tier 0: Volume Snapshots + +!!!note + +Tier 0 is part of our long-term vision and will be introduced in a future +release. +!!! + +**Tier 0** leverages Kubernetes Volume Snapshots, if supported by the +underlying storage class. It consists of instantaneous, point-in-time snapshots +of all volumes used by the PostgreSQL cluster, including the `PGDATA` directory +and any tablespaces. + +This tier is not intended for long-term storage but acts as the **initial +source** for a base backup. By reading from a static snapshot, Klio avoids +impacting the performance of the running database. From a disaster recovery +perspective, these snapshots are often considered "ephemeral," as most local +storage solutions keep them within the same disks, unlike some cloud providers +or storage classes that allow them to be archived to object storage. +Volume snapshot objects reside in the same Kubernetes namespace of a PostgreSQL +cluster. + +Klio coordinates the creation of the snapshot as supported by CloudNativePG and +then uses it to **asynchronously offload** the base backup data to Tier 1. +Klio also manages retention policies for volume snapshots objects for a given +PostgreSQL cluster. + +* * * + +## Tier 1: Primary Storage (The Klio Server) + +**Tier 1** is the core operational tier, also referred to as the **Main Tier** +or **Klio Server**. It's designed for speed and provides immediate access to +all necessary backup artifacts for most recovery scenarios. + +This tier consists of a **local Persistent Volume (PV)** deployed by the +Klio Server. It can be located in the same namespace as the PostgreSQL cluster +or in a different one within the same Kubernetes cluster +(see the ["Tier 1 Architectures" section below](#tier-1-architectures)). + +Its purpose is to store the **WAL archive** and the **catalog of physical base +backups**. Its high-throughput, low-latency nature is optimized for several key +tasks: + +- Receiving a continuous stream of WAL files directly from the PostgreSQL + primary. +- Storing base backups created from the primary or offloaded from Tier 0. +- Serving as the source for asynchronously replicating data to Tier 2. +- Managing retention policies for all tiers. + +### Tier 1 Architectures + +Klio supports several flexible deployment architectures for its Tier 1 storage. + +On the physical layer, it is recommended that both compute and, most +importantly, storage are separate from the PostgreSQL clusters. + +!!!warning + +Placing Tier 1 on the same nodes and storage as the PostgreSQL clusters +severely impacts the business continuity objectives of your organization. +!!! + +On the logical layer, a **Klio Server** can reside in the same namespace as the +PostgreSQL cluster(s) it manages or in a separate, dedicated namespace. + +When choosing an architecture, it's important to consider +**security and tenancy**. +PostgreSQL clusters managed by a single Klio Server share the same master +encryption key. For this reason, it's recommended to use separate Klio Servers +for clusters that serve different tenants or have distinct security +requirements. + +#### Clusters and Klio Server in the Same Namespace + +The simplest deployment places the Klio Server in the same namespace as the +PostgreSQL cluster(s). + +This can be a **dedicated 1:1 mapping** (one Klio Server per cluster): + +![Cluster and Klio server in the same namespace](images/tier1-namespace-single.png) + +Or a **shared N:1 mapping** where one server manages all clusters in the +namespace. + +![Multiple clusters share a Klio server in the same namespace](images/tier1-namespace-multi.png) + +#### Clusters and Klio Server in Different Namespaces + +For greater isolation or centralized management, the Klio Server can be +deployed in a namespace separate from the PostgreSQL clusters it protects. + +The following diagram shows a PostgreSQL cluster being backed up by a Klio +Server in another namespace: + +![Cluster and Klio server in a different namespace](images/tier1-shared-single.png) + +This model also allows a central Klio Server to manage clusters that reside in +different namespaces, as shown below: + +![Multiple clusters share a Klio server in the same namespace](images/tier1-shared-multi.png) + +### Reserving Nodes for Klio Workloads + +For dedicated performance and resource isolation, you can reserve specific +worker nodes for Klio pods using Kubernetes taints and tolerations. + +1. **Taint the Node**: Apply a taint to the desired node. This prevents most + pods from being scheduled on it. + + ```sh + kubectl taint node node-role.kubernetes.io/klio=:NoSchedule + ``` + +2. **Add Toleration to Klio Server**: Add the corresponding toleration to your + Klio `Server` resource, adding it to `.spec.template`. + This allows the Klio Server to be scheduled on the tainted node. + + ```yaml + # In your Server resource definition + spec: + template: + spec: + containers: [] + tolerations: + - key: "node-role.kubernetes.io/klio" + operator: "Exists" + effect: "NoSchedule" + ``` + +* * * + +## Tier 2: Secondary Storage (Object Storage) + +!!!warning Work in Progress + +Tier 2 functionality is currently under heavy development and should be +considered experimental. The features described below are subject to change. +!!! + +**Tier 2** provides durable, long-term storage for robust disaster recovery +(DR) strategies. It's physically and logically separate from the primary +Kubernetes cluster and typically consists of an external object storage system, +such as Amazon S3, Google Cloud Storage, or Azure Blob Storage. +Storing backups off-site ensures **geographical redundancy**, protecting data +against a full cluster or site failure. + +Klio asynchronously relays both base backups and WAL files from Tier 1 to +Tier 2. This decoupling ensures that primary backup and recovery operations in +Tier 1 are not directly affected by the latency or availability of the remote +object storage. + +Additionally, Tier 2 can serve as a read-only fallback source. In a distributed +CloudNativePG topology, this allows a Klio server at a secondary site to use +the shared Tier 2 storage to bootstrap a new cluster, enhancing DR +capabilities. + +### Restoring from Tier 2 + +When a backup is requested for restore, Klio will first look for it in Tier 1. +If the backup is not found in Tier 1, Klio will automatically check Tier 2. +This fallback mechanism ensures that backups that have been migrated to Tier 2 +are still accessible for restore operations. + +When Tier 2 is enabled and a backup exists in both tiers, Tier 1 takes +precedence as restore from it will be faster. + +### Read-Only WAL Server Mode (currently unavailable) + +The Klio WAL server supports a **read-only mode** that allows it to serve WAL +files for download without accepting any write operations. This mode is useful +when implementing Tier 2 functionality, enabling clients to read WAL files from +object storage without the risk of accidental modifications. + +When a WAL server is started in read-only mode: + +- All **read operations** (e.g., `Get`, `GetMetadata`) continue to function normally +- All **write operations** (e.g., `Put`, `SetFirstRequiredWAL`, `RequestWALStart`, + `ResetWALStream`) are rejected with a `FailedPrecondition` gRPC error +- The server will return error code `3` (FailedPrecondition) for any write attempt + +This ensures data integrity in distributed backup scenarios where secondary sites +only need read access to the WAL archive for recovery purposes. + +* * * + +## Planning Your Backup Strategy + +When planning your backup strategy with Klio, **Tier 1 is the most critical +layer** to define architecturally. You have several options, ranging from +running Klio servers on any worker node using your cluster's primary storage +solution, to dedicating a single worker node with local storage for a +centralized Klio server. + +**Tier 0** capabilities are determined by the underlying Kubernetes +`StorageClass`. Klio is particularly valuable when using local storage +solutions (such as LVM with TopoLVM or OpenEBS), as it can **offload** volume +snapshot backups to Tier 1, freeing up high-performance local disk space via +retention policies. + +**Tier 2** is often determined by your organization's infrastructure teams, who +have likely already selected one or more standard object storage solutions for +long-term archival. diff --git a/product_docs/docs/klio/0/backup_and_restore.mdx b/product_docs/docs/klio/0/backup_and_restore.mdx new file mode 100644 index 0000000000..0da8167a61 --- /dev/null +++ b/product_docs/docs/klio/0/backup_and_restore.mdx @@ -0,0 +1,314 @@ +--- +title: Backup and Restore +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/backup_and_restore.md +sidebar_position: 7 +editTarget: originalFilePath + +--- + +This guide explains how to take backups of PostgreSQL clusters managed by +CloudNativePG and restore them using Klio. + +## Overview + +Klio follows PostgreSQL's native physical backup and recovery mechanisms, +leveraging CloudNativePG's backup and restore capabilities through its +[`Backup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-Backup) +and +[`ScheduledBackup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-ScheduledBackup). + +A working **online backup** is composed of: + +- A **physical base backup**: A filesystem copy of the PostgreSQL data directory. +- A set of **WAL (Write-Ahead Log) files**: Continuous logs of all changes made + to the database during the entire period of the base backup. + +!!!important + +It is recommended to periodically test backup restores to ensure correct +recovery procedures. +!!! + +## Prerequisites + +Before performing backup and restore operations, ensure you have: + +- A running [Klio server](klio_server.mdx) with proper configuration +- A PostgreSQL cluster configured with the [Klio plugin](plugin_configuration.mdx) + +## Taking a Backup + +With the Klio plugin configured, you can take on-demand backups using +CloudNativePG's [`Backup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-Backup) +or the [Kubectl plugin](https://cloudnative-pg.io/documentation/current/kubectl-plugin/#requesting-a-new-physical-backup) +for CNPG. + +### Create a Backup + +You can trigger a new backup by creating a `Backup` resource. + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Backup +metadata: + name: my-cluster-backup-20251027 + namespace: default +spec: + method: plugin + target: primary + cluster: + name: my-cluster + pluginConfiguration: + name: klio.enterprisedb.io +``` + +Apply the manifest: + +```bash +kubectl apply -f backup.yaml +``` + +Alternatively, you can request a backup directly using the + [`kubectl cnpg` plugin](https://cloudnative-pg.io/documentation/current/kubectl-plugin/#requesting-a-new-physical-backup): + +```bash +kubectl cnpg backup my-cluster \ + --method plugin \ + --plugin-name klio.enterprisedb.io \ + --backup-target primary +``` + +If you don’t specify the `--backup-name` option, the `cnpg backup` command +automatically generates one using the format `-`, +which is suitable in most cases. + +For a complete list of available options, run: + +```bash +kubectl cnpg backup --help +``` + +### Monitor Backup Progress + +Check the backup status: + +```bash +# Watch the backup status +kubectl get backup my-cluster-backup-20251027 -w + +# Get detailed backup information +kubectl describe backup my-cluster-backup-20251027 +``` + +A successful backup will show: + +``` +NAME AGE CLUSTER METHOD PHASE ERROR +my-cluster-backup-20251027 2m my-cluster plugin Completed +``` + +### Scheduled Backups + +You can schedule automatic backups using CloudNativePG's +[`ScheduledBackup` resource](https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-ScheduledBackup). + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: my-cluster-daily-backup + namespace: default +spec: + # Cron schedule: daily at 2:00 AM + schedule: "0 0 2 * * *" + method: plugin + target: primary + cluster: + name: my-cluster + pluginConfiguration: + name: klio.enterprisedb.io +``` + +Apply the scheduled backup: + +```bash +kubectl apply -f scheduled-backup.yaml +``` + +## Backup Retention and Maintenance + +Klio automatically manages backup retention based on the +[retention policies](plugin_configuration.mdx#retention-policies) defined in the +`PluginConfiguration` referred by the `Cluster`. + +!!!important + +Deleting a `Backup` resource through `kubectl` only removes the Kubernetes +object. The actual backup data in the Klio server will be retained according to +the retention policy. +!!! + +## Finding Your backupID for Recovery + +To restore a specific backup, you need its backupID, otherwise Klio will +choose the latest one autonomously. +You can list all available, completed Backup resources using kubectl: + +```bash +kubectl get backups -n +``` + +Once you identify the backup you want to use, you can identify its backupID + +```bash +kubectl get backup -n -o jsonpath='{.status.backupId}' +``` + +Alternatively, you can use the [API service](api_service.mdx) to get the backup +list. + +## Restoring from a Backup + +Klio supports restoring PostgreSQL clusters from backups using CloudNativePG's +recovery mechanism. Unlike traditional in-place recovery, Klio follows +CloudNativePG's approach of **bootstrapping a new cluster** from a backup, +which ensures data integrity and allows for flexible recovery scenarios. + +### How Recovery Works + +Klio integrates with CloudNativePG's recovery process by performing the +following actions during a restore: + +1. **Restores the base backup**: Copies the physical backup data to the new + cluster's data directory. Uses `klio restore` command under the hood. +2. **Restores WAL files**: Klio is configured to retrieve the WAL files from + required for the PostgreSQL recovery as needed. + Uses `klio get-wal` command under the hood. + +The execution of these commands is driven by CloudNativePG's recovery +mechanism, which ensures that the PostgreSQL server starts correctly after +the restore. + +A restored cluster operates independently of the original cluster. By default, +it will **not** perform backups unless you explicitly configure the Klio plugin +for backup operations in the new cluster's specification. + +### Full Restore + +To restore from a backup, create a new `Cluster` resource with a +`bootstrap.recovery` section that references the Klio plugin: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-restored-cluster + namespace: default +spec: + instances: 3 + + # Bootstrap from a Klio backup + bootstrap: + recovery: + source: source + # OPTIONAL: Specify the backup to restore from + backupID: my-cluster-backup-YYYYMMDDHHMMSS + + # Reference the Klio plugin configuration + externalClusters: + - name: source + plugin: + name: klio.enterprisedb.io + parameters: + pluginConfigurationRef: my-restore-config + + storage: + size: 10Gi +``` + +!!!note + +Klio will choose the latest backup available in case the `backupID` field is +omitted. +!!! + +Create a corresponding `PluginConfiguration` that specifies which backup to +restore: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: my-restore-config + namespace: default +spec: + # Connection details + serverAddress: klio-server.default + clientSecretName: my-client-credentials + serverSecretName: klio-server-tls + + # Optional: specify the original cluster name if different + clusterName: my-cluster +``` + +The client credentials secret (`my-client-credentials`) should contain the +necessary authentication information to access the Klio server, as described +in the [Klio plugin configuration guide](plugin_configuration.mdx#client-credentials-secret). + +!!!note + +The `clusterName` field in the `PluginConfiguration` and the `commonName` +of the certificate should match the name of the **original cluster** that +was backed up, not the name of the new restored cluster. +!!! + +Apply both resources: + +```bash +kubectl apply -f restore-config.yaml +kubectl apply -f restored-cluster.yaml +``` + +### Point-in-Time Recovery (PITR) + +Klio supports Point-in-Time Recovery, allowing you to restore your database +to a specific moment in time rather than the latest available state. This is +useful for recovering from accidental data deletion or corruption. + +The process involves specifying a recovery target in the `Cluster` resource. +The available recovery targets are described in the +[CloudNativePG documentation](https://cloudnative-pg.io/documentation/current/recovery/#recovery-targets). + +#### Example: recover to a `targetTime` + +Restore to a specific timestamp: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-pitr-cluster +spec: + bootstrap: + recovery: + source: source + # Recover to a specific point in time + recoveryTarget: + targetTime: "2025-11-06 15:00:00.0000+00" + # other cluster spec fields... +``` + +!!!important + +The target of a point in time recovery must fall between the time the base +backup was completed and the time of the latest transaction recorded in the +available WAL files. +!!! + +!!!note + +During the Point in Time Recovery, if `targetTime` or `targetLSN` are specified, +Klio will automatically choose the closest backup for the PITR, if not defined +with the `backupID` field. +!!! diff --git a/product_docs/docs/klio/0/helm_chart.mdx b/product_docs/docs/klio/0/helm_chart.mdx new file mode 100644 index 0000000000..279057f59d --- /dev/null +++ b/product_docs/docs/klio/0/helm_chart.mdx @@ -0,0 +1,205 @@ +--- +title: EDB Klio Operator Helm Chart +navTitle: '' +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/helm_chart.mdx +sidebar_position: 90 +editTarget: originalFilePath + +--- + + + +import PartialValues from "./_helm_chart_values.mdx"; + + + +The EDB Klio Operator Helm chart from EDB allows you to deploy the Klio +Operator in your Kubernetes cluster. It is distributed as a private OCI image. + +## Prerequisites + +Before installing the Klio Operator, ensure you have: + +- **Helm** – see the [Helm installation guide](https://helm.sh/docs/intro/install/) +- **Kubernetes** cluster with appropriate permissions +- **Credentials** to access the registry hosting the Helm chart, the Klio operator + image, and the Klio operand image. For the EDB registry, your username will be `k8s` + and your password will be [your EDB Repos 2.0 token](https://www.enterprisedb.com/docs/repos/getting_started/with_web/get_your_token/). +- **CloudNativePG Operator** already installed in your Kubernetes cluster. + See the [CloudNativePG installation guide](https://cloudnative-pg.io/documentation/current/installation_upgrade/). +- **cert-manager** (optional, but strongly recommended for managing TLS certificates). + See the [cert-manager installation guide](https://cert-manager.io/docs/installation/). +- **Prometheus Operator** (optional, for operator monitoring). + See the [Prometheus Operator installation guide](https://prometheus-operator.dev/docs/getting-started/installation/). + +## Installation + +For simplicity, the instructions below assume you've put the following values into environment variables: + +- `$USERNAME` - will be `k8s` for the EDB registry. +- `$TOKEN` - will be your [EDB Repos 2.0 token](https://www.enterprisedb.com/docs/repos/getting_started/with_web/get_your_token/). +- `$NAMESPACE` - will be the namespace where you installed CNPG (`cnpg-system` is the default namespace for CNPG installations). + +Example: + +```sh +export USERNAME=k8s +export TOKEN=your-edb-token +export NAMESPACE=cnpg-system +``` + +### Step 1: Registry Authentication + +First, authenticate with the EDB registry where the Helm chart is hosted: + +```sh +helm registry login docker.enterprisedb.com -u "${USERNAME}" -p "${TOKEN}" +``` + +### Step 2: Create an Image Pull Secret + +Create a Kubernetes secret to allow the operator to pull container images from +the registry: + +```sh +kubectl create secret docker-registry klio-registry-secret \ + --docker-server=docker.enterprisedb.com \ + "--docker-username=${USERNAME}" \ + "--docker-password=${TOKEN}" \ + "--namespace "${NAMESPACE}" +``` + +!!!info Namespace Selection + +Select the namespace where you want to deploy the Klio Operator. This must be +the same namespace where CloudNativePG is deployed. +!!! + +### Step 3: Install the Helm Chart + +Deploy the Klio Operator to your cluster: + + + +```sh +helm install klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ + --version 0.0.11 \ + --namespace "${NAMESPACE}" \ + --set "controllerManager.manager.image.pullSecrets[0].name=klio-registry-secret" +``` + + + +### Step 4: Verify Installation + +After installation, verify that the Klio Operator is running: + +```sh +kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=klio +``` + +You should see the operator pod in a `Running` state. Check the logs to ensure +there are no errors: + +```sh +kubectl logs -n "${NAMESPACE}" deployment/klio-controller-manager -f +``` + +Verify that the Custom Resource Definitions (CRDs) were created: + +```sh +kubectl get crds | grep klio.enterprisedb.io +``` + +You should see CRDs like `servers.klio.enterprisedb.io` and `pluginconfigurations.klio.enterprisedb.io`. + +## Configuration + +### Customizing the Installation + +The chart is designed to be customizable, allowing you to configure multiple +aspects of the Klio Operator deployment, passing in values through a custom +`values.yaml` file or using the `--set` flag during installation. +See the [Helm documentation](https://helm.sh/docs/) for more details +on how to customize and manage Helm charts. + +#### Inspecting the Chart + +Before installing, you can download the Helm chart to inspect its contents, +review the default values, and understand what resources it will create: + + + +```sh +helm pull oci://docker.enterprisedb.com/k8s/klio-operator-chart --version 0.0.11 +``` + + + +This downloads the chart as a `.tgz` file. Extract it to examine the templates, +default `values.yaml`, and other chart files: + + + +```sh +tar -xzf klio-operator-chart-0.0.11.tgz +cd klio-operator-chart +cat values.yaml +``` + + + +### Configuration Reference + + + +## Upgrading + +To upgrade the Klio Operator to a newer version: + +```sh +helm upgrade klio-operator oci://docker.enterprisedb.com/k8s/klio-operator-chart \ + --version \ + --namespace "${NAMESPACE}" +``` + +...where `` is the version you're upgrading to. + +When upgrading, you can control how Helm handles values from the previous +installation. Please refer to the [Helm upgrade documentation](https://helm.sh/docs/helm/helm_upgrade/) +to understand the different options. + +!!!warning CRD Upgrades + +Helm does not automatically upgrade CRDs. If the new version includes CRD +updates, you may need to apply them manually. Check the release notes for +specific upgrade instructions. +!!! + +## Uninstalling + +To uninstall the Klio Operator: + +```sh +helm uninstall klio-operator --namespace "${NAMESPACE}" +``` + +!!!warning Data Preservation + +Uninstalling the operator does not automatically remove: + +- Custom Resource Definitions (CRDs) +- Existing Klio resources (Servers, PluginConfigurations) +- Persistent volumes containing backup data + +To completely remove Klio from your cluster, you must manually delete these resources. +If you want to completely remove Klio, you must manually delete these resources. +!!! + +To remove the CRDs after uninstalling: + +```sh +kubectl delete crd servers.klio.enterprisedb.io +kubectl delete crd pluginconfigurations.klio.enterprisedb.io +``` diff --git a/product_docs/docs/klio/0/images/basebackups_walarchive.png b/product_docs/docs/klio/0/images/basebackups_walarchive.png new file mode 100644 index 0000000000..8880b8c2ba --- /dev/null +++ b/product_docs/docs/klio/0/images/basebackups_walarchive.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4de2c6fa708c49065b625ee92375ae279077a10dc6aad07ecc178b51f291f7 +size 17516 diff --git a/product_docs/docs/klio/0/images/overview-multi-tiers.png b/product_docs/docs/klio/0/images/overview-multi-tiers.png new file mode 100644 index 0000000000..b5171be827 --- /dev/null +++ b/product_docs/docs/klio/0/images/overview-multi-tiers.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc1bce7c07c22ae7fc114748b522b99d5c51169b1271896539012fa8da49648 +size 73488 diff --git a/product_docs/docs/klio/0/images/tier1-namespace-multi.png b/product_docs/docs/klio/0/images/tier1-namespace-multi.png new file mode 100644 index 0000000000..c0af30615a --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-namespace-multi.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6921bdaf4b76a8e9b863edfc6614f5a3f6e63e155beff68006bffb6da7429591 +size 95217 diff --git a/product_docs/docs/klio/0/images/tier1-namespace-single.png b/product_docs/docs/klio/0/images/tier1-namespace-single.png new file mode 100644 index 0000000000..879f1b98c0 --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-namespace-single.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6ae084132f92cec5e0ff7f15b4b5db3f957988af25b3cd224d801fd232d37a +size 38665 diff --git a/product_docs/docs/klio/0/images/tier1-shared-multi.png b/product_docs/docs/klio/0/images/tier1-shared-multi.png new file mode 100644 index 0000000000..6d83dcfc7e --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-shared-multi.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb75f834d4c850d884ede3fba795c499407e89ff79e252051d95080c30ac8d4 +size 75090 diff --git a/product_docs/docs/klio/0/images/tier1-shared-single.png b/product_docs/docs/klio/0/images/tier1-shared-single.png new file mode 100644 index 0000000000..22106089b3 --- /dev/null +++ b/product_docs/docs/klio/0/images/tier1-shared-single.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582b2289089c5f0d7648d28646ccd00b0ea47080031596bd040cd2de032ee4cb +size 40722 diff --git a/product_docs/docs/klio/0/images/wal-streaming.png b/product_docs/docs/klio/0/images/wal-streaming.png new file mode 100644 index 0000000000..7ba175eab2 --- /dev/null +++ b/product_docs/docs/klio/0/images/wal-streaming.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc19985d9893e142755ffbc7a838ac101a9b5cbb3fe8e5e35b000c51c612f05 +size 34122 diff --git a/product_docs/docs/klio/0/index.mdx b/product_docs/docs/klio/0/index.mdx new file mode 100644 index 0000000000..ba3eb7855b --- /dev/null +++ b/product_docs/docs/klio/0/index.mdx @@ -0,0 +1,116 @@ +--- +title: Klio Overview +navigation: + - main_concepts + - architectures + - wal_streaming + - klio_server + - plugin_configuration + - backup_and_restore + - opentelemetry + - api_service + - walplayer + - helm_chart + - api + - images + - '!_helm_chart_values' +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/index.mdx +directoryDefaults: + version: 0.0.11 + displayBanner: >- + This is documentation for a Tech Preview of EDB's {{name.ln}} + ({{name.short}}) solution. It is made available AS IS for testing and early + evaluation purposes ONLY! Is is not to be used in production environments. + For details, please refer to EULA + section 9.4. +sidebar_position: 1 +editTarget: originalFilePath + +--- + +**Klio** is a cloud-native solution for enterprise-grade backup and recovery of +PostgreSQL databases managed by [CloudNativePG](https://cloudnative-pg.io) on +Kubernetes. It is designed to handle: + +- The **Write-Ahead Log (WAL) archive** for a given PostgreSQL `Cluster` + resource, within the same Kubernetes namespace as the Klio deployment +- The **catalog of physical base backups** for that same cluster +- Optionally, multiple PostgreSQL clusters + +These critical backup artifacts are stored across two distinct storage tiers: + +- Tier 1 – **Local Volume**: A local Persistent Volume (PV) within the + same namespace as the associated `Cluster` resource. It offers immediate, + high-throughput access for backup and recovery operations. Also referred to as + the **Main Tier** or **Klio Server**. + +- Tier 2 – **Secondary Storage**: An external object storage system where data + from Tier 1 is asynchronously replicated. This tier typically resides outside + the Kubernetes cluster, enabling geographical redundancy and enhancing disaster + recovery (DR) resilience. + +![Multi-tiered architecture overview](images/overview-multi-tiers.png) + +* * * + +## Key Features + +!!!note + +Some of the following features are currently aspirational and under active +development. +!!! + +### WAL Management + +- Native WAL streaming from the primary, eliminating the need for + `archive_command`, with support for: + - Partial WAL file handling + - WAL file compression + - WAL file encryption using user-provided keys + - Controlled replication slot advancement to ensure uninterrupted streaming + - Synchronous replication +- WAL archive storage on a local PVC (Tier 1) +- Extension of base backup retention policy enforcement to WAL files +- Asynchronous WAL relay to Tier 2 object storage + +!!!important + +Klio's WAL management utilizes the `READ_REPLICATION_SLOT` streaming +replication command, which was introduced in PostgreSQL 15. +Therefore, Klio requires PostgreSQL version 15 or greater to function properly. +!!! + +### Base Backup Catalog + +- Physical online base backups from the primary node to Tier 1, with support + for: + - Data deduplication for efficient remote incremental backups + - Compression to optimize storage usage + - Encryption using user-provided keys for data confidentiality +- Backup catalog stored on a file system Persistent Volume Claim (PVC) in Tier 1 +- Integration with CloudNativePG Kubernetes Volume Snapshots (Tier 0), + enabling asynchronous offload to Tier 1 using the same physical backup + process[^1] +- Retention policy enforcement +- Asynchronous replication of base backups to Tier 2 object storage for + long-term durability and disaster recovery + +!!!important + +Kubernetes Volume Snapshot integration (Tier 0) is only available for storage +classes that support volume snapshots. +!!! + +### General Capabilities + +- End-to-end encryption: both in-transit and at-rest +- Designed for seamless integration with Kubernetes-native data protection + tools such as Veeam Kasten, Velero, and others[^1] +- Delivered as a CNPG-I plugin, with an accompanying Kubernetes Operator +- Available as a Certified Red Hat OpenShift Operator[^1] +- Distributed via a Helm chart for streamlined deployment + +[^1]\: Not yet available; planned for a future release. diff --git a/product_docs/docs/klio/0/klio_server.mdx b/product_docs/docs/klio/0/klio_server.mdx new file mode 100644 index 0000000000..ea81ad935a --- /dev/null +++ b/product_docs/docs/klio/0/klio_server.mdx @@ -0,0 +1,669 @@ +--- +title: The Klio Server +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/klio_server.md +sidebar_position: 5 +editTarget: originalFilePath + +--- + +The Klio server is a central component of the Klio backup solution. It is +defined as the `Server` custom resource in Kubernetes, which creates a +StatefulSet running the Klio server application. + +The Klio server is composed of two main containers: + +- `base`: Manages full and incremental backups using Kopia. +- `wal`: Receives the stream of PostgreSQL Write-Ahead Logs (WAL). + +An additional init container, `init`, is responsible for initializing the +Kopia repository and setting up the necessary configuration. + +The base backups and WAL files are stored in multiple PersistentVolume attached +to the Klio server pod in the `/data/base` and `/data/wal` directories, respectively. + +An additional cache defined by a PersistentVolume is used for the Kopia cache. +This cache allows Kopia to quickly browse repository contents without +having to download from the storage location. + +## Storage Tiers + +### Tier 1: Local Storage + +Tier 1 uses local `PersistentVolumes` for immediate data access. +This is the primary landing zone for backups and WAL files, +providing the fastest recovery times. + +### Tier 2: Remote Object Storage + +!!!warning Work in Progress + +Tier 2 functionality is currently under heavy development and should be +considered experimental. The features described below are subject to change. +!!! + +Tier 2 offloads data to S3-compatible object storage. +This is used for long-term retention and disaster recovery. +When Tier 2 is enabled, the server uses a work queue to manage +the asynchronous transfer of data from the local environment to the cloud. + +### The Work Queue + +If both Tier 1 and Tier 2 are configured, it is mandatory to configure +a work queue in the klio Server resource. +The work queue is backed by NATS JetStream with file storage on a separate +`PersistentVolume mounted` at `/queue`. +When a WAL file is received, the server publishes a notification to the queue, +enabling asynchronous processing. This ensures that the primary backup flow +is not slowed down by network latency to remote object storage. + +## Storage Requirements + +The Klio Server uses three distinct PersistentVolumeClaims (PVCs), each +serving a different purpose. Understanding what each PVC contains helps you +size them appropriately for your environment. + +### Data PVC + +The data PVC stores all backup data and WAL archives for Tier 1 storage. + +It holds the base backups and the WAL archive of all the servers that are backed +up. + +The following factors should be considered when defining the PVC size: + +1. WAL file production rate +2. Base backup size +3. Retention policies + +### Cache PVCs + +The cache PVCs (one for Tier 1 and Tier 2 each) are used by Kopia for its +[caching operations](https://kopia.io/docs/advanced/caching/). +They are used to speed up snapshot operations. + +!!!warning + +Klio is currently limited to use the default cache size when creating a Kopia +repository, 5GB for content and 5GB for metadata. +The cache sizes are not hard limits, as the cache is swept periodically, +so users should have a space buffer to account for this additional space. +This limitation will be removed in a future version. +!!! + +### Queue PVC + +The queue PVC is only required when both Tier 1 and Tier 2 are configured. +It stores the NATS JetStream work queue used for asynchronous Tier 2 +replication. + +## Setting up a new Klio server + +Setting up a Klio server involves creating a `Server` resource along with the +required Kubernetes secrets and certificates. + +### Prerequisites + +Before setting up a Klio server, ensure you have: + +- A Kubernetes cluster with the Klio operator installed +- `kubectl` configured to access your cluster +- [cert-manager](https://cert-manager.io/) installed for certificate + management (recommended) +- Enough storage resources for the data and cache PersistentVolumeClaims +- Enough storage resources for the queue PersistentVolumeClaim + +### Required Components + +A Klio server setup requires the following components: + +1. **Server Resource**: The main `Server` custom resource +2. **TLS Certificate**: For secure communication +3. **Encryption Password**: For encrypting backup data at rest +4. **CA Certificate**: For client authentication via mTLS +5. **Storage**: PersistentVolumeClaims for data, cache, and queue + +### Step-by-step setup + +#### 1. Create the Encryption Key Secret + +The encryption key is used to encrypt backup data at rest: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: my-server-encryption + namespace: default +type: Opaque +data: + encryptionKey: "bXktc2VjdXJlLWtleQ==" # my-secure-key +``` + +Apply the secret: + +```bash +kubectl apply -f encryption-secret.yaml +``` + +!!!tip + +Use a strong, randomly generated key. This key is critical for +data security and recovery. +!!! + +#### 2. Create CA Certificate + +Using cert-manager, a CA certificate can be created by using the following +Certificate resource: + +```yaml +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer + namespace: default +spec: + selfSigned: { } +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: server-sample-ca +spec: + commonName: server-sample-ca + secretName: server-sample-ca + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: true + usages: + - cert sign + + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +Apply the CA configuration with: + +``` +kubectl apply -f ca-configuration.yaml +``` + +In the previous example, the CA to be used for authentication is signed by a +self-signed issuer. This doesn't pose any security issue as this CA is only +used internally and trust is established through configuration. + +The primary concern is the relationship between the client and the certificates +signed by the CA. + +!!!info + +The usage of a self-signed CA is not required by the Klio server. If your +PKI infrastructure already includes a CA for this scope, that CA can be used +for the Klio server, too. +!!! + +#### 3. Create TLS Certificate + +Using cert-manager, create a self-signed certificate (for development) or use +your organization's certificate issuer: + +```yaml +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer + namespace: default +spec: + selfSigned: { } +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: my-server-cert + namespace: default +spec: + secretName: my-server-tls + commonName: my-server + dnsNames: + - my-server + - my-server.default + - my-server.default.svc + - my-server.default.svc.cluster.local + duration: 2160h # 90 days + renewBefore: 360h # 15 days + isCA: false + usages: + - server auth + issuerRef: + name: selfsigned-issuer + kind: Issuer + group: cert-manager.io +``` + +Apply the certificate configuration: + +```bash +kubectl apply -f tls-certificate.yaml +``` + +!!!info + +For production environments, use certificates signed by your organization's +Certificate Authority (CA) or a trusted public CA instead of self-signed +certificates. +!!! + +#### 4. Create the Server Resource + +Now create the main `Server` resource: + + + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: Server +metadata: + name: my-server + namespace: default +spec: + # Container image for the Klio server + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: IfNotPresent + imagePullSecrets: [] # Add image pull secrets if needed + + # TLS configuration + tlsSecretName: my-server-tls + + # Client authentication configuration + caSecretName: server-sample-ca + + # tier 1 configuration + tier1: + # Cache storage configuration + cache: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi # Adjust based on your needs + # Data storage pvcTemplate (for backups and WAL) + data: + pvcTemplate: + storageClassName: standard # Adjust to your storage class (use 'kubectl get storageclass' to see available options) + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi # Adjust based on your backup needs + # Encryption key reference + encryptionKey: + name: my-server-encryption + key: encryptionKey + + # Queue storage configuration (for NATS work queue) + # It can be added only if both tier1 and tier2 are configured + queue: + pvcTemplate: + storageClassName: standard # Adjust to your storage class + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi # Adjust based on queue volume needs + + # tier 2 configuration + tier2: + # Cache storage configuration + cache: + pvcTemplate: + resources: + requests: + storage: 1Gi + accessModes: + - ReadWriteOnce + # Encryption key reference. Can differ from tier1 encryption key. + encryptionKey: + name: my-server-encryption + key: encryptionKey + # S3 access configuration + s3: + prefix: klio + bucketName: klio-bucket + endpoint: https://minio:9000 + region: us-east-1 + accessKeyId: + name: minio + key: ACCESS_KEY_ID + secretAccessKey: + name: minio + key: ACCESS_SECRET_KEY + customCaBundle: + name: minio-server-tls + key: tls.crt +``` + + + +Apply the Server resource: + +```bash +kubectl apply -f klio-server.yaml +``` + +#### 5. Verify the Server is Running + +Check the status of your Klio server: + +```bash +# Check the Server resource status +kubectl get server my-server -n default + +# Check the StatefulSet +kubectl get statefulset my-server-klio -n default + +# Check the Pod +kubectl get pods -l klio.enterprisedb.io/klio-server=my-server -n default + +# View logs +kubectl logs -l klio.enterprisedb.io/klio-server=my-server -n default -f +``` + +The server should create a StatefulSet with a pod named `my-server-klio-0`. + +## Advanced Configuration + +The `.spec.template` field allows you to customize the Klio server's pod +template. You can add additional containers, volumes, or modify existing +settings. + +!!!warning Advanced Users Only + +The `.spec.template` field is primarily designed for advanced configurations. +While powerful, improper modifications can affect server functionality. +Always test changes in a non-production environment first. +!!! + +!!!note + +The `containers` field within `.spec.template.spec` is mandatory but will be +merged with the default Klio server containers `base` and `wal`. If you do not +need to add containers or modify the default ones, you must still include an +empty list. +!!! + +### Node Affinity and Tolerations + +To dedicate specific nodes for Klio workloads (e.g., for performance isolation +or to separate backup workloads from application workloads), you can use the +`template` field to define affinity and toleration rules. + +```yaml +spec: + template: + spec: + # Mandatory field; merged with default containers + containers: [] + tolerations: + # Allow scheduling on nodes tainted for Klio + - key: node-role.kubernetes.io/klio + operator: Exists + effect: NoSchedule + affinity: + # Require nodes labeled for Klio + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/klio + operator: Exists +``` + +See [Reserving Nodes for Klio Workloads](architectures.mdx#reserving-nodes-for-klio-workloads) +for details on node tainting. + +### Monitoring + +Refer to the [OpenTelemetry](opentelemetry.mdx#klio-server-with-opentelemetry) +documentation for setting up monitoring and telemetry for the Klio server. + +## Encryption + +Klio implements encryption at rest for both base backups and WAL files to +ensure data security throughout the backup lifecycle. + +### Base Backups Encryption + +Base backups are encrypted by Kopia using the encryption password provided in +the `encryptionKey` secret references. Kopia handles encryption transparently. + +The encryption key is set during repository initialization and is required +for all subsequent backup and restore operations. + +!!!warning Critical + +Store the encryption key securely. Loss of this key means permanent +loss of access to all backup data. There is no key recovery mechanism. +!!! + +### WAL Files Encryption + +WAL files are encrypted using a master key derivation system with authenticated +encryption. The encryption process works as follows: + +1. **Master Key Generation**: A 32-byte master key is derived from the encryption + key using PBKDF2 +2. **Key Enveloping**: The master key itself is encrypted using AES-256-GCM + with a password-derived encryption key to protect the key at rest +3. **Per-File Encryption**: Each WAL file is compressed and then encrypted using + the master key with authenticated encryption before being stored + +WAL files are first compressed using Snappy S2 compression, +then encrypted to ensure both space efficiency and security. + +The same encryption key used for base backups encrypts the WAL files, +ensuring a unified security model across all backup artifacts. + +### Encryption Password Rotation + +Currently, encryption key rotation is not supported. To change the +encryption key, you would need to: + +1. Create a new Klio server with a new encryption key +2. Perform new base backups to the new server +3. Migrate to using the new server + +!!!tip + +Choose a strong encryption key from the start. Use a password manager or +key management system to generate and store a cryptographically secure key +(recommended: 32+ random characters). +!!! + +### Encryption in Transit + +In addition to encryption at rest, Klio protects both base backups and WAL files +during transmission using TLS (Transport Layer Security). + +All communication between a Klio client and the Klio server is secured +with TLS: + +- **Base Backup Traffic**: Kopia client connections to the base backup server + are encrypted using TLS, protecting backup data as it transfers to the Klio + server +- **WAL Streaming**: PostgreSQL instances streaming WAL files to the Klio server + use gRPC over TLS, ensuring WAL data is encrypted during transmission + +The TLS certificate is configured via the `.spec.tlsSecretName` field in the +Server resource, which references a Kubernetes secret containing the TLS +certificate and private key. This provides end-to-end encryption, ensuring that +backup data is protected both at rest and in transit. + +## Authentication + +Klio uses mTLS Authentication for securing access to both the base backup server +and the WAL streaming server. Authentication is handled by verifying the client +certificates against the CA certificate which has been created when configuring +the Klio server. + +### Creating a client-side certificate + +To create a client-side certificate, you need a issuer that will sign all the +certificates with a CA known by the Klio server. Supposing that such a issuer is +called `server-sample-ca` and available in the current namespace, you can create +a client certificate with the following Certificate object: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: client-sample-tls +spec: + secretName: client-sample-tls + commonName: klio@cluster-1 + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - client auth + + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +``` + +If used the example proposed in the [server configuration documentation +page](#2-create-ca-certificate), the issuer can be created with: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: server-sample-ca +spec: + ca: + secretName: server-sample-ca +``` + +## Access Control Lists (ACLs) + +Klio automatically configures Kopia's Access Control Lists (ACLs) during server +startup to provide fine-grained access control to back up snapshots. This +automation eliminates the need for manual ACL configuration. + +### Automatic ACL Configuration + +When the Klio server starts, it automatically: + +1. **Enables ACL support** in the Kopia repository +2. **Creates a read-only user** (`snapshot_reader@klio`) + with READ access to all snapshots +3. **Configures the API server** to use the read-only user for backup catalog queries + +This automation ensures that the Klio [API service](api_service.mdx) (used for +backup observability and catalog browsing) operates with minimal privileges, +following the principle of least privilege. + +### How ACLs Work + +Kopia's ACL system controls access to repository resources based on: + +- **User identity**: The authenticated username +- **Resource type**: What is being accessed (e.g., snapshots, policies) +- **Access level**: READ, APPEND, or FULL access + +The automated ACL configuration creates the following rule: + +``` +User: snapshot_reader@klio +Access: READ +Target: type=snapshot (all snapshots in the repository) +``` + +This allows the API server to: + +- List all available backups +- Read backup metadata and manifests +- Browse backup catalogs +- Provide observability into the backup state + +However, the read-only user **cannot**: + +- Create new snapshots +- Modify existing snapshots +- Delete backups +- Change repository configuration +- Modify ACL rules + +### User Configuration + +From the authentication point-of-view, the `snapshot_reader@klio` user is not +special, and to use it you need a corresponding Secret containing a certificate +to be used for authentication. + +Cert-manager can create such a secret with the following Certificate definition: + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: client-sample-tls +spec: + secretName: client-sample-tls + commonName: snapshot_reader@klio + + duration: 2160h # 90d + renewBefore: 360h # 15d + + isCA: false + usages: + - client auth + + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +``` + +### API Server Integration + +The Klio API service deployment is automatically configured to use the +`snapshot_reader@klio` user for all Kopia operations. This happens through +environment variable configuration in the API server deployment: + +```yaml +- name: CLIENT_BASE_CLIENT_CERT_PATH + value: /client-certs/tls.crt +- name: CLIENT_BASE_CLIENT_KEY_PATH + value: /client-certs/tls.key +``` + +No manual configuration is required. The API server will automatically use the +restricted read-only access for all backup catalog queries. + +### Benefits + +The automated ACL configuration provides several benefits: + +1. **Security**: API server operates with minimal privileges +2. **Simplicity**: No manual ACL commands required during setup +3. **Consistency**: ACL configuration is standardized across all deployments +4. **Separation of Concerns**: Read operations (API server) are isolated from + write operations (backup/restore processes) + +### Idempotency + +The ACL automation is idempotent. If ACLs are already enabled or the user +already exists, the startup process will detect this and continue without +error. This allows for safe server restarts and upgrades. diff --git a/product_docs/docs/klio/0/main_concepts.mdx b/product_docs/docs/klio/0/main_concepts.mdx new file mode 100644 index 0000000000..5f3df1a7d1 --- /dev/null +++ b/product_docs/docs/klio/0/main_concepts.mdx @@ -0,0 +1,131 @@ +--- +title: Main Concepts +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/main_concepts.md +sidebar_position: 2 +editTarget: originalFilePath + +--- + +Klio is built on top of two foundational technologies: + +- PostgreSQL's native physical backup infrastructure +- The CloudNativePG Interface (CNPG-I) for backup and recovery + +PostgreSQL has provided **native continuous backup and point-in-time recovery +(PITR) capabilities since version 8.0, released in 2005**, enabling reliable +disaster recovery and business continuity for mission-critical systems +worldwide. + +!!!info + +PostgreSQL offers logical backups using tools like `pg_dump`, which generate a +logical representation of the database as SQL statements or data files. Logical +backups do not provide continuous protection or point-in-time recovery +capabilities. As a result, they are not suitable for **business continuity +scenarios** in mission-critical environments where minimizing downtime and data +loss is essential. +!!! + +At its core, [PostgreSQL’s continuous backup and recovery](https://www.postgresql.org/docs/current/continuous-archiving.html) +system uses **physical (file system level) copies** combined with **write-ahead +log (WAL) archiving**. +This approach enables consistent, recoverable backups while keeping systems +online, a strategy proven effective in production environments for over two +decades. + +In a PostgreSQL backup solution, the infrastructure typically consists of: + +- **WAL Archive**: A designated location for continuously archived WAL + (write-ahead log) files, preserving all changes made to the database to + support data durability and recovery. +- **Physical Base Backups**: A consistent copy of all data files used by + PostgreSQL (primarily the `PGDATA` directory and any tablespaces), forming + the foundational layer for any recovery operation. + +The diagram below illustrates the relationship between physical base backups +and the WAL archive over time: + +![Physical backups, WAL archive, and time](images/basebackups_walarchive.png) + +* * * + +## WAL Archive + +The WAL archive is central to **continuous backup** in PostgreSQL and is +essential for: + +- **Hot (Online) Backups**: Allowing physical base backups to be taken from any + node (primary or standby) without shutting down PostgreSQL, ensuring backups + can proceed without service disruption. +- **Point-in-Time Recovery (PITR)**: Enabling recovery to any precise moment + after the earliest available base backup, using archived WAL files to replay + transactions up to the desired recovery point. + +!!!important + +WAL archives on their own are insufficient for disaster recovery. +A **physical base backup is required** to restore a PostgreSQL cluster. +!!! + +Using a WAL archive significantly enhances the resilience of a PostgreSQL +system. WAL files can be fetched by any PostgreSQL instance for replication or +recovery, with archives typically retaining WAL segments longer than local +retention policies, ensuring historical data is preserved for PITR and disaster +recovery workflows. + +Klio receives WAL content from a PostgreSQL primary via streaming replication. + +* * * + +## Physical base backups + +PostgreSQL supports **physical base backups** as the cornerstone of its +disaster recovery and PITR strategies. A base backup is a **consistent, file +system-level copy** of all data files used by a PostgreSQL cluster, including +the `PGDATA` directory and any additional tablespaces. + +Key properties of PostgreSQL base backups: + +- **Online (Hot) Backups**: Base backups can be taken while the database is + online, avoiding downtime. PostgreSQL maintains consistency during an online + backup by coordinating with its write-ahead logging system, ensuring a valid + restore point. +- **Foundation for PITR**: A base backup provides the starting point for + point-in-time recovery. After restoring the base backup, archived WAL files + are replayed to advance the system to a specific recovery target, allowing + precise restoration following accidental data loss or corruption. +- **Efficient Storage and Transport**: Base backups can be compressed and + streamed to external or object storage, supporting offsite and cloud-based + disaster recovery workflows. + +Klio leverages CNPG-I to coordinate the hot backup procedure, using +PostgreSQL’s `pg_backup_start` and `pg_backup_stop` concurrent API to ensure +consistency. It uses [Kopia](https://github.com/kopia/kopia/) to efficiently +transfer backup data across locations, ensuring backups are portable, +secure, and space-efficient. + +* * * + +## Recovery + +In PostgreSQL, **recovery** is the process of restoring a database cluster from +a **physical base backup**, bringing it to a consistent state by replaying +**write-ahead log (WAL)** files, which contain the necessary *redo* information +for all changes made after the backup. + +PostgreSQL’s recovery system supports [Point-in-Time Recovery (PITR)](https://www.postgresql.org/docs/current/continuous-archiving.html#BACKUP-PITR-RECOVERY), +enabling you to restore a cluster to **any precise moment** between your +earliest base backup and the latest available WAL segment. To perform recovery, +a **valid WAL archive is required alongside the physical base backup**. + +Klio follows the approach of CloudNativePG and implements the recovery part of +CNPG-I. It **does not perform in-place recovery on an existing cluster**; +instead, recovery is used to **bootstrap a new cluster** from a base backup and +replay WAL files to reach a desired state. + +Recovery can operate in two primary modes: full recovery (replaying WAL files +to the latest available segment) or **Point-in-Time Recovery (PITR)**, allowing +restoration to a chosen state before an incident such as accidental data +deletion. Klio supports all PITR targets provided by CloudNativePG, including +time, restore point, and transaction. diff --git a/product_docs/docs/klio/0/opentelemetry.mdx b/product_docs/docs/klio/0/opentelemetry.mdx new file mode 100644 index 0000000000..d7dcf707d9 --- /dev/null +++ b/product_docs/docs/klio/0/opentelemetry.mdx @@ -0,0 +1,414 @@ +--- +title: OpenTelemetry Observability +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/opentelemetry.md +sidebar_position: 8 +editTarget: originalFilePath + +--- + +Klio provides built-in support for [OpenTelemetry](https://opentelemetry.io/), +enabling comprehensive observability through distributed tracing and metrics +collection. This allows you to monitor backup operations, performance +characteristics, and system health across your Klio deployment. + +## Available Telemetry + +Klio automatically collects the following: + +- Traces + - Distributed WAL streaming and processing +- Metrics + - Server + - Backup operation metrics + - Number of snapshots + - Number of files in the latest snapshot + - Number of directories in the latest snapshot + - Size of the latest snapshot + - Age of the latest snapshot + - Age of the oldest snapshot + - WAL processing metrics + - Number of WAL files written + - Bytes written + - [GRPC metrics](https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/) + - [Go runtime statistics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/runtime) + - [Host metrics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/host) + - [Controller runtime metrics](https://book.kubebuilder.io/reference/metrics-reference) + - Client + - [GRPC metrics](https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/) + - [Go runtime statistics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/runtime) + - [Host metrics](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/host) + - [Controller runtime metrics](https://book.kubebuilder.io/reference/metrics-reference) + +!!!note + +Log exporters are not currently supported. +!!! + +## Configuration + +Klio automatically detects OpenTelemetry configuration through standard +environment variables. If no OpenTelemetry environment variables are present, +Klio will use no-op providers that don't collect any telemetry data. + +Traces and metrics exporters can be configured independently through the +[`autoexport`](https://go.opentelemetry.io/contrib/exporters/autoexport) package. + +### General Settings + +The following environment variables are used to configure OpenTelemetry: + +- `OTEL_SERVICE_NAME`: (required) Name of the service, e.g., `klio-server` +- `OTEL_RESOURCE_ATTRIBUTES`: Comma-separated list of resource attributes + (e.g., `deployment.environment=production,service.namespace=klio-system`) +- `OTEL_RESOURCE_DETECTORS`: Comma-separated list of resource detectors + from the [`autodetect`](https://pkg.go.dev/go.opentelemetry.io/contrib/detectors/autodetect) + package, used to automatically populate resource attributes + +### Traces exporter + +To enable the traces exporter, set the `OTEL_TRACES_EXPORTER` environment +variable to one of the supported exporters: + +- `otlp`: OpenTelemetry Protocol (OTLP) exporter +- `console`: Console exporter (useful for debugging) +- `none`: No-op exporter (disables tracing) + +You can define the OTLP protocol using the `OTEL_EXPORTER_OTLP_TRACES_PROTOCOL` +variable, or the general `OTEL_EXPORTER_OTLP_PROTOCOL`. Supported protocols include: + +- `http/protobuf` (default) +- `grpc` + +Additional configuration options for trace exporters can be found in the documentation +of the respective exporters: + +- [OTLP Trace gRPC Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc) +- [OTLP Trace HTTP Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp) + +### Metrics Exporter + +To enable the metrics exporter, set the `OTEL_METRICS_EXPORTER` environment +variable to one of the supported exporters: + +- `otlp`: OpenTelemetry Protocol (OTLP) exporter +- `prometheus`: Prometheus exporter + HTTP server +- `console`: Console exporter (useful for debugging) +- `none`: No-op exporter (disables metrics) + +You can define the OTLP protocol using the `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` +variable, or the general `OTEL_EXPORTER_OTLP_PROTOCOL`. Supported protocols include: + +- `http/protobuf` (default) +- `grpc` + +Additional configuration options for metrics exporters can be found in the documentation +of the respective exporters: + +- [OTLP Metric gRPC Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc) +- [OTLP Metric HTTP Exporter](https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp) + +For the Prometheus exporter, you can configure the host and port of the HTTP +server using the following environment variables: + +- `OTEL_EXPORTER_PROMETHEUS_HOST` (default: `localhost`) +- `OTEL_EXPORTER_PROMETHEUS_PORT` (default: `9464`) + +## Configuring Klio with OpenTelemetry in Kubernetes + +When running in a Kubernetes environment, Klio will automatically define +`CONTAINER_NAME`, `POD_NAME` and `NAMESPACE_NAME` environment variables. +When any of these environment variables are set, Klio will automatically add +the corresponding resource attributes (`k8s.container.name`, `k8s.pod.name`, +`k8s.namespace.name`) to all telemetry data. Each attribute is added +independently - you don't need all three environment variables to be present. + +!!!important + +If you have already defined any of these attributes in `OTEL_RESOURCE_ATTRIBUTES`, +Klio will **not override** them. Only missing attributes will be added from the +environment variables. This allows you to customize the values while still +benefiting from automatic defaults for any attributes you don't explicitly set. +!!! + +### Klio server with OpenTelemetry + +When deploying Klio `Server`, you can configure OpenTelemetry specifying the +necessary environment variables in the `template` section of the `Server` spec, +overriding the generated pod. + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: Server +metadata: + name: server-sample +spec: + # ... other configuration ... + template: + spec: + containers: + - name: base + env: + - name: OTEL_SERVICE_NAME + value: "klio-base" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/otel/tls.key" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/otel/tls.key" + volumeMounts: + - mountPath: /otel + name: otel + - name: wal + env: + - name: OTEL_SERVICE_NAME + value: "klio-wal" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/otel/tls.key" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/otel/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/otel/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/otel/tls.key" + volumeMounts: + - mountPath: /otel + name: otel + # Projected volume for OTEL certificates + volumes: + - name: otel + projected: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key +``` + +For simpler management, you can achieve the same results using a `ConfigMap`: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: klio-otel-config +data: + OTEL_RESOURCE_DETECTORS: "telemetry.sdk,host,os.type,process.executable.name" + OTEL_TRACES_EXPORTER: "otlp" + OTEL_METRICS_EXPORTER: "otlp" + # Use the same endpoint configuration for both traces and metrics + # to keep it DRY, if no substantial differences are needed. + OTEL_EXPORTER_OTLP_PROTOCOL: "grpc" + OTEL_EXPORTER_OTLP_ENDPOINT: "https://otel-collector:4317" + OTEL_EXPORTER_OTLP_COMPRESSION: "gzip" + OTEL_EXPORTER_OTLP_TIMEOUT: "10000" + OTEL_EXPORTER_OTLP_INSECURE: "false" + OTEL_EXPORTER_OTLP_CERTIFICATE: "/otel/ca.crt" + OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE: "/otel/tls.crt" + OTEL_EXPORTER_OTLP_CLIENT_KEY: "/otel/tls.key" +--- +apiVersion: klio.edb.io/v1alpha1 +kind: Server +metadata: + name: my-klio-server +spec: + # ... other configuration ... + template: + spec: + containers: + - name: base + env: + - name: OTEL_SERVICE_NAME + value: "klio-base" + envFrom: + - configMapRef: + name: klio-otel-config + volumeMounts: + - mountPath: /otel + name: otel + - name: wal + env: + - name: OTEL_SERVICE_NAME + value: "klio-wal" + envFrom: + - configMapRef: + name: klio-otel-config + volumeMounts: + - mountPath: /otel + name: otel + # Projected volume for OTEL certificates + volumes: + - name: otel + projected: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key +``` + +### Klio plugins with OpenTelemetry + +When deploying Klio as a CNPG Cluster plugin, you can configure OpenTelemetry +by specifying the necessary environment variables in the `env` section of the +`Cluster` spec. + +```yaml + +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: cluster-example +spec: + # ... other configuration ... + env: + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_TRACES_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT + value: "10000" + - name: OTEL_EXPORTER_OTLP_TRACES_COMPRESSION + value: "gzip" + - name: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE + value: "/projected/ca.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_CERTIFICATE + value: "/projected/tls.crt" + - name: OTEL_EXPORTER_OTLP_TRACES_CLIENT_KEY + value: "/projected/tls.key" + - name: OTEL_METRIC_EXPORT_INTERVAL + value: "60000" + - name: OTEL_RESOURCE_DETECTORS + value: "telemetry.sdk,host,os.type,process.executable.name" + - name: OTEL_SERVICE_NAME + value: "klio-walsender" + - name: OTEL_METRICS_EXPORTER + value: "otlp" + - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT + value: "https://otel-collector:4317" + - name: OTEL_EXPORTER_OTLP_METRICS_PROTOCOL + value: "grpc" + - name: OTEL_EXPORTER_OTLP_METRICS_INSECURE + value: "false" + - name: OTEL_EXPORTER_OTLP_METRICS_TIMEOUT + value: "60000" + - name: OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE + value: "/projected/ca.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_CERTIFICATE + value: "/projected/tls.crt" + - name: OTEL_EXPORTER_OTLP_METRICS_CLIENT_KEY + value: "/projected/tls.key" + + projectedVolumeTemplate: + sources: + - secret: + name: otel-collector-tls + items: + - key: ca.crt + path: ca.crt + - secret: + name: otel-walsender-client-cert + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + + plugins: + - name: klio.enterprisedb.io + enabled: true + parameters: + pluginConfigurationRef: client-config-cluster-example +--- +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: client-config-cluster-example +spec: + serverAddress: klio.default + clientSecretName: klio-client + serverSecretName: klio-server-tls +``` diff --git a/product_docs/docs/klio/0/plugin_configuration.mdx b/product_docs/docs/klio/0/plugin_configuration.mdx new file mode 100644 index 0000000000..69a509625d --- /dev/null +++ b/product_docs/docs/klio/0/plugin_configuration.mdx @@ -0,0 +1,368 @@ +--- +title: The Klio Plugin +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/plugin_configuration.md +sidebar_position: 6 +editTarget: originalFilePath + +--- + +The Klio plugin for CloudNativePG allows you to leverage the backup and WAL +streaming capabilities of Klio for your PostgreSQL clusters managed by +CloudNativePG. It will add two containers to each PostgreSQL instance pod: + +- A `klio-plugin` container that handles backup creation and management +- A `klio-wal` container that streams WAL files to the Klio server in real-time + +## Configuration + +The Klio plugin integrates with CloudNativePG through the CNPG-I (CloudNativePG +Interface) specification, enabling Klio to manage backups and WAL streaming for +your PostgreSQL clusters. To use Klio with a CloudNativePG cluster, you need to: + +1. Create a `PluginConfiguration` resource that defines how to connect to the + Klio server +2. Reference the plugin in your `Cluster` resource specification + +## Prerequisites + +Before configuring a cluster to use the Klio plugin, ensure you have: + +- A running Klio `Server` resource deployed in your namespace +- Client credentials (username and password) stored in a Kubernetes Secret +- The server's TLS certificate available in a Secret + +## Creating a PluginConfiguration resource + +The `PluginConfiguration` custom resource defines how the Klio plugin connects +to and communicates with the Klio server. This resource contains connection +details, authentication credentials, and optional configuration for metrics, +profiling, and backup retention policies. + +### Basic example + +Here's a minimal `PluginConfiguration` example: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config + namespace: default +spec: + serverAddress: klio-server.default + clientSecretName: client-sample-tls + serverSecretName: klio-server-tls +``` + +### Client credentials secret + +The client credentials must be stored in a Kubernetes Secret of type +`kubernetes.io/tls`, containing a secret to be presented to the Klio server. + +This secret can be generated with cert-manager by following the [documentation +in the Klio server page](klio_server.mdx#creating-a-client-side-certificate), +or be provided by the user. + +### Server Address + +The `serverAddress` field specifies where the Klio server can be reached. This +can be: + +- A Kubernetes service name: `klio-server.default` (within the same namespace) +- A fully qualified domain name: `klio-server.default.svc.cluster.local` +- An external address: `klio.example.com` + +Connections will be done using the default ports of the Klio base and WAL +servers, respectively 51515 and 52000. + +### TLS configuration + +The `serverSecretName` field references a Secret containing the TLS certificate +used to secure communication with the Klio server. This is the same +certificate configured on the `Server` resource. + +## Configuring a Cluster to use the Klio plugin + +Once you have created a `PluginConfiguration`, reference it in your CloudNativePG +`Cluster` resource: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster + namespace: default +spec: + instances: 3 + + postgresql: + pg_hba: + - local replication all peer # Allow replication connections locally + + plugins: + - name: klio.enterprisedb.io + enabled: true # Activate the Klio plugin (default) + parameters: + pluginConfigurationRef: klio-plugin-config + + storage: + size: 10Gi +``` + +To be able to stream WAL files, ensure that your PostgreSQL configuration +allows local replication connections. You can do this by adding an entry to the +`pg_hba` section, as shown in the example above. + +### Plugin parameters + +The `plugins` section in the `Cluster` specification requires: + +- **name**: Must be set to `klio.enterprisedb.io` to identify the Klio plugin +- **enabled**: Set to `true` to activate the plugin. This is the default value. +- **parameters.pluginConfigurationRef**: The name of your `PluginConfiguration` resource + +!!!note + +Even though the Klio plugin is used to archive WAL files on the Klio server, +it does not use the `archiveCommand` parameter in the PostgreSQL configuration, +as the WAL are streamed directly to the Klio server. Thus, you must not set +`isWALArchiver: true` in the plugin configuration. +!!! + +## Advanced configuration options + +The `PluginConfiguration` resource supports several advanced options to +customize the plugin's behavior. + +### Retention policies + +Define how long backups should be retained by configuring retention policies +for Tier 1 and Tier 2 storage. Retention policies can be configured +independently for each tier: + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + tier1: + retention: + keepLatest: 5 + keepHourly: 12 + keepDaily: 7 + keepWeekly: 4 + keepMonthly: 6 + keepAnnual: 2 + tier2: + enableBackup: true + enableRecovery: true + retention: + keepLatest: 10 + keepDaily: 30 + keepMonthly: 12 + keepAnnual: 5 +``` + +Except for `keepLatest`, each option defines how many backups to retain +for the specified time period. For example, `keepDaily: 7` means that we should +retain at most one backup for each of the past 7 days. + +If multiple backups exist within the same time bucket, the most recent one is +kept, unless preserved by a different *keep* rule. Backups that are not +retained by any rule are deleted. Rule evaluation is done when a new backup is +taken. + +The Klio server will automatically delete WAL files that are no longer needed +for recovery by any retained backup. + +All retention settings are optional. For each unspecified retention level, +the default Kopia value is applied: + +```yaml +keepLatest: 10 +keepHourly: 48 +keepDaily: 7 +keepWeekly: 4 +keepMonthly: 24 +keepAnnual: 1 +``` + +Set a rule to `0` to disable that retention level. + +### Cluster name override + +By default, the plugin uses the name of the CloudNativePG `Cluster` resource. +You can override this if needed: + +```yaml +spec: + clusterName: my-custom-cluster-name +``` + +This can be useful working with backups from different clusters, for example +when restoring clusters or configuring replica clusters. + +### Tier 2 configuration + +Tier 2 provides secondary storage (typically object storage like S3) for +long-term backup retention and disaster recovery. Configure Tier 2 using the +`tier2` section: + +```yaml +spec: + tier2: + enableBackup: true + enableRecovery: true + retention: + keepDaily: 30 + keepMonthly: 12 +``` + +#### Options + +- **`enableBackup`**: When set to `true`, backups and WAL files are + automatically synchronized to Tier 2 storage after being stored in Tier 1. + This ensures your backups are available in long-term storage. + +- **`enableRecovery`**: When set to `true`, Klio will look for backups and + WAL files in both Tier 1 and Tier 2 during restore operations. If a backup + is available in both tiers, Tier 1 takes precedence as restore from it will + be faster. + +- **`retention`**: Configure a separate retention policy for Tier 2. + Typically, you would configure longer retention periods for Tier 2 since + object storage is more cost-effective for long-term storage. + +See the [Architecture documentation](architectures.mdx#tier-2-secondary-storage-object-storage) +for more details on Tier 2 storage. + +### Observability + +See the [OpenTelemetry observability](opentelemetry.mdx) section for more +details on how to monitor the Klio plugin using OpenTelemetry. + +### Performance profiling + +Enable the pprof HTTP endpoint for performance profiling and troubleshooting: + +```yaml +spec: + pprof: true +``` + +When enabled, the pprof endpoint is exposed and can be used with Go's profiling +tools to analyze CPU usage, memory allocation, goroutines, and other runtime +metrics. + +!!!warning + +Only enable pprof in development or testing environments, or when actively +troubleshooting performance issues. It should not be enabled in production +unless necessary. +!!! + +## Container customization + +The `PluginConfiguration` resource allows you to customize the Klio sidecar +containers by providing base container specifications that are used as the +foundation for the sidecars. This feature enables you to add custom environment +variables, volume mounts, resource limits, and other container settings without +modifying the PostgreSQL container environment. + +### Basic example + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + containers: + - name: klio-plugin + env: + - name: CUSTOM_ENV_VAR + value: "my-value" + - name: DEBUG_LEVEL + value: "info" + - name: klio-wal + env: + - name: WAL_BUFFER_SIZE + value: "8192" +``` + +### How container merging works + +The containers you define serve as the base for the Klio sidecars, with the +following merge behavior: + +1. **Your container is the base**: When you define a container + (e.g., `klio-plugin`), your specification serves as the starting point +2. **Klio enforces required values**: Klio sets its essential configuration: + - Container `name` (klio-plugin, klio-wal, or klio-restore) + - Container `args` (the command arguments needed for operation) + - `CONTAINER_NAME` environment variable +3. **Your customizations are preserved**: All other fields you define remain + intact +4. **Template defaults fill gaps**: For fields you don't specify, Klio applies + sensible defaults (image, security context, standard volume mounts, etc.) + +!!!important + +Klio's required values (name, args, `CONTAINER_NAME` env var) will +always override any conflicting values you set. All other customizations are +respected. +!!! + +### Available sidecar containers + +The following containers can be customized: + +- **`klio-plugin`**: Handles backup creation and management in PostgreSQL pods +- **`klio-wal`**: Streams WAL files to the Klio server in PostgreSQL pods +- **`klio-restore`**: Restores backups during recovery jobs + +### Example: Resource limits and environment variables + +```yaml +apiVersion: klio.enterprisedb.io/v1alpha1 +kind: PluginConfiguration +metadata: + name: klio-plugin-config +spec: + serverAddress: klio-server.default + clientSecretName: klio-client-credentials + serverSecretName: klio-server-tls + containers: + - name: klio-plugin + env: + - name: LOG_LEVEL + value: "debug" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4317" + resources: + limits: + memory: "512Mi" + cpu: "1" + requests: + memory: "256Mi" + cpu: "500m" + - name: klio-wal + env: + - name: WAL_STREAM_TIMEOUT + value: "30s" + resources: + limits: + memory: "256Mi" + cpu: "500m" + requests: + memory: "128Mi" + cpu: "250m" +``` diff --git a/product_docs/docs/klio/0/wal_streaming.mdx b/product_docs/docs/klio/0/wal_streaming.mdx new file mode 100644 index 0000000000..a9f2b4dc94 --- /dev/null +++ b/product_docs/docs/klio/0/wal_streaming.mdx @@ -0,0 +1,123 @@ +--- +title: WAL Streaming +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/wal_streaming.md +sidebar_position: 4 +editTarget: originalFilePath + +--- + +A standout feature of Klio is its native, cloud-first implementation of WAL +streaming for PostgreSQL. This architecture enables: + +- Partial WAL segment streaming, ensuring real-time data transfer +- Built-in compression and encryption using user-provided keys +- Controlled replication slot advancement, protecting against WAL loss +- Optional synchronous replication, offering zero RPO when enabled + +## Architecture + +WAL streaming in Klio is built around two components: a client and a server. + +- The client, invoked using the `klio send-wal` command, typically runs + alongside PostgreSQL but does not have to. +- The server, started with the `klio server start-wal` command, runs as a + dedicated process on the Klio server. + +In Kubernetes environments, as illustrated in the diagram above, Klio streams +WAL records directly from the PostgreSQL primary over a local Unix domain +socket. The WAL streamer runs as a lightweight sidecar container within the +same pod as the primary instance and is managed by the CNPG-I–compliant plugin. +It continuously pushes data to a remote Klio WAL server (Tier 1), which handles +partial WAL file synchronization and archives completed segments into the +central WAL archive for the PostgreSQL cluster. + +![WAL streaming architectural overview](images/wal-streaming.png) + +## Moving Beyond `archive_command` + +Klio replaces the traditional PostgreSQL `archive_command` method for WAL +handling in CloudNativePG clusters, providing improved reliability, efficiency, +security, and observability. + +PostgreSQL’s `archive_command` is a shell command executed when a WAL segment +is complete—either because the segment reached its size limit (typically 16MB) +or the `archive_timeout` elapsed (5 minutes by default in CloudNativePG). + +The streaming model provided by Klio offers several key advantages over this +approach: + +- **Near-zero RPO:** WAL changes are streamed incrementally in near real-time, + reducing the worst-case recovery point objective (RPO) from 5 minutes to + near-zero, or even zero in synchronous mode. + +- **Improved efficiency and scalability:** A single, continuously running WAL + streamer process replaces the need to spawn a new process for each WAL + segment, resulting in lower CPU and I/O usage and better scalability during + periods of high WAL volume. + +- **Enhanced security:** WAL data is encrypted end-to-end, both in transit and + at rest, providing protection not available with the traditional + `archive_command`. + +- **Comprehensive observability:** Native metrics and structured logging + provide full visibility into WAL streaming operations, simplifying + monitoring, anomaly detection, and troubleshooting compared to the opaque + nature of `archive_command`. + +## Monitoring Klio WAL Streamer in PostgreSQL + +The Klio WAL streamer is a PostgreSQL streaming replication client and, +as such, can be monitored using the standard `pg_stat_replication` +system view in the PostgreSQL catalog. + +The WAL streamer identifies itself with `application_name` set to `klio`. + +To verify whether any Klio WAL streamer is connected to an instance (in +Kubernetes deployments, this will always be the primary), run the following +query: + +```sql +SELECT * FROM pg_stat_replication WHERE application_name = 'klio'; +``` + +An example output might look like this: + +The following excerpt is an a example: + +```console +-[ RECORD 1 ]----+------------------------------ +pid | 1070 +usesysid | 10 +usename | postgres +application_name | klio +client_addr | +client_hostname | +client_port | -1 +backend_start | 2025-08-07 01:14:39.619662+00 +backend_xmin | +state | streaming +sent_lsn | 2/C765A000 +write_lsn | 2/C75FA000 +flush_lsn | 2/C741A000 +replay_lsn | 2/C741A000 +write_lag | 00:00:00.919907 +flush_lag | 00:00:00.923556 +replay_lag | 00:00:00.923556 +sync_priority | 0 +sync_state | async +reply_time | 2025-08-07 01:54:44.756306+00 +``` + +As you can see, Klio provides relevant feedback to PostgreSQL. Here is a brief +explanation of the key fields: + +- `state`: The replication connection status (`streaming` indicates active + streaming). +- `sent_lsn`, `write_lsn`, `flush_lsn`, `replay_lsn`: Positions in the WAL + indicating how far data has been sent, written, flushed, and replayed on the + Klio server (replayed and flushed are always identical). +- `write_lag`, `flush_lag`, `replay_lag`: Delays between WAL positions + indicating replication latency. +- `sync_state`: The synchronization state of this standby (e.g., `async`, + `sync`, `potential`, `quorum`). diff --git a/product_docs/docs/klio/0/walplayer.mdx b/product_docs/docs/klio/0/walplayer.mdx new file mode 100644 index 0000000000..97d51bc067 --- /dev/null +++ b/product_docs/docs/klio/0/walplayer.mdx @@ -0,0 +1,339 @@ +--- +title: WAL Player +originalFilePath: >- + https://github.com/EnterpriseDB/klio/blob/main/docs/documentation/web//versioned_docs/version-0.0.11/walplayer.md +sidebar_position: 80 +editTarget: originalFilePath + +--- + +The WAL Player is a command-line tool designed to benchmark the performance of +your Klio servers by simulating PostgreSQL Write-Ahead Log (WAL) file streaming +workloads. It is an essential tool for ensuring your Klio servers can handle +your production workloads efficiently. Use it regularly to validate performance +and capacity planning decisions. + +## Overview + +WAL Player provides two main commands: + +- **`generate`** - Creates synthetic WAL files for testing +- **`play`** - Sends WAL files to a Klio server and measures performance + +This tool is essential for: + +- Performance testing and benchmarking Klio servers +- Validating server capacity under different workloads +- Measuring throughput and latency characteristics +- Load testing before production deployment + +## Prerequisites + +- Klio binary installed and accessible +- A running Klio server to test against +- Sufficient disk space for generating test WAL files + +## Commands + +### `klio wal-player generate` + +Generates synthetic WAL files for testing purposes. + +#### Usage + +```bash +klio wal-player generate [output-directory] [flags] +``` + +#### Parameters + +- `output-directory` - Directory where WAL files will be created (defaults to + current directory) + +#### Flags + +- `--wal-size` - Size of each WAL file in MB (default: 16) +- `--length` - Number of WAL files to generate (required) + +#### Examples + +```bash +# Generate 10 WAL files of 16MB each in the current directory +klio wal-player generate --length 10 + +# Generate 50 WAL files of 32MB each in a specific directory +klio wal-player generate /tmp/test-wals --wal-size 32 --length 50 +``` + +### `klio wal-player play` + +Sends WAL files to a Klio server and measures performance metrics. + +#### Usage + +```bash +klio wal-player play [directory] [flags] +``` + +#### Parameters + +- `directory` - Directory containing WAL files to send (required). This + directory should contain PostgreSQL WAL files in the standard format (e.g., + `000000010000000000000001`). It also supports files compressed with gzip, + provided they have the `.gz` extension. + +#### Flags + +- `--jobs, -j` - Number of parallel jobs for concurrent uploads (default: 1). + Can be used to simulate multiple Klio clients sending data simultaneously. +- `--block-size` - Block size in KB for streaming (default: 2048). This controls + how much data is sent in each request. + +#### Configuration + +The play command requires client configuration to connect to your Klio server. +This can be provided via: + +- Configuration file +- Environment variables +- Command-line flags + +Example configuration: + +```yaml +# klio-config.yaml +client: + wal: + address: localhost:52000 + cluster_name: walplayer + server_cert_path: "/path/to/server.crt" + client_cert_path: "/path/to/client/tls.crt" + client_key_path: "/path/to/client/tls.key" +``` + +#### Examples + +```bash +# Send WAL files using single connection +klio wal-player play ./test-wals + +# Send WAL files using 4 workers for parallel uploads +klio wal-player play ./test-wals --jobs 4 + +# Benchmark with different block sizes +klio wal-player play ./test-wals --jobs 2 --block-size 1024 +``` + +## Performance Metrics + +The `play` command outputs detailed performance metrics in JSON format for each +WAL file: + +```json +{ + "walFullPath": "/path/to/000000010000000000000001", + "startTime": "2025-01-15T10:30:00Z", + "endTime": "2025-01-15T10:30:02Z", + "elapsedTime": "7651680", + "error": "" +} +``` + +### Metrics Explained + +- **`walFullPath`** - Full path to the WAL file that was sent +- **`startTime`** - When the upload started +- **`endTime`** - When the upload completed +- **`elapsedTime`** - Total time taken for the upload in nanoseconds +- **`error`** - Error message if the upload failed (empty on success) + +## Benchmarking Example + +The following Kubernetes Job definition demonstrates how to use +the WAL Player to benchmark a Klio server. This example covers generating +WAL files and then playing them back to the server. + + + +```yaml +--- +# PVC for storing generated WAL files +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: walplayer-data +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi # Enough space to hold the generated amount of WAL files +--- +# Client certificate for authenticating with the Klio server +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: walplayer-client-cert +spec: + commonName: klio@walplayer + secretName: walplayer-client-cert + duration: 2160h # 90d + renewBefore: 360h # 15d + isCA: false + usages: + - client auth + issuerRef: + name: server-sample-ca + kind: Issuer + group: cert-manager.io +--- +# ConfigMap with klio client configuration +apiVersion: v1 +kind: ConfigMap +metadata: + name: walplayer-config +data: + # Address your klio server + klio-config.yaml: | + client: + wal: + address: server-sample.default:52000 + cluster_name: walplayer + server_cert_path: /certs/server/ca.crt + client_cert_path: /certs/client/tls.crt + client_key_path: /certs/client/tls.key +--- +# Job to generate and play WAL files +apiVersion: batch/v1 +kind: Job +metadata: + name: walplayer-benchmark +spec: + template: + metadata: + labels: + app: walplayer + spec: + restartPolicy: Never + initContainers: + # Generate synthetic WAL files + - name: generate-wals + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: Always + command: + - /usr/bin/klio + - wal-player + - generate + - /data + - --wal-size=16 + - --length=100 + volumeMounts: + - name: data + mountPath: /data + containers: + # Play WAL files to the Klio server + - name: play-wals + image: docker.enterprisedb.com/k8s/klio:v0.0.11 + imagePullPolicy: Always + command: + - /usr/bin/klio + - wal-player + - play + - /data + - --config=/config/klio-config.yaml + - --jobs=4 + - --block-size=2048 + volumeMounts: + - name: data + mountPath: /data + - name: config + mountPath: /config + readOnly: true + - name: server-cert + mountPath: /certs/server + readOnly: true + - name: client-cert + mountPath: /certs/client + readOnly: true + volumes: + - name: data + persistentVolumeClaim: + claimName: walplayer-data + - name: config + configMap: + name: walplayer-config + - name: server-cert + secret: + secretName: server-sample-tls + - name: client-cert + secret: + secretName: walplayer-client-cert +``` + + + +### Customizing the Benchmark + +You can adjust the following parameters to simulate different workload scenarios: + +#### WAL Generation Parameters + +Modify the `generate-wals` init container to create different test workloads: + +- Many small files: + + ```yaml + - --wal-size=16 + - --length=1000 + ``` + +- Less large files: + + ```yaml + - --wal-size=256 + - --length=100 + ``` + +Match actual production for better results. + +#### WAL Playback Parameters + +Modify the `play-wals` container to test different upload patterns: + +- **Jobs** (`--jobs`): Number of parallel upload workers + - Start with `--jobs=1` to establish baseline performance + - Increase to `--jobs=2`, `--jobs=4`, `--jobs=8` to find optimal concurrency + - Performance typically plateaus at some point +- **Block Size** (`--block-size`): Size of each streaming chunk in KB + - Default is `--block-size=2048` + - Maximum is 8192 + +### Analyzing Results + +View the job logs to see the JSON performance metrics: + +```bash +kubectl logs job/walplayer-benchmark -c play-wals +``` + +You can analyze the output using `jq`: + +```bash +# Get all results +kubectl logs job/walplayer-benchmark -c play-wals > results.json + +# Calculate total successful uploads +jq -s '[.[] | select(.error == "")] | length' results.json + +# Calculate average upload time (in nanoseconds) +jq -s '[.[] | select(.error == "") | .elapsedTime | tonumber] | add / length' results.json + +# Find any failed uploads +jq -s '.[] | select(has("error") and .error != "")' results.json +``` + +## Performance Optimization Tips + +1. **Resource Monitoring**: Monitor CPU, memory, and disk I/O on the Klio server +2. **Network Bandwidth**: Ensure sufficient bandwidth between client and server +3. **Storage Performance**: Verify storage can handle the write throughput diff --git a/src/constants/products.js b/src/constants/products.js index d527b41047..8c818d1d03 100644 --- a/src/constants/products.js +++ b/src/constants/products.js @@ -51,6 +51,11 @@ const products = { alteruser_utility: { name: "alteruser", iconName: IconNames.TOOLS }, edb_sqlpatch: { name: "EDB SQL Patch", iconName: IconNames.TOOLS }, language_pack: { name: "Language Pack", iconName: IconNames.TOOLS }, + klio: { + name: "Enterprise Data Protection for CloudNativePG™", + shortName: "Klio", + iconName: IconNames.BACKUP, + }, lasso: { name: "Lasso" }, livecompare: { name: "LiveCompare", iconName: IconNames.INTEGRATION }, "Migration Handbook": { name: "Migration Handbook" }, diff --git a/src/styles/_admonitions.scss b/src/styles/_admonitions.scss index 4c6e9a391f..762cad6e7c 100644 --- a/src/styles/_admonitions.scss +++ b/src/styles/_admonitions.scss @@ -10,7 +10,7 @@ .admonition-info { @extend .alert-secondary; } -.admonition-warning { +.admonition-warning, .admonition-caution { @extend .alert-warning; } .admonition-danger { @@ -82,7 +82,7 @@ } } -.admonition-warning { +.admonition-warning, .admonition-caution { > .admonition-heading h5:before { content: url('data:image/svg+xml;utf8,'); } diff --git a/src/styles/_dark.scss b/src/styles/_dark.scss index bfab15fa0b..9506c3754c 100644 --- a/src/styles/_dark.scss +++ b/src/styles/_dark.scss @@ -166,7 +166,7 @@ html.dark { background-color: darken(#e4eef5, 70%) !important; color: darken($light, 10%) !important; } - .admonition-warning { + .admonition-warning, .admonition-caution { background-color: darken(#ffedd1, 70%) !important; color: darken($light, 10%) !important; }