From 886c2ca58976e7344a6762a3c17b9cdd8b6af185 Mon Sep 17 00:00:00 2001 From: Miguel Sacristan Date: Fri, 3 Apr 2026 19:55:51 +0200 Subject: [PATCH 1/2] feat(cubestore): Add Azure Blob Storage support for remote filesystem Implement AzureBlobRemoteFs with support for multiple authentication methods: account key, SAS token, and federated token credentials (workload identity, managed identity). Environment variables: - CUBESTORE_AZURE_CONTAINER (required) - CUBESTORE_AZURE_ACCOUNT (required) - CUBESTORE_AZURE_ACCESS_KEY (optional) - CUBESTORE_AZURE_SAS_TOKEN (optional) - CUBESTORE_AZURE_ENDPOINT (optional, for Azurite/sovereign clouds) - CUBESTORE_AZURE_SUB_PATH (optional) Closes #10332, #8288 --- rust/cubestore/Cargo.lock | 35 +- rust/cubestore/cubestore/Cargo.toml | 2 +- rust/cubestore/cubestore/src/config/mod.rs | 33 ++ .../cubestore/src/remotefs/azure_blob.rs | 398 ++++++++++++++++++ rust/cubestore/cubestore/src/remotefs/mod.rs | 1 + 5 files changed, 467 insertions(+), 2 deletions(-) create mode 100644 rust/cubestore/cubestore/src/remotefs/azure_blob.rs diff --git a/rust/cubestore/Cargo.lock b/rust/cubestore/Cargo.lock index 1d453c829cb56..e2e7078c52140 100644 --- a/rust/cubestore/Cargo.lock +++ b/rust/cubestore/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ "attohttpc", "home", "log", - "quick-xml", + "quick-xml 0.32.0", "rust-ini", "serde", "thiserror", @@ -3100,6 +3100,7 @@ dependencies = [ "hyper 1.2.0", "hyper-util", "rustls", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls", @@ -4299,13 +4300,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" dependencies = [ "async-trait", + "base64 0.22.1", "bytes 1.10.1", "chrono", "futures", "humantime", + "hyper 1.2.0", "itertools 0.13.0", "parking_lot", "percent-encoding", + "quick-xml 0.36.2", + "rand 0.8.5", + "reqwest 0.12.5", + "ring 0.17.8", + "serde", + "serde_json", "snafu", "tokio", "tracing", @@ -4954,6 +4963,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.3" @@ -5412,6 +5431,7 @@ dependencies = [ "pin-project-lite 0.2.14", "quinn", "rustls", + "rustls-native-certs", "rustls-pemfile 2.1.2", "rustls-pki-types", "serde", @@ -5575,6 +5595,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.1.2", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" diff --git a/rust/cubestore/cubestore/Cargo.toml b/rust/cubestore/cubestore/Cargo.toml index 07f7acef28441..0a22b3af7c8a6 100644 --- a/rust/cubestore/cubestore/Cargo.toml +++ b/rust/cubestore/cubestore/Cargo.toml @@ -107,7 +107,7 @@ humansize = "2.1.3" deepsize = "0.2.0" anyhow = "1.0" arc-swap = "1.7.1" -object_store = "0.11.1" +object_store = { version = "0.11.1", features = ["azure"] } prost = "0.13.1" [target.'cfg(target_os = "linux")'.dependencies] diff --git a/rust/cubestore/cubestore/src/config/mod.rs b/rust/cubestore/cubestore/src/config/mod.rs index 4058effdd12e6..60db342bf696f 100644 --- a/rust/cubestore/cubestore/src/config/mod.rs +++ b/rust/cubestore/cubestore/src/config/mod.rs @@ -24,6 +24,7 @@ use crate::mysql::{MySqlServer, SqlAuthDefaultImpl, SqlAuthService}; use crate::queryplanner::metadata_cache::BasicMetadataCacheFactory; use crate::queryplanner::query_executor::{QueryExecutor, QueryExecutorImpl}; use crate::queryplanner::{QueryPlanner, QueryPlannerImpl}; +use crate::remotefs::azure_blob::AzureBlobRemoteFs; use crate::remotefs::cleanup::RemoteFsCleanup; use crate::remotefs::gcs::GCSRemoteFs; use crate::remotefs::minio::MINIORemoteFs; @@ -319,6 +320,7 @@ pub fn validate_config(c: &dyn ConfigObj) -> ValidationMessages { "CUBESTORE_MINIO_BUCKET", "CUBESTORE_S3_BUCKET", "CUBESTORE_GCS_BUCKET", + "CUBESTORE_AZURE_CONTAINER", "CUBESTORE_REMOTE_DIR", ]; remote_vars.retain(|v| env::var(v).is_ok()); @@ -352,6 +354,11 @@ pub enum FileStoreProvider { bucket_name: String, sub_path: Option, }, + AzureBlob { + account: String, + container: String, + sub_path: Option, + }, } #[derive(Clone)] @@ -1349,6 +1356,14 @@ impl Config { bucket_name, sub_path: env::var("CUBESTORE_GCS_SUB_PATH").ok(), } + } else if let Ok(container) = env::var("CUBESTORE_AZURE_CONTAINER") { + FileStoreProvider::AzureBlob { + container, + account: env::var("CUBESTORE_AZURE_ACCOUNT").expect( + "CUBESTORE_AZURE_ACCOUNT required when CUBESTORE_AZURE_CONTAINER is set", + ), + sub_path: env::var("CUBESTORE_AZURE_SUB_PATH").ok(), + } } else if let Ok(remote_dir) = env::var("CUBESTORE_REMOTE_DIR") { FileStoreProvider::Filesystem { remote_dir: Some(PathBuf::from(remote_dir)), @@ -2034,6 +2049,24 @@ impl Config { }) .await; } + FileStoreProvider::AzureBlob { + account, + container, + sub_path, + } => { + let data_dir = self.config_obj.data_dir.clone(); + let account = account.to_string(); + let container = container.to_string(); + let sub_path = sub_path.clone(); + self.injector + .register("original_remote_fs", async move |_| { + let arc: Arc = + AzureBlobRemoteFs::new(data_dir, account, container, sub_path) + .unwrap(); + arc + }) + .await; + } FileStoreProvider::Local => unimplemented!(), // TODO }; } diff --git a/rust/cubestore/cubestore/src/remotefs/azure_blob.rs b/rust/cubestore/cubestore/src/remotefs/azure_blob.rs new file mode 100644 index 0000000000000..b8858eed44d4c --- /dev/null +++ b/rust/cubestore/cubestore/src/remotefs/azure_blob.rs @@ -0,0 +1,398 @@ +use crate::app_metrics; +use crate::di_service; +use crate::remotefs::ExtendedRemoteFs; +use crate::remotefs::{CommonRemoteFsUtils, LocalDirRemoteFs, RemoteFile, RemoteFs}; +use crate::util::lock::acquire_lock; +use crate::CubeError; +use async_trait::async_trait; +use datafusion::cube_ext; +use futures::stream::BoxStream; +use futures::StreamExt; +use log::{debug, info}; +use object_store::azure::{MicrosoftAzure, MicrosoftAzureBuilder}; +use object_store::{ObjectStore, PutPayload}; +use std::env; +use std::fmt; +use std::fmt::Formatter; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::SystemTime; +use tempfile::{NamedTempFile, PathPersistError}; +use tokio::fs; +use tokio::fs::File; +use tokio::io::AsyncWriteExt; +use tokio::sync::Mutex; + +pub struct AzureBlobRemoteFs { + dir: PathBuf, + store: Arc, + container: String, + sub_path: Option, + delete_mut: Mutex<()>, +} + +impl fmt::Debug for AzureBlobRemoteFs { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut s = f.debug_struct("AzureBlobRemoteFs"); + s.field("dir", &self.dir) + .field("container", &self.container) + .field("sub_path", &self.sub_path); + s.finish_non_exhaustive() + } +} + +impl AzureBlobRemoteFs { + pub fn new( + dir: PathBuf, + account: String, + container: String, + sub_path: Option, + ) -> Result, CubeError> { + let mut builder = MicrosoftAzureBuilder::new() + .with_account(&account) + .with_container_name(&container); + + if let Ok(endpoint) = env::var("CUBESTORE_AZURE_ENDPOINT") { + builder = builder.with_endpoint(endpoint); + } + + if let Ok(access_key) = env::var("CUBESTORE_AZURE_ACCESS_KEY") { + builder = builder.with_access_key(access_key); + } else if let Ok(sas_token) = env::var("CUBESTORE_AZURE_SAS_TOKEN") { + // Parse SAS token query string into key-value pairs + let query_pairs: Vec<(String, String)> = sas_token + .trim_start_matches('?') + .split('&') + .filter_map(|pair| { + let mut parts = pair.splitn(2, '='); + match (parts.next(), parts.next()) { + (Some(k), Some(v)) => Some((k.to_string(), v.to_string())), + _ => None, + } + }) + .collect(); + builder = builder.with_sas_authorization(query_pairs); + } else if let (Ok(client_id), Ok(tenant_id), Ok(token_file)) = ( + env::var("AZURE_CLIENT_ID"), + env::var("AZURE_TENANT_ID"), + env::var("AZURE_FEDERATED_TOKEN_FILE"), + ) { + // Workload identity: explicitly pass federated token credentials + builder = builder + .with_client_id(client_id) + .with_tenant_id(tenant_id) + .with_federated_token_file(token_file); + } + + let store = builder.build().map_err(|e| { + CubeError::internal(format!("Failed to create Azure Blob Storage client: {}", e)) + })?; + + Ok(Arc::new(Self { + dir, + store: Arc::new(store), + container, + sub_path, + delete_mut: Mutex::new(()), + })) + } + + fn azure_path(&self, remote_path: &str) -> object_store::path::Path { + let full = match &self.sub_path { + Some(p) => format!("{}/{}", p, remote_path), + None => remote_path.to_string(), + }; + object_store::path::Path::from(full) + } + + fn strip_sub_path(&self, key: &str) -> String { + match &self.sub_path { + Some(p) => { + let prefix = format!("{}/", p); + key.strip_prefix(&prefix).unwrap_or(key).to_string() + } + None => key.to_string(), + } + } +} + +di_service!(AzureBlobRemoteFs, [RemoteFs, ExtendedRemoteFs]); + +#[async_trait] +impl RemoteFs for AzureBlobRemoteFs { + async fn temp_upload_path(&self, remote_path: String) -> Result { + CommonRemoteFsUtils::temp_upload_path(self, remote_path).await + } + + async fn uploads_dir(&self) -> Result { + CommonRemoteFsUtils::uploads_dir(self).await + } + + async fn check_upload_file( + &self, + remote_path: String, + expected_size: u64, + ) -> Result<(), CubeError> { + // Use head() instead of list() because the object_store crate's + // list(Some(&exact_path)) returns empty on Azure Blob Storage. + let path = self.azure_path(&remote_path); + let meta = self.store.head(&path).await.map_err(|e| { + CubeError::internal(format!( + "File {} can't be found after upload: {}", + remote_path, e + )) + })?; + if meta.size as u64 != expected_size { + return Err(CubeError::internal(format!( + "File sizes for {} don't match after upload. Expected {} but got {}", + remote_path, expected_size, meta.size + ))); + } + Ok(()) + } + + async fn upload_file( + &self, + temp_upload_path: String, + remote_path: String, + ) -> Result { + app_metrics::REMOTE_FS_OPERATION_CORE.add_with_tags( + 1, + Some(&vec![ + "operation:upload_file".to_string(), + "driver:azure_blob".to_string(), + ]), + ); + let time = SystemTime::now(); + debug!("Uploading {}", remote_path); + + let path = self.azure_path(&remote_path); + let data = fs::read(&temp_upload_path).await?; + let payload = PutPayload::from(data); + + self.store.put(&path, payload).await.map_err(|e| { + CubeError::internal(format!("Azure Blob upload error for {}: {}", remote_path, e)) + })?; + + let size = fs::metadata(&temp_upload_path).await?.len(); + self.check_upload_file(remote_path.clone(), size).await?; + + let local_path = self.dir.as_path().join(&remote_path); + if Path::new(&temp_upload_path) != local_path { + fs::create_dir_all(local_path.parent().unwrap()) + .await + .map_err(|e| { + CubeError::internal(format!( + "Create dir {}: {}", + local_path.parent().as_ref().unwrap().to_string_lossy(), + e + )) + })?; + fs::rename(&temp_upload_path, local_path.clone()).await?; + } + info!("Uploaded {} ({:?})", remote_path, time.elapsed()?); + Ok(fs::metadata(local_path).await?.len()) + } + + async fn download_file( + &self, + remote_path: String, + _expected_file_size: Option, + ) -> Result { + let local_file = self.dir.as_path().join(&remote_path); + let local_dir = local_file.parent().unwrap(); + let downloads_dir = local_dir.join("downloads"); + + let local_file_str = local_file.to_str().unwrap().to_string(); + + fs::create_dir_all(&downloads_dir).await?; + if !local_file.exists() { + app_metrics::REMOTE_FS_OPERATION_CORE.add_with_tags( + 1, + Some(&vec![ + "operation:download_file".to_string(), + "driver:azure_blob".to_string(), + ]), + ); + let time = SystemTime::now(); + debug!("Downloading {}", remote_path); + + let path = self.azure_path(&remote_path); + let result = self.store.get(&path).await.map_err(|e| { + CubeError::internal(format!( + "Azure Blob download error for {}: {}", + remote_path, e + )) + })?; + let bytes = result.bytes().await.map_err(|e| { + CubeError::internal(format!( + "Azure Blob read bytes error for {}: {}", + remote_path, e + )) + })?; + + let (temp_file, temp_path) = + cube_ext::spawn_blocking(move || NamedTempFile::new_in(&downloads_dir)) + .await?? + .into_parts(); + + let mut writer = File::from_std(temp_file); + writer.write_all(&bytes).await?; + writer.flush().await?; + + cube_ext::spawn_blocking(move || -> Result<(), PathPersistError> { + temp_path.persist(&local_file) + }) + .await??; + + info!("Downloaded {} ({:?})", remote_path, time.elapsed()?); + } + + Ok(local_file_str) + } + + async fn delete_file(&self, remote_path: String) -> Result<(), CubeError> { + app_metrics::REMOTE_FS_OPERATION_CORE.add_with_tags( + 1, + Some(&vec![ + "operation:delete_file".to_string(), + "driver:azure_blob".to_string(), + ]), + ); + let time = SystemTime::now(); + debug!("Deleting {}", remote_path); + + let path = self.azure_path(&remote_path); + self.store.delete(&path).await.map_err(|e| { + CubeError::internal(format!("Azure Blob delete error for {}: {}", remote_path, e)) + })?; + + let _guard = acquire_lock("delete file", self.delete_mut.lock()).await?; + let local = self.dir.as_path().join(&remote_path); + if fs::metadata(local.clone()).await.is_ok() { + fs::remove_file(local.clone()).await?; + LocalDirRemoteFs::remove_empty_paths(self.dir.as_path().to_path_buf(), local.clone()) + .await?; + } + + info!("Deleted {} ({:?})", remote_path, time.elapsed()?); + Ok(()) + } + + async fn list(&self, remote_prefix: String) -> Result, CubeError> { + let prefix = self.azure_path(&remote_prefix); + let mut result = Vec::new(); + let mut pages_count: i64 = 0; + let mut stream = self.store.list(Some(&prefix)); + let mut page_items = 0; + + while let Some(item) = stream.next().await { + let meta = item.map_err(|e| { + CubeError::internal(format!("Azure Blob list error: {}", e)) + })?; + result.push(self.strip_sub_path(&meta.location.to_string())); + page_items += 1; + if page_items >= 1000 { + pages_count += 1; + page_items = 0; + } + } + if page_items > 0 { + pages_count += 1; + } + + if pages_count > 100 { + log::warn!( + "Azure Blob list returned more than 100 pages: {}", + pages_count + ); + } + app_metrics::REMOTE_FS_OPERATION_CORE.add_with_tags( + pages_count, + Some(&vec![ + "operation:list".to_string(), + "driver:azure_blob".to_string(), + ]), + ); + + Ok(result) + } + + async fn list_with_metadata( + &self, + remote_prefix: String, + ) -> Result, CubeError> { + let prefix = self.azure_path(&remote_prefix); + let mut result = Vec::new(); + let mut stream = self.store.list(Some(&prefix)); + + while let Some(item) = stream.next().await { + let meta = item.map_err(|e| { + CubeError::internal(format!("Azure Blob list error: {}", e)) + })?; + result.push(RemoteFile { + remote_path: self.strip_sub_path(&meta.location.to_string()), + updated: meta.last_modified, + file_size: meta.size as u64, + }); + } + + Ok(result) + } + + async fn local_path(&self) -> Result { + Ok(self.dir.to_str().unwrap().to_owned()) + } + + async fn local_file(&self, remote_path: String) -> Result { + let buf = self.dir.join(remote_path); + fs::create_dir_all(buf.parent().unwrap()).await?; + Ok(buf.to_str().unwrap().to_string()) + } +} + +#[async_trait] +impl ExtendedRemoteFs for AzureBlobRemoteFs { + async fn list_by_page( + &self, + remote_prefix: String, + ) -> Result, CubeError>>, CubeError> { + let prefix = self.azure_path(&remote_prefix); + let store = self.store.clone(); + let sub_path = self.sub_path.clone(); + + let stream = async_stream::stream! { + let mut object_stream = store.list(Some(&prefix)); + let mut page = Vec::new(); + let page_size = 1000; + + while let Some(result) = object_stream.next().await { + match result { + Ok(meta) => { + let key = meta.location.to_string(); + let remote_path = match &sub_path { + Some(p) => { + let pfx = format!("{}/", p); + key.strip_prefix(&pfx).unwrap_or(&key).to_string() + } + None => key, + }; + page.push(remote_path); + if page.len() >= page_size { + yield Ok(std::mem::take(&mut page)); + } + } + Err(e) => { + yield Err(CubeError::internal(format!("Azure Blob list error: {}", e))); + return; + } + } + } + if !page.is_empty() { + yield Ok(page); + } + }; + + Ok(Box::pin(stream)) + } +} diff --git a/rust/cubestore/cubestore/src/remotefs/mod.rs b/rust/cubestore/cubestore/src/remotefs/mod.rs index c7092b0c11e6c..0190508a90c82 100644 --- a/rust/cubestore/cubestore/src/remotefs/mod.rs +++ b/rust/cubestore/cubestore/src/remotefs/mod.rs @@ -1,3 +1,4 @@ +pub mod azure_blob; pub mod cleanup; pub mod gcs; pub mod minio; From d47b5c28b347721c1d3c561c5450d15f45be6782 Mon Sep 17 00:00:00 2001 From: Miguel Sacristan Date: Fri, 3 Apr 2026 19:56:18 +0200 Subject: [PATCH 2/2] docs: Add Azure Blob Storage environment variables and usage examples --- .../cube-core/running-in-production.mdx | 11 ++++ .../running-in-production.mdx | 13 ++++- .../configuration/environment-variables.mdx | 57 +++++++++++++++++++ .../product/caching/running-in-production.mdx | 41 ++++++++++++- .../reference/environment-variables.mdx | 57 +++++++++++++++++++ 5 files changed, 177 insertions(+), 2 deletions(-) diff --git a/docs-mintlify/cube-core/running-in-production.mdx b/docs-mintlify/cube-core/running-in-production.mdx index 93ff2014ed2ba..df61eabfdef0c 100644 --- a/docs-mintlify/cube-core/running-in-production.mdx +++ b/docs-mintlify/cube-core/running-in-production.mdx @@ -310,6 +310,17 @@ default. +### Azure + +Cube Store supports Azure Blob Storage for persistent storage using account +keys, SAS tokens, or federated token credentials (workload identity, managed +identity). + +When `CUBESTORE_AZURE_ACCESS_KEY` and `CUBESTORE_AZURE_SAS_TOKEN` are not set, +Cube Store automatically uses federated token credentials via the standard +`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, and `AZURE_FEDERATED_TOKEN_FILE` +environment variables. + ### Garbage collection Cleanup isn’t done in export buckets; however, it's done in the persistent diff --git a/docs-mintlify/docs/pre-aggregations/running-in-production.mdx b/docs-mintlify/docs/pre-aggregations/running-in-production.mdx index 93ff2014ed2ba..93b60b71dc5c9 100644 --- a/docs-mintlify/docs/pre-aggregations/running-in-production.mdx +++ b/docs-mintlify/docs/pre-aggregations/running-in-production.mdx @@ -310,9 +310,20 @@ default. +### Azure + +Cube Store supports Azure Blob Storage for persistent storage using account +keys, SAS tokens, or federated token credentials (workload identity, managed +identity). + +When `CUBESTORE_AZURE_ACCESS_KEY` and `CUBESTORE_AZURE_SAS_TOKEN` are not set, +Cube Store automatically uses federated token credentials via the standard +`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, and `AZURE_FEDERATED_TOKEN_FILE` +environment variables. + ### Garbage collection -Cleanup isn’t done in export buckets; however, it's done in the persistent +Cleanup isn’t done in export buckets; however, it’s done in the persistent storage of Cube Store. The default time-to-live (TTL) for orphaned pre-aggregation tables is one day. diff --git a/docs-mintlify/reference/configuration/environment-variables.mdx b/docs-mintlify/reference/configuration/environment-variables.mdx index 788ce8492cefd..7e3bfb7f6924b 100644 --- a/docs-mintlify/reference/configuration/environment-variables.mdx +++ b/docs-mintlify/reference/configuration/environment-variables.mdx @@ -1532,6 +1532,63 @@ Required when using an AWS instance role. | ------------------------- | ---------------------- | --------------------- | | A valid number in minutes | `180` | `180` | +## `CUBESTORE_AZURE_ACCESS_KEY` + +The access key for the Azure Storage account. Optional when using Azure Blob +Storage. If not set, falls back to SAS token authentication or +federated token credentials. + +| Possible Values | Default in Development | Default in Production | +| --------------------------------- | ---------------------- | --------------------- | +| A valid Azure Storage account key | N/A | N/A | + +## `CUBESTORE_AZURE_ACCOUNT` + +The name of the Azure Storage account. Required when +`CUBESTORE_AZURE_CONTAINER` is set. + +| Possible Values | Default in Development | Default in Production | +| ---------------------------------- | ---------------------- | --------------------- | +| A valid Azure Storage account name | N/A | N/A | + +## `CUBESTORE_AZURE_CONTAINER` + +The name of the Azure Blob Storage container. Required when using Azure Blob +Storage. + +| Possible Values | Default in Development | Default in Production | +| --------------------------- | ---------------------- | --------------------- | +| A valid blob container name | N/A | N/A | + +## `CUBESTORE_AZURE_ENDPOINT` + +A custom endpoint URL for Azure Blob Storage. Optional. Use this for local +development with [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite) +or for sovereign cloud endpoints. + +| Possible Values | Default in Development | Default in Production | +| --------------- | ---------------------- | --------------------- | +| A valid URL | N/A | N/A | + +## `CUBESTORE_AZURE_SAS_TOKEN` + +A shared access signature (SAS) token for Azure Blob Storage. Optional. Used +when `CUBESTORE_AZURE_ACCESS_KEY` is not set. If neither the access key nor +SAS token is set, federated token credentials are used. + +| Possible Values | Default in Development | Default in Production | +| ----------------- | ---------------------- | --------------------- | +| A valid SAS token | N/A | N/A | + +## `CUBESTORE_AZURE_SUB_PATH` + +The path prefix within the Azure Blob Storage container to store +pre-aggregations. Optional. + +| Possible Values | Default in Development | Default in Production | +| ------------------- | ---------------------- | --------------------- | +| A valid path prefix | N/A | N/A | + ## `CUBESTORE_BIND_ADDR` The address/port pair for Cube Store's MySQL-compatible interface. diff --git a/docs/content/product/caching/running-in-production.mdx b/docs/content/product/caching/running-in-production.mdx index 1a645be0e2acc..eb544e7b7b50b 100644 --- a/docs/content/product/caching/running-in-production.mdx +++ b/docs/content/product/caching/running-in-production.mdx @@ -280,8 +280,36 @@ services: - cubestore_router ``` +A simplified example using Azure Blob Storage might look like: + +```yaml +services: + cubestore_router: + image: cubejs/cubestore:latest + environment: + - CUBESTORE_SERVER_NAME=cubestore_router:9999 + - CUBESTORE_META_PORT=9999 + - CUBESTORE_WORKERS=cubestore_worker_1:9001 + - CUBESTORE_AZURE_CONTAINER= + - CUBESTORE_AZURE_ACCOUNT= + - CUBESTORE_AZURE_ACCESS_KEY= + + cubestore_worker_1: + image: cubejs/cubestore:latest + environment: + - CUBESTORE_SERVER_NAME=cubestore_worker_1:9001 + - CUBESTORE_WORKER_PORT=9001 + - CUBESTORE_META_ADDR=cubestore_router:9999 + - CUBESTORE_WORKERS=cubestore_worker_1:9001 + - CUBESTORE_AZURE_CONTAINER= + - CUBESTORE_AZURE_ACCOUNT= + - CUBESTORE_AZURE_ACCESS_KEY= + depends_on: + - cubestore_router +``` + Note that you can’t use the same bucket as an export bucket and persistent -storage for Cube Store. It's recommended to use two separate buckets. +storage for Cube Store. It’s recommended to use two separate buckets. ### Scratch storage @@ -309,6 +337,17 @@ default. +### Azure + +Cube Store supports Azure Blob Storage for persistent storage using account +keys, SAS tokens, or DefaultAzureCredential (workload identity, managed +identity, Azure CLI). + +When `CUBESTORE_AZURE_ACCESS_KEY` and `CUBESTORE_AZURE_SAS_TOKEN` are not set, +Cube Store automatically uses DefaultAzureCredential, which supports workload +identity in Kubernetes via the standard `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, +and `AZURE_FEDERATED_TOKEN_FILE` environment variables. + ### Garbage collection Cleanup isn’t done in export buckets; however, it's done in the persistent diff --git a/docs/content/product/configuration/reference/environment-variables.mdx b/docs/content/product/configuration/reference/environment-variables.mdx index 0c7f42140b8e2..06508db078bb7 100644 --- a/docs/content/product/configuration/reference/environment-variables.mdx +++ b/docs/content/product/configuration/reference/environment-variables.mdx @@ -1529,6 +1529,63 @@ Required when using an AWS instance role. | ------------------------- | ---------------------- | --------------------- | | A valid number in minutes | `180` | `180` | +## `CUBESTORE_AZURE_ACCESS_KEY` + +The access key for the Azure Storage account. Optional when using Azure Blob +Storage. If not set, falls back to SAS token authentication or +DefaultAzureCredential. + +| Possible Values | Default in Development | Default in Production | +| --------------------------------- | ---------------------- | --------------------- | +| A valid Azure Storage account key | N/A | N/A | + +## `CUBESTORE_AZURE_ACCOUNT` + +The name of the Azure Storage account. Required when +`CUBESTORE_AZURE_CONTAINER` is set. + +| Possible Values | Default in Development | Default in Production | +| ---------------------------------- | ---------------------- | --------------------- | +| A valid Azure Storage account name | N/A | N/A | + +## `CUBESTORE_AZURE_CONTAINER` + +The name of the Azure Blob Storage container. Required when using Azure Blob +Storage. + +| Possible Values | Default in Development | Default in Production | +| --------------------------- | ---------------------- | --------------------- | +| A valid blob container name | N/A | N/A | + +## `CUBESTORE_AZURE_ENDPOINT` + +A custom endpoint URL for Azure Blob Storage. Optional. Use this for local +development with [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite) +or for sovereign cloud endpoints. + +| Possible Values | Default in Development | Default in Production | +| --------------- | ---------------------- | --------------------- | +| A valid URL | N/A | N/A | + +## `CUBESTORE_AZURE_SAS_TOKEN` + +A shared access signature (SAS) token for Azure Blob Storage. Optional. Used +when `CUBESTORE_AZURE_ACCESS_KEY` is not set. If neither the access key nor +SAS token is set, DefaultAzureCredential is used. + +| Possible Values | Default in Development | Default in Production | +| ----------------- | ---------------------- | --------------------- | +| A valid SAS token | N/A | N/A | + +## `CUBESTORE_AZURE_SUB_PATH` + +The path prefix within the Azure Blob Storage container to store +pre-aggregations. Optional. + +| Possible Values | Default in Development | Default in Production | +| ------------------- | ---------------------- | --------------------- | +| A valid path prefix | N/A | N/A | + ## `CUBESTORE_BIND_ADDR` The address/port pair for Cube Store's MySQL-compatible interface.