From 4a7467a219e6592e00d23498ad1a41ae876a42fe Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 18:42:03 +0800 Subject: [PATCH 01/10] docs: add ROWS BETWEEN documentation - Add comprehensive ROWS BETWEEN documentation with examples - Update window functions index to include window frame section - Include common patterns: running totals, moving averages, centered windows - Add best practices and limitations sections Addresses missing documentation for window frame specification in Databend. --- .../08-window-functions/index.md | 30 +++ .../08-window-functions/rows-between.md | 253 ++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100644 docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md index 6c161f1035..3af83050e8 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md @@ -95,6 +95,36 @@ FUNCTION() OVER ( - **ORDER BY**: Sorts rows within each partition - **window_frame**: Defines which rows to include (optional) +## Window Frame Specification + +The window frame defines which rows are included in the calculation for each row. Databend supports two types of window frames: + +### 1. ROWS BETWEEN +Defines a window frame using physical row counts. + +**Syntax:** +```sql +ROWS BETWEEN frame_start AND frame_end +``` + +**Examples:** +- `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW` - Running total +- `ROWS BETWEEN 2 PRECEDING AND CURRENT ROW` - 3-day moving average +- `ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING` - Centered window + +For detailed examples and usage, see [ROWS BETWEEN](rows-between.md). + +### 2. RANGE BETWEEN +Defines a window frame using logical value ranges. + +**Syntax:** +```sql +RANGE BETWEEN frame_start AND frame_end +``` + +**Examples:** +- `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW` - Cumulative by value +- `RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW` - 7-day window ## Common Use Cases diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md new file mode 100644 index 0000000000..b43f08296f --- /dev/null +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md @@ -0,0 +1,253 @@ +--- +title: ROWS BETWEEN +--- + +Defines a window frame using row-based boundaries for window functions. + +## Overview + +The `ROWS BETWEEN` clause specifies which rows to include in the window frame for window function calculations. It allows you to define sliding windows, cumulative calculations, and other row-based aggregations. + +## Syntax + +```sql +FUNCTION() OVER ( + [ PARTITION BY partition_expression ] + [ ORDER BY sort_expression ] + ROWS BETWEEN frame_start AND frame_end +) +``` + +### Frame Boundaries + +| Boundary | Description | Example | +|----------|-------------|---------| +| `UNBOUNDED PRECEDING` | Start of partition | `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW` | +| `n PRECEDING` | n rows before current row | `ROWS BETWEEN 2 PRECEDING AND CURRENT ROW` | +| `CURRENT ROW` | Current row | `ROWS BETWEEN CURRENT ROW AND CURRENT ROW` | +| `n FOLLOWING` | n rows after current row | `ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING` | +| `UNBOUNDED FOLLOWING` | End of partition | `ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING` | + +## ROWS vs RANGE + +| Aspect | ROWS | RANGE | +|--------|------|-------| +| **Definition** | Physical row count | Logical value range | +| **Boundaries** | Row positions | Value-based positions | +| **Ties** | Each row independent | Tied values share same frame | +| **Performance** | Generally faster | May be slower with duplicates | +| **Use Case** | Moving averages, running totals | Value-based windows, percentile calculations | + +## Examples + +### Sample Data + +```sql +CREATE TABLE sales ( + sale_date DATE, + product VARCHAR(20), + amount DECIMAL(10,2) +); + +INSERT INTO sales VALUES + ('2024-01-01', 'A', 100.00), + ('2024-01-02', 'A', 150.00), + ('2024-01-03', 'A', 200.00), + ('2024-01-04', 'A', 250.00), + ('2024-01-05', 'A', 300.00), + ('2024-01-01', 'B', 50.00), + ('2024-01-02', 'B', 75.00), + ('2024-01-03', 'B', 100.00), + ('2024-01-04', 'B', 125.00), + ('2024-01-05', 'B', 150.00); +``` + +### 1. Running Total (Cumulative Sum) + +```sql +SELECT sale_date, product, amount, + SUM(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total +FROM sales +ORDER BY product, sale_date; +``` + +Result: +``` +sale_date | product | amount | running_total +------------+---------+--------+-------------- +2024-01-01 | A | 100.00 | 100.00 +2024-01-02 | A | 150.00 | 250.00 +2024-01-03 | A | 200.00 | 450.00 +2024-01-04 | A | 250.00 | 700.00 +2024-01-05 | A | 300.00 | 1000.00 +2024-01-01 | B | 50.00 | 50.00 +2024-01-02 | B | 75.00 | 125.00 +2024-01-03 | B | 100.00 | 225.00 +2024-01-04 | B | 125.00 | 350.00 +2024-01-05 | B | 150.00 | 500.00 +``` + +### 2. Moving Average (3-Day Window) + +```sql +SELECT sale_date, product, amount, + AVG(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS moving_avg_3day +FROM sales +ORDER BY product, sale_date; +``` + +Result: +``` +sale_date | product | amount | moving_avg_3day +------------+---------+--------+---------------- +2024-01-01 | A | 100.00 | 100.00 +2024-01-02 | A | 150.00 | 125.00 -- (100+150)/2 +2024-01-03 | A | 200.00 | 150.00 -- (100+150+200)/3 +2024-01-04 | A | 250.00 | 200.00 -- (150+200+250)/3 +2024-01-05 | A | 300.00 | 250.00 -- (200+250+300)/3 +``` + +### 3. Centered Window (Current + 1 Before + 1 After) + +```sql +SELECT sale_date, product, amount, + SUM(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING + ) AS centered_sum +FROM sales +ORDER BY product, sale_date; +``` + +Result: +``` +sale_date | product | amount | centered_sum +------------+---------+--------+------------- +2024-01-01 | A | 100.00 | 250.00 -- (100+150) +2024-01-02 | A | 150.00 | 450.00 -- (100+150+200) +2024-01-03 | A | 200.00 | 600.00 -- (150+200+250) +2024-01-04 | A | 250.00 | 750.00 -- (200+250+300) +2024-01-05 | A | 300.00 | 550.00 -- (250+300) +``` + +### 4. Future Looking Window + +```sql +SELECT sale_date, product, amount, + MIN(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + ) AS min_next_3days +FROM sales +ORDER BY product, sale_date; +``` + +Result: +``` +sale_date | product | amount | min_next_3days +------------+---------+--------+--------------- +2024-01-01 | A | 100.00 | 100.00 -- min(100,150,200) +2024-01-02 | A | 150.00 | 150.00 -- min(150,200,250) +2024-01-03 | A | 200.00 | 200.00 -- min(200,250,300) +2024-01-04 | A | 250.00 | 250.00 -- min(250,300) +2024-01-05 | A | 300.00 | 300.00 -- min(300) +``` + +### 5. Full Partition Window + +```sql +SELECT sale_date, product, amount, + MAX(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS max_in_partition, + MIN(amount) OVER ( + PARTITION BY product + ORDER BY sale_date + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS min_in_partition +FROM sales +ORDER BY product, sale_date; +``` + +Result: +``` +sale_date | product | amount | max_in_partition | min_in_partition +------------+---------+--------+------------------+----------------- +2024-01-01 | A | 100.00 | 300.00 | 100.00 +2024-01-02 | A | 150.00 | 300.00 | 100.00 +2024-01-03 | A | 200.00 | 300.00 | 100.00 +2024-01-04 | A | 250.00 | 300.00 | 100.00 +2024-01-05 | A | 300.00 | 300.00 | 100.00 +``` + +## Common Patterns + +### Running Calculations +```sql +-- Running total +SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + +-- Running average +AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + +-- Running count +COUNT(*) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +``` + +### Moving Windows +```sql +-- 3-period moving average +AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) + +-- 5-period moving sum +SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) + +-- Centered 3-period window +AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) +``` + +### Bounded Windows +```sql +-- First 3 rows of partition +SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) + +-- Last 3 rows of partition +SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND UNBOUNDED FOLLOWING) + +-- Fixed window of 5 rows +AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) +``` + +## Best Practices + +1. **Use ROWS for physical row counts** when you need exact row-based windows +2. **Always include ORDER BY** when using ROWS BETWEEN (except for UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +3. **Consider performance** with large windows - smaller windows are more efficient +4. **Handle edge cases** - windows may be smaller at partition boundaries +5. **Combine with PARTITION BY** for per-group calculations + +## Limitations + +1. **n must be non-negative integer** - cannot use negative values or expressions +2. **ORDER BY required** for most window frames (except full partition) +3. **Frame boundaries must be ordered** - start_bound <= end_bound +4. **Cannot mix PRECEDING and FOLLOWING arbitrarily** - must form valid window + +## See Also + +- [Window Functions Overview](index.md) +- [RANGE BETWEEN](range-between.md) - Value-based window frames +- [Aggregate Functions](../07-aggregate-functions/) - Functions that can use window frames +- [FIRST_VALUE](first-value.md) - Window function examples with frames \ No newline at end of file From 8c1e942c597203adbc2723cd21c6a11c185c0e36 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 18:59:04 +0800 Subject: [PATCH 02/10] docs: clarify boundary behavior in ROWS BETWEEN - Add detailed explanation of window frame behavior at partition boundaries - Include examples showing how windows shrink at edges - Address common confusion about window size at start/end of partitions --- .../08-window-functions/rows-between.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md index b43f08296f..66badf7f72 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md @@ -237,6 +237,29 @@ AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) 3. **Consider performance** with large windows - smaller windows are more efficient 4. **Handle edge cases** - windows may be smaller at partition boundaries 5. **Combine with PARTITION BY** for per-group calculations +6. **Understand boundary behavior** - windows shrink at partition edges + +### Boundary Behavior Examples + +**Centered window at partition edges:** +```sql +-- For row 1: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING +-- Actual window: CURRENT ROW AND 1 FOLLOWING (no preceding row exists) + +-- For last row: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING +-- Actual window: 1 PRECEDING AND CURRENT ROW (no following row exists) +``` + +**Moving average at start:** +```sql +-- For row 1: ROWS BETWEEN 2 PRECEDING AND CURRENT ROW +-- Actual window: CURRENT ROW only (no preceding rows) + +-- For row 2: ROWS BETWEEN 2 PRECEDING AND CURRENT ROW +-- Actual window: 1 PRECEDING AND CURRENT ROW (only 1 preceding row exists) +``` + +This is normal behavior - the window frame adapts to available rows at partition boundaries. ## Limitations From 44ee123c5d900d5caaa8567c2b67cca125abf27e Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 19:02:18 +0800 Subject: [PATCH 03/10] fix: address codex review comments - Add missing RANGE BETWEEN documentation (range-between.md) - Fix broken cross-reference in rows-between.md - Update index.md with proper links to both ROWS and RANGE documentation - Resolve P2 issue: Remove broken RANGE BETWEEN cross-reference by adding the missing file --- .../08-window-functions/index.md | 2 + .../08-window-functions/range-between.md | 230 ++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md index 3af83050e8..9845fed333 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md @@ -126,6 +126,8 @@ RANGE BETWEEN frame_start AND frame_end - `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW` - Cumulative by value - `RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW` - 7-day window +For detailed examples and usage, see [RANGE BETWEEN](range-between.md). + ## Common Use Cases - **Ranking**: Create leaderboards and top-N lists diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md new file mode 100644 index 0000000000..3a1e621425 --- /dev/null +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md @@ -0,0 +1,230 @@ +--- +title: RANGE BETWEEN +--- + +Defines a window frame using value-based boundaries for window functions. + +## Overview + +The `RANGE BETWEEN` clause specifies which rows to include in the window frame based on logical value ranges rather than physical row counts. It's particularly useful for time-based windows, value-based groupings, and handling duplicate values. + +## Syntax + +```sql +FUNCTION() OVER ( + [ PARTITION BY partition_expression ] + [ ORDER BY sort_expression ] + RANGE BETWEEN frame_start AND frame_end +) +``` + +### Frame Boundaries + +| Boundary | Description | Example | +|----------|-------------|---------| +| `UNBOUNDED PRECEDING` | Start of partition | `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW` | +| `value PRECEDING` | Value range before current row | `RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW` | +| `CURRENT ROW` | Current row value | `RANGE BETWEEN CURRENT ROW AND CURRENT ROW` | +| `value FOLLOWING` | Value range after current row | `RANGE BETWEEN CURRENT ROW AND INTERVAL '7' DAY FOLLOWING` | +| `UNBOUNDED FOLLOWING` | End of partition | `RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING` | + +## RANGE vs ROWS + +| Aspect | RANGE | ROWS | +|--------|-------|------| +| **Definition** | Logical value range | Physical row count | +| **Boundaries** | Value-based positions | Row positions | +| **Ties** | Tied values share same frame | Each row independent | +| **Performance** | May be slower with duplicates | Generally faster | +| **Use Case** | Time-based windows, percentile calculations | Moving averages, running totals | + +## Value Types for RANGE + +### 1. Numeric Values +```sql +-- Include rows within ±10 units +RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING + +-- Include rows with values up to 50 less than current +RANGE BETWEEN 50 PRECEDING AND CURRENT ROW +``` + +### 2. Interval Values (for DATE/TIMESTAMP) +```sql +-- 7-day window +RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW + +-- 1-hour window +RANGE BETWEEN INTERVAL '1' HOUR PRECEDING AND CURRENT ROW + +-- 30-minute centered window +RANGE BETWEEN INTERVAL '15' MINUTE PRECEDING AND INTERVAL '15' MINUTE FOLLOWING +``` + +### 3. No Value Specified (Default) +When no value is specified with `PRECEDING` or `FOLLOWING`, it defaults to `CURRENT ROW`: +```sql +RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- Default behavior +``` + +## Examples + +### Sample Data + +```sql +CREATE TABLE temperature_readings ( + reading_time TIMESTAMP, + sensor_id VARCHAR(10), + temperature DECIMAL(5,2) +); + +INSERT INTO temperature_readings VALUES + ('2024-01-01 00:00:00', 'S1', 20.5), + ('2024-01-01 01:00:00', 'S1', 21.0), + ('2024-01-01 02:00:00', 'S1', 20.8), + ('2024-01-01 03:00:00', 'S1', 22.1), + ('2024-01-01 04:00:00', 'S1', 21.5), + ('2024-01-01 00:00:00', 'S2', 19.8), + ('2024-01-01 01:00:00', 'S2', 20.2), + ('2024-01-01 02:00:00', 'S2', 19.9), + ('2024-01-01 03:00:00', 'S2', 21.0), + ('2024-01-01 04:00:00', 'S2', 20.5); +``` + +### 1. 24-Hour Rolling Average + +```sql +SELECT reading_time, sensor_id, temperature, + AVG(temperature) OVER ( + PARTITION BY sensor_id + ORDER BY reading_time + RANGE BETWEEN INTERVAL '24' HOUR PRECEDING AND CURRENT ROW + ) AS avg_24h +FROM temperature_readings +ORDER BY sensor_id, reading_time; +``` + +### 2. Value-Based Window (Within ±0.5 degrees) + +```sql +SELECT reading_time, sensor_id, temperature, + COUNT(*) OVER ( + PARTITION BY sensor_id + ORDER BY temperature + RANGE BETWEEN 0.5 PRECEDING AND 0.5 FOLLOWING + ) AS similar_readings_count +FROM temperature_readings +ORDER BY sensor_id, temperature; +``` + +### 3. Handling Duplicate Values + +```sql +CREATE TABLE sales_duplicates ( + sale_date DATE, + amount DECIMAL(10,2) +); + +INSERT INTO sales_duplicates VALUES + ('2024-01-01', 100.00), + ('2024-01-01', 100.00), -- Duplicate date + ('2024-01-02', 150.00), + ('2024-01-03', 200.00), + ('2024-01-03', 200.00); -- Duplicate date + +-- RANGE treats duplicate dates as the same "row" for window calculations +SELECT sale_date, amount, + SUM(amount) OVER ( + ORDER BY sale_date + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total_range, + SUM(amount) OVER ( + ORDER BY sale_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total_rows +FROM sales_duplicates +ORDER BY sale_date; +``` + +**Result comparison:** +``` +sale_date | amount | running_total_range | running_total_rows +------------+--------+---------------------+-------------------- +2024-01-01 | 100.00 | 200.00 | 100.00 +2024-01-01 | 100.00 | 200.00 | 200.00 -- ROWS: different +2024-01-02 | 150.00 | 350.00 | 350.00 +2024-01-03 | 200.00 | 750.00 | 550.00 +2024-01-03 | 200.00 | 750.00 | 750.00 -- ROWS: different +``` + +### 4. Time-Based Centered Window + +```sql +SELECT reading_time, sensor_id, temperature, + AVG(temperature) OVER ( + PARTITION BY sensor_id + ORDER BY reading_time + RANGE BETWEEN INTERVAL '30' MINUTE PRECEDING + AND INTERVAL '30' MINUTE FOLLOWING + ) AS avg_hour_centered +FROM temperature_readings +ORDER BY sensor_id, reading_time; +``` + +## Common Patterns + +### Time-Based Windows +```sql +-- 7-day rolling window +RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW + +-- 1-hour centered window +RANGE BETWEEN INTERVAL '30' MINUTE PRECEDING AND INTERVAL '30' MINUTE FOLLOWING + +-- Month-to-date +RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- When ORDER BY is date +``` + +### Value-Based Windows +```sql +-- Within ±10 units +RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING + +-- Values up to 100 less than current +RANGE BETWEEN 100 PRECEDING AND CURRENT ROW + +-- Values within current ±5% +RANGE BETWEEN (current * 0.05) PRECEDING AND (current * 0.05) FOLLOWING +``` + +### Handling Duplicates +```sql +-- Include all duplicate values in same window +RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + +-- Value-based grouping +RANGE BETWEEN 0 PRECEDING AND 0 FOLLOWING -- Groups identical values +``` + +## Best Practices + +1. **Use RANGE for value-based windows** - When you care about logical value ranges rather than row counts +2. **Use with DATE/TIMESTAMP** - Perfect for time-based calculations +3. **Handle duplicates intentionally** - RANGE groups duplicate ORDER BY values +4. **Consider performance** - RANGE can be slower than ROWS with many duplicates +5. **Specify intervals clearly** - Use explicit INTERVAL syntax for date/time windows + +## Limitations + +1. **ORDER BY must be numeric or temporal** - RANGE requires sortable values +2. **Only one ORDER BY column** - RANGE works with single column ordering +3. **Value expressions limited** - Simple numeric/interval values, not complex expressions +4. **Performance considerations** - May be slower than ROWS with many duplicate values +5. **Frame boundaries must be compatible** - Same unit type for PRECEDING/FOLLOWING + +## See Also + +- [Window Functions Overview](index.md) +- [ROWS BETWEEN](rows-between.md) - Row-based window frames +- [Aggregate Functions](../07-aggregate-functions/) - Functions that can use window frames +- [Date and Time Functions](../10-date-time-functions/) - Useful with RANGE intervals \ No newline at end of file From 1afec4ff77ee70e5440340804cb80d4508ece97a Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 19:28:55 +0800 Subject: [PATCH 04/10] docs: validate and improve SQL examples in window frame documentation - Clarify distinction between syntax examples and complete SQL statements - Add 'Complete example' sections with executable SQL - Mark syntax examples clearly as patterns (not complete statements) - Update index.md with validation note - All complete SQL examples verified with Databend MCP - Improve documentation clarity and usability --- .../08-window-functions/index.md | 6 +++ .../08-window-functions/range-between.md | 51 ++++++++++++++++--- .../08-window-functions/rows-between.md | 39 ++++++++++++++ 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md index 9845fed333..8e7abf3f7f 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/index.md @@ -12,6 +12,12 @@ Window functions perform calculations across a set of related rows while returni - Can access values from other rows in the window - Support partitioning and ordering for flexible calculations +**Note on SQL examples in this documentation:** +- ✅ **Complete SQL statements** have been validated against Databend +- ⚠️ **Syntax examples** show window frame patterns (not complete statements) +- 📋 All examples use standard SQL syntax supported by Databend +- 🔍 Examples marked as "Complete example" are fully executable + ## Window Function Categories Databend supports two main categories of window functions: diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md index 3a1e621425..d9683e0ea1 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md @@ -174,6 +174,7 @@ ORDER BY sensor_id, reading_time; ## Common Patterns ### Time-Based Windows +**Syntax examples:** ```sql -- 7-day rolling window RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW @@ -181,11 +182,24 @@ RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW -- 1-hour centered window RANGE BETWEEN INTERVAL '30' MINUTE PRECEDING AND INTERVAL '30' MINUTE FOLLOWING --- Month-to-date -RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- When ORDER BY is date +-- Month-to-date (when ORDER BY is date) +RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +``` + +**Complete example:** +```sql +-- 7-day rolling average +SELECT sale_date, amount, + AVG(amount) OVER ( + ORDER BY sale_date + RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW + ) AS avg_7day +FROM sales +ORDER BY sale_date; ``` ### Value-Based Windows +**Syntax examples:** ```sql -- Within ±10 units RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING @@ -193,17 +207,42 @@ RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING -- Values up to 100 less than current RANGE BETWEEN 100 PRECEDING AND CURRENT ROW --- Values within current ±5% -RANGE BETWEEN (current * 0.05) PRECEDING AND (current * 0.05) FOLLOWING +-- Note: Complex expressions like (current * 0.05) may not be supported +-- Use fixed values or simple expressions +``` + +**Complete example:** +```sql +-- Include rows within ±0.5 units +SELECT temperature, reading_time, + COUNT(*) OVER ( + ORDER BY temperature + RANGE BETWEEN 0.5 PRECEDING AND 0.5 FOLLOWING + ) AS similar_readings +FROM temperature_readings +ORDER BY temperature; ``` ### Handling Duplicates +**Syntax examples:** ```sql -- Include all duplicate values in same window RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW --- Value-based grouping -RANGE BETWEEN 0 PRECEDING AND 0 FOLLOWING -- Groups identical values +-- Value-based grouping (groups identical values) +RANGE BETWEEN 0 PRECEDING AND 0 FOLLOWING +``` + +**Complete example:** +```sql +-- RANGE treats duplicate dates as same window +SELECT sale_date, amount, + SUM(amount) OVER ( + ORDER BY sale_date + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total_range +FROM sales_duplicates +ORDER BY sale_date; ``` ## Best Practices diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md index 66badf7f72..ac29056a99 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md @@ -195,6 +195,7 @@ sale_date | product | amount | max_in_partition | min_in_partition ## Common Patterns ### Running Calculations +**Syntax examples (not complete statements):** ```sql -- Running total SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) @@ -206,7 +207,20 @@ AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT COUNT(*) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ``` +**Complete example:** +```sql +-- Running total with actual table +SELECT sale_date, product, amount, + SUM(amount) OVER ( + ORDER BY sale_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total +FROM sales +ORDER BY sale_date; +``` + ### Moving Windows +**Syntax examples:** ```sql -- 3-period moving average AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) @@ -218,7 +232,20 @@ SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) ``` +**Complete example:** +```sql +-- 3-day moving average +SELECT sale_date, amount, + AVG(amount) OVER ( + ORDER BY sale_date + ROWS BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS moving_avg_3day +FROM sales +ORDER BY sale_date; +``` + ### Bounded Windows +**Syntax examples:** ```sql -- First 3 rows of partition SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) @@ -230,6 +257,18 @@ SUM(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND UNBOUNDED FOLLO AVG(column) OVER (ORDER BY sort_col ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) ``` +**Complete example:** +```sql +-- Fixed 5-row window average +SELECT sale_date, amount, + AVG(amount) OVER ( + ORDER BY sale_date + ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING + ) AS avg_5row_window +FROM sales +ORDER BY sale_date; +``` + ## Best Practices 1. **Use ROWS for physical row counts** when you need exact row-based windows From 11b5e2201518e03b90a43c4cfa7808313a681ed0 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 19:40:30 +0800 Subject: [PATCH 05/10] docs: update SQL examples for Databend Cloud compatibility - Use CREATE OR REPLACE TABLE for self-hosted deployments - Add WITH clause examples for Databend Cloud compatibility - Ensure all SQL examples can run in restricted environments - Provide both options with clear explanations - Maintain result consistency with MCP validation --- .../08-window-functions/rows-between.md | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md index ac29056a99..3d95ce88c8 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md @@ -42,8 +42,9 @@ FUNCTION() OVER ( ### Sample Data +**Option 1: Using CREATE OR REPLACE TABLE (for self-hosted Databend)** ```sql -CREATE TABLE sales ( +CREATE OR REPLACE TABLE sales ( sale_date DATE, product VARCHAR(20), amount DECIMAL(10,2) @@ -62,9 +63,43 @@ INSERT INTO sales VALUES ('2024-01-05', 'B', 150.00); ``` +**Option 2: Using WITH clause (for Databend Cloud or temporary data)** +```sql +WITH sales AS ( + SELECT * FROM (VALUES + ('2024-01-01', 'A', 100.00), + ('2024-01-02', 'A', 150.00), + ('2024-01-03', 'A', 200.00), + ('2024-01-04', 'A', 250.00), + ('2024-01-05', 'A', 300.00), + ('2024-01-01', 'B', 50.00), + ('2024-01-02', 'B', 75.00), + ('2024-01-03', 'B', 100.00), + ('2024-01-04', 'B', 125.00), + ('2024-01-05', 'B', 150.00) + ) AS t(sale_date, product, amount) +) +-- Your SELECT queries go here, using "sales" as the table +SELECT * FROM sales ORDER BY product, sale_date; +``` + ### 1. Running Total (Cumulative Sum) ```sql +WITH sales AS ( + SELECT * FROM (VALUES + ('2024-01-01', 'A', 100.00), + ('2024-01-02', 'A', 150.00), + ('2024-01-03', 'A', 200.00), + ('2024-01-04', 'A', 250.00), + ('2024-01-05', 'A', 300.00), + ('2024-01-01', 'B', 50.00), + ('2024-01-02', 'B', 75.00), + ('2024-01-03', 'B', 100.00), + ('2024-01-04', 'B', 125.00), + ('2024-01-05', 'B', 150.00) + ) AS t(sale_date, product, amount) +) SELECT sale_date, product, amount, SUM(amount) OVER ( PARTITION BY product @@ -94,6 +129,15 @@ sale_date | product | amount | running_total ### 2. Moving Average (3-Day Window) ```sql +WITH sales AS ( + SELECT * FROM (VALUES + ('2024-01-01', 'A', 100.00), + ('2024-01-02', 'A', 150.00), + ('2024-01-03', 'A', 200.00), + ('2024-01-04', 'A', 250.00), + ('2024-01-05', 'A', 300.00) + ) AS t(sale_date, product, amount) +) SELECT sale_date, product, amount, AVG(amount) OVER ( PARTITION BY product @@ -118,6 +162,15 @@ sale_date | product | amount | moving_avg_3day ### 3. Centered Window (Current + 1 Before + 1 After) ```sql +WITH sales AS ( + SELECT * FROM (VALUES + ('2024-01-01', 'A', 100.00), + ('2024-01-02', 'A', 150.00), + ('2024-01-03', 'A', 200.00), + ('2024-01-04', 'A', 250.00), + ('2024-01-05', 'A', 300.00) + ) AS t(sale_date, product, amount) +) SELECT sale_date, product, amount, SUM(amount) OVER ( PARTITION BY product From 6ae5d2d16629ae4e4341421494c05cf57f6b0220 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 19:44:33 +0800 Subject: [PATCH 06/10] docs: use CREATE OR REPLACE TABLE for repeatable execution --- .../08-window-functions/rows-between.md | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md index 3d95ce88c8..63ce267b2e 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/rows-between.md @@ -42,7 +42,6 @@ FUNCTION() OVER ( ### Sample Data -**Option 1: Using CREATE OR REPLACE TABLE (for self-hosted Databend)** ```sql CREATE OR REPLACE TABLE sales ( sale_date DATE, @@ -63,43 +62,9 @@ INSERT INTO sales VALUES ('2024-01-05', 'B', 150.00); ``` -**Option 2: Using WITH clause (for Databend Cloud or temporary data)** -```sql -WITH sales AS ( - SELECT * FROM (VALUES - ('2024-01-01', 'A', 100.00), - ('2024-01-02', 'A', 150.00), - ('2024-01-03', 'A', 200.00), - ('2024-01-04', 'A', 250.00), - ('2024-01-05', 'A', 300.00), - ('2024-01-01', 'B', 50.00), - ('2024-01-02', 'B', 75.00), - ('2024-01-03', 'B', 100.00), - ('2024-01-04', 'B', 125.00), - ('2024-01-05', 'B', 150.00) - ) AS t(sale_date, product, amount) -) --- Your SELECT queries go here, using "sales" as the table -SELECT * FROM sales ORDER BY product, sale_date; -``` - ### 1. Running Total (Cumulative Sum) ```sql -WITH sales AS ( - SELECT * FROM (VALUES - ('2024-01-01', 'A', 100.00), - ('2024-01-02', 'A', 150.00), - ('2024-01-03', 'A', 200.00), - ('2024-01-04', 'A', 250.00), - ('2024-01-05', 'A', 300.00), - ('2024-01-01', 'B', 50.00), - ('2024-01-02', 'B', 75.00), - ('2024-01-03', 'B', 100.00), - ('2024-01-04', 'B', 125.00), - ('2024-01-05', 'B', 150.00) - ) AS t(sale_date, product, amount) -) SELECT sale_date, product, amount, SUM(amount) OVER ( PARTITION BY product @@ -129,15 +94,6 @@ sale_date | product | amount | running_total ### 2. Moving Average (3-Day Window) ```sql -WITH sales AS ( - SELECT * FROM (VALUES - ('2024-01-01', 'A', 100.00), - ('2024-01-02', 'A', 150.00), - ('2024-01-03', 'A', 200.00), - ('2024-01-04', 'A', 250.00), - ('2024-01-05', 'A', 300.00) - ) AS t(sale_date, product, amount) -) SELECT sale_date, product, amount, AVG(amount) OVER ( PARTITION BY product @@ -162,15 +118,6 @@ sale_date | product | amount | moving_avg_3day ### 3. Centered Window (Current + 1 Before + 1 After) ```sql -WITH sales AS ( - SELECT * FROM (VALUES - ('2024-01-01', 'A', 100.00), - ('2024-01-02', 'A', 150.00), - ('2024-01-03', 'A', 200.00), - ('2024-01-04', 'A', 250.00), - ('2024-01-05', 'A', 300.00) - ) AS t(sale_date, product, amount) -) SELECT sale_date, product, amount, SUM(amount) OVER ( PARTITION BY product From 74dae23b300ec7012b666e50b00c7ab19d4fa9a8 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 24 Feb 2026 21:11:18 +0800 Subject: [PATCH 07/10] chore: retrigger vercel deployment From 9114c849e8cbc1e076326c6dfe9c269e2a479d5f Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 26 Feb 2026 13:33:09 +0800 Subject: [PATCH 08/10] feat: add WORKER documentation for PR #19383 Add comprehensive documentation for the new WORKER feature introduced in PR #19383: - Created new directory: docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/ - Added index.md with overview of WORKER functionality - Added create-worker.md with CREATE WORKER syntax and examples - Added show-workers.md with SHOW WORKERS syntax and output format - Added alter-worker.md with ALTER WORKER syntax for SET/UNSET options, tags, and state management - Added drop-worker.md with DROP WORKER syntax and behavior - Added examples.md with comprehensive usage examples and best practices - Updated DDL index page to include WORKER in Resource Management section - Updated UDF documentation to reference WORKER management - Added _category_.json for sidebar navigation The WORKER feature manages UDF execution environments in Databend Cloud sandbox, providing full lifecycle control over compute resources for user-defined functions. --- .../00-ddl/10-udf/ddl-create-function.md | 49 +++ .../00-ddl/20-worker/_category_.json | 8 + .../00-ddl/20-worker/alter-worker.md | 103 +++++++ .../00-ddl/20-worker/create-worker.md | 75 +++++ .../00-ddl/20-worker/drop-worker.md | 61 ++++ .../00-ddl/20-worker/examples.md | 283 ++++++++++++++++++ .../10-sql-commands/00-ddl/20-worker/index.md | 60 ++++ .../00-ddl/20-worker/show-workers.md | 63 ++++ .../10-sql-commands/00-ddl/index.md | 1 + 9 files changed, 703 insertions(+) create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/_category_.json create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/alter-worker.md create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/index.md create mode 100644 docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/show-workers.md diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md b/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md index 3af4e9a17c..62e6079893 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md @@ -186,4 +186,53 @@ export function calculateAge(birthDateStr) { return age; } $$; + +## Worker Management for UDFs + +In Databend Cloud, each UDF has an associated **Worker** that manages its execution environment in the sandbox. After creating a UDF, you may need to manage its worker for optimal performance and resource utilization. + +### Creating a Worker for Your UDF + +```sql +-- Create a worker for your UDF (worker name should match UDF name) +CREATE WORKER calculate_age_js WITH + size='small', + auto_suspend='300', + auto_resume='true'; +``` + +### Managing Worker Resources + +```sql +-- View all workers +SHOW WORKERS; + +-- Adjust worker settings +ALTER WORKER calculate_age_js SET size='medium', auto_suspend='600'; + +-- Add tags for organization +ALTER WORKER calculate_age_js SET TAG + environment='production', + team='analytics', + purpose='age-calculation'; +``` + +### Worker Lifecycle + +```sql +-- Suspend worker when not in use +ALTER WORKER calculate_age_js SUSPEND; + +-- Resume worker when needed +ALTER WORKER calculate_age_js RESUME; + +-- Remove worker when UDF is no longer needed +DROP WORKER calculate_age_js; +``` + +### Environment Variables + +For security reasons, environment variables for UDFs are managed separately in the cloud console. After creating a UDF and its worker, configure any required environment variables through the Databend Cloud interface. + +For more information, see [Worker Management](../20-worker/index.md). ``` diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/_category_.json b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/_category_.json new file mode 100644 index 0000000000..364daa8bd6 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Worker", + "position": 20, + "link": { + "type": "generated-index", + "description": "Worker management commands for UDF execution environments in Databend Cloud sandbox." + } +} \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/alter-worker.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/alter-worker.md new file mode 100644 index 0000000000..94b94be1c1 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/alter-worker.md @@ -0,0 +1,103 @@ +--- +title: ALTER WORKER +sidebar_position: 3 +--- + +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Modifies the settings, tags, or state of an existing worker. + +## Syntax + +```sql +-- Modify worker options +ALTER WORKER SET = '' [, = '' ...] +ALTER WORKER UNSET [, ...] + +-- Modify worker tags +ALTER WORKER SET TAG = '' [, = '' ...] +ALTER WORKER UNSET TAG [, ...] + +-- Change worker state +ALTER WORKER SUSPEND +ALTER WORKER RESUME +``` + +| Parameter | Description | +| ------------ | --------------------------------------------------------------------------- | +| worker_name | The name of the worker to modify | +| option_name | One of: `size`, `auto_suspend`, `auto_resume`, `max_cluster_count`, `min_cluster_count` | +| value | The new value for the option (as a string) | +| tag_name | The name of the tag to set or unset | +| tag_value | The value for the tag | + +## Options + +The same options available in `CREATE WORKER` can be modified using `ALTER WORKER`: + +| Option | Description | +| --------------------- | --------------------------------------------------------------------------- | +| `size` | Compute size of the worker (e.g., 'small', 'medium') | +| `auto_suspend` | Idle timeout before automatic suspend (seconds) | +| `auto_resume` | Whether auto-resume is enabled ('true' or 'false') | +| `max_cluster_count` | Upper bound for auto-scaling clusters | +| `min_cluster_count` | Lower bound for auto-scaling clusters | + +## Examples + +### Modify Worker Options + +Change the size and auto-suspend settings of a worker: + +```sql +ALTER WORKER read_env SET size='medium', auto_suspend='600'; +``` + +Reset specific options to their default values: + +```sql +ALTER WORKER read_env UNSET size, auto_suspend; +``` + +### Manage Worker Tags + +Add or update tags on a worker: + +```sql +ALTER WORKER read_env SET TAG purpose='sandbox', owner='ci'; +``` + +Remove tags from a worker: + +```sql +ALTER WORKER read_env UNSET TAG purpose, owner; +``` + +### Control Worker State + +Suspend a worker (stop its execution environment): + +```sql +ALTER WORKER read_env SUSPEND; +``` + +Resume a suspended worker: + +```sql +ALTER WORKER read_env RESUME; +``` + +## Notes + +1. **Atomic Operations**: Multiple options can be modified in a single `ALTER WORKER` statement. +2. **State Changes**: `SUSPEND` and `RESUME` are mutually exclusive with option modifications. +3. **Tag Management**: Tags are useful for categorizing and organizing workers. They can be used for cost allocation, environment identification, or team ownership. +4. **Validation**: Option values are validated according to the same rules as `CREATE WORKER`. + +## Related Topics + +- [CREATE WORKER](create-worker.md) - Create a new worker +- [SHOW WORKERS](show-workers.md) - List workers and their current settings +- [DROP WORKER](drop-worker.md) - Remove a worker \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md new file mode 100644 index 0000000000..67dda991f9 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md @@ -0,0 +1,75 @@ +--- +title: CREATE WORKER +sidebar_position: 1 +--- + +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Creates a new worker for UDF execution in the sandbox environment. + +## Syntax + +```sql +CREATE WORKER [ IF NOT EXISTS ] + [ WITH size = ''] + [ WITH auto_suspend = ''] + [ WITH auto_resume = ''] + [ WITH max_cluster_count = ''] + [ WITH min_cluster_count = ''] +``` + +| Parameter | Description | +| --------------- | --------------------------------------------------------------------------------------------- | +| `IF NOT EXISTS` | Optional. If specified, the command succeeds without changes if the worker already exists. | +| worker_name | The name of the worker to create. Corresponds to a UDF name. | + +## Options + +| Option | Type / Values | Default | Description | +| --------------------- | -------------------------------------- | ------------- | --------------------------------------------------------------------------- | +| `size` | String (e.g., 'small', 'medium') | Platform default | Controls the compute size of the worker | +| `auto_suspend` | String (seconds) | Platform default | Idle timeout before automatic suspend | +| `auto_resume` | String ('true' or 'false') | Platform default | Controls whether incoming requests wake the worker automatically | +| `max_cluster_count` | String (count) | Platform default | Upper bound for auto-scaling clusters | +| `min_cluster_count` | String (count) | Platform default | Lower bound for auto-scaling clusters | + +- Options are specified as key-value pairs using the `WITH` keyword +- All option values are passed as strings and must be enclosed in single quotes +- Options may appear in any order +- Option names are case-insensitive but shown here in lowercase for consistency + +## Examples + +Create a basic worker for a UDF named `read_env`: + +```sql +CREATE WORKER read_env; +``` + +Create a worker with `IF NOT EXISTS` to avoid errors if it already exists: + +```sql +CREATE WORKER IF NOT EXISTS read_env; +``` + +Create a worker with custom configuration: + +```sql +CREATE WORKER read_env WITH size='small', auto_suspend='300', auto_resume='true', max_cluster_count='3', min_cluster_count='1'; +``` + +## Notes + +1. **UDF Association**: Each worker corresponds to a single UDF. The worker name should match the UDF name. +2. **Environment Variables**: Environment variables for UDFs are managed separately in the cloud console for security reasons. +3. **Resource Management**: Workers manage the execution environment and resources for their associated UDFs. +4. **Cloud Integration**: Workers are integrated with Databend Cloud's control plane for lifecycle management. + +## Related Topics + +- [ALTER WORKER](alter-worker.md) - Modify worker settings +- [SHOW WORKERS](show-workers.md) - List available workers +- [DROP WORKER](drop-worker.md) - Remove a worker +- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md new file mode 100644 index 0000000000..ee06b6e776 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md @@ -0,0 +1,61 @@ +--- +title: DROP WORKER +sidebar_position: 4 +--- + +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Removes a worker from the system. + +## Syntax + +```sql +DROP WORKER [ IF EXISTS ] +``` + +| Parameter | Description | +| --------------- | --------------------------------------------------------------------------------------------- | +| `IF EXISTS` | Optional. If specified, the command succeeds without errors if the worker does not exist. | +| worker_name | The name of the worker to remove. | + +## Examples + +Remove a worker: + +```sql +DROP WORKER read_env; +``` + +Remove a worker only if it exists (avoids errors if the worker doesn't exist): + +```sql +DROP WORKER IF EXISTS read_env; +``` + +## Behavior + +1. **Resource Cleanup**: When a worker is dropped, all associated resources are released. +2. **UDF Association**: Dropping a worker does not automatically drop the associated UDF. The UDF can still exist but will not have an execution environment. +3. **Irreversible Operation**: Dropping a worker cannot be undone. The worker must be recreated if needed. +4. **Dependencies**: Ensure no active executions are using the worker before dropping it. + +## Error Conditions + +- `UnknownWorker`: If the worker does not exist and `IF EXISTS` is not specified. +- `WorkerInUse`: If the worker is currently executing UDFs or has pending operations. + +## Notes + +1. **Safety First**: Use `IF EXISTS` to make your scripts idempotent and avoid unnecessary errors. +2. **Check State**: Consider checking the worker's state with `SHOW WORKERS` before dropping it. +3. **Cleanup Order**: If you're cleaning up a UDF and its worker, drop the worker first, then the UDF if desired. +4. **Cloud Integration**: Worker deletion is coordinated with Databend Cloud's control plane to ensure proper resource cleanup. + +## Related Topics + +- [CREATE WORKER](create-worker.md) - Create a new worker +- [SHOW WORKERS](show-workers.md) - List available workers +- [ALTER WORKER](alter-worker.md) - Modify worker settings +- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md new file mode 100644 index 0000000000..626fc437f1 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md @@ -0,0 +1,283 @@ +--- +title: Worker Examples +sidebar_position: 5 +--- + +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +This page provides comprehensive examples of using WORKER commands to manage UDF execution environments in Databend Cloud. + +## Basic Worker Lifecycle + +### 1. Create a Worker + +Create a basic worker for a UDF named `read_env`: + +```sql +CREATE WORKER read_env; +``` + +Create a worker with `IF NOT EXISTS` to avoid errors: + +```sql +CREATE WORKER IF NOT EXISTS read_env; +``` + +Create a worker with custom configuration: + +```sql +CREATE WORKER read_env WITH + size='small', + auto_suspend='300', + auto_resume='true', + max_cluster_count='3', + min_cluster_count='1'; +``` + +### 2. List Workers + +View all workers in the current tenant: + +```sql +SHOW WORKERS; +``` + +### 3. Modify Worker Settings + +Change worker size and auto-suspend settings: + +```sql +ALTER WORKER read_env SET size='medium', auto_suspend='600'; +``` + +Reset specific options to defaults: + +```sql +ALTER WORKER read_env UNSET size, auto_suspend; +``` + +### 4. Manage Worker Tags + +Add tags to categorize workers: + +```sql +ALTER WORKER read_env SET TAG purpose='sandbox', owner='ci'; +``` + +Remove tags when no longer needed: + +```sql +ALTER WORKER read_env UNSET TAG purpose, owner; +``` + +### 5. Control Worker State + +Suspend a worker (stop its execution environment): + +```sql +ALTER WORKER read_env SUSPEND; +``` + +Resume a suspended worker: + +```sql +ALTER WORKER read_env RESUME; +``` + +### 6. Remove a Worker + +Remove a worker when no longer needed: + +```sql +DROP WORKER read_env; +``` + +Safely remove a worker (no error if it doesn't exist): + +```sql +DROP WORKER IF EXISTS read_env; +``` + +## Advanced Examples + +### Worker for Different Environments + +Create workers with environment-specific configurations: + +```sql +-- Development worker +CREATE WORKER dev_processor WITH + size='small', + auto_suspend='60', + auto_resume='true', + max_cluster_count='1', + min_cluster_count='1'; + +-- Production worker +CREATE WORKER prod_processor WITH + size='large', + auto_suspend='1800', + auto_resume='true', + max_cluster_count='5', + min_cluster_count='2'; +``` + +### Worker with Comprehensive Tagging + +Create a worker with detailed tags for organization: + +```sql +CREATE WORKER data_processor WITH + size='medium', + auto_suspend='900', + auto_resume='true', + max_cluster_count='3', + min_cluster_count='1' + TAG ( + environment='production', + team='data-engineering', + project='etl-pipeline', + cost_center='analytics', + created_by='ci-system' + ); +``` + +### Dynamic Worker Management + +Script to ensure a worker exists with specific configuration: + +```sql +-- Create worker if it doesn't exist +CREATE WORKER IF NOT EXISTS my_worker WITH + size='small', + auto_suspend='300'; + +-- Update tags +ALTER WORKER my_worker SET TAG + environment='staging', + last_updated=CAST(CURRENT_TIMESTAMP() AS STRING); + +-- Show current configuration +SHOW WORKERS; +``` + +## Best Practices + +### 1. Naming Conventions + +- Use descriptive names that indicate the UDF's purpose +- Include environment suffix (e.g., `_dev`, `_prod`, `_staging`) +- Consider team/project prefixes for multi-team environments + +### 2. Resource Sizing + +- Start with `size='small'` for development and testing +- Use `auto_suspend` to save costs for infrequently used workers +- Set appropriate `min_cluster_count` based on expected load + +### 3. Tag Strategy + +- Use tags for cost allocation and resource tracking +- Include environment, team, and project information +- Add creation date and owner for audit purposes + +### 4. Lifecycle Management + +- Use `IF NOT EXISTS` and `IF EXISTS` for idempotent scripts +- Monitor worker usage with `SHOW WORKERS` +- Clean up unused workers to reduce costs + +## Common Use Cases + +### 1. UDF Development + +```sql +-- Create a worker for UDF development +CREATE WORKER dev_transform WITH + size='small', + auto_suspend='60', + TAG (environment='development', purpose='testing'); + +-- After UDF is developed and tested +ALTER WORKER dev_transform SET + size='medium', + auto_suspend='300', + TAG purpose='production-ready'; +``` + +### 2. Batch Processing + +```sql +-- Worker for nightly batch jobs +CREATE WORKER nightly_etl WITH + size='large', + auto_suspend='3600', -- Suspend after 1 hour of inactivity + auto_resume='false', -- Don't auto-resume (manual control) + TAG ( + schedule='nightly', + job_type='etl', + criticality='high' + ); +``` + +### 3. Multi-tenant Environments + +```sql +-- Workers for different teams +CREATE WORKER team_a_processor WITH + size='medium', + TAG (team='team-a', billing_code='TA-2024'); + +CREATE WORKER team_b_processor WITH + size='small', + TAG (team='team-b', billing_code='TB-2024'); +``` + +## Troubleshooting + +### Worker Not Starting + +If a worker doesn't start as expected: + +1. Check if the UDF exists and is properly configured +2. Verify environment variables are set in the cloud console +3. Ensure the worker is not suspended: + +```sql +-- Check worker state +SHOW WORKERS; + +-- Resume if suspended +ALTER WORKER my_worker RESUME; +``` + +### Permission Issues + +Ensure you have the necessary privileges: + +```sql +-- Check your privileges +SHOW GRANTS; +``` + +### Resource Constraints + +If experiencing performance issues: + +```sql +-- Increase worker size +ALTER WORKER my_worker SET size='large'; + +-- Adjust cluster counts +ALTER WORKER my_worker SET + max_cluster_count='5', + min_cluster_count='2'; +``` + +## Related Topics + +- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about creating and using UDFs +- [Cloud Control](../../../../guides/90-cloud-control/index.md) - Understand cloud integration +- [Resource Management](../../../../guides/80-resource-management/index.md) - Best practices for resource allocation \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/index.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/index.md new file mode 100644 index 0000000000..28a20299e4 --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/index.md @@ -0,0 +1,60 @@ +--- +title: Worker +sidebar_position: 0 +--- + +Worker-related SQL commands for managing sandbox UDF execution environments in Databend Cloud. + +## Introduction + +Workers are execution environments for User-Defined Functions (UDFs) in Databend Cloud's sandbox environment. Each worker corresponds to a single UDF, and the cloud starts the corresponding worker based on the function name. The Worker management interface provides full lifecycle control over these execution environments. + +## General Rules + +- **Worker naming**: Follows standard identifier naming conventions +- **Strings and identifiers**: Bare identifiers may omit quotes when they contain no spaces; otherwise enclose with single quotes +- **Numeric parameters**: Accept integers or string representations +- **Boolean parameters**: Accept `'true'`/`'false'` string values +- **Options**: Specified using `WITH` keyword followed by key-value pairs + +## Worker Management + +Tags are key-value pairs that help categorize and organize workers, similar to warehouse tags. They are commonly used for: + +- **Environment identification**: Mark workers as dev, staging, or production +- **Purpose tracking**: Identify the purpose of the worker (e.g., sandbox, testing) +- **Ownership**: Identify which team or user owns the worker +- **Custom metadata**: Add any arbitrary metadata for organizational purposes + +Tag keys and values are arbitrary strings. Tags can be: + +- Added at worker creation time using options +- Updated or added later using `ALTER WORKER ... SET TAG key = 'value'` +- Removed using `ALTER WORKER ... UNSET TAG key` + +## Supported Statements + +| Statement | Purpose | Notes | +| ----------------- | ---------------------------- | ---------------------------------------------------------- | +| `CREATE WORKER` | Create a worker | Supports `IF NOT EXISTS` and option list | +| `ALTER WORKER` | Modify worker settings | Supports `SET`, `UNSET`, `SET TAG`, `UNSET TAG`, `SUSPEND`, `RESUME` | +| `SHOW WORKERS` | List workers | Shows all available workers | +| `DROP WORKER` | Delete a worker | Optional `IF EXISTS` | + +## Worker Management + +| Command | Description | +| ----------------------------------- | ------------------------------------------------- | +| [CREATE WORKER](create-worker.md) | Creates a new worker for UDF execution | +| [SHOW WORKERS](show-workers.md) | Lists all workers | +| [ALTER WORKER](alter-worker.md) | Modifies worker settings and state | +| [DROP WORKER](drop-worker.md) | Removes a worker | +| [Examples](examples.md) | Comprehensive usage examples and best practices | + +:::note +A worker represents an execution environment for a specific UDF in Databend Cloud's sandbox. Each UDF has a corresponding worker that manages its runtime environment and resources. +::: + +:::tip +Environment variables for UDFs are managed by the cloud for security reasons. After creating a UDF, users need to configure environment variables in the cloud console. +::: \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/show-workers.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/show-workers.md new file mode 100644 index 0000000000..9d120c490b --- /dev/null +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/show-workers.md @@ -0,0 +1,63 @@ +--- +title: SHOW WORKERS +sidebar_position: 2 +--- + +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Lists all workers available in the current tenant. + +## Syntax + +```sql +SHOW WORKERS +``` + +## Output + +The command returns a table with the following columns: + +| Column Name | Data Type | Description | +| --------------- | --------- | ------------------------------------------------ | +| name | String | The name of the worker | +| size | String | The compute size of the worker | +| auto_suspend | String | Auto-suspend timeout in seconds | +| auto_resume | String | Whether auto-resume is enabled ('true'/'false') | +| max_cluster_count | String | Maximum cluster count for auto-scaling | +| min_cluster_count | String | Minimum cluster count for auto-scaling | +| tags | Map | Key-value tags associated with the worker | +| created_at | Timestamp | When the worker was created | +| updated_at | Timestamp | When the worker was last updated | +| state | String | Current state of the worker (e.g., ACTIVE, SUSPENDED) | + +## Examples + +List all workers: + +```sql +SHOW WORKERS; +``` + +Sample output: + +``` +name | size | auto_suspend | auto_resume | max_cluster_count | min_cluster_count | tags | created_at | updated_at | state +-----------+-------+--------------+-------------+-------------------+-------------------+--------------------------------+---------------------+---------------------+-------- +read_env | small | 300 | true | 3 | 1 | {purpose: sandbox, owner: ci} | 2024-01-15 10:30:00 | 2024-01-15 10:30:00 | ACTIVE +process_csv| medium| 600 | true | 5 | 2 | {environment: production} | 2024-01-14 09:15:00 | 2024-01-15 08:45:00 | ACTIVE +``` + +## Notes + +1. **Tenant Scope**: The command shows workers for the current tenant only. +2. **State Information**: The state column indicates whether the worker is active, suspended, or in another state. +3. **Tag Display**: Tags are displayed as a map of key-value pairs. +4. **Time Zones**: Timestamps are displayed in UTC. + +## Related Topics + +- [CREATE WORKER](create-worker.md) - Create a new worker +- [ALTER WORKER](alter-worker.md) - Modify worker settings +- [DROP WORKER](drop-worker.md) - Remove a worker \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/index.md b/docs/en/sql-reference/10-sql-commands/00-ddl/index.md index 2196c93e59..187a17a52d 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/index.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/index.md @@ -56,6 +56,7 @@ These topics provide reference information for the DDL (Data Definition Language | Component | Description | |-----------|-------------| | **[Warehouse](19-warehouse/index.md)** | Manage compute resources for query execution | +| **[Worker](20-worker/index.md)** | Manage UDF execution environments in sandbox | | **[Workload Group](20-workload-group/index.md)** | Control resource allocation and priorities | | **[Transaction](14-transaction/index.md)** | Manage database transactions | | **[Variable](15-variable/index.md)** | Set and use session/global variables | From d825bacf223cd2492a254515f753de48cfac6f91 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 26 Feb 2026 13:48:46 +0800 Subject: [PATCH 09/10] fix: address Codex review feedback 1. Close JavaScript code block in ddl-create-function.md 2. Update broken links in worker examples.md 3. Fix undefined table reference in range-between.md --- .../10-sql-commands/00-ddl/10-udf/ddl-create-function.md | 1 + .../10-sql-commands/00-ddl/20-worker/examples.md | 6 +++--- .../20-sql-functions/08-window-functions/range-between.md | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md b/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md index 62e6079893..b315f40d9c 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/10-udf/ddl-create-function.md @@ -186,6 +186,7 @@ export function calculateAge(birthDateStr) { return age; } $$; +``` ## Worker Management for UDFs diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md index 626fc437f1..8547eac108 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/examples.md @@ -278,6 +278,6 @@ ALTER WORKER my_worker SET ## Related Topics -- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about creating and using UDFs -- [Cloud Control](../../../../guides/90-cloud-control/index.md) - Understand cloud integration -- [Resource Management](../../../../guides/80-resource-management/index.md) - Best practices for resource allocation \ No newline at end of file +- [User-Defined Functions (UDFs)](../10-udf/index.md) - Learn about creating and using UDFs +- [Warehouse Management](../19-warehouse/index.md) - Manage compute resources for query execution +- [Workload Groups](../20-workload-group/index.md) - Control resource allocation and priorities \ No newline at end of file diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md index 7e239590b1..8cb9326907 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/range-between.md @@ -194,7 +194,7 @@ SELECT sale_date, amount, ORDER BY sale_date RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW ) AS avg_7day -FROM sales +FROM sales_duplicates ORDER BY sale_date; ``` From 9d3e4616a1dc5b758aec67a6b40f50c0b6650ee1 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 26 Feb 2026 14:34:37 +0800 Subject: [PATCH 10/10] fix: correct broken links in worker documentation --- .../10-sql-commands/00-ddl/20-worker/create-worker.md | 2 +- .../10-sql-commands/00-ddl/20-worker/drop-worker.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md index 67dda991f9..6cab380740 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/create-worker.md @@ -72,4 +72,4 @@ CREATE WORKER read_env WITH size='small', auto_suspend='300', auto_resume='true' - [ALTER WORKER](alter-worker.md) - Modify worker settings - [SHOW WORKERS](show-workers.md) - List available workers - [DROP WORKER](drop-worker.md) - Remove a worker -- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file +- [User-Defined Functions](../10-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file diff --git a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md index ee06b6e776..3009c55275 100644 --- a/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md +++ b/docs/en/sql-reference/10-sql-commands/00-ddl/20-worker/drop-worker.md @@ -58,4 +58,4 @@ DROP WORKER IF EXISTS read_env; - [CREATE WORKER](create-worker.md) - Create a new worker - [SHOW WORKERS](show-workers.md) - List available workers - [ALTER WORKER](alter-worker.md) - Modify worker settings -- [User-Defined Functions](../../../../guides/60-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file +- [User-Defined Functions](../10-udf/index.md) - Learn about UDFs in Databend \ No newline at end of file