From a236bc4707f5061fb338974b0ea121f70f79ad49 Mon Sep 17 00:00:00 2001 From: james_teo Date: Thu, 2 Apr 2026 18:26:42 +0800 Subject: [PATCH 1/5] chore:Add documentation links --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d05d995..6801b6e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@
vowl logo + +
+ + [![Documentation](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://govtech-data-practice.github.io/Vowl/)
# Vowl From d44689971eb8a7c09219853f2b8a42079167d941 Mon Sep 17 00:00:00 2001 From: james_teo Date: Thu, 2 Apr 2026 18:28:00 +0800 Subject: [PATCH 2/5] notebook usage fixes --- examples/vowl_usage_patterns_demo.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/vowl_usage_patterns_demo.ipynb b/examples/vowl_usage_patterns_demo.ipynb index 48f566c..cf3c7ca 100644 --- a/examples/vowl_usage_patterns_demo.ipynb +++ b/examples/vowl_usage_patterns_demo.ipynb @@ -33,13 +33,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "20da0e86", "metadata": {}, "outputs": [], "source": [ - "# Install the package\n", - "#!pip install vowl[all]" + "# Install the package and additional packages used in the notebook\n", + "#!pip install \"vowl[all]\" polars" ] }, { From e181c85deeb7955f00664949f8a6f205a7515b86 Mon Sep 17 00:00:00 2001 From: james_teo Date: Thu, 2 Apr 2026 18:34:09 +0800 Subject: [PATCH 3/5] Add upload artifacts workflow stage for github ci --- .github/workflows/ci.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 044e806..fe6f85c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,5 +64,32 @@ jobs: - name: Build and validate run: make release-check + - name: Upload dist artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 + + release: + needs: publish + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') + permissions: + contents: write + steps: + - uses: actions/checkout@v6 + + - name: Download dist artifacts + uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + files: dist/* + generate_release_notes: true From 3a1af8fca904c941f0a83fb4551a87dccd5f3467 Mon Sep 17 00:00:00 2001 From: james_teo Date: Thu, 2 Apr 2026 18:46:09 +0800 Subject: [PATCH 4/5] Rename references of Vowl to lowercase vowl --- CHANGELOG.md | 4 ++-- CONTRIBUTING.md | 2 +- README.md | 8 ++++---- docs/architecture.md | 2 +- docs/contracts.md | 8 ++++---- docs/getting-started.md | 2 +- docs/index.md | 6 +++--- docs/known-issues.md | 14 +++++++------- docs/usage-patterns.md | 4 ++-- examples/vowl_usage_patterns_demo.ipynb | 2 +- mkdocs.yml | 8 ++++---- pyproject.toml | 6 +++--- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e5d46..99c0117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,5 +34,5 @@ Initial public release of **vowl**. - `THIRD_PARTY_NOTICES` and `LICENSE_AUDIT_REPORT.md`. - `CONTRIBUTING.md` with development setup and release workflow. -[Unreleased]: https://github.com/govtech-data-practice/Vowl/compare/v0.0.1...HEAD -[0.0.1]: https://github.com/govtech-data-practice/Vowl/releases/tag/v0.0.1 +[Unreleased]: https://github.com/govtech-data-practice/vowl/compare/v0.0.1...HEAD +[0.0.1]: https://github.com/govtech-data-practice/vowl/releases/tag/v0.0.1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e9bd6e..367aa93 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,7 +56,7 @@ The Makefile is the canonical source for local development commands. If a README ```bash git clone https://github.com//vowl.git cd vowl -git remote add upstream https://github.com/govtech-data-practice/Vowl.git +git remote add upstream https://github.com/govtech-data-practice/vowl.git ``` ### Install Development Dependencies diff --git a/README.md b/README.md index 6801b6e..c77b6d5 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@
- vowl logo + vowl logo
- [![Documentation](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://govtech-data-practice.github.io/Vowl/) + [![Documentation](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://govtech-data-practice.github.io/vowl/)
-# Vowl +# vowl -Vowl (vee-owl 🦉) is a validation engine for [Open Data Contract Standard (ODCS)](https://github.com/bitol-io/open-data-contract-standard) data contracts. Define your validation rules once in a declarative YAML contract and get rich, actionable reports on your data's quality. +vowl (vee-owl 🦉) is a validation engine for [Open Data Contract Standard (ODCS)](https://github.com/bitol-io/open-data-contract-standard) data contracts. Define your validation rules once in a declarative YAML contract and get rich, actionable reports on your data's quality. ## Table of Contents diff --git a/docs/architecture.md b/docs/architecture.md index 5e34133..f579a2e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,6 +1,6 @@ # Architecture -Vowl has a modular architecture built around **Ibis** as the universal query layer. +vowl has a modular architecture built around **Ibis** as the universal query layer. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ diff --git a/docs/contracts.md b/docs/contracts.md index 6492dde..c907499 100644 --- a/docs/contracts.md +++ b/docs/contracts.md @@ -85,7 +85,7 @@ schema: ## Automatic Check References -When a contract is loaded, Vowl automatically builds `CheckReference` objects for every executable check in the contract via `Contract.get_check_references_by_schema()`. +When a contract is loaded, vowl automatically builds `CheckReference` objects for every executable check in the contract via `Contract.get_check_references_by_schema()`. This includes both user-authored checks in `quality` blocks and synthetic checks derived from column metadata. The generated references are grouped by schema, and the auto-generated ones run before explicit `quality` checks. @@ -104,7 +104,7 @@ This includes both user-authored checks in `quality` blocks and synthetic checks ## Auto-Generated Checks -| Generated from | What Vowl validates | +| Generated from | What vowl validates | |----------------|----------------------| | `name` | Column declared in the contract exists in the source table | | `logicalType` | Values can be cast to the declared SQL type for `integer`, `number`, `boolean`, `date`, `timestamp`, and `time` | @@ -139,11 +139,11 @@ produces three generated check references: | `$.schema[0].properties[...].required` | `RequiredCheckReference` | !!! note - Because `string` does not currently generate a SQL cast-based type check, the `logicalType` entry above contributes metadata for option checks rather than a standalone type-validation query. If you use `integer`, `number`, `boolean`, `date`, `timestamp`, or `time`, Vowl also generates a `logicalType` SQL check automatically. + Because `string` does not currently generate a SQL cast-based type check, the `logicalType` entry above contributes metadata for option checks rather than a standalone type-validation query. If you use `integer`, `number`, `boolean`, `date`, `timestamp`, or `time`, vowl also generates a `logicalType` SQL check automatically. ## Library Metrics (`type: library`) -Instead of writing SQL by hand, you can declare common data quality metrics using `type: library` in your `quality` blocks. Vowl auto-generates the appropriate SQL at runtime. +Instead of writing SQL by hand, you can declare common data quality metrics using `type: library` in your `quality` blocks. vowl auto-generates the appropriate SQL at runtime. ### Column-Level Metrics diff --git a/docs/getting-started.md b/docs/getting-started.md index 0bb13bb..95a1925 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -13,7 +13,7 @@ Optional extras are available: | `vowl[spark]` | PySpark support | | `vowl[all]` | Everything (Spark + AWS) | -For local development, testing, and release workflow, see [CONTRIBUTING.md](https://github.com/govtech-data-practice/Vowl/blob/main/CONTRIBUTING.md). +For local development, testing, and release workflow, see [CONTRIBUTING.md](https://github.com/govtech-data-practice/vowl/blob/main/CONTRIBUTING.md). ## Validate in 3 Lines diff --git a/docs/index.md b/docs/index.md index fc0454c..82d915f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,9 +7,9 @@ hide: vowl logo -# Vowl +# vowl -Vowl (vee-owl 🦉): a validation engine for [Open Data Contract Standard (ODCS)](https://github.com/bitol-io/open-data-contract-standard) data contracts. Define your validation rules once in a declarative YAML contract and get rich, actionable reports on your data's quality. +vowl (vee-owl 🦉): a validation engine for [Open Data Contract Standard (ODCS)](https://github.com/bitol-io/open-data-contract-standard) data contracts. Define your validation rules once in a declarative YAML contract and get rich, actionable reports on your data's quality. ## Key Features @@ -42,4 +42,4 @@ Optional extras: `vowl[spark]`, `vowl[all]`. ## License -This project is licensed under the [MIT License](https://github.com/govtech-data-practice/Vowl/blob/main/LICENSE). +This project is licensed under the [MIT License](https://github.com/govtech-data-practice/vowl/blob/main/LICENSE). diff --git a/docs/known-issues.md b/docs/known-issues.md index 139c728..fa8adea 100644 --- a/docs/known-issues.md +++ b/docs/known-issues.md @@ -29,13 +29,13 @@ result = validate_data("contract.yaml", adapter=IbisAdapter(con)) Oracle has several SQL dialect differences that can cause some generated checks to `ERROR`: - **No `LIMIT` clause:** Ibis handles this via `FETCH FIRST N ROWS ONLY`, but edge cases may arise. -- **No `!~` regex operator:** Vowl applies an AST transform to use `REGEXP_LIKE` on Oracle, but complex patterns may not translate cleanly. -- **Case-sensitive identifiers:** Oracle folds unquoted identifiers to uppercase. If your source tables were created with quoted lowercase column names, auto-generated checks may fail to match. Vowl applies quoting transforms to mitigate this, but mismatches can still occur. -- **`TEXT`/`CLOB` types cannot be used in `REGEXP_LIKE`:** Vowl automatically casts these to `VARCHAR(4000)`, which truncates values longer than 4000 characters. +- **No `!~` regex operator:** vowl applies an AST transform to use `REGEXP_LIKE` on Oracle, but complex patterns may not translate cleanly. +- **Case-sensitive identifiers:** Oracle folds unquoted identifiers to uppercase. If your source tables were created with quoted lowercase column names, auto-generated checks may fail to match. vowl applies quoting transforms to mitigate this, but mismatches can still occur. +- **`TEXT`/`CLOB` types cannot be used in `REGEXP_LIKE`:** vowl automatically casts these to `VARCHAR(4000)`, which truncates values longer than 4000 characters. ## SQLite: Regex via User-Defined Function -SQLite does not natively support `REGEXP_LIKE`. Vowl rewrites regex checks to use Ibis's `_IBIS_REGEX_SEARCH` user-defined function, which Ibis registers automatically when using a DuckDB or SQLite backend. This generally works but may behave differently from server-side regex implementations in edge cases. +SQLite does not natively support `REGEXP_LIKE`. vowl rewrites regex checks to use Ibis's `_IBIS_REGEX_SEARCH` user-defined function, which Ibis registers automatically when using a DuckDB or SQLite backend. This generally works but may behave differently from server-side regex implementations in edge cases. ## Multi-Source Adapters: Data Materialisation @@ -53,9 +53,9 @@ The `MultiSourceSQLExecutor` materialises tables via Arrow instead of using Duck 1. **Table namespace mismatch.** DuckDB ATTACH places tables under a qualified namespace (e.g. `pg_db.public.my_table`). User-authored contract queries use bare table names (`my_table`). `USE` or `SET search_path` resolves this for a single attached database, but cross-database joins (the core multi-source use case) require every table reference to be fully qualified. Rewriting arbitrary user SQL to inject per-table namespace prefixes is fragile and error-prone. -2. **No access to connection credentials.** DuckDB ATTACH requires a connection string (`host=... port=... dbname=... user=... password=...`), not a live connection object. Vowl receives an Ibis connection from the user. Reconstructing credentials would require accessing private Ibis internals (`_con_kwargs`), would not work for connections created via `from_connection()` or environment-based auth, and would surface passwords in SQL strings. +2. **No access to connection credentials.** DuckDB ATTACH requires a connection string (`host=... port=... dbname=... user=... password=...`), not a live connection object. vowl receives an Ibis connection from the user. Reconstructing credentials would require accessing private Ibis internals (`_con_kwargs`), would not work for connections created via `from_connection()` or environment-based auth, and would surface passwords in SQL strings. -3. **Limited backend coverage.** DuckDB ATTACH only supports PostgreSQL, MySQL, and SQLite. Vowl supports 12+ Ibis backends (Snowflake, Spark, BigQuery, Trino, ClickHouse, Oracle, MSSQL, DataFusion, etc.). The materialisation path would still be needed for every unsupported backend, so ATTACH would only serve as a partial optimisation. +3. **Limited backend coverage.** DuckDB ATTACH only supports PostgreSQL, MySQL, and SQLite. vowl supports 12+ Ibis backends (Snowflake, Spark, BigQuery, Trino, ClickHouse, Oracle, MSSQL, DataFusion, etc.). The materialisation path would still be needed for every unsupported backend, so ATTACH would only serve as a partial optimisation. 4. **Filter conditions cannot be pushed through ATTACH.** When an adapter has filter conditions, `export_table_as_arrow()` applies them at the source before export. With ATTACH, the remote table is exposed raw, and pushing per-adapter filters into cross-database joins would require deep query rewriting. @@ -97,7 +97,7 @@ quality: mustBe: 0 ``` -will happily query `audit_log` even if it is not declared anywhere in the contract. Vowl extracts and reports the tables involved via `tables_in_query` in the results, but it does **not** block execution against undeclared tables. +will happily query `audit_log` even if it is not declared anywhere in the contract. vowl extracts and reports the tables involved via `tables_in_query` in the results, but it does **not** block execution against undeclared tables. **Why this matters:** diff --git a/docs/usage-patterns.md b/docs/usage-patterns.md index 4c42a9e..bf80062 100644 --- a/docs/usage-patterns.md +++ b/docs/usage-patterns.md @@ -1,7 +1,7 @@ # Usage Patterns !!! tip "Interactive Demo" - Try the [usage patterns notebook](https://github.com/govtech-data-practice/Vowl/blob/main/examples/vowl_usage_patterns_demo.ipynb) for a hands-on walkthrough of the examples below. + Try the [usage patterns notebook](https://github.com/govtech-data-practice/vowl/blob/main/examples/vowl_usage_patterns_demo.ipynb) for a hands-on walkthrough of the examples below. ## Local DataFrame (Pandas/Polars) @@ -49,7 +49,7 @@ result.display_full_report() Ibis supports: Amazon Athena, BigQuery, ClickHouse, Dask, Databricks, DataFusion, Druid, DuckDB, Exasol, Flink, Impala, MSSQL, MySQL, Oracle, pandas, Polars, PostgreSQL, PySpark, RisingWave, SingleStoreDB, Snowflake, SQLite, Trino, and more. See [ibis-project/ibis](https://github.com/ibis-project/ibis). !!! info "MySQL" - Select the database when you create the connection, for example via `ibis.mysql.connect(..., database="my_db")` or a connection URI that already includes the database name. Vowl does not issue `USE database` during validation; it runs read-only `SELECT` queries against the active database on the existing connection. + Select the database when you create the connection, for example via `ibis.mysql.connect(..., database="my_db")` or a connection URI that already includes the database name. vowl does not issue `USE database` during validation; it runs read-only `SELECT` queries against the active database on the existing connection. ## Compatibility Mode (DuckDB ATTACH) diff --git a/examples/vowl_usage_patterns_demo.ipynb b/examples/vowl_usage_patterns_demo.ipynb index cf3c7ca..a002f59 100644 --- a/examples/vowl_usage_patterns_demo.ipynb +++ b/examples/vowl_usage_patterns_demo.ipynb @@ -5,7 +5,7 @@ "id": "5179d529", "metadata": {}, "source": [ - "# Vowl Usage Patterns Demo\n", + "# vowl Usage Patterns Demo\n", "\n", "This notebook demonstrates the different methods of using the **vowl** library for data quality validation using [ODCS](https://github.com/bitol-io/open-data-contract-standard) data contracts.\n", "\n", diff --git a/mkdocs.yml b/mkdocs.yml index 3dc1342..1fc71e0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,8 +1,8 @@ -site_name: Vowl +site_name: vowl site_description: A SQL-powered data quality validation library for pandas and spark DataFrames. -site_url: https://govtech-data-practice.github.io/Vowl/ -repo_url: https://github.com/govtech-data-practice/Vowl -repo_name: govtech-data-practice/Vowl +site_url: https://govtech-data-practice.github.io/vowl/ +repo_url: https://github.com/govtech-data-practice/vowl +repo_name: govtech-data-practice/vowl theme: name: material diff --git a/pyproject.toml b/pyproject.toml index bbb0086..71690ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,9 +51,9 @@ all = [ ] [project.urls] -Homepage = "https://github.com/govtech-data-practice/Vowl" -Repository = "https://github.com/govtech-data-practice/Vowl" -Issues = "https://github.com/govtech-data-practice/Vowl/issues" +Homepage = "https://github.com/govtech-data-practice/vowl" +Repository = "https://github.com/govtech-data-practice/vowl" +Issues = "https://github.com/govtech-data-practice/vowl/issues" Documentation = "https://pypi.org/project/vowl/" [tool.setuptools.packages.find] From 16cae539f6a2fa3f76f11e027796947768feba5a Mon Sep 17 00:00:00 2001 From: james_teo Date: Thu, 2 Apr 2026 18:49:11 +0800 Subject: [PATCH 5/5] Add contributing section --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index c77b6d5..91e216e 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ vowl (vee-owl 🦉) is a validation engine for [Open Data Contract Standard (ODC - [Loading Contracts from Git (GitHub/GitLab)](#loading-contracts-from-git-githubgitlab) - [Loading Contracts from S3](#loading-contracts-from-s3) - [Roadmap](#roadmap) +- [Contributing](#contributing) - [License](#license) ## Features @@ -795,6 +796,12 @@ result.display_full_report() --- +## Contributing + +We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to get started. + +--- + ## License This project is licensed under the [MIT License](LICENSE).