Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/main_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@ jobs:
spark_matrix: ${{ steps.set-matrix-values.outputs.spark_dataproc_matrix }}
hive_matrix: ${{ steps.set-matrix-values.outputs.hive_dataproc_matrix }}
dbt_matrix: ${{ steps.set-matrix-values.outputs.dbt_matrix }}
execution_time: ${{ steps.get-execution-time.outputs.execution_time }}
steps:
- name: Get execution time
id: get-execution-time
run: echo "execution_time=$(date +'%Y%d%m%H%M')" >> $GITHUB_OUTPUT

- name: Checkout code
uses: actions/checkout@v4

Expand Down Expand Up @@ -175,6 +180,17 @@ jobs:
with:
fail-for-new-failures: true

notify-maintainers:
needs:
- initialize_workflow
- collect-and-compare-reports
if: ${{ failure() && needs.initialize_workflow.outputs.any_run == 'true' }}
uses: ./.github/workflows/notify_maintainers.yml
with:
workflow-type: 'pr'
trigger-type: 'pr'
execution-time: ${{ needs.initialize_workflow.outputs.execution_time }}

generate-compatibility-tables:
needs:
- collect-and-compare-reports
Expand Down
22 changes: 17 additions & 5 deletions .github/workflows/notify_maintainers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
workflow-type:
type: string
required: true
description: "type of workflow calling, allowed values: release, spec_change"
description: "type of workflow calling, allowed values: release, spec_change, pr"
execution-time:
type: string
required: true
Expand All @@ -33,19 +33,31 @@ jobs:
id: check-report-empty
run: |
result=$(jq '. == []' reports/retention-failures-report.json)
echo "report-empty=${result}" >> $GITHUB_OUTPUT
echo "report_empty=${result}" >> $GITHUB_OUTPUT


- name: Generate PR summary
id: generate-pr-summary
if: ${{ inputs.workflow-type == 'pr' && steps.check-report-empty.outputs.report_empty == 'false'}}
run: |
python scripts/generate_issue.py \
--failure_path=reports/retention-failures-report.json \
--issue_path=generated-files/summary.md \
--skip-maintainers

cat generated-files/summary.md >> $GITHUB_STEP_SUMMARY

- name: Run task for Collect Reports
id: collect-and-merge-reports
if: ${{ steps.check-report-empty.outputs.report-empty == 'false'}}
id: generate-issue-report
if: ${{ inputs.workflow-type != 'pr' && steps.check-report-empty.outputs.report_empty == 'false'}}
run: |
python scripts/generate_issue.py \
--failure_path=reports/retention-failures-report.json \
--issue_path=generated-files/issue.md

- name: Create Issue From File
uses: peter-evans/create-issue-from-file@v5
if: ${{ steps.check-report-empty.outputs.report-empty == 'false'}}
if: ${{ inputs.workflow-type != 'pr' && steps.check-report-empty.outputs.report_empty == 'false'}}
with:
title: new failures in report from run ${{ github.run_id }}
content-filepath: generated-files/issue.md
Expand Down
20 changes: 11 additions & 9 deletions producer/dbt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,13 @@ The GitHub Actions workflow:

### Local Debugging (Optional)

**For development debugging, you may optionally run PostgreSQL locally. The standard test environment is GitHub Actions.**
**For development debugging, local runs should use the same PostgreSQL 15 Docker setup as CI.**

If you need to debug event generation locally:

1. **Start PostgreSQL (Optional)**:
```bash
cd producer/dbt/scenarions/csv_to_postgres/test
docker compose up
```
1. **Ensure Docker is running**:
- The scenario runner uses `producer/dbt/scenarios/csv_to_postgres/test/compose.yml`.
- If PostgreSQL is not already available on `localhost:5432`, the scenario script starts the local Docker Compose service automatically and waits until it is ready.

2. **Install Python Dependencies**:
```bash
Expand All @@ -120,8 +118,12 @@ If you need to debug event generation locally:
```

3. **Run Test Scenario**:
- The example below assumes you run the command from the repository root, so relative paths such as `./producer/dbt/output` and `./dbt_producer_report.json` resolve from that location.
```bash
./producer/dbt/run_dbt_tests.sh --openlineage-directory <open_lineage_directory>
./producer/dbt/run_dbt_tests.sh \
--openlineage-directory <open_lineage_directory> \
--producer-output-events-dir ./producer/dbt/output \
--openlineage-release 1.45.0
```

4. **Inspect Generated Events**:
Expand All @@ -130,10 +132,10 @@ If you need to debug event generation locally:
cat ./producer/dbt/output/csv_to_postgres/event-{id}.json | jq '.'

# check report
cat ./producer/dbt/dbt_producer_report.json | jq '.'
cat ./dbt_producer_report.json | jq '.'
```

**Note**: Local debugging is entirely optional. All official validation happens in GitHub Actions with PostgreSQL service containers. The test runner (`test/run.sh`) is the same code used by CI/CD, ensuring consistency.
**Note**: Local debugging is entirely optional. All official validation happens in GitHub Actions with PostgreSQL service containers. Local runs now reuse the same PostgreSQL 15 image and readiness check as CI to reduce drift between local debugging and workflow execution.

## Important dbt Integration Notes

Expand Down
73 changes: 50 additions & 23 deletions producer/dbt/run_dbt_tests.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
#!/bin/bash

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DBT_DIR="$SCRIPT_DIR"
REPO_ROOT="$(cd "$DBT_DIR/../.." && pwd)"
SCENARIOS_DIR="$DBT_DIR/scenarios"
RUNNER_REQUIREMENTS="$DBT_DIR/runner/requirements.txt"
SPECS_BASE_DIR="$DBT_DIR/specs"
SCRIPTS_DIR="$REPO_ROOT/scripts"

resolve_path() {
local input_path="$1"
local base_dir="$2"
if [[ "$input_path" == ~* ]]; then
input_path="${input_path/#\~/$HOME}"
fi
if [[ "$input_path" = /* ]]; then
echo "$input_path"
else
echo "$base_dir/$input_path"
fi
}

################################################################################
############ dbt Producer Compatibility Test Execution Script ################
################################################################################
Expand All @@ -10,23 +31,23 @@ usage() {
echo ""
echo "Options:"
echo " --openlineage-directory PATH Path to openlineage repository directory (required)"
echo " --producer-output-events-dir PATH Path to producer output events directory (default: output)"
echo " --openlineage-release VERSION OpenLineage release version (default: 2-0-2)"
echo " --report-path PATH Path to report directory (default: ../dbt_producer_report.json)"
echo " --producer-output-events-dir PATH Path to producer output events directory (default: <script_dir>/output)"
echo " --openlineage-release VERSION OpenLineage release version (default: 1.40.1)"
echo " --report-path PATH Path to report file (default: <repo_root>/dbt_producer_report.json)"
echo " -h, --help Show this help message and exit"
echo ""
echo "Example:"
echo " $0 --openlineage-directory /path/to/specs --producer-output-events-dir output --openlineage-release 2-0-2"
echo " $0 --openlineage-directory /path/to/OpenLineage --producer-output-events-dir /tmp/dbt-output --openlineage-release 1.45.0"
exit 0
}

# Required variables (no defaults)
OPENLINEAGE_DIRECTORY=""

# Variables with default values
PRODUCER_OUTPUT_EVENTS_DIR=output
PRODUCER_OUTPUT_EVENTS_DIR="$DBT_DIR/output"
OPENLINEAGE_RELEASE=1.40.1
REPORT_PATH="./dbt_producer_report.json"
REPORT_PATH="$REPO_ROOT/dbt_producer_report.json"

# If -h or --help is passed, print usage and exit
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
Expand All @@ -45,14 +66,19 @@ while [[ "$#" -gt 0 ]]; do
shift
done

OPENLINEAGE_DIRECTORY="$(resolve_path "$OPENLINEAGE_DIRECTORY" "$PWD")"
PRODUCER_OUTPUT_EVENTS_DIR="$(resolve_path "$PRODUCER_OUTPUT_EVENTS_DIR" "$PWD")"
REPORT_PATH="$(resolve_path "$REPORT_PATH" "$PWD")"

# Check required arguments
if [[ -z "$OPENLINEAGE_DIRECTORY" ]]; then
echo "Error: Missing required arguments."
usage
fi

# fail if scenarios are not defined in scenario directory
[[ $(find scenarios | wc -l) -gt 0 ]] || { echo >&2 "NO SCENARIOS DEFINED IN scenarios"; exit 1; }
[[ -d "$SCENARIOS_DIR" ]] || { echo >&2 "Error: scenarios directory not found at $SCENARIOS_DIR"; exit 1; }
[[ $(find "$SCENARIOS_DIR" | wc -l) -gt 0 ]] || { echo >&2 "NO SCENARIOS DEFINED IN $SCENARIOS_DIR"; exit 1; }

mkdir -p "$PRODUCER_OUTPUT_EVENTS_DIR"

Expand All @@ -72,16 +98,16 @@ echo "==========================================================================
################################################################################

# Check if scenario directory exists
if [[ ! -d "scenarios" ]]; then
echo "Error: scenarios directory not found"
if [[ ! -d "$SCENARIOS_DIR" ]]; then
echo "Error: scenarios directory not found at $SCENARIOS_DIR"
exit 1
fi

#install python dependencies
python -m pip install --upgrade pip

if [ -f ./runner/requirements.txt ]; then
pip install -r ./runner/requirements.txt
if [ -f "$RUNNER_REQUIREMENTS" ]; then
pip install -r "$RUNNER_REQUIREMENTS"
fi

################################################################################
Expand All @@ -91,18 +117,16 @@ fi
################################################################################

echo "Running dbt producer tests..."
POSIX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BASE_DIR="$(cygpath -m "$POSIX_DIR")"

# Run tests for each scenario
echo "Discovering test scenarios..."
for scenario_dir in scenarios/*/; do
for scenario_dir in "$SCENARIOS_DIR"/*/; do
if [[ -d "$scenario_dir" && -f "${scenario_dir}config.json" ]]; then
SCENARIO_NAME=$(basename "$scenario_dir")
echo "Found scenario: $SCENARIO_NAME"

mkdir -p "$PRODUCER_OUTPUT_EVENTS_DIR/$SCENARIO_NAME"
"$scenario_dir"test/run.sh "$BASE_DIR/$PRODUCER_OUTPUT_EVENTS_DIR/$SCENARIO_NAME"
"$scenario_dir"test/run.sh "$PRODUCER_OUTPUT_EVENTS_DIR/$SCENARIO_NAME"

echo "Scenario $SCENARIO_NAME completed"
fi
Expand All @@ -114,32 +138,35 @@ echo "EVENT VALIDATION FOR SPEC VERSION $OPENLINEAGE_RELEASE"
REPORT_DIR=$(dirname "$REPORT_PATH")
mkdir -p "$REPORT_DIR"

SPECS_BASE_DIR="./specs"
DEST_DIR="$SPECS_BASE_DIR/$OPENLINEAGE_RELEASE"

mkdir -p "$DEST_DIR"

if [ -d "$OPENLINEAGE_DIRECTORY"/spec ]; then
find "$OPENLINEAGE_DIRECTORY"/spec -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp -t "$DEST_DIR" {} +
if [ -d "$OPENLINEAGE_DIRECTORY/spec" ]; then
while IFS= read -r spec_file; do
cp "$spec_file" "$DEST_DIR/"
done < <(find "$OPENLINEAGE_DIRECTORY/spec" -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \))
fi
if [ -d "$OPENLINEAGE_DIRECTORY"/integration/common/src/openlineage ]; then
find "$OPENLINEAGE_DIRECTORY"/integration/common/src/openlineage -type f -iname '*facet.json' -exec cp -t "$DEST_DIR" {} +
if [ -d "$OPENLINEAGE_DIRECTORY/integration/common/src/openlineage" ]; then
while IFS= read -r spec_file; do
cp "$spec_file" "$DEST_DIR/"
done < <(find "$OPENLINEAGE_DIRECTORY/integration/common/src/openlineage" -type f -iname '*facet.json')
fi

if [ -z "$(ls -A "$DEST_DIR")" ]; then
echo "Cannot collect OpenLineage specs"
exit 1
fi

pip install -r ../../scripts/requirements.txt
pip install -r "$SCRIPTS_DIR/requirements.txt"

python ../../scripts/validate_ol_events.py \
python "$SCRIPTS_DIR/validate_ol_events.py" \
--event_base_dir="$PRODUCER_OUTPUT_EVENTS_DIR" \
--spec_base_dir="$SPECS_BASE_DIR" \
--target="$REPORT_PATH" \
--component="dbt" \
--component_version="1.8.0" \
--producer_dir=.. \
--producer_dir="$REPO_ROOT/producer" \
--openlineage_version="$OPENLINEAGE_RELEASE"

echo "EVENT VALIDATION FINISHED"
Expand Down
7 changes: 5 additions & 2 deletions producer/dbt/scenarios/csv_to_postgres/test/compose.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
version: "3.9"

name: csv_to_postgres

services:
postgres:
image: postgres:15-alpine
container_name: postgres15
restart: always
environment:
POSTGRES_USER: testuser
POSTGRES_PASSWORD: testpass
POSTGRES_DB: dbt_test
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U testuser -d dbt_test"]
interval: 10s
timeout: 5s
retries: 5
volumes:
- postgres_data:/var/lib/postgresql/data

Expand Down
Loading
Loading