Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 0 additions & 64 deletions .github/workflows/ci.yaml

This file was deleted.

110 changes: 69 additions & 41 deletions .github/workflows/cd.yaml → .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
name: CD Pipeline Automation
name: Pipeline CI/CD automation

on:
workflow_run:
workflows: [ CI Quality Checks ]
types: [ completed ]
# push:
# branches: [ main ]
pull_request:
branches: [ main ]
types: [ assigned, opened, edited, reopened, synchronize ]

jobs:
execute-pipeline:
test-remote-runnable:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}

env: # ✅ env: Global environment variables for all steps
CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }}
QUEUE: Remote_CPU

steps:
- name: 👉 Step 0 - Check the event type
Expand All @@ -26,7 +28,7 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: 👉 Step 2 - Set up Python
uses: actions/setup-python@v4
with:
Expand All @@ -35,14 +37,58 @@ jobs:
- name: 👉 Step 3 - Install ClearML
run: |
python -m pip install --upgrade pip clearml
sudo apt-get update && sudo apt-get install -y ripgrep

- name: 👉 Step 4 - Make sure the task can run remotely
id: launch_task
run: |
TASK_ID=$(clearml-task \
--project SyntaxSquad \
--name 'CI: Test remote runnable' \
--branch ${{ github.head_ref || github.ref_name }} \
--script cicd/example_task.py \
--requirements cicd/requirements.txt \
--skip-task-init --queue "$QUEUE" \
| rg -o 'Task id=(.*) sent' -r '$1')
echo "TASK_ID=${TASK_ID}" >> $GITHUB_OUTPUT

- name: 👉 Step 5 - Poll for task progress
run: |
python cicd/check_remotely_runnable.py "${{ steps.launch_task.outputs.TASK_ID }}"
echo "🍏 This job's status is ${{ job.status }}."


execute-pipeline:
runs-on: ubuntu-latest
needs: test-remote-runnable

env: # ✅ env: Global environment variables for all steps
CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }}

steps:
- name: 👉 Step 0 - Check the event type
run: |
echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}."

- name: 👉 Step 1 - Checkout the repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: 👉 Step 2 - Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11.12'

- name: 👉 Step 4 - Debug ENV
- name: 👉 Step 3 - Install ClearML
run: |
echo "CLEARML_API_HOST=$CLEARML_API_HOST"
if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
curl -I $CLEARML_API_HOST
python -m pip install --upgrade pip clearml

- name: 👉 Step 5 - Run the pipeline
- name: 👉 Step 4 - Run the pipeline
run: |
python pipeline_from_tasks.py
echo "🍏 This job's status is ${{ job.status }}."
Expand Down Expand Up @@ -70,7 +116,7 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: 👉 Step 2 - Set up Python
uses: actions/setup-python@v4
with:
Expand All @@ -81,13 +127,7 @@ jobs:
python -m pip install --upgrade pip
pip install clearml pandas tabulate github3.py Jinja2

- name: 👉 Step 4 - Debug ENV
run: |
echo "CLEARML_API_HOST=$CLEARML_API_HOST"
if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
curl -I $CLEARML_API_HOST

- name: 👉 Step 5 - Assign the latest Pipeline as production if it's better than the current one
- name: 👉 Step 4 - Comment metrics from step7_model_evaluation of the pipeline
run: |
python cicd/pipeline_reports.py
echo "🍏 This job's status is ${{ job.status }}."
Expand All @@ -114,7 +154,7 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: 👉 Step 2 - Set up Python
uses: actions/setup-python@v4
with:
Expand All @@ -125,13 +165,7 @@ jobs:
python -m pip install --upgrade pip
pip install clearml pandas tabulate github3.py Jinja2

- name: 👉 Step 4 - Debug ENV
run: |
echo "CLEARML_API_HOST=$CLEARML_API_HOST"
if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
curl -I $CLEARML_API_HOST

- name: 👉 Step 5 - Comment metrics from step7_model_evaluation of the pipeline
- name: 👉 Step 4 - Assign the latest Pipeline as production if it's better than the current one
run: |
python cicd/production_tagging.py
echo "🍏 This job's status is ${{ job.status }}."
Expand All @@ -158,36 +192,30 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: 👉 Step 2 - Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11.12'

- name: 👉 Step 3 - Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r serving/requirements.txt

- name: 👉 Step 4 - Debug ENV
run: |
echo "CLEARML_API_HOST=$CLEARML_API_HOST"
if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
curl -I $CLEARML_API_HOST

- name: 👉 Step 5 - Deploy FastAPI model serving
- name: 👉 Step 4 - Deploy FastAPI model serving
run: |
nohup fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background
fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background
echo $! > fastapi.pid # Save the PID of the FastAPI server
sleep 10 # Wait for the server to start
sleep 240 # Wait for 4 minutes to start the server and download artifacts
curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running
Comment on lines +210 to 211
Copy link

Copilot AI May 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of a fixed long sleep duration, consider implementing a polling mechanism to check for FastAPI server readiness to optimize pipeline execution time.

Suggested change
sleep 240 # Wait for 4 minutes to start the server and download artifacts
curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running
# Poll the FastAPI server's health endpoint until it is ready or timeout is reached
for i in {1..60}; do # Retry up to 60 times (1 minute timeout with 1-second intervals)
if curl -f http://localhost:$FASTAPI_PORT/health; then
echo "FastAPI server is ready!"
break
fi
echo "Waiting for FastAPI server to be ready... (attempt $i)"
sleep 1
done
if ! curl -f http://localhost:$FASTAPI_PORT/health; then
echo "FastAPI server failed to start within the timeout period."
exit 1
fi

Copilot uses AI. Check for mistakes.

- name: 👉 Step 6 - Notify deployment status
- name: 👉 Step 5 - Notify deployment status
if: success()
run: |
echo "FastAPI model serving deployed successfully on port $FASTAPI_PORT"

- name: 👉 Step 7 - Stop FastAPI server (cleanup)
- name: 👉 Step 6 - Stop FastAPI server (cleanup)
if: always() # This step will always run, even if the previous steps fail
run: |
kill $(cat fastapi.pid) # Stop the FastAPI server using the saved PID
Expand Down
36 changes: 12 additions & 24 deletions cicd/pipeline_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,22 @@ def get_pipeline_of_current_commit(commit_id): # Find the ClearML task that corr
return PipelineController.get(pipeline_id=task['id'])


def create_output_tables(retrieve_scalars_dict): # Extract data from ClearML into format for tabulation
data = []
for graph_title, graph_values in retrieve_scalars_dict.items():
graph_data = []
for series, series_values in graph_values.items():
graph_data.append((graph_title, series, *series_values.values()))
data += graph_data
return sorted(data, key=lambda output: (output[0], output[1]))
def get_reported_table(task, title): # Get the table from the task's reported plots
reported_plots = task.get_reported_plots()
metrics_report = next((d for d in reported_plots if title.lower() in d.get('metric').lower()), None)
if not metrics_report: return None
metrics_report = json.loads(metrics_report['plot_str'])['data'][0]
df = pd.DataFrame(list(zip(*metrics_report['cells']['values'])), columns=[col[0] for col in metrics_report['header']['values']])
return tabulate(df, tablefmt='github', headers='keys', showindex=False)


def create_comment_output(task, status): # Create a markdown table from a ClearML task's output scalars
retrieve_scalars_dict = task.get_last_scalar_metrics()
if retrieve_scalars_dict:
scalars_tables = create_output_tables(retrieve_scalars_dict)
df = pd.DataFrame(data=scalars_tables, columns=['Title', 'Series', 'Last', 'Min', 'Max'])
df.style.set_caption(f'Last scalars metrics for task {task.task_id}, task status {status}')
table = tabulate(df, tablefmt='github', headers='keys', showindex=False)
return table


def get_task_stats(task): # Get the comment markdown for a stats table based on the task object
def get_task_stats(task, title): # Get the comment markdown for a stats table based on the task object
task_status = task.get_status() # Try to get the task stats
output_log_page = task.get_output_log_web_page()

if task_status == 'completed':
table = create_comment_output(task, task_status)
if table: return f'Model performance on test set:\n\n{table}\n\n' \
f'You can view full task results [here]({output_log_page})'
table = get_reported_table(task, title)
if table: return f'{title}:\n\n{table}\n\nYou can view full task results [here]({output_log_page})'
return (f'Something went wrong when creating the task table. '
f'Check full task [here]({output_log_page})')
return f'Task is in {task_status} status, this should not happen!'
Expand All @@ -70,10 +58,10 @@ def create_stats_comment(task_stats): # Create a comment on the current PR conta


if __name__ == '__main__': # Main check: Does a ClearML task exist for this specific commit?
print(f"Running on commit hash: {os.getenv('COMMIT_ID')}")
print('Running on commit hash:', os.getenv('COMMIT_ID'))
ml_pipeline_task = get_pipeline_of_current_commit(os.getenv('COMMIT_ID'))
ml_pipeline_task.add_tags(['main_branch']) # If the task exists, tag it as such, so we know in the interface which one it is
ml_pipeline_nodes = ml_pipeline_task.get_processed_nodes()
model_evaluation_task = Task.get_task(task_id=ml_pipeline_nodes['step7_model_evaluation'].executed)
task_stats = get_task_stats(model_evaluation_task)
task_stats = get_task_stats(model_evaluation_task, 'Model evaluation metrics on 3 subsets')
create_stats_comment(task_stats) # Get the metrics from the task and create a comment on the PR
4 changes: 2 additions & 2 deletions serving/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
clearml==1.18.0
fastapi==0.115.12
numpy==2.2.6
fastapi[standard]
numpy == 1.26.4
Copy link

Copilot AI May 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider removing the extra space around '==' in the numpy dependency to avoid potential installation issues; it should be 'numpy==1.26.4'.

Suggested change
numpy == 1.26.4
numpy==1.26.4

Copilot uses AI. Check for mistakes.
pydantic==2.11.4
tensorflow==2.18.0