From 940f14ac066b5cb9fcfd2a89c07743f65a54cfc3 Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 15:54:31 +1000 Subject: [PATCH 1/6] SCRUM-135 Fix deploy-fastapi job error due to numpy conflict --- serving/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serving/requirements.txt b/serving/requirements.txt index ca0df64..387d4a8 100644 --- a/serving/requirements.txt +++ b/serving/requirements.txt @@ -1,5 +1,5 @@ clearml==1.18.0 fastapi==0.115.12 -numpy==2.2.6 +numpy == 1.26.4 pydantic==2.11.4 tensorflow==2.18.0 \ No newline at end of file From 60f777af32a422e149bfd62fb0aaa2d738f41bd6 Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 15:55:38 +1000 Subject: [PATCH 2/6] SCRUM-135 Combine CI/CD into 1 workflow --- .github/workflows/ci.yaml | 64 -------- .github/workflows/{cd.yaml => pipeline.yaml} | 145 ++++++++----------- 2 files changed, 62 insertions(+), 147 deletions(-) delete mode 100644 .github/workflows/ci.yaml rename .github/workflows/{cd.yaml => pipeline.yaml} (54%) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index a5cfd8a..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: CI Quality Checks - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - types: [ assigned, opened, edited, reopened, synchronize ] - -jobs: - test-remote-runnable: - runs-on: ubuntu-latest - - env: # ✅ env: Global environment variables for all steps - CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} - CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} - CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }} - QUEUE: Remote_CPU - - steps: - - name: 👉 Step 0 - Check the event type - run: | - echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" - echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." - - - name: 👉 Step 1 - Checkout the repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: 👉 Step 2 - Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11.12' - - - name: 👉 Step 3 - Install ClearML - run: | - python -m pip install --upgrade pip clearml - sudo apt-get update && sudo apt-get install -y ripgrep - - - name: 👉 Step 4 - Debug ENV - run: | - echo "CLEARML_API_HOST=$CLEARML_API_HOST" - if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi - curl -I $CLEARML_API_HOST - - - name: 👉 Step 5 - Make sure the task can run remotely - id: launch_task - run: | - TASK_ID=$(clearml-task \ - --project SyntaxSquad \ - --name 'CI: Test remote runnable' \ - --branch ${{ github.head_ref || github.ref_name }} \ - --script cicd/example_task.py \ - --requirements cicd/requirements.txt \ - --skip-task-init --queue "$QUEUE" \ - | rg -o 'Task id=(.*) sent' -r '$1') - echo "TASK_ID=${TASK_ID}" >> $GITHUB_OUTPUT - - - name: 👉 Step 6 - Poll for task progress - run: | - python cicd/check_remotely_runnable.py "${{ steps.launch_task.outputs.TASK_ID }}" - echo "🍏 This job's status is ${{ job.status }}." \ No newline at end of file diff --git a/.github/workflows/cd.yaml b/.github/workflows/pipeline.yaml similarity index 54% rename from .github/workflows/cd.yaml rename to .github/workflows/pipeline.yaml index 21875f2..cd98135 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/pipeline.yaml @@ -1,32 +1,37 @@ -name: CD Pipeline Automation +name: Pipeline CI/CD automation on: - workflow_run: - workflows: [ CI Quality Checks ] - types: [ completed ] + # push: + # branches: [ main ] + pull_request: + branches: [ main ] + types: [ assigned, opened, edited, reopened, synchronize ] jobs: - execute-pipeline: + test-remote-runnable: runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} env: # ✅ env: Global environment variables for all steps CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }} + QUEUE: Remote_CPU steps: + - &step0 - name: 👉 Step 0 - Check the event type run: | echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." + - &step1 - name: 👉 Step 1 - Checkout the repository uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - + + - &step2 - name: 👉 Step 2 - Set up Python uses: actions/setup-python@v4 with: @@ -35,14 +40,45 @@ jobs: - name: 👉 Step 3 - Install ClearML run: | python -m pip install --upgrade pip clearml + sudo apt-get update && sudo apt-get install -y ripgrep + + - name: 👉 Step 4 - Make sure the task can run remotely + id: launch_task + run: | + TASK_ID=$(clearml-task \ + --project SyntaxSquad \ + --name 'CI: Test remote runnable' \ + --branch ${{ github.head_ref || github.ref_name }} \ + --script cicd/example_task.py \ + --requirements cicd/requirements.txt \ + --skip-task-init --queue "$QUEUE" \ + | rg -o 'Task id=(.*) sent' -r '$1') + echo "TASK_ID=${TASK_ID}" >> $GITHUB_OUTPUT + + - name: 👉 Step 5 - Poll for task progress + run: | + python cicd/check_remotely_runnable.py "${{ steps.launch_task.outputs.TASK_ID }}" + echo "🍏 This job's status is ${{ job.status }}." + + + execute-pipeline: + runs-on: ubuntu-latest + needs: test-remote-runnable - - name: 👉 Step 4 - Debug ENV + env: # ✅ env: Global environment variables for all steps + CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} + CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} + CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }} + + steps: + - *step0 + - *step1 + - *step2 + - name: 👉 Step 3 - Install ClearML run: | - echo "CLEARML_API_HOST=$CLEARML_API_HOST" - if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi - curl -I $CLEARML_API_HOST + python -m pip install --upgrade pip clearml - - name: 👉 Step 5 - Run the pipeline + - name: 👉 Step 4 - Run the pipeline run: | python pipeline_from_tasks.py echo "🍏 This job's status is ${{ job.status }}." @@ -60,34 +96,15 @@ jobs: COMMIT_ID: ${{ github.event.pull_request.head.sha }} steps: - - name: 👉 Step 0 - Check the event type - run: | - echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" - echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." - - - name: 👉 Step 1 - Checkout the repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: 👉 Step 2 - Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11.12' - + - *step0 + - *step1 + - *step2 - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip pip install clearml pandas tabulate github3.py Jinja2 - - name: 👉 Step 4 - Debug ENV - run: | - echo "CLEARML_API_HOST=$CLEARML_API_HOST" - if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi - curl -I $CLEARML_API_HOST - - - name: 👉 Step 5 - Assign the latest Pipeline as production if it's better than the current one + - name: 👉 Step 4 - Assign the latest Pipeline as production if it's better than the current one run: | python cicd/pipeline_reports.py echo "🍏 This job's status is ${{ job.status }}." @@ -104,34 +121,15 @@ jobs: COMMIT_ID: ${{ github.event.pull_request.head.sha }} steps: - - name: 👉 Step 0 - Check the event type - run: | - echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" - echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." - - - name: 👉 Step 1 - Checkout the repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: 👉 Step 2 - Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11.12' - + - *step0 + - *step1 + - *step2 - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip pip install clearml pandas tabulate github3.py Jinja2 - - name: 👉 Step 4 - Debug ENV - run: | - echo "CLEARML_API_HOST=$CLEARML_API_HOST" - if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi - curl -I $CLEARML_API_HOST - - - name: 👉 Step 5 - Comment metrics from step7_model_evaluation of the pipeline + - name: 👉 Step 4 - Comment metrics from step7_model_evaluation of the pipeline run: | python cicd/production_tagging.py echo "🍏 This job's status is ${{ job.status }}." @@ -148,46 +146,27 @@ jobs: FASTAPI_PORT: 8000 steps: - - name: 👉 Step 0 - Check the event type - run: | - echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" - echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." - - - name: 👉 Step 1 - Checkout the repository - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: 👉 Step 2 - Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11.12' - + - *step0 + - *step1 + - *step2 - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip pip install -r serving/requirements.txt - - name: 👉 Step 4 - Debug ENV - run: | - echo "CLEARML_API_HOST=$CLEARML_API_HOST" - if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi - curl -I $CLEARML_API_HOST - - - name: 👉 Step 5 - Deploy FastAPI model serving + - name: 👉 Step 4 - Deploy FastAPI model serving run: | nohup fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background echo $! > fastapi.pid # Save the PID of the FastAPI server sleep 10 # Wait for the server to start curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running - - name: 👉 Step 6 - Notify deployment status + - name: 👉 Step 5 - Notify deployment status if: success() run: | echo "FastAPI model serving deployed successfully on port $FASTAPI_PORT" - - name: 👉 Step 7 - Stop FastAPI server (cleanup) + - name: 👉 Step 6 - Stop FastAPI server (cleanup) if: always() # This step will always run, even if the previous steps fail run: | kill $(cat fastapi.pid) # Stop the FastAPI server using the saved PID From 7d3eac7932f50133b1fc2223402741134d4c20fd Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 16:25:25 +1000 Subject: [PATCH 3/6] SCRUM-135 Remove anchors in `pipeline.yaml` as GitHub not support --- .github/workflows/pipeline.yaml | 79 ++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index cd98135..075225d 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -18,20 +18,17 @@ jobs: QUEUE: Remote_CPU steps: - - &step0 - name: 👉 Step 0 - Check the event type run: | echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." - - &step1 - name: 👉 Step 1 - Checkout the repository uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - &step2 - name: 👉 Step 2 - Set up Python uses: actions/setup-python@v4 with: @@ -71,9 +68,22 @@ jobs: CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }} steps: - - *step0 - - *step1 - - *step2 + - name: 👉 Step 0 - Check the event type + run: | + echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" + echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." + + - name: 👉 Step 1 - Checkout the repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: 👉 Step 2 - Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11.12' + - name: 👉 Step 3 - Install ClearML run: | python -m pip install --upgrade pip clearml @@ -96,9 +106,22 @@ jobs: COMMIT_ID: ${{ github.event.pull_request.head.sha }} steps: - - *step0 - - *step1 - - *step2 + - name: 👉 Step 0 - Check the event type + run: | + echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" + echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." + + - name: 👉 Step 1 - Checkout the repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: 👉 Step 2 - Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11.12' + - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip @@ -121,9 +144,22 @@ jobs: COMMIT_ID: ${{ github.event.pull_request.head.sha }} steps: - - *step0 - - *step1 - - *step2 + - name: 👉 Step 0 - Check the event type + run: | + echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" + echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." + + - name: 👉 Step 1 - Checkout the repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: 👉 Step 2 - Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11.12' + - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip @@ -146,9 +182,22 @@ jobs: FASTAPI_PORT: 8000 steps: - - *step0 - - *step1 - - *step2 + - name: 👉 Step 0 - Check the event type + run: | + echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" + echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}." + + - name: 👉 Step 1 - Checkout the repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: 👉 Step 2 - Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11.12' + - name: 👉 Step 3 - Install dependencies run: | python -m pip install --upgrade pip From 4fd9540e9afa5574cb3f2ba1c90054ca4eae2d74 Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 18:11:53 +1000 Subject: [PATCH 4/6] SCRUM-135 Report last table of step7 for report-pipeline-metrics job --- cicd/pipeline_reports.py | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/cicd/pipeline_reports.py b/cicd/pipeline_reports.py index 2f863e5..5854463 100644 --- a/cicd/pipeline_reports.py +++ b/cicd/pipeline_reports.py @@ -20,34 +20,22 @@ def get_pipeline_of_current_commit(commit_id): # Find the ClearML task that corr return PipelineController.get(pipeline_id=task['id']) -def create_output_tables(retrieve_scalars_dict): # Extract data from ClearML into format for tabulation - data = [] - for graph_title, graph_values in retrieve_scalars_dict.items(): - graph_data = [] - for series, series_values in graph_values.items(): - graph_data.append((graph_title, series, *series_values.values())) - data += graph_data - return sorted(data, key=lambda output: (output[0], output[1])) +def get_reported_table(task, title): # Get the table from the task's reported plots + reported_plots = task.get_reported_plots() + metrics_report = next((d for d in reported_plots if title.lower() in d.get('metric').lower()), None) + if not metrics_report: return None + metrics_report = json.loads(metrics_report['plot_str'])['data'][0] + df = pd.DataFrame(list(zip(*metrics_report['cells']['values'])), columns=[col[0] for col in metrics_report['header']['values']]) + return tabulate(df, tablefmt='github', headers='keys', showindex=False) -def create_comment_output(task, status): # Create a markdown table from a ClearML task's output scalars - retrieve_scalars_dict = task.get_last_scalar_metrics() - if retrieve_scalars_dict: - scalars_tables = create_output_tables(retrieve_scalars_dict) - df = pd.DataFrame(data=scalars_tables, columns=['Title', 'Series', 'Last', 'Min', 'Max']) - df.style.set_caption(f'Last scalars metrics for task {task.task_id}, task status {status}') - table = tabulate(df, tablefmt='github', headers='keys', showindex=False) - return table - - -def get_task_stats(task): # Get the comment markdown for a stats table based on the task object +def get_task_stats(task, title): # Get the comment markdown for a stats table based on the task object task_status = task.get_status() # Try to get the task stats output_log_page = task.get_output_log_web_page() if task_status == 'completed': - table = create_comment_output(task, task_status) - if table: return f'Model performance on test set:\n\n{table}\n\n' \ - f'You can view full task results [here]({output_log_page})' + table = get_reported_table(task, title) + if table: return f'{title}:\n\n{table}\n\nYou can view full task results [here]({output_log_page})' return (f'Something went wrong when creating the task table. ' f'Check full task [here]({output_log_page})') return f'Task is in {task_status} status, this should not happen!' @@ -70,10 +58,10 @@ def create_stats_comment(task_stats): # Create a comment on the current PR conta if __name__ == '__main__': # Main check: Does a ClearML task exist for this specific commit? - print(f"Running on commit hash: {os.getenv('COMMIT_ID')}") + print('Running on commit hash:', os.getenv('COMMIT_ID')) ml_pipeline_task = get_pipeline_of_current_commit(os.getenv('COMMIT_ID')) ml_pipeline_task.add_tags(['main_branch']) # If the task exists, tag it as such, so we know in the interface which one it is ml_pipeline_nodes = ml_pipeline_task.get_processed_nodes() model_evaluation_task = Task.get_task(task_id=ml_pipeline_nodes['step7_model_evaluation'].executed) - task_stats = get_task_stats(model_evaluation_task) + task_stats = get_task_stats(model_evaluation_task, 'Model evaluation metrics on 3 subsets') create_stats_comment(task_stats) # Get the metrics from the task and create a comment on the PR \ No newline at end of file From e3a4c8f694c65e2e36446d93e71c575366fbcb2e Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 18:12:59 +1000 Subject: [PATCH 5/6] SCRUM-135 Update pipeline step names for clarity and fix FastAPI requirement in `requirements.txt` --- .github/workflows/pipeline.yaml | 4 ++-- serving/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index 075225d..7c47a4c 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -127,7 +127,7 @@ jobs: python -m pip install --upgrade pip pip install clearml pandas tabulate github3.py Jinja2 - - name: 👉 Step 4 - Assign the latest Pipeline as production if it's better than the current one + - name: 👉 Step 4 - Comment metrics from step7_model_evaluation of the pipeline run: | python cicd/pipeline_reports.py echo "🍏 This job's status is ${{ job.status }}." @@ -165,7 +165,7 @@ jobs: python -m pip install --upgrade pip pip install clearml pandas tabulate github3.py Jinja2 - - name: 👉 Step 4 - Comment metrics from step7_model_evaluation of the pipeline + - name: 👉 Step 4 - Assign the latest Pipeline as production if it's better than the current one run: | python cicd/production_tagging.py echo "🍏 This job's status is ${{ job.status }}." diff --git a/serving/requirements.txt b/serving/requirements.txt index 387d4a8..d5a5e23 100644 --- a/serving/requirements.txt +++ b/serving/requirements.txt @@ -1,5 +1,5 @@ clearml==1.18.0 -fastapi==0.115.12 +fastapi[standard] numpy == 1.26.4 pydantic==2.11.4 tensorflow==2.18.0 \ No newline at end of file From 44b94a28eb960ba1f1418cffc44270aee79a7a41 Mon Sep 17 00:00:00 2001 From: Hoang-Quan Dang Date: Tue, 20 May 2025 19:49:12 +1000 Subject: [PATCH 6/6] SCRUM-135 Update FastAPI deployment step to increase wait time for server startup --- .github/workflows/pipeline.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index 7c47a4c..706caaf 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -205,9 +205,9 @@ jobs: - name: 👉 Step 4 - Deploy FastAPI model serving run: | - nohup fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background + fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background echo $! > fastapi.pid # Save the PID of the FastAPI server - sleep 10 # Wait for the server to start + sleep 240 # Wait for 4 minutes to start the server and download artifacts curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running - name: 👉 Step 5 - Notify deployment status