streamsl · QuanUTS · May 20, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
diff --git a/.github/workflows/cd.yaml → .github/workflows/pipeline.yaml b/.github/workflows/cd.yaml → .github/workflows/pipeline.yaml
@@ -1,19 +1,21 @@
-name: CD Pipeline Automation
+name: Pipeline CI/CD automation
 
 on:
-  workflow_run:
-    workflows: [ CI Quality Checks ]
-    types: [ completed ]
+  # push:
+  #   branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    types: [ assigned, opened, edited, reopened, synchronize ]
 
 jobs:
-  execute-pipeline:
+  test-remote-runnable:
     runs-on: ubuntu-latest
-    if: ${{ github.event.workflow_run.conclusion == 'success' }}
 
     env: # ✅ env: Global environment variables for all steps
       CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
       CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
       CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }}
+      QUEUE: Remote_CPU
 
     steps:
       - name: 👉 Step 0 - Check the event type
@@ -26,7 +28,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-        
+
       - name: 👉 Step 2 - Set up Python
         uses: actions/setup-python@v4
         with:
@@ -35,14 +37,58 @@ jobs:
       - name: 👉 Step 3 - Install ClearML
         run: |
           python -m pip install --upgrade pip clearml
+          sudo apt-get update && sudo apt-get install -y ripgrep
+
+      - name: 👉 Step 4 - Make sure the task can run remotely
+        id: launch_task
+        run: |
+          TASK_ID=$(clearml-task \
+            --project SyntaxSquad \
+            --name 'CI: Test remote runnable' \
+            --branch ${{ github.head_ref || github.ref_name }} \
+            --script cicd/example_task.py \
+            --requirements cicd/requirements.txt \
+            --skip-task-init --queue "$QUEUE" \
+          | rg -o 'Task id=(.*) sent' -r '$1')
+          echo "TASK_ID=${TASK_ID}" >> $GITHUB_OUTPUT
+
+      - name: 👉 Step 5 - Poll for task progress
+        run: |
+          python cicd/check_remotely_runnable.py "${{ steps.launch_task.outputs.TASK_ID }}"
+          echo "🍏 This job's status is ${{ job.status }}."
+
+
+  execute-pipeline:
+    runs-on: ubuntu-latest
+    needs: test-remote-runnable
+
+    env: # ✅ env: Global environment variables for all steps
+      CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
+      CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
+      CLEARML_API_SECRET_KEY: ${{ secrets.CLEARML_API_SECRET_KEY }}
+
+    steps:
+      - name: 👉 Step 0 - Check the event type
+        run: |
+          echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
+          echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
+          echo "🔎 The name of current branch is ${{ github.ref }} and the repository is ${{ github.repository }}."
+
+      - name: 👉 Step 1 - Checkout the repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: 👉 Step 2 - Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11.12'
 
-      - name: 👉 Step 4 - Debug ENV
+      - name: 👉 Step 3 - Install ClearML
         run: |
-          echo "CLEARML_API_HOST=$CLEARML_API_HOST"
-          if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
-          curl -I $CLEARML_API_HOST
+          python -m pip install --upgrade pip clearml
 
-      - name: 👉 Step 5 - Run the pipeline
+      - name: 👉 Step 4 - Run the pipeline
         run: |
           python pipeline_from_tasks.py
           echo "🍏 This job's status is ${{ job.status }}."
@@ -70,7 +116,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-        
+
       - name: 👉 Step 2 - Set up Python
         uses: actions/setup-python@v4
         with:
@@ -81,13 +127,7 @@ jobs:
           python -m pip install --upgrade pip 
           pip install clearml pandas tabulate github3.py Jinja2
 
-      - name: 👉 Step 4 - Debug ENV
-        run: |
-          echo "CLEARML_API_HOST=$CLEARML_API_HOST"
-          if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
-          curl -I $CLEARML_API_HOST
-
-      - name: 👉 Step 5 - Assign the latest Pipeline as production if it's better than the current one
+      - name: 👉 Step 4 - Comment metrics from step7_model_evaluation of the pipeline
         run: |
           python cicd/pipeline_reports.py
           echo "🍏 This job's status is ${{ job.status }}."
@@ -114,7 +154,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-        
+
       - name: 👉 Step 2 - Set up Python
         uses: actions/setup-python@v4
         with:
@@ -125,13 +165,7 @@ jobs:
           python -m pip install --upgrade pip 
           pip install clearml pandas tabulate github3.py Jinja2
 
-      - name: 👉 Step 4 - Debug ENV
-        run: |
-          echo "CLEARML_API_HOST=$CLEARML_API_HOST"
-          if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
-          curl -I $CLEARML_API_HOST
-
-      - name: 👉 Step 5 - Comment metrics from step7_model_evaluation of the pipeline
+      - name: 👉 Step 4 - Assign the latest Pipeline as production if it's better than the current one
         run: |
           python cicd/production_tagging.py
           echo "🍏 This job's status is ${{ job.status }}."
@@ -158,36 +192,30 @@ jobs:
         uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-        
+
       - name: 👉 Step 2 - Set up Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11.12'
-
+          
       - name: 👉 Step 3 - Install dependencies
         run: |
           python -m pip install --upgrade pip 
           pip install -r serving/requirements.txt
 
-      - name: 👉 Step 4 - Debug ENV
-        run: |
-          echo "CLEARML_API_HOST=$CLEARML_API_HOST"
-          if [ -z "$CLEARML_API_HOST" ]; then echo "❌ HOST is empty!"; exit 1; fi
-          curl -I $CLEARML_API_HOST
-
-      - name: 👉 Step 5 - Deploy FastAPI model serving
+      - name: 👉 Step 4 - Deploy FastAPI model serving
         run: |
-          nohup fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background
+          fastapi dev serving/pose2gloss.py & # Start the FastAPI server in the background
           echo $! > fastapi.pid # Save the PID of the FastAPI server
-          sleep 10 # Wait for the server to start
+          sleep 240 # Wait for 4 minutes to start the server and download artifacts
           curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running
-          sleep 240 # Wait for 4 minutes to start the server and download artifacts
-          curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running
+          # Poll the FastAPI server's health endpoint until it is ready or timeout is reached
+          for i in {1..60}; do # Retry up to 60 times (1 minute timeout with 1-second intervals)
+            if curl -f http://localhost:$FASTAPI_PORT/health; then
+              echo "FastAPI server is ready!"
+              break
+            fi
+            echo "Waiting for FastAPI server to be ready... (attempt $i)"
+            sleep 1
+          done
+          if ! curl -f http://localhost:$FASTAPI_PORT/health; then
+            echo "FastAPI server failed to start within the timeout period."
+            exit 1
+          fi
-          sleep 240 # Wait for 4 minutes to start the server and download artifacts
-          curl -f http://localhost:$FASTAPI_PORT/health || exit 1 # Verify the server is running
+          # Poll the FastAPI server's health endpoint until it is ready or timeout is reached
+          for i in {1..60}; do # Retry up to 60 times (1 minute timeout with 1-second intervals)
+            if curl -f http://localhost:$FASTAPI_PORT/health; then
+              echo "FastAPI server is ready!"
+              break
+            fi
+            echo "Waiting for FastAPI server to be ready... (attempt $i)"
+            sleep 1
+          done
+          if ! curl -f http://localhost:$FASTAPI_PORT/health; then
+            echo "FastAPI server failed to start within the timeout period."
+            exit 1
+          fi
 
-      - name: 👉 Step 6 - Notify deployment status
+      - name: 👉 Step 5 - Notify deployment status
         if: success()
         run: |
           echo "FastAPI model serving deployed successfully on port $FASTAPI_PORT"
 
-      - name: 👉 Step 7 - Stop FastAPI server (cleanup)
+      - name: 👉 Step 6 - Stop FastAPI server (cleanup)
         if: always() # This step will always run, even if the previous steps fail
         run: |
           kill $(cat fastapi.pid) # Stop the FastAPI server using the saved PID

diff --git a/cicd/pipeline_reports.py b/cicd/pipeline_reports.py
@@ -20,34 +20,22 @@ def get_pipeline_of_current_commit(commit_id): # Find the ClearML task that corr
             return PipelineController.get(pipeline_id=task['id'])
 
 
-def create_output_tables(retrieve_scalars_dict): # Extract data from ClearML into format for tabulation
-    data = []
-    for graph_title, graph_values in retrieve_scalars_dict.items():
-        graph_data = []
-        for series, series_values in graph_values.items():
-            graph_data.append((graph_title, series, *series_values.values()))
-        data += graph_data
-    return sorted(data, key=lambda output: (output[0], output[1]))
+def get_reported_table(task, title): # Get the table from the task's reported plots
+    reported_plots = task.get_reported_plots()
+    metrics_report = next((d for d in reported_plots if title.lower() in d.get('metric').lower()), None)
+    if not metrics_report: return None
+    metrics_report = json.loads(metrics_report['plot_str'])['data'][0]
+    df = pd.DataFrame(list(zip(*metrics_report['cells']['values'])), columns=[col[0] for col in metrics_report['header']['values']])
+    return tabulate(df, tablefmt='github', headers='keys', showindex=False)
 
 
-def create_comment_output(task, status): # Create a markdown table from a ClearML task's output scalars
-    retrieve_scalars_dict = task.get_last_scalar_metrics()
-    if retrieve_scalars_dict:
-        scalars_tables = create_output_tables(retrieve_scalars_dict)
-        df = pd.DataFrame(data=scalars_tables, columns=['Title', 'Series', 'Last', 'Min', 'Max'])
-        df.style.set_caption(f'Last scalars metrics for task {task.task_id}, task status {status}')
-        table = tabulate(df, tablefmt='github', headers='keys', showindex=False)
-        return table
-
-
-def get_task_stats(task): # Get the comment markdown for a stats table based on the task object
+def get_task_stats(task, title): # Get the comment markdown for a stats table based on the task object
     task_status = task.get_status() # Try to get the task stats
     output_log_page = task.get_output_log_web_page()
 
     if task_status == 'completed':
-        table = create_comment_output(task, task_status)
-        if table: return f'Model performance on test set:\n\n{table}\n\n' \
-                         f'You can view full task results [here]({output_log_page})'
+        table = get_reported_table(task, title)
+        if table: return f'{title}:\n\n{table}\n\nYou can view full task results [here]({output_log_page})'
         return (f'Something went wrong when creating the task table. '
                 f'Check full task [here]({output_log_page})')
     return f'Task is in {task_status} status, this should not happen!'
@@ -70,10 +58,10 @@ def create_stats_comment(task_stats): # Create a comment on the current PR conta
 
 
 if __name__ == '__main__': # Main check: Does a ClearML task exist for this specific commit?
-    print(f"Running on commit hash: {os.getenv('COMMIT_ID')}")
+    print('Running on commit hash:', os.getenv('COMMIT_ID'))
     ml_pipeline_task = get_pipeline_of_current_commit(os.getenv('COMMIT_ID'))
     ml_pipeline_task.add_tags(['main_branch']) # If the task exists, tag it as such, so we know in the interface which one it is
     ml_pipeline_nodes = ml_pipeline_task.get_processed_nodes()
     model_evaluation_task = Task.get_task(task_id=ml_pipeline_nodes['step7_model_evaluation'].executed)
-    task_stats = get_task_stats(model_evaluation_task)
+    task_stats = get_task_stats(model_evaluation_task, 'Model evaluation metrics on 3 subsets')
     create_stats_comment(task_stats) # Get the metrics from the task and create a comment on the PR
diff --git a/serving/requirements.txt b/serving/requirements.txt
@@ -1,5 +1,5 @@
 clearml==1.18.0
-fastapi==0.115.12
-numpy==2.2.6
+fastapi[standard]
+numpy == 1.26.4
-numpy == 1.26.4
+numpy==1.26.4
-numpy == 1.26.4
+numpy==1.26.4
 pydantic==2.11.4
 tensorflow==2.18.0