[Chore] Add CI workflow and test requirements

cennn · cennn · commit fdaeb73b70cc · 2026-03-28T15:41:46.000+08:00
diff --git a/.github/workflows/check_chinese_chars.py b/.github/workflows/check_chinese_chars.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2026 SandAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Check for Chinese characters in source code.
+
+Two modes:
+  - CI mode   : set env vars BASE_REF and HEAD_REF to only check the PR diff.
+  - Local mode: run without those env vars to scan every tracked file in the repo.
+
+Usage:
+  python3 .github/workflows/check_chinese_chars.py          # scan entire repo
+  BASE_REF=main HEAD_REF=HEAD python3 ...                   # scan diff only
+"""
+
+import os
+import re
+import subprocess
+import sys
+from typing import List, Tuple
+
+CHINESE_CHAR_PATTERN = re.compile(
+    "["
+    "\u4e00-\u9fff"  # CJK Unified Ideographs
+    "\u3400-\u4dbf"  # CJK Unified Ideographs Extension A
+    "\uf900-\ufaff"  # CJK Compatibility Ideographs
+    "\u3000-\u303f"  # CJK Symbols and Punctuation
+    "\uff01-\uff5e"  # Fullwidth ASCII variants
+    "]"
+)
+
+BINARY_EXTENSIONS = frozenset(
+    {
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".bmp",
+        ".ico",
+        ".svg",
+        ".webp",
+        ".mp3",
+        ".mp4",
+        ".wav",
+        ".avi",
+        ".mov",
+        ".mkv",
+        ".zip",
+        ".tar",
+        ".gz",
+        ".bz2",
+        ".xz",
+        ".7z",
+        ".bin",
+        ".exe",
+        ".dll",
+        ".so",
+        ".dylib",
+        ".pt",
+        ".pth",
+        ".onnx",
+        ".safetensors",
+        ".pickle",
+        ".pkl",
+        ".pdf",
+        ".woff",
+        ".woff2",
+        ".ttf",
+        ".otf",
+        ".eot",
+        ".pyc",
+        ".o",
+        ".a",
+        ".nsys-rep",
+        ".npz",
+        ".npy",
+    }
+)
+
+
+def _is_binary(path: str) -> bool:
+    _, ext = os.path.splitext(path.lower())
+    return ext in BINARY_EXTENSIONS
+
+
+# ---------------------------------------------------------------------------
+# CI mode: only check newly added / modified lines in the PR diff
+# ---------------------------------------------------------------------------
+
+
+def _check_diff(base_sha: str, head_sha: str) -> List[Tuple[str, int, str]]:
+    base_sha = subprocess.check_output(["git", "rev-parse", base_sha], text=True).strip()
+    head_sha = subprocess.check_output(["git", "rev-parse", head_sha], text=True).strip()
+
+    print(f"[CI mode] Checking diff between {base_sha[:8]} and {head_sha[:8]} ...")
+
+    result = subprocess.run(
+        ["git", "diff", "-U0", "--diff-filter=ACM", base_sha, head_sha], capture_output=True, text=True, check=True
+    )
+
+    findings: List[Tuple[str, int, str]] = []
+    current_file = None
+    line_num = 0
+
+    for line in result.stdout.split("\n"):
+        if line.startswith("diff --git"):
+            parts = line.split(" b/")
+            current_file = parts[-1] if len(parts) >= 2 else None
+            continue
+        if line.startswith("@@"):
+            match = re.search(r"\+(\d+)", line)
+            if match:
+                line_num = int(match.group(1)) - 1
+            continue
+        if line.startswith("+++") or line.startswith("---"):
+            continue
+        if line.startswith("+"):
+            line_num += 1
+            content = line[1:]
+            if current_file and not _is_binary(current_file) and CHINESE_CHAR_PATTERN.search(content):
+                findings.append((current_file, line_num, content))
+        elif not line.startswith("-"):
+            line_num += 1
+
+    return findings
+
+
+# ---------------------------------------------------------------------------
+# Local mode: scan every tracked file in the repo
+# ---------------------------------------------------------------------------
+
+
+def _check_all_files() -> List[Tuple[str, int, str]]:
+    print("[Local mode] Scanning all tracked files for Chinese characters ...")
+
+    tracked = subprocess.check_output(["git", "ls-files"], text=True).strip().split("\n")
+
+    findings: List[Tuple[str, int, str]] = []
+    for filepath in tracked:
+        if not filepath or _is_binary(filepath) or not os.path.isfile(filepath):
+            continue
+        try:
+            with open(filepath, encoding="utf-8", errors="ignore") as fh:
+                for line_num, line in enumerate(fh, start=1):
+                    if CHINESE_CHAR_PATTERN.search(line):
+                        findings.append((filepath, line_num, line.rstrip("\n")))
+        except (OSError, UnicodeDecodeError):
+            continue
+
+    return findings
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+
+def _report(findings: List[Tuple[str, int, str]], is_ci: bool) -> None:
+    if not findings:
+        print("\nNo Chinese characters found.")
+        return
+
+    print(f"\nFound {len(findings)} line(s) containing Chinese characters:\n")
+    for filepath, line_no, content in findings:
+        stripped = content.strip()
+        print(f"  {filepath}:{line_no}: {stripped}")
+        if is_ci:
+            print(f"::error file={filepath},line={line_no}::Chinese character detected: {stripped}")
+
+    print(f"\n{len(findings)} occurrence(s) total. Please remove Chinese characters from your code.")
+
+
+def main():
+    base_ref = os.environ.get("BASE_REF")
+    head_ref = os.environ.get("HEAD_REF")
+    is_ci = bool(base_ref and head_ref)
+
+    if is_ci:
+        findings = _check_diff(base_ref, head_ref)
+    else:
+        findings = _check_all_files()
+
+    _report(findings, is_ci)
+
+    if findings:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml
@@ -0,0 +1,72 @@
+name: Integration Test on PR
+
+on:
+    pull_request:
+        types: [opened, reopened, synchronize, ready_for_review]
+        branches:
+        -   main
+
+concurrency:
+    group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test
+    cancel-in-progress: true
+
+jobs:
+    integration_test:
+        name: Integration Test
+        runs-on: [self-hosted, magi-compiler]
+        timeout-minutes: 30
+        env:
+            http_proxy: ${{ secrets.HTTP_PROXY }}
+            https_proxy: ${{ secrets.HTTPS_PROXY }}
+            no_proxy: localhost,127.0.0.1,::1
+            PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
+            PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
+            GIT_HTTP_CONNECT_TIMEOUT: 10   # Connection timeout in seconds
+            GIT_HTTP_LOW_SPEED_LIMIT: 100  # Minimum speed threshold (bytes/s)
+            GIT_HTTP_LOW_SPEED_TIME: 10    # Abort if below threshold for this many seconds
+        permissions:
+            pull-requests: read
+            contents: read
+        defaults:
+            run:
+                shell: bash
+        steps:
+        -   name: Configure git proxy
+            run: |
+                git config --global http.proxy "${{ secrets.HTTP_PROXY }}"
+                git config --global https.proxy "${{ secrets.HTTPS_PROXY }}"
+
+        -   name: Checkout base branch
+            uses: actions/checkout@v4
+            timeout-minutes: 5
+            with:
+                ref: ${{ github.event.pull_request.base.sha }}
+                fetch-depth: 1
+                persist-credentials: false
+
+        -   name: Checkout PR head
+            uses: actions/checkout@v4
+            timeout-minutes: 5
+            with:
+                ref: ${{ github.event.pull_request.head.sha }}
+                fetch-depth: 1
+                persist-credentials: false
+                clean: false
+
+        -   name: Check Chinese Characters
+            run: python3 .github/workflows/check_chinese_chars.py
+            env:
+                BASE_REF: ${{ github.event.pull_request.base.sha }}
+                HEAD_REF: ${{ github.event.pull_request.head.sha }}
+
+        -   name: Check Code Style
+            run: pre-commit run --show-diff-on-failure --color=always --all-files
+
+        -   name: Install MagiCompiler
+            run: pip install --no-build-isolation --force-reinstall . --break-system-packages
+
+        -   name: Install test dependencies
+            run: pip install -r requirements-test.txt --break-system-packages
+
+        -   name: Run MagiCompiler Unit Tests
+            run: pytest -v tests/
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -0,0 +1,4 @@
+# Test-only dependencies (not required for magi_compiler itself)
+timm==1.0.15
+diffusers==0.32.2
+transformers==4.48.3