diff --git a/.github/workflows/check_chinese_chars.py b/.github/workflows/check_chinese_chars.py new file mode 100644 index 0000000..44bc410 --- /dev/null +++ b/.github/workflows/check_chinese_chars.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 SandAI. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Check for Chinese characters in source code. + +Two modes: + - CI mode : set env vars BASE_REF and HEAD_REF to only check the PR diff. + - Local mode: run without those env vars to scan every tracked file in the repo. + +Usage: + python3 .github/workflows/check_chinese_chars.py # scan entire repo + BASE_REF=main HEAD_REF=HEAD python3 ... # scan diff only +""" + +import os +import re +import subprocess +import sys +from typing import List, Tuple + +CHINESE_CHAR_PATTERN = re.compile( + "[" + "\u4e00-\u9fff" # CJK Unified Ideographs + "\u3400-\u4dbf" # CJK Unified Ideographs Extension A + "\uf900-\ufaff" # CJK Compatibility Ideographs + "\u3000-\u303f" # CJK Symbols and Punctuation + "\uff01-\uff5e" # Fullwidth ASCII variants + "]" +) + +BINARY_EXTENSIONS = frozenset( + { + ".png", + ".jpg", + ".jpeg", + ".gif", + ".bmp", + ".ico", + ".svg", + ".webp", + ".mp3", + ".mp4", + ".wav", + ".avi", + ".mov", + ".mkv", + ".zip", + ".tar", + ".gz", + ".bz2", + ".xz", + ".7z", + ".bin", + ".exe", + ".dll", + ".so", + ".dylib", + ".pt", + ".pth", + ".onnx", + ".safetensors", + ".pickle", + ".pkl", + ".pdf", + ".woff", + ".woff2", + ".ttf", + ".otf", + ".eot", + ".pyc", + ".o", + ".a", + ".nsys-rep", + ".npz", + ".npy", + } +) + + +def _is_binary(path: str) -> bool: + _, ext = os.path.splitext(path.lower()) + return ext in BINARY_EXTENSIONS + + +# --------------------------------------------------------------------------- +# CI mode: only check newly added / modified lines in the PR diff +# --------------------------------------------------------------------------- + + +def _check_diff(base_sha: str, head_sha: str) -> List[Tuple[str, int, str]]: + base_sha = subprocess.check_output(["git", "rev-parse", base_sha], text=True).strip() + head_sha = subprocess.check_output(["git", "rev-parse", head_sha], text=True).strip() + + print(f"[CI mode] Checking diff between {base_sha[:8]} and {head_sha[:8]} ...") + + result = subprocess.run( + ["git", "diff", "-U0", "--diff-filter=ACM", base_sha, head_sha], capture_output=True, text=True, check=True + ) + + findings: List[Tuple[str, int, str]] = [] + current_file = None + line_num = 0 + + for line in result.stdout.split("\n"): + if line.startswith("diff --git"): + parts = line.split(" b/") + current_file = parts[-1] if len(parts) >= 2 else None + continue + if line.startswith("@@"): + match = re.search(r"\+(\d+)", line) + if match: + line_num = int(match.group(1)) - 1 + continue + if line.startswith("+++") or line.startswith("---"): + continue + if line.startswith("+"): + line_num += 1 + content = line[1:] + if current_file and not _is_binary(current_file) and CHINESE_CHAR_PATTERN.search(content): + findings.append((current_file, line_num, content)) + elif not line.startswith("-"): + line_num += 1 + + return findings + + +# --------------------------------------------------------------------------- +# Local mode: scan every tracked file in the repo +# --------------------------------------------------------------------------- + + +def _check_all_files() -> List[Tuple[str, int, str]]: + print("[Local mode] Scanning all tracked files for Chinese characters ...") + + tracked = subprocess.check_output(["git", "ls-files"], text=True).strip().split("\n") + + findings: List[Tuple[str, int, str]] = [] + for filepath in tracked: + if not filepath or _is_binary(filepath) or not os.path.isfile(filepath): + continue + try: + with open(filepath, encoding="utf-8", errors="ignore") as fh: + for line_num, line in enumerate(fh, start=1): + if CHINESE_CHAR_PATTERN.search(line): + findings.append((filepath, line_num, line.rstrip("\n"))) + except (OSError, UnicodeDecodeError): + continue + + return findings + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def _report(findings: List[Tuple[str, int, str]], is_ci: bool) -> None: + if not findings: + print("\nNo Chinese characters found.") + return + + print(f"\nFound {len(findings)} line(s) containing Chinese characters:\n") + for filepath, line_no, content in findings: + stripped = content.strip() + print(f" {filepath}:{line_no}: {stripped}") + if is_ci: + print(f"::error file={filepath},line={line_no}::Chinese character detected: {stripped}") + + print(f"\n{len(findings)} occurrence(s) total. Please remove Chinese characters from your code.") + + +def main(): + base_ref = os.environ.get("BASE_REF") + head_ref = os.environ.get("HEAD_REF") + is_ci = bool(base_ref and head_ref) + + if is_ci: + findings = _check_diff(base_ref, head_ref) + else: + findings = _check_all_files() + + _report(findings, is_ci) + + if findings: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml new file mode 100644 index 0000000..a4f23b0 --- /dev/null +++ b/.github/workflows/integration_test.yml @@ -0,0 +1,72 @@ +name: Integration Test on PR + +on: + pull_request: + types: [opened, reopened, synchronize, ready_for_review] + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test + cancel-in-progress: true + +jobs: + integration_test: + name: Integration Test + runs-on: [self-hosted, magi-compiler] + timeout-minutes: 30 + env: + http_proxy: ${{ secrets.HTTP_PROXY }} + https_proxy: ${{ secrets.HTTPS_PROXY }} + no_proxy: localhost,127.0.0.1,::1 + PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple + PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn + GIT_HTTP_CONNECT_TIMEOUT: 10 # Connection timeout in seconds + GIT_HTTP_LOW_SPEED_LIMIT: 100 # Minimum speed threshold (bytes/s) + GIT_HTTP_LOW_SPEED_TIME: 10 # Abort if below threshold for this many seconds + permissions: + pull-requests: read + contents: read + defaults: + run: + shell: bash + steps: + - name: Configure git proxy + run: | + git config --global http.proxy "${{ secrets.HTTP_PROXY }}" + git config --global https.proxy "${{ secrets.HTTPS_PROXY }}" + + - name: Checkout base branch + uses: actions/checkout@v4 + timeout-minutes: 5 + with: + ref: ${{ github.event.pull_request.base.sha }} + fetch-depth: 1 + persist-credentials: false + + - name: Checkout PR head + uses: actions/checkout@v4 + timeout-minutes: 5 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 1 + persist-credentials: false + clean: false + + - name: Check Chinese Characters + run: python3 .github/workflows/check_chinese_chars.py + env: + BASE_REF: ${{ github.event.pull_request.base.sha }} + HEAD_REF: ${{ github.event.pull_request.head.sha }} + + - name: Check Code Style + run: pre-commit run --show-diff-on-failure --color=always --all-files + + - name: Install MagiCompiler + run: pip install --no-build-isolation --force-reinstall . --break-system-packages + + - name: Install test dependencies + run: pip install -r requirements-test.txt --break-system-packages + + - name: Run MagiCompiler Unit Tests + run: pytest -v tests/ diff --git a/Dockerfile.magi_compiler.base b/Dockerfile.magi_compiler.base index 43cfca3..476ad3f 100644 --- a/Dockerfile.magi_compiler.base +++ b/Dockerfile.magi_compiler.base @@ -55,4 +55,4 @@ RUN --mount=type=secret,id=http_proxy,required=false \ COPY requirements.txt /app/ RUN pip install -r /app/requirements.txt -WORKDIR /app \ No newline at end of file +WORKDIR /app diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..1694081 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,4 @@ +# Test-only dependencies (not required for magi_compiler itself) +diffusers==0.32.2 +timm==1.0.15 +transformers==4.48.3 diff --git a/requirements.txt b/requirements.txt index c394c75..4de0443 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ cuda-python depyf graphviz -seaborn pydantic-settings +seaborn triton==3.5.0