Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions .github/workflows/check_chinese_chars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/usr/bin/env python3

# Copyright (c) 2026 SandAI. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Check for Chinese characters in source code.

Two modes:
- CI mode : set env vars BASE_REF and HEAD_REF to only check the PR diff.
- Local mode: run without those env vars to scan every tracked file in the repo.

Usage:
python3 .github/workflows/check_chinese_chars.py # scan entire repo
BASE_REF=main HEAD_REF=HEAD python3 ... # scan diff only
"""

import os
import re
import subprocess
import sys
from typing import List, Tuple

CHINESE_CHAR_PATTERN = re.compile(
"["
"\u4e00-\u9fff" # CJK Unified Ideographs
"\u3400-\u4dbf" # CJK Unified Ideographs Extension A
"\uf900-\ufaff" # CJK Compatibility Ideographs
"\u3000-\u303f" # CJK Symbols and Punctuation
"\uff01-\uff5e" # Fullwidth ASCII variants
"]"
)

BINARY_EXTENSIONS = frozenset(
{
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".ico",
".svg",
".webp",
".mp3",
".mp4",
".wav",
".avi",
".mov",
".mkv",
".zip",
".tar",
".gz",
".bz2",
".xz",
".7z",
".bin",
".exe",
".dll",
".so",
".dylib",
".pt",
".pth",
".onnx",
".safetensors",
".pickle",
".pkl",
".pdf",
".woff",
".woff2",
".ttf",
".otf",
".eot",
".pyc",
".o",
".a",
".nsys-rep",
".npz",
".npy",
}
)


def _is_binary(path: str) -> bool:
_, ext = os.path.splitext(path.lower())
return ext in BINARY_EXTENSIONS


# ---------------------------------------------------------------------------
# CI mode: only check newly added / modified lines in the PR diff
# ---------------------------------------------------------------------------


def _check_diff(base_sha: str, head_sha: str) -> List[Tuple[str, int, str]]:
base_sha = subprocess.check_output(["git", "rev-parse", base_sha], text=True).strip()
head_sha = subprocess.check_output(["git", "rev-parse", head_sha], text=True).strip()

print(f"[CI mode] Checking diff between {base_sha[:8]} and {head_sha[:8]} ...")

result = subprocess.run(
["git", "diff", "-U0", "--diff-filter=ACM", base_sha, head_sha], capture_output=True, text=True, check=True
)

findings: List[Tuple[str, int, str]] = []
current_file = None
line_num = 0

for line in result.stdout.split("\n"):
if line.startswith("diff --git"):
parts = line.split(" b/")
current_file = parts[-1] if len(parts) >= 2 else None
continue
if line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
line_num = int(match.group(1)) - 1
continue
if line.startswith("+++") or line.startswith("---"):
continue
if line.startswith("+"):
line_num += 1
content = line[1:]
if current_file and not _is_binary(current_file) and CHINESE_CHAR_PATTERN.search(content):
findings.append((current_file, line_num, content))
elif not line.startswith("-"):
line_num += 1

return findings


# ---------------------------------------------------------------------------
# Local mode: scan every tracked file in the repo
# ---------------------------------------------------------------------------


def _check_all_files() -> List[Tuple[str, int, str]]:
print("[Local mode] Scanning all tracked files for Chinese characters ...")

tracked = subprocess.check_output(["git", "ls-files"], text=True).strip().split("\n")

findings: List[Tuple[str, int, str]] = []
for filepath in tracked:
if not filepath or _is_binary(filepath) or not os.path.isfile(filepath):
continue
try:
with open(filepath, encoding="utf-8", errors="ignore") as fh:
for line_num, line in enumerate(fh, start=1):
if CHINESE_CHAR_PATTERN.search(line):
findings.append((filepath, line_num, line.rstrip("\n")))
except (OSError, UnicodeDecodeError):
continue

return findings


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------


def _report(findings: List[Tuple[str, int, str]], is_ci: bool) -> None:
if not findings:
print("\nNo Chinese characters found.")
return

print(f"\nFound {len(findings)} line(s) containing Chinese characters:\n")
for filepath, line_no, content in findings:
stripped = content.strip()
print(f" {filepath}:{line_no}: {stripped}")
if is_ci:
print(f"::error file={filepath},line={line_no}::Chinese character detected: {stripped}")

print(f"\n{len(findings)} occurrence(s) total. Please remove Chinese characters from your code.")


def main():
base_ref = os.environ.get("BASE_REF")
head_ref = os.environ.get("HEAD_REF")
is_ci = bool(base_ref and head_ref)

if is_ci:
findings = _check_diff(base_ref, head_ref)
else:
findings = _check_all_files()

_report(findings, is_ci)

if findings:
sys.exit(1)


if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions .github/workflows/integration_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Integration Test on PR

on:
pull_request:
types: [opened, reopened, synchronize, ready_for_review]
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test
cancel-in-progress: true

jobs:
integration_test:
name: Integration Test
runs-on: [self-hosted, magi-compiler]
timeout-minutes: 30
env:
http_proxy: ${{ secrets.HTTP_PROXY }}
https_proxy: ${{ secrets.HTTPS_PROXY }}
no_proxy: localhost,127.0.0.1,::1
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
GIT_HTTP_CONNECT_TIMEOUT: 10 # Connection timeout in seconds
GIT_HTTP_LOW_SPEED_LIMIT: 100 # Minimum speed threshold (bytes/s)
GIT_HTTP_LOW_SPEED_TIME: 10 # Abort if below threshold for this many seconds
permissions:
pull-requests: read
contents: read
defaults:
run:
shell: bash
steps:
- name: Configure git proxy
run: |
git config --global http.proxy "${{ secrets.HTTP_PROXY }}"
git config --global https.proxy "${{ secrets.HTTPS_PROXY }}"

- name: Checkout base branch
uses: actions/checkout@v4
timeout-minutes: 5
with:
ref: ${{ github.event.pull_request.base.sha }}
fetch-depth: 1
persist-credentials: false

- name: Checkout PR head
uses: actions/checkout@v4
timeout-minutes: 5
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 1
persist-credentials: false
clean: false

- name: Check Chinese Characters
run: python3 .github/workflows/check_chinese_chars.py
env:
BASE_REF: ${{ github.event.pull_request.base.sha }}
HEAD_REF: ${{ github.event.pull_request.head.sha }}

- name: Check Code Style
run: pre-commit run --show-diff-on-failure --color=always --all-files

- name: Install MagiCompiler
run: pip install --no-build-isolation --force-reinstall . --break-system-packages

- name: Install test dependencies
run: pip install -r requirements-test.txt --break-system-packages

- name: Run MagiCompiler Unit Tests
run: pytest -v tests/
2 changes: 1 addition & 1 deletion Dockerfile.magi_compiler.base
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ RUN --mount=type=secret,id=http_proxy,required=false \
COPY requirements.txt /app/
RUN pip install -r /app/requirements.txt

WORKDIR /app
WORKDIR /app
4 changes: 4 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Test-only dependencies (not required for magi_compiler itself)
diffusers==0.32.2
timm==1.0.15
transformers==4.48.3
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
cuda-python
depyf
graphviz
seaborn
pydantic-settings
seaborn
triton==3.5.0
Loading