Skip to content

Commit fdaeb73

Browse files
committed
[Chore] Add CI workflow and test requirements
1 parent 0dc7cdb commit fdaeb73

3 files changed

Lines changed: 279 additions & 0 deletions

File tree

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) 2026 SandAI. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Check for Chinese characters in source code.
19+
20+
Two modes:
21+
- CI mode : set env vars BASE_REF and HEAD_REF to only check the PR diff.
22+
- Local mode: run without those env vars to scan every tracked file in the repo.
23+
24+
Usage:
25+
python3 .github/workflows/check_chinese_chars.py # scan entire repo
26+
BASE_REF=main HEAD_REF=HEAD python3 ... # scan diff only
27+
"""
28+
29+
import os
30+
import re
31+
import subprocess
32+
import sys
33+
from typing import List, Tuple
34+
35+
CHINESE_CHAR_PATTERN = re.compile(
36+
"["
37+
"\u4e00-\u9fff" # CJK Unified Ideographs
38+
"\u3400-\u4dbf" # CJK Unified Ideographs Extension A
39+
"\uf900-\ufaff" # CJK Compatibility Ideographs
40+
"\u3000-\u303f" # CJK Symbols and Punctuation
41+
"\uff01-\uff5e" # Fullwidth ASCII variants
42+
"]"
43+
)
44+
45+
BINARY_EXTENSIONS = frozenset(
46+
{
47+
".png",
48+
".jpg",
49+
".jpeg",
50+
".gif",
51+
".bmp",
52+
".ico",
53+
".svg",
54+
".webp",
55+
".mp3",
56+
".mp4",
57+
".wav",
58+
".avi",
59+
".mov",
60+
".mkv",
61+
".zip",
62+
".tar",
63+
".gz",
64+
".bz2",
65+
".xz",
66+
".7z",
67+
".bin",
68+
".exe",
69+
".dll",
70+
".so",
71+
".dylib",
72+
".pt",
73+
".pth",
74+
".onnx",
75+
".safetensors",
76+
".pickle",
77+
".pkl",
78+
".pdf",
79+
".woff",
80+
".woff2",
81+
".ttf",
82+
".otf",
83+
".eot",
84+
".pyc",
85+
".o",
86+
".a",
87+
".nsys-rep",
88+
".npz",
89+
".npy",
90+
}
91+
)
92+
93+
94+
def _is_binary(path: str) -> bool:
95+
_, ext = os.path.splitext(path.lower())
96+
return ext in BINARY_EXTENSIONS
97+
98+
99+
# ---------------------------------------------------------------------------
100+
# CI mode: only check newly added / modified lines in the PR diff
101+
# ---------------------------------------------------------------------------
102+
103+
104+
def _check_diff(base_sha: str, head_sha: str) -> List[Tuple[str, int, str]]:
105+
base_sha = subprocess.check_output(["git", "rev-parse", base_sha], text=True).strip()
106+
head_sha = subprocess.check_output(["git", "rev-parse", head_sha], text=True).strip()
107+
108+
print(f"[CI mode] Checking diff between {base_sha[:8]} and {head_sha[:8]} ...")
109+
110+
result = subprocess.run(
111+
["git", "diff", "-U0", "--diff-filter=ACM", base_sha, head_sha], capture_output=True, text=True, check=True
112+
)
113+
114+
findings: List[Tuple[str, int, str]] = []
115+
current_file = None
116+
line_num = 0
117+
118+
for line in result.stdout.split("\n"):
119+
if line.startswith("diff --git"):
120+
parts = line.split(" b/")
121+
current_file = parts[-1] if len(parts) >= 2 else None
122+
continue
123+
if line.startswith("@@"):
124+
match = re.search(r"\+(\d+)", line)
125+
if match:
126+
line_num = int(match.group(1)) - 1
127+
continue
128+
if line.startswith("+++") or line.startswith("---"):
129+
continue
130+
if line.startswith("+"):
131+
line_num += 1
132+
content = line[1:]
133+
if current_file and not _is_binary(current_file) and CHINESE_CHAR_PATTERN.search(content):
134+
findings.append((current_file, line_num, content))
135+
elif not line.startswith("-"):
136+
line_num += 1
137+
138+
return findings
139+
140+
141+
# ---------------------------------------------------------------------------
142+
# Local mode: scan every tracked file in the repo
143+
# ---------------------------------------------------------------------------
144+
145+
146+
def _check_all_files() -> List[Tuple[str, int, str]]:
147+
print("[Local mode] Scanning all tracked files for Chinese characters ...")
148+
149+
tracked = subprocess.check_output(["git", "ls-files"], text=True).strip().split("\n")
150+
151+
findings: List[Tuple[str, int, str]] = []
152+
for filepath in tracked:
153+
if not filepath or _is_binary(filepath) or not os.path.isfile(filepath):
154+
continue
155+
try:
156+
with open(filepath, encoding="utf-8", errors="ignore") as fh:
157+
for line_num, line in enumerate(fh, start=1):
158+
if CHINESE_CHAR_PATTERN.search(line):
159+
findings.append((filepath, line_num, line.rstrip("\n")))
160+
except (OSError, UnicodeDecodeError):
161+
continue
162+
163+
return findings
164+
165+
166+
# ---------------------------------------------------------------------------
167+
# Entry point
168+
# ---------------------------------------------------------------------------
169+
170+
171+
def _report(findings: List[Tuple[str, int, str]], is_ci: bool) -> None:
172+
if not findings:
173+
print("\nNo Chinese characters found.")
174+
return
175+
176+
print(f"\nFound {len(findings)} line(s) containing Chinese characters:\n")
177+
for filepath, line_no, content in findings:
178+
stripped = content.strip()
179+
print(f" {filepath}:{line_no}: {stripped}")
180+
if is_ci:
181+
print(f"::error file={filepath},line={line_no}::Chinese character detected: {stripped}")
182+
183+
print(f"\n{len(findings)} occurrence(s) total. Please remove Chinese characters from your code.")
184+
185+
186+
def main():
187+
base_ref = os.environ.get("BASE_REF")
188+
head_ref = os.environ.get("HEAD_REF")
189+
is_ci = bool(base_ref and head_ref)
190+
191+
if is_ci:
192+
findings = _check_diff(base_ref, head_ref)
193+
else:
194+
findings = _check_all_files()
195+
196+
_report(findings, is_ci)
197+
198+
if findings:
199+
sys.exit(1)
200+
201+
202+
if __name__ == "__main__":
203+
main()
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
name: Integration Test on PR
2+
3+
on:
4+
pull_request:
5+
types: [opened, reopened, synchronize, ready_for_review]
6+
branches:
7+
- main
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test
11+
cancel-in-progress: true
12+
13+
jobs:
14+
integration_test:
15+
name: Integration Test
16+
runs-on: [self-hosted, magi-compiler]
17+
timeout-minutes: 30
18+
env:
19+
http_proxy: ${{ secrets.HTTP_PROXY }}
20+
https_proxy: ${{ secrets.HTTPS_PROXY }}
21+
no_proxy: localhost,127.0.0.1,::1
22+
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
23+
PIP_TRUSTED_HOST: pypi.tuna.tsinghua.edu.cn
24+
GIT_HTTP_CONNECT_TIMEOUT: 10 # Connection timeout in seconds
25+
GIT_HTTP_LOW_SPEED_LIMIT: 100 # Minimum speed threshold (bytes/s)
26+
GIT_HTTP_LOW_SPEED_TIME: 10 # Abort if below threshold for this many seconds
27+
permissions:
28+
pull-requests: read
29+
contents: read
30+
defaults:
31+
run:
32+
shell: bash
33+
steps:
34+
- name: Configure git proxy
35+
run: |
36+
git config --global http.proxy "${{ secrets.HTTP_PROXY }}"
37+
git config --global https.proxy "${{ secrets.HTTPS_PROXY }}"
38+
39+
- name: Checkout base branch
40+
uses: actions/checkout@v4
41+
timeout-minutes: 5
42+
with:
43+
ref: ${{ github.event.pull_request.base.sha }}
44+
fetch-depth: 1
45+
persist-credentials: false
46+
47+
- name: Checkout PR head
48+
uses: actions/checkout@v4
49+
timeout-minutes: 5
50+
with:
51+
ref: ${{ github.event.pull_request.head.sha }}
52+
fetch-depth: 1
53+
persist-credentials: false
54+
clean: false
55+
56+
- name: Check Chinese Characters
57+
run: python3 .github/workflows/check_chinese_chars.py
58+
env:
59+
BASE_REF: ${{ github.event.pull_request.base.sha }}
60+
HEAD_REF: ${{ github.event.pull_request.head.sha }}
61+
62+
- name: Check Code Style
63+
run: pre-commit run --show-diff-on-failure --color=always --all-files
64+
65+
- name: Install MagiCompiler
66+
run: pip install --no-build-isolation --force-reinstall . --break-system-packages
67+
68+
- name: Install test dependencies
69+
run: pip install -r requirements-test.txt --break-system-packages
70+
71+
- name: Run MagiCompiler Unit Tests
72+
run: pytest -v tests/

requirements-test.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Test-only dependencies (not required for magi_compiler itself)
2+
timm==1.0.15
3+
diffusers==0.32.2
4+
transformers==4.48.3

0 commit comments

Comments
 (0)