Skip to content

Commit 5d831fe

Browse files
Internal change
PiperOrigin-RevId: 922850859
1 parent 76b61ae commit 5d831fe

4 files changed

Lines changed: 236 additions & 7 deletions

File tree

.bazelrc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,7 @@ build:windows --google_default_credentials=true
3232
build:macos --remote_cache=https://storage.googleapis.com/macos-cel-python-remote-cache
3333
build:macos --google_default_credentials=true
3434

35+
# Silence deprecation warnings from external dependencies (Linux and macOS)
36+
build:linux --cxxopt=-Wno-deprecated-declarations
37+
build:macos --cxxopt=-Wno-deprecated-declarations
38+

release/kokoro/release_linux.cfg

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@
33

44
build_file: "cel-python/release/kokoro/release_linux.sh"
55
timeout_mins: 120
6+
7+
container_properties {
8+
docker_image: "us-central1-docker.pkg.dev/kokoro-container-bakery/kokoro/ubuntu/ubuntu2204/ktcb:current"
9+
docker_sibling_containers: true
10+
}

release/kokoro/release_linux.sh

Lines changed: 216 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,113 @@
11
#!/bin/bash
22
set -e
33

4+
5+
# Avoid virtualenv/pip trying to download/upgrade tools from PyPI on host
6+
export VIRTUALENV_NO_DOWNLOAD=1
7+
export PIP_DISABLE_PIP_VERSION_CHECK=1
8+
9+
# Pass these environment variables to the cibuildwheel Docker container
10+
export CIBW_ENVIRONMENT="VIRTUALENV_NO_DOWNLOAD=1 PIP_DISABLE_PIP_VERSION_CHECK=1"
11+
export CIBW_DEPENDENCY_VERSIONS="latest"
12+
413
# If running locally (not on Kokoro), authenticate with gcloud.
514
if [ -z "${KOKORO_BUILD_ID}" ]; then
615
if ! gcloud auth application-default print-access-token --quiet > /dev/null; then
716
gcloud auth application-default login
817
fi
918
fi
1019

11-
pip install -U keyring keyrings.google-artifactregistry-auth twine cibuildwheel
20+
# We use --no-cache-dir to force pip to download packages fresh and bypass the local
21+
# cache. In Kokoro/RBE sandboxed environments, writing to the default cache directory
22+
# (~/.cache/pip) can encounter permission/sandbox restrictions or lead to stale
23+
# dependency resolution. Disabling the cache ensures a reliable, reproducible install.
24+
pip install --no-cache-dir -U keyring keyrings.google-artifactregistry-auth twine cibuildwheel
25+
26+
# Patch cibuildwheel at runtime to bypass the RBE stdout buffering deadlock.
27+
# The RBE proxy buffers the persistent container bash stdout. By appending a 4KB
28+
# padding line to the end of every command output, we force the proxy to flush the
29+
# buffer immediately. We then read and discard this padding to keep the stream clean.
30+
OCI_PATH=$(python3 -c "import cibuildwheel.oci_container; print(cibuildwheel.oci_container.__file__)")
31+
echo "Patching cibuildwheel at $OCI_PATH..."
32+
33+
cat << 'EOF' > patch_oci.py
34+
import sys
35+
import re
36+
37+
path = sys.argv[1]
38+
with open(path, 'r') as f:
39+
content = f.read()
40+
41+
# 1. Force a 4KB flush at the end of every command execution
42+
target_write = 'printf "%04d%s\\n" $? {end_of_message}'
43+
replacement_write = 'printf "%04d%s\\n%4096s\\n" $? {end_of_message} " "'
44+
if target_write in content:
45+
content = content.replace(target_write, replacement_write)
46+
print("Patched write loop.")
47+
48+
# 2. Read and discard the 4KB padding to keep the stream clean
49+
target_read = """ # add the last line to output, without the footer
50+
output_io.write(line[0:footer_offset])
51+
output_io.flush()
52+
break"""
53+
54+
replacement_read = """ # add the last line to output, without the footer
55+
output_io.write(line[0:footer_offset])
56+
output_io.flush()
57+
# Read and discard the 4KB padding line to clear the stream!
58+
self.bash_stdout.readline()
59+
break"""
60+
61+
if target_read in content:
62+
content = content.replace(target_read, replacement_read)
63+
print("Patched read loop.")
64+
65+
# 3. Patch the entire copy_into method using a unique regex
66+
pattern = re.compile(r' def copy_into\(self,.*?\).*?:.*? def copy_out', re.DOTALL)
67+
68+
replacement_copy = """ def copy_into(self, from_path: Path, to_path: PurePath) -> None:
69+
if from_path.is_dir():
70+
self.call(["mkdir", "-p", to_path])
71+
subprocess.run(
72+
f"tar -c {self.host_tar_format} -f - . | {self.engine.name} exec -i {self.name} tar --no-same-owner -xC {shell_quote(to_path)} -f -",
73+
shell=True,
74+
check=True,
75+
cwd=from_path,
76+
)
77+
else:
78+
self.call(["mkdir", "-p", to_path.parent])
79+
# Use native docker cp to copy the file, avoiding stdin EOF deadlocks in RBE
80+
subprocess.run(
81+
[
82+
self.engine.name,
83+
"cp",
84+
str(from_path),
85+
f"{self.name}:{to_path}",
86+
],
87+
check=True,
88+
)
89+
90+
def copy_out"""
91+
92+
if pattern.search(content):
93+
content = pattern.sub(replacement_copy, content)
94+
print("Patched copy_into method using unique regex.")
95+
else:
96+
print("Error: copy_into method pattern not found!")
97+
sys.exit(1)
98+
99+
with open(path, 'w') as f:
100+
f.write(content)
101+
102+
print("Successfully patched oci_container.py!")
103+
EOF
104+
105+
python3 patch_oci.py "$OCI_PATH"
106+
rm patch_oci.py
107+
108+
# Verify that the patched file is syntactically valid Python
109+
echo "Verifying patched oci_container.py syntax..."
110+
python3 -m py_compile "$OCI_PATH" || { echo "ERROR: Patched oci_container.py is corrupted!"; exit 1; }
12111

13112
REPO_DIR=$(mktemp -d)
14113
echo "Created temporary directory: ${REPO_DIR}"
@@ -43,26 +142,139 @@ echo "Building release for version: ${VERSION}"
43142
TMP_DIR=$(mktemp -d)
44143
echo "Build directory: ${TMP_DIR}"
45144

46-
# Add trap cleanup for TMP_DIR as well
47-
trap 'echo "Cleaning up temporary directories: ${REPO_DIR} ${TMP_DIR}"; rm -rf "${REPO_DIR}" "${TMP_DIR}"' EXIT
145+
# Define a comprehensive cleanup function that always dumps logs on failure
146+
cleanup() {
147+
echo "=== CLEANUP TRIGGERED ==="
148+
if [ -f cibuildwheel.log ]; then
149+
echo "=== LAST 200 LINES OF CIBUILDWHEEL LOG ==="
150+
tail -n 200 cibuildwheel.log
151+
fi
152+
echo "Cleaning up temporary directories: ${REPO_DIR} ${TMP_DIR}"
153+
rm -rf "${REPO_DIR}" "${TMP_DIR}"
154+
}
155+
trap cleanup EXIT
48156

49157
pushd "${TMP_DIR}"
50158

51159
cp -r "${SRC_DIR}"/{*,.*} . 2>/dev/null || true
52160
cp -r "${SRC_DIR}"/release/* . 2>/dev/null || true
53161
rm -rf cel_expr_python/*_test.py
54162

163+
echo "Downloading bazelisk on host..."
164+
curl -LO https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64
165+
chmod +x bazelisk-linux-amd64
166+
55167
# Check if pyproject.toml exists before running sed
56168
if [ -f pyproject.toml ]; then
57169
sed -i "" "s/\$VERSION/${VERSION}/g" pyproject.toml || sed -i "s/\$VERSION/${VERSION}/g" pyproject.toml
58170
fi
59171

172+
export CIBW_CONTAINER_ENGINE_EXTRA_ARGS="--network=host"
173+
60174
echo "Running cibuildwheel: ${CIBWHEEL_BIN}"
61175
# Default CIBWHEEL_BIN if not set
62176
if [ -z "${CIBWHEEL_BIN}" ]; then
63177
CIBWHEEL_BIN="python3 -m cibuildwheel"
64178
fi
65-
${CIBWHEEL_BIN} --platform linux --output-dir dist
179+
180+
echo "Installing diagnostic tools (psmisc, strace) on host..."
181+
# We try to install them, but don't fail the build if we can't (e.g. if no sudo or apt)
182+
sudo apt-get update && sudo apt-get install -y psmisc strace || echo "Failed to install diagnostic tools, proceeding anyway..."
183+
184+
echo "Running cibuildwheel in background..."
185+
${CIBWHEEL_BIN} --platform linux --output-dir dist > cibuildwheel.log 2>&1 &
186+
CIBW_PID=$!
187+
188+
echo "Started cibuildwheel in background with PID $CIBW_PID"
189+
190+
# Poll the log file waiting for the hang
191+
# We look for the "mkdir -p" line followed by no activity for 60 seconds.
192+
TIMEOUT=900 # 15 minutes total timeout
193+
ELAPSED=0
194+
LAST_SIZE=0
195+
STUCK_COUNT=0
196+
HANG_DETECTED=false
197+
198+
while kill -0 $CIBW_PID 2>/dev/null; do
199+
if [ -f cibuildwheel.log ]; then
200+
# Check if the log contains the test setup line
201+
if grep -q "mkdir -p" cibuildwheel.log; then
202+
CURRENT_SIZE=$(stat -c%s cibuildwheel.log)
203+
if [ "$CURRENT_SIZE" -eq "$LAST_SIZE" ]; then
204+
# Log size hasn't changed. If this persists for 60 seconds, we assume it is stuck.
205+
STUCK_COUNT=$((STUCK_COUNT + 10))
206+
echo "Log size unchanged for ${STUCK_COUNT}s at mkdir -p..."
207+
if [ $STUCK_COUNT -ge 60 ]; then
208+
HANG_DETECTED=true
209+
break
210+
fi
211+
else
212+
STUCK_COUNT=0
213+
LAST_SIZE=$CURRENT_SIZE
214+
fi
215+
fi
216+
fi
217+
218+
sleep 10
219+
ELAPSED=$((ELAPSED + 10))
220+
if [ $ELAPSED -ge $TIMEOUT ]; then
221+
echo "Timeout waiting for build to complete."
222+
break
223+
fi
224+
done
225+
226+
if [ "$HANG_DETECTED" = "true" ]; then
227+
echo "===================================================="
228+
echo "!!! DETECTED HANG AT mkdir -p !!! STARTING DIAGNOSTICS"
229+
echo "===================================================="
230+
231+
echo "=== HOST PROCESSES ==="
232+
ps aux
233+
234+
echo "=== PROCESS TREE ==="
235+
pstree -p -a || echo "pstree not available"
236+
237+
echo "=== DOCKER CONTAINERS ==="
238+
docker ps -a
239+
240+
CONTAINER_ID=$(docker ps -q | head -n 1)
241+
if [ -n "$CONTAINER_ID" ]; then
242+
echo "=== CONTAINER PROCESSES ($CONTAINER_ID) ==="
243+
docker exec "$CONTAINER_ID" ps aux
244+
245+
echo "=== CONTAINER LSOF ==="
246+
docker exec "$CONTAINER_ID" lsof || echo "lsof not available"
247+
248+
echo "=== CONTAINER DOCKER INSPECT ==="
249+
docker inspect "$CONTAINER_ID"
250+
251+
echo "=== STRACE DOCKER PROCESSES ==="
252+
DOCKER_PID=$(pgrep -f "docker start|docker exec" | head -n 1)
253+
if [ -n "$DOCKER_PID" ]; then
254+
echo "Stracing host docker process $DOCKER_PID for 15 seconds..."
255+
timeout 15 strace -p "$DOCKER_PID" -f || true
256+
fi
257+
else
258+
echo "No active docker container found!"
259+
fi
260+
261+
echo "=== LAST 100 LINES OF CIBUILDWHEEL LOG ==="
262+
tail -n 100 cibuildwheel.log
263+
264+
echo "Diagnostics complete. Killing cibuildwheel."
265+
kill -9 $CIBW_PID
266+
exit 99
267+
fi
268+
269+
# If it didn't hang, wait for it to finish and print the log
270+
wait $CIBW_PID
271+
RC=$?
272+
echo "=== CIBUILDWHEEL LOG ==="
273+
cat cibuildwheel.log
274+
if [ $RC -ne 0 ]; then
275+
echo "cibuildwheel failed with exit code $RC"
276+
exit $RC
277+
fi
66278

67279
if [ "${DRY_RUN}" = "true" ]; then
68280
echo "[DRY RUN] Skipping upload to PyPI exit gate."

release/pyproject.toml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ where = ["."]
3838
exclude = ["codelab*", "conformance*", "custom_ext*", "release*", "testing*", "wheelhouse*"]
3939

4040
[tool.cibuildwheel]
41-
build = "cp311-* cp312-* cp313-* cp314-*"
42-
skip = "*musllinux* *win32*"
41+
build = "cp311-*"
42+
#build = "cp311-* cp312-* cp313-* cp314-*"
43+
skip = "*musllinux* *win32* *i686*"
4344
test-command = "python {project}/cel_basic_test.py"
4445
build-verbosity = 1
4546

4647
[tool.cibuildwheel.linux]
47-
before-all = "echo 'Installing bazelisk'; curl -LO https://github.com/bazelbuild/bazelisk/releases/download/v1.19.0/bazelisk-linux-amd64 && chmod +x bazelisk-linux-amd64 && mv bazelisk-linux-amd64 /usr/local/bin/bazel"
48+
manylinux-x86_64-image = "manylinux_2_28"
49+
container-engine = "docker; disable_host_mount: True"
50+
# Google's internal Kokoro/RBE network uses a secure MITM proxy that resigns HTTPS
51+
# traffic with an internal Google CA. Since the public manylinux container does not
52+
# trust this CA, git fetches for external dependencies (like @cel-cpp) will fail
53+
# with SSL certificate errors. We disable http.sslVerify inside the container to
54+
# bypass this and allow Bazel to fetch SCM dependencies through the proxy.
55+
before-all = "git config --global http.sslVerify false && echo 'Installing bazelisk' && cp {project}/bazelisk-linux-amd64 /usr/local/bin/bazel"
4856

4957
[tool.cibuildwheel.macos]
5058
before-all = "echo 'Installing bazelisk'; brew install bazelisk"

0 commit comments

Comments
 (0)