From c2ad24c38dfe3644799eaf51c48d41d0c584d8f8 Mon Sep 17 00:00:00 2001 From: June Kim Date: Sun, 10 May 2026 08:42:45 -0700 Subject: [PATCH 1/2] Strip toolchain bin paths from sysconfig to fix AR variable Fixes #1073 The AR sysconfig variable contains an absolute build-time path (e.g., /tools/llvm/bin/llvm-ar) instead of just the tool name (llvm-ar). CC and CXX are already normalized by CPython, but AR and RANLIB are not. This change adds path normalization in hack_sysconfig.py to strip the toolchain bin directory prefix from all occurrences in sysconfig artifacts (PYTHON_CONFIG, Makefile, SYSCONFIGDATA). The fix: - Uses os.path.normpath to handle edge cases like trailing slashes - Also catches resolved symlinks (e.g., macOS /var vs /private/var) - Preserves LLVM tool names (llvm-ar, not ar) for consistency with CC=clang and CXX=clang++ Reviewed by: codex (GPT-5.5), gemini-3.1-pro-preview --- cpython-unix/build-cpython.sh | 14 +++++++++ test_ar_normalization.py | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 test_ar_normalization.py diff --git a/cpython-unix/build-cpython.sh b/cpython-unix/build-cpython.sh index 7deb00830..e908c0f32 100755 --- a/cpython-unix/build-cpython.sh +++ b/cpython-unix/build-cpython.sh @@ -1063,6 +1063,20 @@ replace_in_all("-L%s/deps/lib" % tools_path, "") # See https://github.com/python/cpython/issues/145810#issuecomment-4068139183 replace_in_all("-LModules/_hacl", "") +# Strip toolchain bin directory prefix from sysconfig artifacts to avoid exposing +# build-time paths. This primarily fixes AR (e.g., /tools/llvm/bin/llvm-ar -> llvm-ar), +# which is not normalized by CPython, and also catches RANLIB and similar tools. +# See https://github.com/astral-sh/python-build-standalone/issues/1073 +toolchain = os.environ["TOOLCHAIN"] +toolchain_bin = os.path.normpath(os.path.join(tools_path, toolchain, "bin")) +toolchain_bin_norm = toolchain_bin + "/" +replace_in_all(toolchain_bin_norm, "") + +# Also catch resolved symlinks (e.g., macOS /var vs /private/var). +toolchain_bin_real = os.path.realpath(toolchain_bin) + "/" +if toolchain_bin_real != toolchain_bin_norm: + replace_in_all(toolchain_bin_real, "") + EOF ${BUILD_PYTHON} "${ROOT}/hack_sysconfig.py" "${ROOT}/out/python" diff --git a/test_ar_normalization.py b/test_ar_normalization.py new file mode 100644 index 000000000..91d273ae4 --- /dev/null +++ b/test_ar_normalization.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Test that AR normalization works correctly.""" +import os + +def test_ar_normalization(): + """Simulate the string replacement to verify it works.""" + # Simulated sysconfig data + test_cases = [ + # (input, expected_output, description) + ("AR = /tools/llvm/bin/llvm-ar", "AR = llvm-ar", "Basic AR case"), + ("RANLIB = /tools/llvm/bin/llvm-ranlib", "RANLIB = llvm-ranlib", "RANLIB case"), + ("CC = /tools/llvm/bin/clang -pthread", "CC = clang -pthread", "CC with flags"), + ("CXX = /tools/llvm/bin/clang++ -pthread", "CXX = clang++ -pthread", "CXX with flags"), + # Edge cases + ("PATH=/tools/llvm/bin:/usr/bin", "PATH=/tools/llvm/bin:/usr/bin", "PATH unchanged (no trailing /)"), + ("Some random text", "Some random text", "No match - unchanged"), + # Path normalization: our search pattern is normalized, so double-slash + # in sysconfig data won't match (but that's OK - sysconfig should be normalized) + ("AR = /tools//llvm/bin/llvm-ar", "AR = /tools//llvm/bin/llvm-ar", "Double slash in data won't match (expected)"), + ] + + tools_path = "/tools" + toolchain = "llvm" + + # Use os.path.normpath to handle edge cases like double slashes + toolchain_bin = os.path.normpath(os.path.join(tools_path, toolchain, "bin")) + search = toolchain_bin + "/" + replace = "" + + print(f"Testing replacement: '{search}' -> '{replace}'") + print("-" * 60) + + all_passed = True + for input_str, expected, description in test_cases: + output = input_str.replace(search, replace) + passed = output == expected + status = "✓" if passed else "✗" + + print(f"{status} {description}") + if not passed: + print(f" Input: {input_str}") + print(f" Expected: {expected}") + print(f" Got: {output}") + all_passed = False + + print("-" * 60) + if all_passed: + print("All tests passed!") + return 0 + else: + print("Some tests failed!") + return 1 + +if __name__ == "__main__": + import sys + sys.exit(test_ar_normalization()) From d7d6c977841a0a098ebc75f5ed1d9e7d0b2b3fde Mon Sep 17 00:00:00 2001 From: June Kim Date: Mon, 11 May 2026 17:57:35 -0700 Subject: [PATCH 2/2] test: add edge case for trailing slash in AR normalization test - Added test case: 'AR = /tools/llvm/bin/' -> 'AR = ' - Documents expected behavior when path ends with trailing slash - Validates replacement doesn't crash on degenerate inputs --- test_ar_normalization.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_ar_normalization.py b/test_ar_normalization.py index 91d273ae4..e64883f2d 100644 --- a/test_ar_normalization.py +++ b/test_ar_normalization.py @@ -17,6 +17,8 @@ def test_ar_normalization(): # Path normalization: our search pattern is normalized, so double-slash # in sysconfig data won't match (but that's OK - sysconfig should be normalized) ("AR = /tools//llvm/bin/llvm-ar", "AR = /tools//llvm/bin/llvm-ar", "Double slash in data won't match (expected)"), + # Empty value after replacement (edge case) + ("AR = /tools/llvm/bin/", "AR = ", "Trailing slash creates empty value (acceptable)"), ] tools_path = "/tools"