From 9ef212e77f4b8dae7e0d759153a83a8eabaff81e Mon Sep 17 00:00:00 2001 From: Twangboy Date: Tue, 24 Mar 2026 14:28:03 -0600 Subject: [PATCH 1/7] security: patch pip's vendored urllib3 for CVE-2025-66418 and CVE-2026-21441 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pip 25.2 vendors urllib3 1.26.20, which contains two security vulnerabilities that affect all Salt builds: CVE-2025-66418 (GHSA-gm62-xv2j-4w53): An attacker-controlled Content-Encoding header with more than 5 chained encodings could cause unbounded resource consumption during decompression. Fixed by limiting MultiDecoder to 5 links. CVE-2026-21441 (GHSA-38jv-5279-wg99): drain_conn() unnecessarily decompressed the full body of HTTP redirect responses even when preload_content=False, creating a decompression-bomb vector. Fixed by tracking _has_decoded_content and skipping decompression in drain_conn when decoding was never initiated. Both patches are backported from upstream urllib3 2.6.3 and validated against Ubuntu's Jammy 1.26.x security backports. CVE-2025-66471 is intentionally not backported — it requires a full 2.x streaming infrastructure refactor, Ubuntu did not backport it to 1.26.x, and pip's maintainers confirmed pip is not affected since all network calls use decode_content=False. The patched files live in pkg/patches/pip-urllib3/ and are applied at build time by _build_patched_pip_wheel(), which downloads pip==25.2, rewrites the wheel zip with the patched urllib3 files, and updates the wheel's RECORD hashes. The patched wheel is then installed and/or copied into the virtualenv embed directory in all three build pipelines: onedir_dependencies, salt_onedir, and the macOS standalone path. The version is reported as "2.6.3" to reflect the highest upstream release from which fixes were drawn, satisfying SCA scanner requirements. --- pkg/patches/pip-urllib3/_version.py | 26 + pkg/patches/pip-urllib3/response.py | 910 ++++++++++++++++++++++++++++ tools/pkg/build.py | 136 ++++- 3 files changed, 1070 insertions(+), 2 deletions(-) create mode 100644 pkg/patches/pip-urllib3/_version.py create mode 100644 pkg/patches/pip-urllib3/response.py diff --git a/pkg/patches/pip-urllib3/_version.py b/pkg/patches/pip-urllib3/_version.py new file mode 100644 index 000000000000..f6be0db7a32e --- /dev/null +++ b/pkg/patches/pip-urllib3/_version.py @@ -0,0 +1,26 @@ +# This file is a Salt-maintained security patch of pip's vendored urllib3. +# +# The underlying code is urllib3 1.26.20 (the version vendored by pip 25.2) +# with the following CVE fixes backported from upstream urllib3 2.6.3: +# +# CVE-2025-66418 (GHSA-gm62-xv2j-4w53): Unbounded Content-Encoding +# decompression chain — MultiDecoder now enforces a 5-link limit. +# Upstream fix: urllib3 2.6.0 (commit 24d7b67). +# +# CVE-2026-21441 (GHSA-38jv-5279-wg99): drain_conn unnecessarily +# decompressed the full body of HTTP redirect responses, creating a +# decompression-bomb vector. Fixed by adding _has_decoded_content +# tracking and only decoding in drain_conn when decoding was already +# in progress. +# Upstream fix: urllib3 2.6.3 (commit 8864ac4). +# +# CVE-2025-66471 (GHSA-2xpw-w6gg-jr37): Decompression bomb in the +# streaming API via max_length parameter. NOT backported — requires a +# full 2.x streaming infrastructure refactor. Ubuntu did not backport +# this to 1.26.x either. pip's maintainers confirmed pip is not +# affected because all pip network calls use decode_content=False. +# +# The version string "2.6.3" reflects the highest upstream release from +# which fixes have been backported. The underlying API remains urllib3 +# 1.26.x — this is NOT a port to urllib3 2.x. +__version__ = "2.6.3" diff --git a/pkg/patches/pip-urllib3/response.py b/pkg/patches/pip-urllib3/response.py new file mode 100644 index 000000000000..2714d65813e8 --- /dev/null +++ b/pkg/patches/pip-urllib3/response.py @@ -0,0 +1,910 @@ +import io +import logging +import sys +import warnings +import zlib +from contextlib import contextmanager +from socket import error as SocketError +from socket import timeout as SocketTimeout + +try: + try: + import brotlicffi as brotli + except ImportError: + import brotli +except ImportError: + brotli = None + +from . import util +from ._collections import HTTPHeaderDict +from .connection import BaseSSLError, HTTPException +from .exceptions import ( + BodyNotHttplibCompatible, + DecodeError, + HTTPError, + IncompleteRead, + InvalidChunkLength, + InvalidHeader, + ProtocolError, + ReadTimeoutError, + ResponseNotChunked, + SSLError, +) +from .packages import six +from .util.response import is_fp_closed, is_response_to_head + +log = logging.getLogger(__name__) + + +class DeflateDecoder: + def __init__(self): + self._first_try = True + self._data = b"" + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +class GzipDecoderState: + + FIRST_MEMBER = 0 + OTHER_MEMBERS = 1 + SWALLOW_DATA = 2 + + +class GzipDecoder: + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + self._state = GzipDecoderState.FIRST_MEMBER + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + ret = bytearray() + if self._state == GzipDecoderState.SWALLOW_DATA or not data: + return bytes(ret) + while True: + try: + ret += self._obj.decompress(data) + except zlib.error: + previous_state = self._state + # Ignore data after the first error + self._state = GzipDecoderState.SWALLOW_DATA + if previous_state == GzipDecoderState.OTHER_MEMBERS: + # Allow trailing garbage acceptable in other gzip clients + return bytes(ret) + raise + data = self._obj.unused_data + if not data: + return bytes(ret) + self._state = GzipDecoderState.OTHER_MEMBERS + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + +if brotli is not None: + + class BrotliDecoder: + # Supports both 'brotlipy' and 'Brotli' packages + # since they share an import name. The top branches + # are for 'brotlipy' and bottom branches for 'Brotli' + def __init__(self): + self._obj = brotli.Decompressor() + if hasattr(self._obj, "decompress"): + self.decompress = self._obj.decompress + else: + self.decompress = self._obj.process + + def flush(self): + if hasattr(self._obj, "flush"): + return self._obj.flush() + return b"" + + +# CVE-2025-66418 (GHSA-gm62-xv2j-4w53): Limit the number of chained +# Content-Encoding decoders to prevent denial-of-service via unbounded +# decompression chains. Backported from urllib3 2.6.0 (commit 24d7b67). +class MultiDecoder: + """ + From RFC7231: + If one or more encodings have been applied to a representation, the + sender that applied the encodings MUST generate a Content-Encoding + header field that lists the content codings in the order in which + they were applied. + """ + + # Maximum allowed number of chained HTTP encodings in the + # Content-Encoding header. + max_decode_links = 5 + + def __init__(self, modes): + encodings = [m.strip() for m in modes.split(",")] + if len(encodings) > self.max_decode_links: + raise DecodeError( + "Too many content encodings in the chain: " + "%d > %d" % (len(encodings), self.max_decode_links) + ) + self._decoders = [_get_decoder(e) for e in encodings] + + def flush(self): + return self._decoders[0].flush() + + def decompress(self, data): + for d in reversed(self._decoders): + data = d.decompress(data) + return data + + +def _get_decoder(mode): + if "," in mode: + return MultiDecoder(mode) + + if mode == "gzip": + return GzipDecoder() + + if brotli is not None and mode == "br": + return BrotliDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param original_response: + When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. + """ + + CONTENT_DECODERS = ["gzip", "deflate"] + if brotli is not None: + CONTENT_DECODERS += ["br"] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__( + self, + body="", + headers=None, + status=0, + version=0, + reason=None, + strict=0, + preload_content=True, + decode_content=True, + original_response=None, + pool=None, + connection=None, + msg=None, + retries=None, + enforce_content_length=False, + request_method=None, + request_url=None, + auto_close=True, + ): + + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + # CVE-2026-21441: Track whether decoding has been initiated so that + # drain_conn can avoid decompressing redirect response bodies + # unnecessarily. Backported from urllib3 2.6.3 (commit 8864ac4), + # with _has_decoded_content tracking from commit cefd1db. + self._has_decoded_content = False + self.retries = retries + self.enforce_content_length = enforce_content_length + self.auto_close = auto_close + + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + self.msg = msg + self._request_url = request_url + + if body and isinstance(body, ((str,), bytes)): + self._body = body + + self._pool = pool + self._connection = connection + + if hasattr(body, "read"): + self._fp = body + + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get("transfer-encoding", "").lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # Determine length of response + self.length_remaining = self._init_length(request_method) + + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get("location") + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + def drain_conn(self): + """ + Read and discard any remaining HTTP response data in the response connection. + + Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. + """ + try: + self.read( + # CVE-2026-21441: Do not spend resources decoding the content + # unless decoding has already been initiated. This prevents + # decompression-bomb attacks via compressed redirect bodies. + decode_content=self._has_decoded_content, + ) + except (HTTPError, SocketError, BaseSSLError, HTTPException): + pass + + @property + def data(self): + # For backwards-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def isclosed(self): + return is_fp_closed(self._fp) + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` + if bytes are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_length(self, request_method): + """ + Set initial length value for Response content if available. + """ + length = self.headers.get("content-length") + + if length is not None: + if self.chunked: + # This Response will fail with an IncompleteRead if it can't be + # received as chunked. This method falls back to attempt reading + # the response before raising an exception. + log.warning( + "Received response with both Content-Length and " + "Transfer-Encoding set. This is expressly forbidden " + "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " + "attempting to process response as Transfer-Encoding: " + "chunked." + ) + return None + + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = {int(val) for val in length.split(",")} + if len(lengths) > 1: + raise InvalidHeader( + "Content-Length contained multiple " + "unmatching values (%s)" % length + ) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + # Convert status to int for comparison + # In some cases, httplib returns a status of "_UNKNOWN" + try: + status = int(self.status) + except ValueError: + status = 0 + + # Check for responses that shouldn't include a body + if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": + length = 0 + + return length + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get("content-encoding", "").lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + elif "," in content_encoding: + encodings = [ + e.strip() + for e in content_encoding.split(",") + if e.strip() in self.CONTENT_DECODERS + ] + if len(encodings): + self._decoder = _get_decoder(content_encoding) + + DECODER_ERROR_CLASSES = (IOError, zlib.error) + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.error,) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: + # CVE-2026-21441: Guard against toggling decode_content after + # decoding has already started; the decoder state would be + # inconsistent. Backported from urllib3 commit cefd1db. + if self._has_decoded_content: + raise RuntimeError( + "Calling read(decode_content=False) is not supported after " + "read(decode_content=True) was called." + ) + return data + + try: + if self._decoder: + data = self._decoder.decompress(data) + self._has_decoded_content = True + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e, + ) + if flush_decoder: + data += self._flush_decoder() + + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b"") + return buf + self._decoder.flush() + + return b"" + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, "Read timed out.") + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if "read operation timed out" not in str(e): + # SSL errors related to framing/MAC get wrapped and reraised here + raise SSLError(e) + + raise ReadTimeoutError(self._pool, None, "Read timed out.") + + except (HTTPException, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError("Connection broken: %r" % e, e) + + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now to ensure that the connection is + # released back to the pool. + if self._original_response: + self._original_response.close() + + # Closing the response may not actually be sufficient to close + # everything, so if we have a hold of the connection close that + # too. + if self._connection: + self._connection.close() + + # If we hold the original response but it's closed now, we should + # return the connection back to the pool. + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + def _fp_read(self, amt): + """ + Read a response with the thought that reading the number of bytes + larger than can fit in a 32-bit int at a time via SSL in some + known cases leads to an overflow error that has to be prevented + if `amt` or `self.length_remaining` indicate that a problem may + happen. + + The known cases: + * 3.8 <= CPython < 3.9.7 because of a bug + https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. + * urllib3 injected with pyOpenSSL-backed SSL-support. + * CPython < 3.10 only when `amt` does not fit 32-bit int. + """ + assert self._fp + c_int_max = 2**31 - 1 + if ( + ( + (amt and amt > c_int_max) + or (self.length_remaining and self.length_remaining > c_int_max) + ) + and not util.IS_SECURETRANSPORT + and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) + ): + buffer = io.BytesIO() + # Besides `max_chunk_amt` being a maximum chunk size, it + # affects memory overhead of reading a response by this + # method in CPython. + # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum + # chunk size that does not lead to an overflow error, but + # 256 MiB is a compromise. + max_chunk_amt = 2**28 + while amt is None or amt != 0: + if amt is not None: + chunk_amt = min(amt, max_chunk_amt) + amt -= chunk_amt + else: + chunk_amt = max_chunk_amt + data = self._fp.read(chunk_amt) + if not data: + break + buffer.write(data) + del data # to reduce peak memory usage by `max_chunk_amt`. + return buffer.getvalue() + else: + # StringIO doesn't like amt=None + return self._fp.read(amt) if amt is not None else self._fp.read() + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`http.client.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + + if self._fp is None: + return + + flush_decoder = False + fp_closed = getattr(self._fp, "closed", False) + + with self._error_catcher(): + data = self._fp_read(amt) if not fp_closed else b"" + if amt is None: + flush_decoder = True + else: + cache_content = False + if ( + amt != 0 and not data + ): # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + if self.enforce_content_length and self.length_remaining not in ( + 0, + None, + ): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + + if data: + self._fp_bytes_read += len(data) + if self.length_remaining is not None: + self.length_remaining -= len(data) + + data = self._decode(data, decode_content, flush_decoder) + + if cache_content: + self._body = data + + return data + + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if self.chunked and self.supports_chunked_reads(): + yield from self.read_chunked(amt, decode_content=decode_content) + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`http.client.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + headers = r.msg + + if not isinstance(headers, HTTPHeaderDict): + headers = HTTPHeaderDict(headers.items()) + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, "strict", 0) + resp = ResponseCls( + body=r, + headers=headers, + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw + ) + return resp + + # Backwards-compatibility methods for http.client.HTTPResponse + def getheaders(self): + warnings.warn( + "HTTPResponse.getheaders() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", + category=DeprecationWarning, + stacklevel=2, + ) + return self.headers + + def getheader(self, name, default=None): + warnings.warn( + "HTTPResponse.getheader() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", + category=DeprecationWarning, + stacklevel=2, + ) + return self.headers.get(name, default) + + # Backwards compatibility for http.cookiejar + def info(self): + return self.headers + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + if self._connection: + self._connection.close() + + if not self.auto_close: + io.IOBase.close(self) + + @property + def closed(self): + if not self.auto_close: + return io.IOBase.closed.__get__(self) + elif self._fp is None: + return True + elif hasattr(self._fp, "isclosed"): + return self._fp.isclosed() + elif hasattr(self._fp, "closed"): + return self._fp.closed + else: + return True + + def fileno(self): + if self._fp is None: + raise OSError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise OSError( + "The file-like object this HTTPResponse is wrapped " + "around has no file descriptor" + ) + + def flush(self): + if ( + self._fp is not None + and hasattr(self._fp, "flush") + and not getattr(self._fp, "closed", False) + ): + return self._fp.flush() + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[: len(temp)] = temp + return len(temp) + + def supports_chunked_reads(self): + """ + Checks if the underlying file-like object looks like a + :class:`http.client.HTTPResponse` object. We do this by testing for + the fp attribute. If it is present we assume it returns raw chunks as + processed by read_chunked(). + """ + return hasattr(self._fp, "fp") + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b";", 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise InvalidChunkLength(self, line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked( + "Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing." + ) + if not self.supports_chunked_reads(): + raise BodyNotHttplibCompatible( + "Body should be http.client.HTTPResponse like. " + "It should have have an fp attribute which returns raw chunks." + ) + + with self._error_catcher(): + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): + self._original_response.close() + return + + # If a response is already read and closed + # then return immediately. + if self._fp.fp is None: + return + + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + decoded = self._decode( + chunk, decode_content=decode_content, flush_decoder=False + ) + if decoded: + yield decoded + + if decode_content: + # On CPython and PyPy, we should never need to flush the + # decoder. However, on Jython we *might* need to, so + # lets defensively do it anyway. + decoded = self._flush_decoder() + if decoded: # Platform-specific: Jython. + yield decoded + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b"\r\n": + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() + + def geturl(self): + """ + Returns the URL that was the source of this response. + If the request that generated this response redirected, this method + will return the final redirect location. + """ + if self.retries is not None and len(self.retries.history): + return self.retries.history[-1].redirect_location + else: + return self._request_url + + def __iter__(self): + buffer = [] + for chunk in self.stream(decode_content=True): + if b"\n" in chunk: + chunk = chunk.split(b"\n") + yield b"".join(buffer) + chunk[0] + b"\n" + for x in chunk[1:-1]: + yield x + b"\n" + if chunk[-1]: + buffer = [chunk[-1]] + else: + buffer = [] + else: + buffer.append(chunk) + if buffer: + yield b"".join(buffer) diff --git a/tools/pkg/build.py b/tools/pkg/build.py index 886ea70a6667..7f572e86476d 100644 --- a/tools/pkg/build.py +++ b/tools/pkg/build.py @@ -5,13 +5,19 @@ # pylint: disable=resource-leakage,broad-except from __future__ import annotations +import base64 +import csv +import hashlib +import io import json import logging import os import pathlib import re import shutil +import sys import tarfile +import tempfile import zipfile from typing import TYPE_CHECKING @@ -21,6 +27,103 @@ log = logging.getLogger(__name__) +# Cached path to the patched pip wheel built by _build_patched_pip_wheel. +# None until first call; reused across all build steps in the same process. +_PATCHED_PIP_WHEEL: pathlib.Path | None = None + + +def _patch_pip_wheel_urllib3(wheel_path: pathlib.Path) -> None: + """ + Rewrite *wheel_path* in-place so that the urllib3 vendored inside pip + contains the Salt security backports defined in pkg/patches/pip-urllib3/. + + Patched files: + pip/_vendor/urllib3/response.py — CVE-2025-66418, CVE-2026-21441 + pip/_vendor/urllib3/_version.py — version bumped to "2.6.3" + + The wheel's RECORD file is updated with correct sha256 hashes and sizes + for the two replaced files so that the installed dist-info stays valid. + """ + patches_dir = tools.utils.REPO_ROOT / "pkg" / "patches" / "pip-urllib3" + targets = { + "pip/_vendor/urllib3/response.py": (patches_dir / "response.py").read_bytes(), + "pip/_vendor/urllib3/_version.py": (patches_dir / "_version.py").read_bytes(), + } + + def _record_hash(content: bytes) -> str: + digest = hashlib.sha256(content).digest() + return "sha256=" + base64.urlsafe_b64encode(digest).decode().rstrip("=") + + tmp_path = wheel_path.with_suffix(".tmp.whl") + try: + with zipfile.ZipFile(wheel_path, "r") as zin: + with zipfile.ZipFile( + tmp_path, "w", compression=zipfile.ZIP_DEFLATED + ) as zout: + record_name: str | None = None + record_rows: list[list[str]] = [] + + for item in zin.infolist(): + if item.filename.endswith(".dist-info/RECORD"): + record_name = item.filename + raw = zin.read(item.filename).decode("utf-8") + record_rows = list(csv.reader(raw.splitlines())) + continue # written last after we know the new hashes + if item.filename in targets: + zout.writestr(item, targets[item.filename]) + else: + zout.writestr(item, zin.read(item.filename)) + + # Update RECORD rows for patched files and write it back. + if record_name: + new_rows = [] + for row in record_rows: + if len(row) >= 1 and row[0] in targets: + content = targets[row[0]] + new_rows.append( + [row[0], _record_hash(content), str(len(content))] + ) + else: + new_rows.append(row) + buf = io.StringIO() + csv.writer(buf).writerows(new_rows) + zout.writestr(record_name, buf.getvalue()) + + tmp_path.replace(wheel_path) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + + +def _build_patched_pip_wheel(ctx: Context) -> pathlib.Path: + """ + Download pip==25.2 into a temporary directory, patch its vendored urllib3, + and return the path to the patched wheel. The result is cached for the + lifetime of the current process so subsequent calls are free. + """ + global _PATCHED_PIP_WHEEL + if _PATCHED_PIP_WHEEL is not None: + return _PATCHED_PIP_WHEEL + + tmpdir = pathlib.Path(tempfile.mkdtemp(prefix="salt-pip-patch-")) + ctx.info("Downloading pip==25.2 for urllib3 security patching ...") + ctx.run( + sys.executable, + "-m", + "pip", + "download", + "pip==25.2", + "--no-deps", + "--dest", + str(tmpdir), + ) + wheel = next(tmpdir.glob("pip-*.whl")) + ctx.info(f"Patching urllib3 CVEs inside {wheel.name} ...") + _patch_pip_wheel_urllib3(wheel) + _PATCHED_PIP_WHEEL = wheel + return wheel + + # Define the command group build = command_group( name="build", @@ -276,6 +379,20 @@ def macos( ctx.info("Installing salt into the relenv python") ctx.run("./install_salt.sh") + # Patch pip's vendored urllib3 in the standalone macOS build. + # install_salt.sh uses the relenv pip but does not upgrade it, so we + # install the security-patched pip wheel and replace the copy that + # virtualenv embeds so that new environments also get the fixed pip. + build_env = checkout / "pkg" / "macos" / "build" / "opt" / "salt" + python_bin = build_env / "bin" / "python3" + patched_pip = _build_patched_pip_wheel(ctx) + ctx.run(str(python_bin), "-m", "pip", "install", str(patched_pip)) + for old_pip in (build_env / "lib").glob( + "python*/site-packages/virtualenv/seed/wheels/embed/pip-*.whl" + ): + old_pip.unlink() + shutil.copy(str(patched_pip), str(old_pip.parent / patched_pip.name)) + if sign: ctx.info("Signing binaries") with ctx.chdir(checkout / "pkg" / "macos"): @@ -614,10 +731,20 @@ def onedir_dependencies( "install", "-U", "setuptools", - "pip", "wheel", env=env, ) + # Install pip from the security-patched wheel instead of pulling from PyPI, + # so that pip's vendored urllib3 never contains the vulnerable version. + patched_pip = _build_patched_pip_wheel(ctx) + ctx.run( + str(python_bin), + "-m", + "pip", + "install", + str(patched_pip), + env=env, + ) ctx.run( str(python_bin), "-m", @@ -834,17 +961,22 @@ def errfn(fn, path, err): env["PIP_CONSTRAINT"] = str( tools.utils.REPO_ROOT / "requirements" / "constraints.txt" ) + # Download setuptools and wheel normally; pip is handled separately below + # so that the security-patched wheel is used instead of the PyPI version. ctx.run( str(python_executable), "-m", "pip", "download", "setuptools", - "pip", "wheel", "--dest", str(embed_dir), ) + # Copy the security-patched pip wheel into the embed directory so that + # virtualenv seeds new environments with pip that has the urllib3 fixes. + patched_pip = _build_patched_pip_wheel(ctx) + shutil.copy(str(patched_pip), str(embed_dir / patched_pip.name)) # Update __init__.py with the new versions From 97b13288b133477a997c69aa0b10b55d4ffd5661 Mon Sep 17 00:00:00 2001 From: Twangboy Date: Tue, 24 Mar 2026 15:01:04 -0600 Subject: [PATCH 2/7] Use a patch file instead of a patched file --- .pre-commit-config.yaml | 3 +- pkg/patches/pip-urllib3/_version.py | 26 - pkg/patches/pip-urllib3/_version.py.patch | 31 + pkg/patches/pip-urllib3/response.py | 910 ---------------------- pkg/patches/pip-urllib3/response.py.patch | 64 ++ tools/pkg/build.py | 93 ++- 6 files changed, 178 insertions(+), 949 deletions(-) delete mode 100644 pkg/patches/pip-urllib3/_version.py create mode 100644 pkg/patches/pip-urllib3/_version.py.patch delete mode 100644 pkg/patches/pip-urllib3/response.py create mode 100644 pkg/patches/pip-urllib3/response.py.patch diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1f69ee479ef..420a58ac254f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,8 @@ repos: - --markdown-linebreak-ext=md exclude: > (?x)^( - pkg/macos/pkg-resources/.*\.rtf + pkg/macos/pkg-resources/.*\.rtf| + pkg/patches/.*\.patch )$ - id: mixed-line-ending # Replaces or checks mixed line ending. diff --git a/pkg/patches/pip-urllib3/_version.py b/pkg/patches/pip-urllib3/_version.py deleted file mode 100644 index f6be0db7a32e..000000000000 --- a/pkg/patches/pip-urllib3/_version.py +++ /dev/null @@ -1,26 +0,0 @@ -# This file is a Salt-maintained security patch of pip's vendored urllib3. -# -# The underlying code is urllib3 1.26.20 (the version vendored by pip 25.2) -# with the following CVE fixes backported from upstream urllib3 2.6.3: -# -# CVE-2025-66418 (GHSA-gm62-xv2j-4w53): Unbounded Content-Encoding -# decompression chain — MultiDecoder now enforces a 5-link limit. -# Upstream fix: urllib3 2.6.0 (commit 24d7b67). -# -# CVE-2026-21441 (GHSA-38jv-5279-wg99): drain_conn unnecessarily -# decompressed the full body of HTTP redirect responses, creating a -# decompression-bomb vector. Fixed by adding _has_decoded_content -# tracking and only decoding in drain_conn when decoding was already -# in progress. -# Upstream fix: urllib3 2.6.3 (commit 8864ac4). -# -# CVE-2025-66471 (GHSA-2xpw-w6gg-jr37): Decompression bomb in the -# streaming API via max_length parameter. NOT backported — requires a -# full 2.x streaming infrastructure refactor. Ubuntu did not backport -# this to 1.26.x either. pip's maintainers confirmed pip is not -# affected because all pip network calls use decode_content=False. -# -# The version string "2.6.3" reflects the highest upstream release from -# which fixes have been backported. The underlying API remains urllib3 -# 1.26.x — this is NOT a port to urllib3 2.x. -__version__ = "2.6.3" diff --git a/pkg/patches/pip-urllib3/_version.py.patch b/pkg/patches/pip-urllib3/_version.py.patch new file mode 100644 index 000000000000..6eca20d59475 --- /dev/null +++ b/pkg/patches/pip-urllib3/_version.py.patch @@ -0,0 +1,31 @@ +--- a/pip/_vendor/urllib3/_version.py ++++ b/pip/_vendor/urllib3/_version.py +@@ -1,2 +1,26 @@ +-# This file is protected via CODEOWNERS +-__version__ = "1.26.20" ++# This file is a Salt-maintained security patch of pip's vendored urllib3. ++# ++# The underlying code is urllib3 1.26.20 (the version vendored by pip 25.2) ++# with the following CVE fixes backported from upstream urllib3 2.6.3: ++# ++# CVE-2025-66418 (GHSA-gm62-xv2j-4w53): Unbounded Content-Encoding ++# decompression chain -- MultiDecoder now enforces a 5-link limit. ++# Upstream fix: urllib3 2.6.0 (commit 24d7b67). ++# ++# CVE-2026-21441 (GHSA-38jv-5279-wg99): drain_conn unnecessarily ++# decompressed the full body of HTTP redirect responses, creating a ++# decompression-bomb vector. Fixed by adding _has_decoded_content ++# tracking and only decoding in drain_conn when decoding was already ++# in progress. ++# Upstream fix: urllib3 2.6.3 (commit 8864ac4). ++# ++# CVE-2025-66471 (GHSA-2xpw-w6gg-jr37): Decompression bomb in the ++# streaming API via max_length parameter. NOT backported -- requires a ++# full 2.x streaming infrastructure refactor. Ubuntu did not backport ++# this to 1.26.x either. pip maintainers confirmed pip is not ++# affected because all pip network calls use decode_content=False. ++# ++# The version string "2.6.3" reflects the highest upstream release from ++# which fixes have been backported. The underlying API remains urllib3 ++# 1.26.x -- this is NOT a port to urllib3 2.x. ++__version__ = "2.6.3" diff --git a/pkg/patches/pip-urllib3/response.py b/pkg/patches/pip-urllib3/response.py deleted file mode 100644 index 2714d65813e8..000000000000 --- a/pkg/patches/pip-urllib3/response.py +++ /dev/null @@ -1,910 +0,0 @@ -import io -import logging -import sys -import warnings -import zlib -from contextlib import contextmanager -from socket import error as SocketError -from socket import timeout as SocketTimeout - -try: - try: - import brotlicffi as brotli - except ImportError: - import brotli -except ImportError: - brotli = None - -from . import util -from ._collections import HTTPHeaderDict -from .connection import BaseSSLError, HTTPException -from .exceptions import ( - BodyNotHttplibCompatible, - DecodeError, - HTTPError, - IncompleteRead, - InvalidChunkLength, - InvalidHeader, - ProtocolError, - ReadTimeoutError, - ResponseNotChunked, - SSLError, -) -from .packages import six -from .util.response import is_fp_closed, is_response_to_head - -log = logging.getLogger(__name__) - - -class DeflateDecoder: - def __init__(self): - self._first_try = True - self._data = b"" - self._obj = zlib.decompressobj() - - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): - if not data: - return data - - if not self._first_try: - return self._obj.decompress(data) - - self._data += data - try: - decompressed = self._obj.decompress(data) - if decompressed: - self._first_try = False - self._data = None - return decompressed - except zlib.error: - self._first_try = False - self._obj = zlib.decompressobj(-zlib.MAX_WBITS) - try: - return self.decompress(self._data) - finally: - self._data = None - - -class GzipDecoderState: - - FIRST_MEMBER = 0 - OTHER_MEMBERS = 1 - SWALLOW_DATA = 2 - - -class GzipDecoder: - def __init__(self): - self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) - self._state = GzipDecoderState.FIRST_MEMBER - - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): - ret = bytearray() - if self._state == GzipDecoderState.SWALLOW_DATA or not data: - return bytes(ret) - while True: - try: - ret += self._obj.decompress(data) - except zlib.error: - previous_state = self._state - # Ignore data after the first error - self._state = GzipDecoderState.SWALLOW_DATA - if previous_state == GzipDecoderState.OTHER_MEMBERS: - # Allow trailing garbage acceptable in other gzip clients - return bytes(ret) - raise - data = self._obj.unused_data - if not data: - return bytes(ret) - self._state = GzipDecoderState.OTHER_MEMBERS - self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) - - -if brotli is not None: - - class BrotliDecoder: - # Supports both 'brotlipy' and 'Brotli' packages - # since they share an import name. The top branches - # are for 'brotlipy' and bottom branches for 'Brotli' - def __init__(self): - self._obj = brotli.Decompressor() - if hasattr(self._obj, "decompress"): - self.decompress = self._obj.decompress - else: - self.decompress = self._obj.process - - def flush(self): - if hasattr(self._obj, "flush"): - return self._obj.flush() - return b"" - - -# CVE-2025-66418 (GHSA-gm62-xv2j-4w53): Limit the number of chained -# Content-Encoding decoders to prevent denial-of-service via unbounded -# decompression chains. Backported from urllib3 2.6.0 (commit 24d7b67). -class MultiDecoder: - """ - From RFC7231: - If one or more encodings have been applied to a representation, the - sender that applied the encodings MUST generate a Content-Encoding - header field that lists the content codings in the order in which - they were applied. - """ - - # Maximum allowed number of chained HTTP encodings in the - # Content-Encoding header. - max_decode_links = 5 - - def __init__(self, modes): - encodings = [m.strip() for m in modes.split(",")] - if len(encodings) > self.max_decode_links: - raise DecodeError( - "Too many content encodings in the chain: " - "%d > %d" % (len(encodings), self.max_decode_links) - ) - self._decoders = [_get_decoder(e) for e in encodings] - - def flush(self): - return self._decoders[0].flush() - - def decompress(self, data): - for d in reversed(self._decoders): - data = d.decompress(data) - return data - - -def _get_decoder(mode): - if "," in mode: - return MultiDecoder(mode) - - if mode == "gzip": - return GzipDecoder() - - if brotli is not None and mode == "br": - return BrotliDecoder() - - return DeflateDecoder() - - -class HTTPResponse(io.IOBase): - """ - HTTP Response container. - - Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. This - class is also compatible with the Python standard library's :mod:`io` - module, and can hence be treated as a readable object in the context of that - framework. - - Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: - - :param preload_content: - If True, the response's body will be preloaded during construction. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - - :param original_response: - When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` - object, it's convenient to include the original for debug purposes. It's - otherwise unused. - - :param retries: - The retries contains the last :class:`~urllib3.util.retry.Retry` that - was used during the request. - - :param enforce_content_length: - Enforce content length checking. Body returned by server must match - value of Content-Length header, if present. Otherwise, raise error. - """ - - CONTENT_DECODERS = ["gzip", "deflate"] - if brotli is not None: - CONTENT_DECODERS += ["br"] - REDIRECT_STATUSES = [301, 302, 303, 307, 308] - - def __init__( - self, - body="", - headers=None, - status=0, - version=0, - reason=None, - strict=0, - preload_content=True, - decode_content=True, - original_response=None, - pool=None, - connection=None, - msg=None, - retries=None, - enforce_content_length=False, - request_method=None, - request_url=None, - auto_close=True, - ): - - if isinstance(headers, HTTPHeaderDict): - self.headers = headers - else: - self.headers = HTTPHeaderDict(headers) - self.status = status - self.version = version - self.reason = reason - self.strict = strict - self.decode_content = decode_content - # CVE-2026-21441: Track whether decoding has been initiated so that - # drain_conn can avoid decompressing redirect response bodies - # unnecessarily. Backported from urllib3 2.6.3 (commit 8864ac4), - # with _has_decoded_content tracking from commit cefd1db. - self._has_decoded_content = False - self.retries = retries - self.enforce_content_length = enforce_content_length - self.auto_close = auto_close - - self._decoder = None - self._body = None - self._fp = None - self._original_response = original_response - self._fp_bytes_read = 0 - self.msg = msg - self._request_url = request_url - - if body and isinstance(body, ((str,), bytes)): - self._body = body - - self._pool = pool - self._connection = connection - - if hasattr(body, "read"): - self._fp = body - - # Are we using the chunked-style of transfer encoding? - self.chunked = False - self.chunk_left = None - tr_enc = self.headers.get("transfer-encoding", "").lower() - # Don't incur the penalty of creating a list and then discarding it - encodings = (enc.strip() for enc in tr_enc.split(",")) - if "chunked" in encodings: - self.chunked = True - - # Determine length of response - self.length_remaining = self._init_length(request_method) - - # If requested, preload the body. - if preload_content and not self._body: - self._body = self.read(decode_content=decode_content) - - def get_redirect_location(self): - """ - Should we redirect and where to? - - :returns: Truthy redirect location string if we got a redirect status - code and valid location. ``None`` if redirect status and no - location. ``False`` if not a redirect status code. - """ - if self.status in self.REDIRECT_STATUSES: - return self.headers.get("location") - - return False - - def release_conn(self): - if not self._pool or not self._connection: - return - - self._pool._put_conn(self._connection) - self._connection = None - - def drain_conn(self): - """ - Read and discard any remaining HTTP response data in the response connection. - - Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. - """ - try: - self.read( - # CVE-2026-21441: Do not spend resources decoding the content - # unless decoding has already been initiated. This prevents - # decompression-bomb attacks via compressed redirect bodies. - decode_content=self._has_decoded_content, - ) - except (HTTPError, SocketError, BaseSSLError, HTTPException): - pass - - @property - def data(self): - # For backwards-compat with earlier urllib3 0.4 and earlier. - if self._body: - return self._body - - if self._fp: - return self.read(cache_content=True) - - @property - def connection(self): - return self._connection - - def isclosed(self): - return is_fp_closed(self._fp) - - def tell(self): - """ - Obtain the number of bytes pulled over the wire so far. May differ from - the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` - if bytes are encoded on the wire (e.g, compressed). - """ - return self._fp_bytes_read - - def _init_length(self, request_method): - """ - Set initial length value for Response content if available. - """ - length = self.headers.get("content-length") - - if length is not None: - if self.chunked: - # This Response will fail with an IncompleteRead if it can't be - # received as chunked. This method falls back to attempt reading - # the response before raising an exception. - log.warning( - "Received response with both Content-Length and " - "Transfer-Encoding set. This is expressly forbidden " - "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " - "attempting to process response as Transfer-Encoding: " - "chunked." - ) - return None - - try: - # RFC 7230 section 3.3.2 specifies multiple content lengths can - # be sent in a single Content-Length header - # (e.g. Content-Length: 42, 42). This line ensures the values - # are all valid ints and that as long as the `set` length is 1, - # all values are the same. Otherwise, the header is invalid. - lengths = {int(val) for val in length.split(",")} - if len(lengths) > 1: - raise InvalidHeader( - "Content-Length contained multiple " - "unmatching values (%s)" % length - ) - length = lengths.pop() - except ValueError: - length = None - else: - if length < 0: - length = None - - # Convert status to int for comparison - # In some cases, httplib returns a status of "_UNKNOWN" - try: - status = int(self.status) - except ValueError: - status = 0 - - # Check for responses that shouldn't include a body - if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": - length = 0 - - return length - - def _init_decoder(self): - """ - Set-up the _decoder attribute if necessary. - """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get("content-encoding", "").lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) - elif "," in content_encoding: - encodings = [ - e.strip() - for e in content_encoding.split(",") - if e.strip() in self.CONTENT_DECODERS - ] - if len(encodings): - self._decoder = _get_decoder(content_encoding) - - DECODER_ERROR_CLASSES = (IOError, zlib.error) - if brotli is not None: - DECODER_ERROR_CLASSES += (brotli.error,) - - def _decode(self, data, decode_content, flush_decoder): - """ - Decode the data passed in and potentially flush the decoder. - """ - if not decode_content: - # CVE-2026-21441: Guard against toggling decode_content after - # decoding has already started; the decoder state would be - # inconsistent. Backported from urllib3 commit cefd1db. - if self._has_decoded_content: - raise RuntimeError( - "Calling read(decode_content=False) is not supported after " - "read(decode_content=True) was called." - ) - return data - - try: - if self._decoder: - data = self._decoder.decompress(data) - self._has_decoded_content = True - except self.DECODER_ERROR_CLASSES as e: - content_encoding = self.headers.get("content-encoding", "").lower() - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e, - ) - if flush_decoder: - data += self._flush_decoder() - - return data - - def _flush_decoder(self): - """ - Flushes the decoder. Should only be called if the decoder is actually - being used. - """ - if self._decoder: - buf = self._decoder.decompress(b"") - return buf + self._decoder.flush() - - return b"" - - @contextmanager - def _error_catcher(self): - """ - Catch low-level python exceptions, instead re-raising urllib3 - variants, so that low-level exceptions are not leaked in the - high-level api. - - On exit, release the connection back to the pool. - """ - clean_exit = False - - try: - try: - yield - - except SocketTimeout: - # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but - # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, "Read timed out.") - - except BaseSSLError as e: - # FIXME: Is there a better way to differentiate between SSLErrors? - if "read operation timed out" not in str(e): - # SSL errors related to framing/MAC get wrapped and reraised here - raise SSLError(e) - - raise ReadTimeoutError(self._pool, None, "Read timed out.") - - except (HTTPException, SocketError) as e: - # This includes IncompleteRead. - raise ProtocolError("Connection broken: %r" % e, e) - - # If no exception is thrown, we should avoid cleaning up - # unnecessarily. - clean_exit = True - finally: - # If we didn't terminate cleanly, we need to throw away our - # connection. - if not clean_exit: - # The response may not be closed but we're not going to use it - # anymore so close it now to ensure that the connection is - # released back to the pool. - if self._original_response: - self._original_response.close() - - # Closing the response may not actually be sufficient to close - # everything, so if we have a hold of the connection close that - # too. - if self._connection: - self._connection.close() - - # If we hold the original response but it's closed now, we should - # return the connection back to the pool. - if self._original_response and self._original_response.isclosed(): - self.release_conn() - - def _fp_read(self, amt): - """ - Read a response with the thought that reading the number of bytes - larger than can fit in a 32-bit int at a time via SSL in some - known cases leads to an overflow error that has to be prevented - if `amt` or `self.length_remaining` indicate that a problem may - happen. - - The known cases: - * 3.8 <= CPython < 3.9.7 because of a bug - https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. - * urllib3 injected with pyOpenSSL-backed SSL-support. - * CPython < 3.10 only when `amt` does not fit 32-bit int. - """ - assert self._fp - c_int_max = 2**31 - 1 - if ( - ( - (amt and amt > c_int_max) - or (self.length_remaining and self.length_remaining > c_int_max) - ) - and not util.IS_SECURETRANSPORT - and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) - ): - buffer = io.BytesIO() - # Besides `max_chunk_amt` being a maximum chunk size, it - # affects memory overhead of reading a response by this - # method in CPython. - # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum - # chunk size that does not lead to an overflow error, but - # 256 MiB is a compromise. - max_chunk_amt = 2**28 - while amt is None or amt != 0: - if amt is not None: - chunk_amt = min(amt, max_chunk_amt) - amt -= chunk_amt - else: - chunk_amt = max_chunk_amt - data = self._fp.read(chunk_amt) - if not data: - break - buffer.write(data) - del data # to reduce peak memory usage by `max_chunk_amt`. - return buffer.getvalue() - else: - # StringIO doesn't like amt=None - return self._fp.read(amt) if amt is not None else self._fp.read() - - def read(self, amt=None, decode_content=None, cache_content=False): - """ - Similar to :meth:`http.client.HTTPResponse.read`, but with two additional - parameters: ``decode_content`` and ``cache_content``. - - :param amt: - How much of the content to read. If specified, caching is skipped - because it doesn't make sense to cache partial content as the full - response. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - - :param cache_content: - If True, will save the returned data such that the same result is - returned despite of the state of the underlying file object. This - is useful if you want the ``.data`` property to continue working - after having ``.read()`` the file object. (Overridden if ``amt`` is - set.) - """ - self._init_decoder() - if decode_content is None: - decode_content = self.decode_content - - if self._fp is None: - return - - flush_decoder = False - fp_closed = getattr(self._fp, "closed", False) - - with self._error_catcher(): - data = self._fp_read(amt) if not fp_closed else b"" - if amt is None: - flush_decoder = True - else: - cache_content = False - if ( - amt != 0 and not data - ): # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - if self.enforce_content_length and self.length_remaining not in ( - 0, - None, - ): - # This is an edge case that httplib failed to cover due - # to concerns of backward compatibility. We're - # addressing it here to make sure IncompleteRead is - # raised during streaming, so all calls with incorrect - # Content-Length are caught. - raise IncompleteRead(self._fp_bytes_read, self.length_remaining) - - if data: - self._fp_bytes_read += len(data) - if self.length_remaining is not None: - self.length_remaining -= len(data) - - data = self._decode(data, decode_content, flush_decoder) - - if cache_content: - self._body = data - - return data - - def stream(self, amt=2**16, decode_content=None): - """ - A generator wrapper for the read() method. A call will block until - ``amt`` bytes have been read from the connection or until the - connection is closed. - - :param amt: - How much of the content to read. The generator will return up to - much data per iteration, but may return less. This is particularly - likely when using compressed data. However, the empty string will - never be returned. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - """ - if self.chunked and self.supports_chunked_reads(): - yield from self.read_chunked(amt, decode_content=decode_content) - else: - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) - - if data: - yield data - - @classmethod - def from_httplib(ResponseCls, r, **response_kw): - """ - Given an :class:`http.client.HTTPResponse` instance ``r``, return a - corresponding :class:`urllib3.response.HTTPResponse` object. - - Remaining parameters are passed to the HTTPResponse constructor, along - with ``original_response=r``. - """ - headers = r.msg - - if not isinstance(headers, HTTPHeaderDict): - headers = HTTPHeaderDict(headers.items()) - - # HTTPResponse objects in Python 3 don't have a .strict attribute - strict = getattr(r, "strict", 0) - resp = ResponseCls( - body=r, - headers=headers, - status=r.status, - version=r.version, - reason=r.reason, - strict=strict, - original_response=r, - **response_kw - ) - return resp - - # Backwards-compatibility methods for http.client.HTTPResponse - def getheaders(self): - warnings.warn( - "HTTPResponse.getheaders() is deprecated and will be removed " - "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", - category=DeprecationWarning, - stacklevel=2, - ) - return self.headers - - def getheader(self, name, default=None): - warnings.warn( - "HTTPResponse.getheader() is deprecated and will be removed " - "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", - category=DeprecationWarning, - stacklevel=2, - ) - return self.headers.get(name, default) - - # Backwards compatibility for http.cookiejar - def info(self): - return self.headers - - # Overrides from io.IOBase - def close(self): - if not self.closed: - self._fp.close() - - if self._connection: - self._connection.close() - - if not self.auto_close: - io.IOBase.close(self) - - @property - def closed(self): - if not self.auto_close: - return io.IOBase.closed.__get__(self) - elif self._fp is None: - return True - elif hasattr(self._fp, "isclosed"): - return self._fp.isclosed() - elif hasattr(self._fp, "closed"): - return self._fp.closed - else: - return True - - def fileno(self): - if self._fp is None: - raise OSError("HTTPResponse has no file to get a fileno from") - elif hasattr(self._fp, "fileno"): - return self._fp.fileno() - else: - raise OSError( - "The file-like object this HTTPResponse is wrapped " - "around has no file descriptor" - ) - - def flush(self): - if ( - self._fp is not None - and hasattr(self._fp, "flush") - and not getattr(self._fp, "closed", False) - ): - return self._fp.flush() - - def readable(self): - # This method is required for `io` module compatibility. - return True - - def readinto(self, b): - # This method is required for `io` module compatibility. - temp = self.read(len(b)) - if len(temp) == 0: - return 0 - else: - b[: len(temp)] = temp - return len(temp) - - def supports_chunked_reads(self): - """ - Checks if the underlying file-like object looks like a - :class:`http.client.HTTPResponse` object. We do this by testing for - the fp attribute. If it is present we assume it returns raw chunks as - processed by read_chunked(). - """ - return hasattr(self._fp, "fp") - - def _update_chunk_length(self): - # First, we'll figure out length of a chunk and then - # we'll try to read it from socket. - if self.chunk_left is not None: - return - line = self._fp.fp.readline() - line = line.split(b";", 1)[0] - try: - self.chunk_left = int(line, 16) - except ValueError: - # Invalid chunked protocol response, abort. - self.close() - raise InvalidChunkLength(self, line) - - def _handle_chunk(self, amt): - returned_chunk = None - if amt is None: - chunk = self._fp._safe_read(self.chunk_left) - returned_chunk = chunk - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - elif amt < self.chunk_left: - value = self._fp._safe_read(amt) - self.chunk_left = self.chunk_left - amt - returned_chunk = value - elif amt == self.chunk_left: - value = self._fp._safe_read(amt) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - returned_chunk = value - else: # amt > self.chunk_left - returned_chunk = self._fp._safe_read(self.chunk_left) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - return returned_chunk - - def read_chunked(self, amt=None, decode_content=None): - """ - Similar to :meth:`HTTPResponse.read`, but with an additional - parameter: ``decode_content``. - - :param amt: - How much of the content to read. If specified, caching is skipped - because it doesn't make sense to cache partial content as the full - response. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - """ - self._init_decoder() - # FIXME: Rewrite this method and make it a class with a better structured logic. - if not self.chunked: - raise ResponseNotChunked( - "Response is not chunked. " - "Header 'transfer-encoding: chunked' is missing." - ) - if not self.supports_chunked_reads(): - raise BodyNotHttplibCompatible( - "Body should be http.client.HTTPResponse like. " - "It should have have an fp attribute which returns raw chunks." - ) - - with self._error_catcher(): - # Don't bother reading the body of a HEAD request. - if self._original_response and is_response_to_head(self._original_response): - self._original_response.close() - return - - # If a response is already read and closed - # then return immediately. - if self._fp.fp is None: - return - - while True: - self._update_chunk_length() - if self.chunk_left == 0: - break - chunk = self._handle_chunk(amt) - decoded = self._decode( - chunk, decode_content=decode_content, flush_decoder=False - ) - if decoded: - yield decoded - - if decode_content: - # On CPython and PyPy, we should never need to flush the - # decoder. However, on Jython we *might* need to, so - # lets defensively do it anyway. - decoded = self._flush_decoder() - if decoded: # Platform-specific: Jython. - yield decoded - - # Chunk content ends with \r\n: discard it. - while True: - line = self._fp.fp.readline() - if not line: - # Some sites may not end with '\r\n'. - break - if line == b"\r\n": - break - - # We read everything; close the "file". - if self._original_response: - self._original_response.close() - - def geturl(self): - """ - Returns the URL that was the source of this response. - If the request that generated this response redirected, this method - will return the final redirect location. - """ - if self.retries is not None and len(self.retries.history): - return self.retries.history[-1].redirect_location - else: - return self._request_url - - def __iter__(self): - buffer = [] - for chunk in self.stream(decode_content=True): - if b"\n" in chunk: - chunk = chunk.split(b"\n") - yield b"".join(buffer) + chunk[0] + b"\n" - for x in chunk[1:-1]: - yield x + b"\n" - if chunk[-1]: - buffer = [chunk[-1]] - else: - buffer = [] - else: - buffer.append(chunk) - if buffer: - yield b"".join(buffer) diff --git a/pkg/patches/pip-urllib3/response.py.patch b/pkg/patches/pip-urllib3/response.py.patch new file mode 100644 index 000000000000..4bd47c69c053 --- /dev/null +++ b/pkg/patches/pip-urllib3/response.py.patch @@ -0,0 +1,64 @@ +--- a/pip/_vendor/urllib3/response.py ++++ b/pip/_vendor/urllib3/response.py +@@ -129,8 +129,18 @@ + they were applied. + """ + ++ # Maximum allowed number of chained HTTP encodings in the ++ # Content-Encoding header. CVE-2025-66418 (GHSA-gm62-xv2j-4w53). ++ max_decode_links = 5 ++ + def __init__(self, modes): +- self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] ++ encodings = [m.strip() for m in modes.split(",")] ++ if len(encodings) > self.max_decode_links: ++ raise DecodeError( ++ "Too many content encodings in the chain: " ++ "%d > %d" % (len(encodings), self.max_decode_links) ++ ) ++ self._decoders = [_get_decoder(e) for e in encodings] + + def flush(self): + return self._decoders[0].flush() +@@ -222,6 +232,9 @@ + self.reason = reason + self.strict = strict + self.decode_content = decode_content ++ # CVE-2026-21441: tracks whether content decoding has been ++ # initiated so drain_conn can skip decompression on redirects. ++ self._has_decoded_content = False + self.retries = retries + self.enforce_content_length = enforce_content_length + self.auto_close = auto_close +@@ -286,7 +299,11 @@ + Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. + """ + try: +- self.read() ++ self.read( ++ # CVE-2026-21441: Do not spend resources decoding the ++ # content unless decoding has already been initiated. ++ decode_content=self._has_decoded_content, ++ ) + except (HTTPError, SocketError, BaseSSLError, HTTPException): + pass + +@@ -394,11 +411,18 @@ + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: ++ # CVE-2026-21441: guard against toggling after decoding started. ++ if self._has_decoded_content: ++ raise RuntimeError( ++ "Calling read(decode_content=False) is not supported after " ++ "read(decode_content=True) was called." ++ ) + return data + + try: + if self._decoder: + data = self._decoder.decompress(data) ++ self._has_decoded_content = True + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( diff --git a/tools/pkg/build.py b/tools/pkg/build.py index 7f572e86476d..3ca31ad8d7d8 100644 --- a/tools/pkg/build.py +++ b/tools/pkg/build.py @@ -32,22 +32,84 @@ _PATCHED_PIP_WHEEL: pathlib.Path | None = None +def _apply_unified_diff(original_text: str, patch_text: str) -> str: + """ + Apply a unified diff patch to *original_text* and return the result. + + This is a minimal pure-Python applier sufficient for the well-formed, + non-fuzzy patches stored in pkg/patches/pip-urllib3/. It handles the + standard unified diff hunk format produced by difflib.unified_diff and + GNU diff, including the '\\' (no newline at end of file) marker. + """ + orig_lines = original_text.splitlines(True) + result: list[str] = [] + orig_idx = 0 + + patch_lines = patch_text.splitlines(True) + i = 0 + + # Skip the file-header lines (--- / +++) before the first hunk. + while i < len(patch_lines) and not patch_lines[i].startswith("@@"): + i += 1 + + while i < len(patch_lines): + line = patch_lines[i] + if line.startswith("@@"): + m = re.match(r"^@@ -(\d+)(?:,\d+)? \+\d+(?:,\d+)? @@", line) + if not m: + i += 1 + continue + orig_start = int(m.group(1)) - 1 # convert 1-based → 0-based + + # Copy unchanged original lines that precede this hunk. + result.extend(orig_lines[orig_idx:orig_start]) + orig_idx = orig_start + i += 1 + + # Process hunk body lines. + while i < len(patch_lines): + hunk_line = patch_lines[i] + if hunk_line.startswith("@@"): + break # next hunk starts + if hunk_line.startswith("+"): + result.append(hunk_line[1:]) + elif hunk_line.startswith("-"): + orig_idx += 1 + elif hunk_line.startswith(" "): + result.append(orig_lines[orig_idx]) + orig_idx += 1 + # "\\" → "No newline at end of file" marker; skip. + i += 1 + else: + i += 1 + + # Copy any original lines that follow the last hunk. + result.extend(orig_lines[orig_idx:]) + return "".join(result) + + def _patch_pip_wheel_urllib3(wheel_path: pathlib.Path) -> None: """ Rewrite *wheel_path* in-place so that the urllib3 vendored inside pip contains the Salt security backports defined in pkg/patches/pip-urllib3/. - Patched files: - pip/_vendor/urllib3/response.py — CVE-2025-66418, CVE-2026-21441 - pip/_vendor/urllib3/_version.py — version bumped to "2.6.3" + Patches applied (unified diff format): + response.py.patch — CVE-2025-66418, CVE-2026-21441 + _version.py.patch — version bumped to "2.6.3" - The wheel's RECORD file is updated with correct sha256 hashes and sizes - for the two replaced files so that the installed dist-info stays valid. + Each patch is applied to the file as extracted from the wheel, so the + original sources do not need to be stored in the repository. The wheel's + RECORD file is updated with correct sha256 hashes and sizes for the two + patched files so that the installed dist-info stays valid. """ patches_dir = tools.utils.REPO_ROOT / "pkg" / "patches" / "pip-urllib3" - targets = { - "pip/_vendor/urllib3/response.py": (patches_dir / "response.py").read_bytes(), - "pip/_vendor/urllib3/_version.py": (patches_dir / "_version.py").read_bytes(), + patch_map = { + "pip/_vendor/urllib3/response.py": ( + patches_dir / "response.py.patch" + ).read_text(encoding="utf-8"), + "pip/_vendor/urllib3/_version.py": ( + patches_dir / "_version.py.patch" + ).read_text(encoding="utf-8"), } def _record_hash(content: bytes) -> str: @@ -62,6 +124,7 @@ def _record_hash(content: bytes) -> str: ) as zout: record_name: str | None = None record_rows: list[list[str]] = [] + patched: dict[str, bytes] = {} for item in zin.infolist(): if item.filename.endswith(".dist-info/RECORD"): @@ -69,8 +132,14 @@ def _record_hash(content: bytes) -> str: raw = zin.read(item.filename).decode("utf-8") record_rows = list(csv.reader(raw.splitlines())) continue # written last after we know the new hashes - if item.filename in targets: - zout.writestr(item, targets[item.filename]) + if item.filename in patch_map: + original = zin.read(item.filename).decode("utf-8") + patched_text = _apply_unified_diff( + original, patch_map[item.filename] + ) + patched_bytes = patched_text.encode("utf-8") + patched[item.filename] = patched_bytes + zout.writestr(item, patched_bytes) else: zout.writestr(item, zin.read(item.filename)) @@ -78,8 +147,8 @@ def _record_hash(content: bytes) -> str: if record_name: new_rows = [] for row in record_rows: - if len(row) >= 1 and row[0] in targets: - content = targets[row[0]] + if len(row) >= 1 and row[0] in patched: + content = patched[row[0]] new_rows.append( [row[0], _record_hash(content), str(len(content))] ) From 20cf7964aa2bef0507636a153f21f7d50abc390c Mon Sep 17 00:00:00 2001 From: Twangboy Date: Wed, 25 Mar 2026 11:47:48 -0600 Subject: [PATCH 3/7] Make sure pip is using our patched version --- tools/pkg/build.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/pkg/build.py b/tools/pkg/build.py index 3ca31ad8d7d8..b4b557460f24 100644 --- a/tools/pkg/build.py +++ b/tools/pkg/build.py @@ -805,12 +805,17 @@ def onedir_dependencies( ) # Install pip from the security-patched wheel instead of pulling from PyPI, # so that pip's vendored urllib3 never contains the vulnerable version. + # --force-reinstall is required because relenv ships with pip pre-installed + # at the same version (25.2), so without it pip would skip the install as + # "already satisfied" and leave the unpatched copy in site-packages. patched_pip = _build_patched_pip_wheel(ctx) ctx.run( str(python_bin), "-m", "pip", "install", + "--force-reinstall", + "--no-deps", str(patched_pip), env=env, ) From 2b939a6afc440433e92f07a78cc3728b0561bfe6 Mon Sep 17 00:00:00 2001 From: Twangboy Date: Mon, 30 Mar 2026 13:18:14 -0600 Subject: [PATCH 4/7] Fix error handling on no response from http --- salt/utils/http.py | 13 +++++++------ tests/pytests/unit/utils/test_http.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/salt/utils/http.py b/salt/utils/http.py index 50f1f3428bbd..0636e5c5222e 100644 --- a/salt/utils/http.py +++ b/salt/utils/http.py @@ -636,12 +636,13 @@ def query( except salt.ext.tornado.httpclient.HTTPError as exc: ret["status"] = exc.code ret["error"] = str(exc) - ret["body"], _ = _decode_result( - exc.response.body, - exc.response.headers, - backend, - decode_body=decode_body, - ) + if exc.response is not None: + ret["body"], _ = _decode_result( + exc.response.body, + exc.response.headers, + backend, + decode_body=decode_body, + ) return ret except (socket.herror, OSError, TimeoutError, socket.gaierror) as exc: if status is True: diff --git a/tests/pytests/unit/utils/test_http.py b/tests/pytests/unit/utils/test_http.py index ba4e0b6a2e2c..62a169ff9a47 100644 --- a/tests/pytests/unit/utils/test_http.py +++ b/tests/pytests/unit/utils/test_http.py @@ -166,6 +166,29 @@ def test_query_error_handling(): assert isinstance(ret.get("error", None), str) +def test_query_tornado_httperror_no_response(): + """ + Tests that http.query handles a Tornado HTTPError where exc.response is None. + This happens on connection-level failures such as a connect timeout (HTTP 599) + where no HTTP response is ever received from the server. + """ + import salt.ext.tornado.httpclient + + http_error = salt.ext.tornado.httpclient.HTTPError(599, "Timeout while connecting") + assert http_error.response is None + + mock_client = MagicMock() + mock_client.fetch.side_effect = http_error + + with patch("salt.utils.http.HTTPClient", return_value=mock_client): + ret = http.query("https://example.com/test", backend="tornado") + + assert isinstance(ret, dict) + assert ret.get("status") == 599 + assert "Timeout while connecting" in ret.get("error", "") + assert "body" not in ret + + def test_parse_cookie_header(): header = "; ".join( [ From fd42442411d4e6d03b8cf444e3f04eb713381bad Mon Sep 17 00:00:00 2001 From: Twangboy Date: Tue, 31 Mar 2026 11:59:25 -0600 Subject: [PATCH 5/7] Add a test to verify the files are patched --- .../pkg/integration/test_pip_urllib3_patch.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/pytests/pkg/integration/test_pip_urllib3_patch.py diff --git a/tests/pytests/pkg/integration/test_pip_urllib3_patch.py b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py new file mode 100644 index 000000000000..b9a7fc7831a3 --- /dev/null +++ b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py @@ -0,0 +1,81 @@ +import pathlib +import re +import subprocess +import zipfile + +import pytest + + +PATCHED_URLLIB3_VERSION = "2.6.3" + + +def _site_packages(install_salt) -> pathlib.Path: + """Return the site-packages directory for the installed Salt Python.""" + ret = subprocess.run( + install_salt.binary_paths["python"] + + ["-c", "import pip, pathlib; print(pathlib.Path(pip.__file__).parent.parent)"], + capture_output=True, + text=True, + check=False, + ) + assert ret.returncode == 0, ret.stderr + return pathlib.Path(ret.stdout.strip()) + + +def test_pip_vendored_urllib3_version(install_salt): + """ + Verify that pip's vendored urllib3 in the installed Salt package + reports the security-patched version string. + """ + ret = subprocess.run( + install_salt.binary_paths["python"] + + [ + "-c", + "import pip._vendor.urllib3; print(pip._vendor.urllib3.__version__)", + ], + capture_output=True, + text=True, + check=False, + ) + assert ret.returncode == 0, ret.stderr + version = ret.stdout.strip() + assert version == PATCHED_URLLIB3_VERSION, ( + f"pip's vendored urllib3 is {version!r}; expected {PATCHED_URLLIB3_VERSION!r}" + ) + + +def test_virtualenv_embedded_pip_wheel_urllib3_version(install_salt): + """ + Verify that the pip wheel bundled inside virtualenv's seed/wheels/embed + directory also contains the security-patched urllib3. New virtualenvs + seeded from this wheel will inherit the CVE fixes. + """ + site_packages = _site_packages(install_salt) + embed_dir = site_packages / "virtualenv" / "seed" / "wheels" / "embed" + + if not embed_dir.is_dir(): + pytest.skip(f"virtualenv embed directory not found: {embed_dir}") + + pip_wheels = sorted(embed_dir.glob("pip-*.whl")) + if not pip_wheels: + pytest.skip(f"No pip wheel found in {embed_dir}") + + pip_wheel = pip_wheels[-1] + with zipfile.ZipFile(pip_wheel) as zf: + try: + with zf.open("pip/_vendor/urllib3/_version.py") as f: + content = f.read().decode("utf-8") + except KeyError: + pytest.fail( + f"pip/_vendor/urllib3/_version.py not found inside {pip_wheel.name}" + ) + + match = re.search( + r'^__version__\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE + ) + assert match, f"Could not parse __version__ from {pip_wheel.name}" + version = match.group(1) + assert version == PATCHED_URLLIB3_VERSION, ( + f"Embedded pip wheel {pip_wheel.name} contains urllib3 {version!r}; " + f"expected {PATCHED_URLLIB3_VERSION!r}" + ) From 83bf60d989bb879b6c949c7e095d405964e2431b Mon Sep 17 00:00:00 2001 From: Twangboy Date: Tue, 31 Mar 2026 17:02:03 -0600 Subject: [PATCH 6/7] Skip urllib3 patch test on downgrade tests --- .../pytests/pkg/integration/test_pip_urllib3_patch.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/pytests/pkg/integration/test_pip_urllib3_patch.py b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py index b9a7fc7831a3..3738391573ff 100644 --- a/tests/pytests/pkg/integration/test_pip_urllib3_patch.py +++ b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py @@ -9,6 +9,16 @@ PATCHED_URLLIB3_VERSION = "2.6.3" +@pytest.fixture(autouse=True) +def skip_on_prev_version(install_salt): + """ + Skip urllib3 patch tests when running against the previous (downgraded) + Salt version, which does not contain the CVE backports. + """ + if install_salt.use_prev_version: + pytest.skip("urllib3 CVE patch is not present in the previous Salt version") + + def _site_packages(install_salt) -> pathlib.Path: """Return the site-packages directory for the installed Salt Python.""" ret = subprocess.run( From 649003634d4f75d9b4f4a3818c7c0512f79d9357 Mon Sep 17 00:00:00 2001 From: Twangboy Date: Tue, 31 Mar 2026 17:23:41 -0600 Subject: [PATCH 7/7] Fix pre-commit --- .../pkg/integration/test_pip_urllib3_patch.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/pytests/pkg/integration/test_pip_urllib3_patch.py b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py index 3738391573ff..13563abe6740 100644 --- a/tests/pytests/pkg/integration/test_pip_urllib3_patch.py +++ b/tests/pytests/pkg/integration/test_pip_urllib3_patch.py @@ -5,7 +5,6 @@ import pytest - PATCHED_URLLIB3_VERSION = "2.6.3" @@ -23,7 +22,10 @@ def _site_packages(install_salt) -> pathlib.Path: """Return the site-packages directory for the installed Salt Python.""" ret = subprocess.run( install_salt.binary_paths["python"] - + ["-c", "import pip, pathlib; print(pathlib.Path(pip.__file__).parent.parent)"], + + [ + "-c", + "import pip, pathlib; print(pathlib.Path(pip.__file__).parent.parent)", + ], capture_output=True, text=True, check=False, @@ -49,9 +51,9 @@ def test_pip_vendored_urllib3_version(install_salt): ) assert ret.returncode == 0, ret.stderr version = ret.stdout.strip() - assert version == PATCHED_URLLIB3_VERSION, ( - f"pip's vendored urllib3 is {version!r}; expected {PATCHED_URLLIB3_VERSION!r}" - ) + assert ( + version == PATCHED_URLLIB3_VERSION + ), f"pip's vendored urllib3 is {version!r}; expected {PATCHED_URLLIB3_VERSION!r}" def test_virtualenv_embedded_pip_wheel_urllib3_version(install_salt): @@ -80,9 +82,7 @@ def test_virtualenv_embedded_pip_wheel_urllib3_version(install_salt): f"pip/_vendor/urllib3/_version.py not found inside {pip_wheel.name}" ) - match = re.search( - r'^__version__\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE - ) + match = re.search(r'^__version__\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE) assert match, f"Could not parse __version__ from {pip_wheel.name}" version = match.group(1) assert version == PATCHED_URLLIB3_VERSION, (