aio-libs · rodrigobnogueira · Mar 23, 2026 · Mar 23, 2026
@@ -0,0 +1,5 @@
+Normalized parsing of list-style ``Connection`` and ``Transfer-Encoding``
+headers so repeated field lines and comma-joined values are handled
+consistently in the HTTP parser, without changing ``CIMultiDict``
+storage semantics.
+-- by :user:`rodrigobnogueira`.
@@ -398,6 +398,54 @@ def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
     return content_type, MappingProxyType(content_dict)
 
 
+def parse_http_list_values(header_values: Iterable[str]) -> tuple[str, ...]:
+    """Parse comma-separated HTTP field values from one or more field lines.
+
+    This normalizes equivalent list-style representations:
+    - ``Foo: 1`` + ``Foo: 2``
+    - ``Foo: 1, 2``
+
+    Quoted substrings are respected, so commas inside quoted values do not
+    split the value.
+    """
+    values: list[str] = []
+    for header_value in header_values:
+        values.extend(_parse_http_list_value(header_value))
+    return tuple(values)
+
+
+def _parse_http_list_value(header_value: str) -> list[str]:
+    values: list[str] = []
+    start = 0
+    in_quotes = False
+    escaped = False
+
+    for idx, ch in enumerate(header_value):
+        if escaped:
+            escaped = False
+            continue
+
+        if ch == "\\" and in_quotes:
+            escaped = True
+            continue
+
+        if ch == '"':
+            in_quotes = not in_quotes
+            continue
+
+        if ch == "," and not in_quotes:
+            value = header_value[start:idx].strip(" \t")
+            if value:
+                values.append(value)
+            start = idx + 1
+
+    value = header_value[start:].strip(" \t")
+    if value:
+        values.append(value)
+
+    return values
+
+
 def guess_filename(obj: Any, default: str | None = None) -> str | None:
     name = getattr(obj, "name", None)
     if name and isinstance(name, str) and name[0] != "<" and name[-1] != ">":

@@ -27,6 +27,7 @@
     EMPTY_BODY_STATUS_CODES,
     NO_EXTENSIONS,
     BaseTimerContext,
+    parse_http_list_values,
     set_exception,
 )
 from .http_exceptions import (
@@ -522,14 +523,9 @@ def parse_headers(
 
         # keep-alive and protocol switching
         # RFC 9110 section 7.6.1 defines Connection as a comma-separated list.
-        conn_values = headers.getall(hdrs.CONNECTION, ())
+        conn_values = parse_http_list_values(headers.getall(hdrs.CONNECTION, ()))
         if conn_values:
-            conn_tokens = {
-                token.lower()
-                for conn_value in conn_values
-                for token in (part.strip(" \t") for part in conn_value.split(","))
-                if token and token.isascii()
-            }
+            conn_tokens = {token.lower() for token in conn_values if token.isascii()}
 
             if "close" in conn_tokens:
                 close_conn = True
@@ -658,7 +654,9 @@ def _is_chunked_te(self, te: str) -> bool:
         # https://www.rfc-editor.org/rfc/rfc9112#section-7.1-3
         # "A sender MUST NOT apply the chunked transfer coding more
         #  than once to a message body"
-        parts = [p.strip(" \t") for p in te.split(",")]
+        parts = list(parse_http_list_values((te,)))
+        if not parts:
+            raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
         chunked_count = sum(1 for p in parts if p.isascii() and p.lower() == "chunked")
         if chunked_count > 1:
             raise BadHttpMessage("Request has duplicate `chunked` Transfer-Encoding")
@@ -751,7 +749,11 @@ def parse_message(self, lines: list[bytes]) -> RawResponseMessage:
 
     def _is_chunked_te(self, te: str) -> bool:
         # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
-        return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"
+        parts = parse_http_list_values((te,))
+        if not parts:
+            return False
+        last = parts[-1]
+        return last.isascii() and last.lower() == "chunked"
 
 
 class HttpPayloadParser:

@@ -106,6 +106,45 @@ def test_parse_content_type(
     assert result == expected
 
 
+def test_cimultidict_list_header_representations_differ() -> None:
+    # Characterization test: document current CIMultiDict semantics.
+    # Equivalent HTTP list-header wire representations are exposed
+    # differently by getall(), which is the behavior we normalize later.
+    repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
+    combined = CIMultiDict([("Foo", "1, 2")])
+
+    assert repeated.getall("Foo") == ["1", "2"]
+    assert combined.getall("Foo") == ["1, 2"]
+
+
+@pytest.mark.parametrize(
+    ("header_values", "expected"),
+    [
+        (("1", "2"), ("1", "2")),
+        (("1, 2",), ("1", "2")),
+        (
+            ('"http://example.com/a.html,foo", apples',),
+            ('"http://example.com/a.html,foo"', "apples"),
+        ),
+        (('"foo\\"bar", baz',), ('"foo\\"bar"', "baz")),
+        ((" spam , eggs ",), ("spam", "eggs")),
+        ((",   , ",), ()),
+    ],
+)
+def test_parse_http_list_values(
+    header_values: tuple[str, ...], expected: tuple[str, ...]
+) -> None:
+    assert helpers.parse_http_list_values(header_values) == expected
+
+
+def test_parse_http_list_values_normalizes_equivalent_field_representations() -> None:
+    repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
+    combined = CIMultiDict([("Foo", "1, 2")])
+
+    assert helpers.parse_http_list_values(repeated.getall("Foo")) == ("1", "2")
+    assert helpers.parse_http_list_values(combined.getall("Foo")) == ("1", "2")
+
+
 # ------------------- guess_filename ----------------------------------
 
 

@@ -586,6 +586,15 @@ def test_request_te_chunked123(parser: HttpRequestParser) -> None:
         parser.feed_data(text)
 
 
+def test_request_te_empty_list_invalid(parser: HttpRequestParser) -> None:
+    text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: ,  \t ,\r\n\r\n"
+    with pytest.raises(
+        http_exceptions.BadHttpMessage,
+        match="Request has invalid `Transfer-Encoding`",
+    ):
+        parser.feed_data(text)
+
+
 async def test_request_te_last_chunked(parser: HttpRequestParser) -> None:
     text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: not, chunked\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
     messages, upgrade, tail = parser.feed_data(text)
@@ -1413,6 +1422,16 @@ async def test_http_response_parser_notchunked(
     assert await messages[0][1].read() == b"1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
 
 
+async def test_http_response_parser_empty_list_te_not_chunked(
+    response: HttpResponseParser,
+) -> None:
+    text = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: ,  \t ,\r\n\r\nbody"
+    messages, upgrade, tail = response.feed_data(text)
+    response.feed_eof()
+
+    assert await messages[0][1].read() == b"body"
+
+
 async def test_http_response_parser_last_chunked(
     response: HttpResponseParser,
 ) -> None: