Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES/12253.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Normalized parsing of list-style ``Connection`` and ``Transfer-Encoding``
headers so repeated field lines and comma-joined values are handled
consistently in the HTTP parser, without changing ``CIMultiDict``
storage semantics.
-- by :user:`rodrigobnogueira`.
48 changes: 48 additions & 0 deletions aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,54 @@ def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
return content_type, MappingProxyType(content_dict)


def parse_http_list_values(header_values: Iterable[str]) -> tuple[str, ...]:
"""Parse comma-separated HTTP field values from one or more field lines.

This normalizes equivalent list-style representations:
- ``Foo: 1`` + ``Foo: 2``
- ``Foo: 1, 2``

Quoted substrings are respected, so commas inside quoted values do not
split the value.
"""
values: list[str] = []
for header_value in header_values:
values.extend(_parse_http_list_value(header_value))
return tuple(values)


def _parse_http_list_value(header_value: str) -> list[str]:
values: list[str] = []
start = 0
in_quotes = False
escaped = False

for idx, ch in enumerate(header_value):
if escaped:
escaped = False
continue

if ch == "\\" and in_quotes:
escaped = True
continue

if ch == '"':
in_quotes = not in_quotes
continue

if ch == "," and not in_quotes:
value = header_value[start:idx].strip(" \t")
if value:
values.append(value)
start = idx + 1

value = header_value[start:].strip(" \t")
if value:
values.append(value)

return values


def guess_filename(obj: Any, default: str | None = None) -> str | None:
name = getattr(obj, "name", None)
if name and isinstance(name, str) and name[0] != "<" and name[-1] != ">":
Expand Down
20 changes: 11 additions & 9 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
EMPTY_BODY_STATUS_CODES,
NO_EXTENSIONS,
BaseTimerContext,
parse_http_list_values,
set_exception,
)
from .http_exceptions import (
Expand Down Expand Up @@ -522,14 +523,9 @@ def parse_headers(

# keep-alive and protocol switching
# RFC 9110 section 7.6.1 defines Connection as a comma-separated list.
conn_values = headers.getall(hdrs.CONNECTION, ())
conn_values = parse_http_list_values(headers.getall(hdrs.CONNECTION, ()))
if conn_values:
conn_tokens = {
token.lower()
for conn_value in conn_values
for token in (part.strip(" \t") for part in conn_value.split(","))
if token and token.isascii()
}
conn_tokens = {token.lower() for token in conn_values if token.isascii()}

if "close" in conn_tokens:
close_conn = True
Expand Down Expand Up @@ -658,7 +654,9 @@ def _is_chunked_te(self, te: str) -> bool:
# https://www.rfc-editor.org/rfc/rfc9112#section-7.1-3
# "A sender MUST NOT apply the chunked transfer coding more
# than once to a message body"
parts = [p.strip(" \t") for p in te.split(",")]
parts = list(parse_http_list_values((te,)))
if not parts:
raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
chunked_count = sum(1 for p in parts if p.isascii() and p.lower() == "chunked")
if chunked_count > 1:
raise BadHttpMessage("Request has duplicate `chunked` Transfer-Encoding")
Expand Down Expand Up @@ -751,7 +749,11 @@ def parse_message(self, lines: list[bytes]) -> RawResponseMessage:

def _is_chunked_te(self, te: str) -> bool:
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"
parts = parse_http_list_values((te,))
if not parts:
return False
last = parts[-1]
return last.isascii() and last.lower() == "chunked"


class HttpPayloadParser:
Expand Down
39 changes: 39 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,45 @@ def test_parse_content_type(
assert result == expected


def test_cimultidict_list_header_representations_differ() -> None:
# Characterization test: document current CIMultiDict semantics.
# Equivalent HTTP list-header wire representations are exposed
# differently by getall(), which is the behavior we normalize later.
repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
combined = CIMultiDict([("Foo", "1, 2")])

assert repeated.getall("Foo") == ["1", "2"]
assert combined.getall("Foo") == ["1, 2"]


@pytest.mark.parametrize(
("header_values", "expected"),
[
(("1", "2"), ("1", "2")),
(("1, 2",), ("1", "2")),
(
('"http://example.com/a.html,foo", apples',),
('"http://example.com/a.html,foo"', "apples"),
),
(('"foo\\"bar", baz',), ('"foo\\"bar"', "baz")),
((" spam , eggs ",), ("spam", "eggs")),
((", , ",), ()),
],
)
def test_parse_http_list_values(
header_values: tuple[str, ...], expected: tuple[str, ...]
) -> None:
assert helpers.parse_http_list_values(header_values) == expected


def test_parse_http_list_values_normalizes_equivalent_field_representations() -> None:
repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
combined = CIMultiDict([("Foo", "1, 2")])

assert helpers.parse_http_list_values(repeated.getall("Foo")) == ("1", "2")
assert helpers.parse_http_list_values(combined.getall("Foo")) == ("1", "2")


# ------------------- guess_filename ----------------------------------


Expand Down
19 changes: 19 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,15 @@ def test_request_te_chunked123(parser: HttpRequestParser) -> None:
parser.feed_data(text)


def test_request_te_empty_list_invalid(parser: HttpRequestParser) -> None:
text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: , \t ,\r\n\r\n"
with pytest.raises(
http_exceptions.BadHttpMessage,
match="Request has invalid `Transfer-Encoding`",
):
parser.feed_data(text)


async def test_request_te_last_chunked(parser: HttpRequestParser) -> None:
text = b"GET /test HTTP/1.1\r\nTransfer-Encoding: not, chunked\r\n\r\n1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
Expand Down Expand Up @@ -1413,6 +1422,16 @@ async def test_http_response_parser_notchunked(
assert await messages[0][1].read() == b"1\r\nT\r\n3\r\nest\r\n0\r\n\r\n"


async def test_http_response_parser_empty_list_te_not_chunked(
response: HttpResponseParser,
) -> None:
text = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: , \t ,\r\n\r\nbody"
messages, upgrade, tail = response.feed_data(text)
response.feed_eof()

assert await messages[0][1].read() == b"body"


async def test_http_response_parser_last_chunked(
response: HttpResponseParser,
) -> None:
Expand Down
Loading