Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES/12296.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Normalized parsing of list-style ``Connection`` and ``Transfer-Encoding``
headers so repeated field lines and comma-joined values are handled
consistently in the HTTP parser, without changing ``CIMultiDict``
storage semantics.
-- by :user:`rodrigobnogueira`.
3 changes: 3 additions & 0 deletions CHANGES/12493.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed :func:`aiohttp.web.run_app` losing inner traceback frames when an
exception is raised during application startup (e.g. inside
``cleanup_ctx`` or ``on_startup``). Regression since 3.10.6.
14 changes: 8 additions & 6 deletions aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ from cpython.mem cimport PyMem_Free, PyMem_Malloc
from libc.limits cimport ULLONG_MAX
from libc.string cimport memcpy

from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy
from multidict import CIMultiDict as _CIMultiDict
from yarl import URL as _URL

from aiohttp import hdrs
from aiohttp.helpers import DEBUG, set_exception

from .helpers import HeadersDictProxy as _HeadersDictProxy
from .http_exceptions import (
BadHttpMessage,
BadHttpMethod,
Expand Down Expand Up @@ -61,7 +62,7 @@ __all__ = ('HttpRequestParser', 'HttpResponseParser',
cdef object URL = _URL
cdef object URL_build = URL.build
cdef object CIMultiDict = _CIMultiDict
cdef object CIMultiDictProxy = _CIMultiDictProxy
cdef object HeadersDictProxy = _HeadersDictProxy
cdef object HttpVersion = _HttpVersion
cdef object HttpVersion10 = _HttpVersion10
cdef object HttpVersion11 = _HttpVersion11
Expand All @@ -76,6 +77,7 @@ cdef tuple EMPTY_FEED_DATA_RESULT = ((), False, b"")
# In lax mode (response parser default), the check is skipped entirely
# since real-world servers (e.g. Google APIs, Werkzeug) commonly send
# duplicate headers like Content-Type or Server.
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
cdef frozenset SINGLETON_HEADERS = frozenset({
hdrs.CONTENT_LENGTH,
hdrs.CONTENT_LOCATION,
Expand Down Expand Up @@ -129,7 +131,7 @@ cdef class RawRequestMessage:
cdef readonly str method
cdef readonly str path
cdef readonly object version # HttpVersion
cdef readonly object headers # CIMultiDict
cdef readonly object headers # HeadersDictProxy
cdef readonly object raw_headers # tuple
cdef readonly object should_close
cdef readonly object compression
Expand Down Expand Up @@ -229,7 +231,7 @@ cdef class RawResponseMessage:
cdef readonly object version # HttpVersion
cdef readonly int code
cdef readonly str reason
cdef readonly object headers # CIMultiDict
cdef readonly object headers # HeadersDictProxy
cdef readonly object raw_headers # tuple
cdef readonly object should_close
cdef readonly object compression
Expand Down Expand Up @@ -316,7 +318,7 @@ cdef class HttpParser:
bytearray _buf
str _path
str _reason
list _headers
object _headers
set _seen_singletons
list _raw_headers
bint _upgraded
Expand Down Expand Up @@ -463,7 +465,7 @@ cdef class HttpParser:
chunked = self._cparser.flags & cparser.F_CHUNKED

raw_headers = tuple(self._raw_headers)
headers = CIMultiDictProxy(CIMultiDict(self._headers))
headers = HeadersDictProxy(CIMultiDict(self._headers))

if self._cparser.type == cparser.HTTP_REQUEST:
if http_version == HttpVersion11 and hdrs.HOST not in headers:
Expand Down
5 changes: 2 additions & 3 deletions aiohttp/client_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""HTTP related errors."""

import asyncio
from collections.abc import Mapping
from typing import TYPE_CHECKING, Union

from multidict import MultiMapping

from .typedefs import StrOrURL

try:
Expand Down Expand Up @@ -73,7 +72,7 @@ def __init__(
*,
status: int | None = None,
message: str = "",
headers: MultiMapping[str] | None = None,
headers: Mapping[str, str] | None = None,
) -> None:
self.request_info = request_info
if status is not None:
Expand Down
19 changes: 7 additions & 12 deletions aiohttp/client_reqrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
_SENTINEL,
BaseTimerContext,
BasicAuth,
HeadersDictProxy,
HeadersMixin,
TimerNoop,
frozen_dataclass_decorator,
Expand Down Expand Up @@ -193,7 +194,7 @@ class ClientResponse(HeadersMixin):

content: StreamReader = None # type: ignore[assignment] # Payload stream
_body: bytes | None = None
_headers: CIMultiDictProxy[str] = None # type: ignore[assignment]
_headers: HeadersDictProxy = None # type: ignore[assignment]
_history: tuple["ClientResponse", ...] = ()
_raw_headers: RawHeaders = None # type: ignore[assignment]

Expand Down Expand Up @@ -324,7 +325,7 @@ def host(self) -> str:
return self._url.host

@reify
def headers(self) -> "CIMultiDictProxy[str]":
def headers(self) -> HeadersDictProxy:
return self._headers

@reify
Expand Down Expand Up @@ -393,14 +394,8 @@ def history(self) -> tuple["ClientResponse", ...]:

@reify
def links(self) -> "MultiDictProxy[MultiDictProxy[str | URL]]":
links_str = ", ".join(self.headers.getall("link", []))

if not links_str:
return MultiDictProxy(MultiDict())

links: MultiDict[MultiDictProxy[str | URL]] = MultiDict()

for val in re.split(r",(?=\s*<)", links_str):
for val in self.headers.getall("link"):
match = re.match(r"\s*<(.*)>(.*)", val)
if match is None: # Malformed link
continue
Expand Down Expand Up @@ -462,14 +457,14 @@ async def start(self, connection: "Connection") -> "ClientResponse":
self.reason = message.reason

# headers
self._headers = message.headers # type is CIMultiDictProxy
self._raw_headers = message.raw_headers # type is Tuple[bytes, bytes]
self._headers = message.headers
self._raw_headers = message.raw_headers

# payload
self.content = payload

# cookies
if cookie_hdrs := self.headers.getall(hdrs.SET_COOKIE, ()):
if cookie_hdrs := self.headers._md.getall(hdrs.SET_COOKIE, ()):
# Store raw cookie headers for CookieJar
self._raw_cookie_headers = tuple(cookie_hdrs)
return self
Expand Down
79 changes: 77 additions & 2 deletions aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from urllib.parse import quote
from urllib.request import getproxies, proxy_bypass

from multidict import CIMultiDict, MultiDict, MultiDictProxy, MultiMapping
from multidict import CIMultiDict, MultiDict, MultiDictProxy
from propcache.api import under_cached_property as reify
from yarl import URL

Expand All @@ -71,6 +71,37 @@
# https://github.com/python/cpython/blob/1857a40807daeae3a1bf5efb682de9c9ae6df845/Lib/asyncio/selector_events.py#L766
DEFAULT_CHUNK_SIZE = 2**18 # 256 KiB
COOKIE_MAX_LENGTH = 4096
_QUOTED_PAIR_SUB = re.compile(r"\\(.)")
_QUOTED_STRING = r'"(?:[^"\\]|\\.)*"'
_ESCAPED_COMMENT = r"(?:[^()\\]|\\.)*"
# Matches one element in a comma-separated header list.
# Group 1: content of a top-level quoted-string (quotes stripped).
# Group 2: an unquoted element (may contain parameter quoted-strings / comments).
_LIST_ELEMENT_RE = re.compile(
rf"""
[ \t]*
(?:
"( (?:[^"\\]|\\.)* )" # group 1: top-level quoted-string
| ( # group 2: unquoted element
(?:
(?<=[^\s]=) {_QUOTED_STRING} # parameter quoted value
| (?<=\s) \( {_ESCAPED_COMMENT} \) # comment
| [^,] # any non-comma character
)+?
)
)
[ \t]* (?:,|\Z)
""",
re.VERBOSE,
)
# Finds parameter quoted-strings and comments inside an unquoted element for unescaping.
_PROTECTED_RE = re.compile(
rf"""
(?<=[^\s]=) {_QUOTED_STRING} # parameter quoted-string
| (?<=\s) \( {_ESCAPED_COMMENT} \) # comment
""",
re.VERBOSE,
)

_T = TypeVar("_T")
_S = TypeVar("_S")
Expand Down Expand Up @@ -753,10 +784,54 @@ def ceil_timeout(
return async_timeout.timeout_at(when)


class HeadersDictProxy(Mapping[str, str]):
def __init__(self, md: CIMultiDict[str]):
self._md = md

def getall(self, key: str) -> tuple[str, ...]:
val = self.get(key, "")
unescape = _QUOTED_PAIR_SUB.sub
values = []
for m in _LIST_ELEMENT_RE.finditer(val):
qs = m.group(1)
if qs is not None:
values.append(unescape(r"\1", qs))
else:
raw = m.group(2).strip()
if raw:
values.append(
_PROTECTED_RE.sub(lambda p: unescape(r"\1", p.group()), raw)
)
return tuple(values)

def __eq__(self, other: object) -> bool:
return self._md.__eq__(other)

def __getitem__(self, key: str) -> str:
return ", ".join(self._md.getall(key))

def __iter__(self) -> Iterator[str]:
# We need to deduplicate keys from MultiDict
# But, we also need to retain ordering
seen = set()
for k in self._md.__iter__():
if k in seen:
continue
seen.add(k)
yield k

def __len__(self) -> int:
return len(set(self._md.keys()))

def __repr__(self) -> str:
body = ", ".join(f"'{k}': {v!r}" for k, v in self.items())
return f"<{self.__class__.__name__}({body})>"


class HeadersMixin:
"""Mixin for handling headers."""

_headers: MultiMapping[str]
_headers: Mapping[str, str]
_content_type: str | None = None
_content_dict: dict[str, str] | None = None
_stored_content_type: str | None | _SENTINEL = sentinel
Expand Down
29 changes: 15 additions & 14 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
TypeVar,
)

from multidict import CIMultiDict, CIMultiDictProxy, istr
from multidict import CIMultiDict, istr
from yarl import URL

from . import hdrs
Expand All @@ -37,6 +37,7 @@
EMPTY_BODY_STATUS_CODES,
NO_EXTENSIONS,
BaseTimerContext,
HeadersDictProxy,
set_exception,
)
from .http_exceptions import (
Expand Down Expand Up @@ -66,6 +67,8 @@
"RawResponseMessage",
)

_T = TypeVar("_T")

_SEP = Literal[b"\r\n", b"\n"]

ASCIISET: Final[set[str]] = set(string.printable)
Expand Down Expand Up @@ -112,7 +115,7 @@ class RawRequestMessage(NamedTuple):
method: str
path: str
version: HttpVersion
headers: CIMultiDictProxy[str]
headers: HeadersDictProxy
raw_headers: RawHeaders
should_close: bool
compression: str | None
Expand All @@ -125,7 +128,7 @@ class RawResponseMessage(NamedTuple):
version: HttpVersion
code: int
reason: str
headers: CIMultiDictProxy[str]
headers: HeadersDictProxy
raw_headers: RawHeaders
should_close: bool
compression: str | None
Expand Down Expand Up @@ -161,9 +164,7 @@ def __init__(self, max_field_size: int = 8190, lax: bool = False) -> None:
self.max_field_size = max_field_size
self._lax = lax

def parse_headers(
self, lines: list[bytes]
) -> tuple["CIMultiDictProxy[str]", RawHeaders]:
def parse_headers(self, lines: list[bytes]) -> tuple[HeadersDictProxy, RawHeaders]:
headers: CIMultiDict[str] = CIMultiDict()
# note: "raw" does not mean inclusion of OWS before/after the field value
raw_headers = []
Expand Down Expand Up @@ -237,10 +238,10 @@ def parse_headers(
headers.add(name, value)
raw_headers.append((bname, bvalue))

return (CIMultiDictProxy(headers), tuple(raw_headers))
return (HeadersDictProxy(headers), tuple(raw_headers))


def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool:
def _is_supported_upgrade(headers: HeadersDictProxy) -> bool:
"""Check if the upgrade header is supported."""
u = headers.get(hdrs.UPGRADE, "")
# .lower() can transform non-ascii characters.
Expand Down Expand Up @@ -544,9 +545,7 @@ def get_content_length() -> int | None:

def parse_headers(
self, lines: list[bytes]
) -> tuple[
"CIMultiDictProxy[str]", RawHeaders, bool | None, str | None, bool, bool
]:
) -> tuple[HeadersDictProxy, RawHeaders, bool | None, str | None, bool, bool]:
"""Parses RFC 5322 headers from a stream.

Line continuations are supported. Returns list of header name
Expand All @@ -560,12 +559,14 @@ def parse_headers(

# keep-alive and protocol switching
# RFC 9110 section 7.6.1 defines Connection as a comma-separated list.
conn_values = headers.getall(hdrs.CONNECTION, ())
# We use a simple comma split here rather than getall() for performance,
# as the target tokens (close, keep-alive, upgrade) are simple ASCII
# values that never contain commas.
conn_values = headers.get(hdrs.CONNECTION)
if conn_values:
conn_tokens = {
token.lower()
for conn_value in conn_values
for token in (part.strip(" \t") for part in conn_value.split(","))
for token in (part.strip(" \t") for part in conn_values.split(","))
if token and token.isascii()
}

Expand Down
Loading
Loading