From c37149c216e78bf793e8e55009b4a9103f4e3888 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Mon, 29 Jun 2026 10:13:54 +0200 Subject: [PATCH] Encode disallowed characters around already percent-encoded triples Reserved ("+") and fragment ("#") expansion must preserve already valid percent-encoded triples (RFC 6570 Section 3.2.3). The current implementation does this by returning the value untouched as soon as `urllib.parse.unquote` detects any triple, which also skips encoding every other disallowed character in the value. As a result `URITemplate("{+v}").expand(v="a b%20c")` produced "a b%20c" (space left raw) instead of "a%20b%20c". This is a regression from the fix for issue #99, which addressed the opposite problem of double-encoding. Quote the value segment by segment instead: pass valid percent-encoded triples through verbatim and percent-encode everything between them. Output is unchanged for values with no triples or with only triples, so existing behavior (and all current tests) is preserved. --- HISTORY.rst | 9 +++++++++ tests/test_uritemplate.py | 14 ++++++++++++++ uritemplate/variable.py | 20 +++++++++++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index b5aaf87..51cfba1 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,15 @@ Changelog - uritemplate ======================= +Unreleased +---------- + +- Fix bug where reserved (``+``) and fragment (``#``) expansion left + disallowed characters (such as spaces) unencoded whenever the value also + contained an already percent-encoded triple. This was a regression from the + fix for + `issue #99 `_. + 4.2.0 - 2025-06-01 ------------------ diff --git a/tests/test_uritemplate.py b/tests/test_uritemplate.py index dfe19f5..20a7660 100644 --- a/tests/test_uritemplate.py +++ b/tests/test_uritemplate.py @@ -589,6 +589,20 @@ def test_no_mutate(self) -> None: t.expand(args, key=1) self.assertEqual(args, {}) + def test_reserved_expansion_encodes_around_pct_triples(self) -> None: + # A value mixing disallowed characters with an already valid + # percent-encoded triple must still encode the disallowed characters + # (the triple is preserved, the rest is quoted) -- RFC 6570 3.2.3. + self.assertEqual(URITemplate("{+v}").expand(v="a b%20c"), "a%20b%20c") + self.assertEqual( + URITemplate("{#v}").expand(v="x y%20z"), "#x%20y%20z" + ) + # Uppercase triple preserved; reserved characters left untouched. + self.assertEqual(URITemplate("{+v}").expand(v="a%2Fb c"), "a%2Fb%20c") + # No triple present -> ordinary quoting; all triples -> left as-is. + self.assertEqual(URITemplate("{+v}").expand(v="a b c"), "a%20b%20c") + self.assertEqual(URITemplate("{+v}").expand(v="%20"), "%20") + class TestVariableModule(unittest.TestCase): def test_is_list_of_tuples(self) -> None: diff --git a/uritemplate/variable.py b/uritemplate/variable.py index 1f7993c..d023b45 100644 --- a/uritemplate/variable.py +++ b/uritemplate/variable.py @@ -17,6 +17,7 @@ import collections.abc import enum +import re import string import typing as t import urllib.parse @@ -38,6 +39,7 @@ _GEN_DELIMS: t.Final[str] = ":/?#[]@" _SUB_DELIMS: t.Final[str] = "!$&'()*+,;=" _RESERVED_CHARACTERS: t.Final[str] = f"{_GEN_DELIMS}{_SUB_DELIMS}" +_PERCENT_ENCODED: t.Final["re.Pattern[str]"] = re.compile("%[0-9A-Fa-f]{2}") class Operator(enum.Enum): @@ -150,9 +152,21 @@ def _always_quote(self, value: str) -> str: return quote(value, "") def _only_quote_unquoted_characters(self, value: str) -> str: - if urllib.parse.unquote(value) == value: - return quote(value, _RESERVED_CHARACTERS) - return value + # For reserved ("+") and fragment ("#") expansion, already + # percent-encoded triples must be preserved (RFC 6570 Section 3.2.3). + # Quote every other disallowed character while leaving valid triples + # untouched, rather than passing the whole value through unquoted as + # soon as a single triple is present. + result = [] + last = 0 + for match in _PERCENT_ENCODED.finditer(value): + result.append( + quote(value[last : match.start()], _RESERVED_CHARACTERS) + ) + result.append(match.group()) + last = match.end() + result.append(quote(value[last:], _RESERVED_CHARACTERS)) + return "".join(result) def quote(self, value: t.Any) -> str: if not isinstance(value, (str, bytes)):