From b367ef237cc5c4a58dfe916aa3234e944954997c Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 12:21:43 +0100 Subject: [PATCH 01/12] Add WIP fix for handling urls with unicode characters. --- st2common/st2common/router.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/router.py b/st2common/st2common/router.py index 47ef009b98..1309037ced 100644 --- a/st2common/st2common/router.py +++ b/st2common/st2common/router.py @@ -29,7 +29,7 @@ from six.moves.urllib import parse as urlparse # pylint: disable=import-error import webob from webob import cookies, exc, Request -from webob.compat import url_unquote +from six.moves import urllib from st2common.exceptions import rbac as rbac_exc from st2common.exceptions import auth as auth_exc @@ -239,7 +239,10 @@ def add_spec(self, spec, transforms): ) def match(self, req): - path = url_unquote(req.path) + # NOTE: webob.url_unquote doesn't work correctly under Python 3 when paths contain non-ascii + # characters. That method supposed to handle Python 2 and Python 3 compatibility, but it + # doesn't work correctly under Python 3. + path = urllib.parse.unquote(req.path) LOG.debug("Match path: %s", path) if len(path) > 1 and path.endswith("/"): From 4e0bb04cae8e97538d8bb05057740b43e95e187b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 12:49:17 +0100 Subject: [PATCH 02/12] Add WIP fix for the client side and make sure urls with unicode characters are handled correctly. --- st2client/st2client/utils/httpclient.py | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/st2client/st2client/utils/httpclient.py b/st2client/st2client/utils/httpclient.py index 6af6595ec5..eca872b1f0 100644 --- a/st2client/st2client/utils/httpclient.py +++ b/st2client/st2client/utils/httpclient.py @@ -15,6 +15,8 @@ from __future__ import absolute_import +from typing import Union + import json import logging from pipes import quote as pquote @@ -78,6 +80,27 @@ def get_url_without_trailing_slash(value): return result +def sanitize_url(url: Union[bytes, str]) -> str: + """ + Sanitize the provided url and ensure it's a valid unicode string. + + By default, sys.argv will contain a unicode string where the actual item values which contain + unicode sequences are escaped using unicode surrogates. + + For example, if "examples.test_rule_utf8_náme" value is specified as a CLI argument, sys.argv + and as such url, would contain "examples.test_rule_utf8_n%ED%B3%83%ED%B2%A1me" which is not + what we want. + + This won't work correctly when sending requests to the API. As such, we correctly escape the + value to the unicode string here and then let the http layer (requests) correctly url encode + this value. + """ + if isinstance(url, str): + url = url.encode("ascii", "surrogateescape").decode("utf-8") + + return url + + class HTTPClient(object): def __init__(self, root, cacert=None, debug=False): self.root = get_url_without_trailing_slash(root) @@ -87,6 +110,7 @@ def __init__(self, root, cacert=None, debug=False): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def get(self, url, **kwargs): + url = sanitize_url(url) response = requests.get(self.root + url, **kwargs) response = self._response_hook(response=response) return response @@ -95,6 +119,7 @@ def get(self, url, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def post(self, url, data, **kwargs): + url = sanitize_url(url) response = requests.post(self.root + url, json.dumps(data), **kwargs) response = self._response_hook(response=response) return response @@ -102,6 +127,7 @@ def post(self, url, data, **kwargs): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def post_raw(self, url, data, **kwargs): + url = sanitize_url(url) response = requests.post(self.root + url, data, **kwargs) response = self._response_hook(response=response) return response @@ -110,6 +136,7 @@ def post_raw(self, url, data, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def put(self, url, data, **kwargs): + url = sanitize_url(url) response = requests.put(self.root + url, json.dumps(data), **kwargs) response = self._response_hook(response=response) return response @@ -118,6 +145,7 @@ def put(self, url, data, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def patch(self, url, data, **kwargs): + url = sanitize_url(url) response = requests.patch(self.root + url, data, **kwargs) response = self._response_hook(response=response) return response @@ -125,6 +153,7 @@ def patch(self, url, data, **kwargs): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def delete(self, url, **kwargs): + url = sanitize_url(url) response = requests.delete(self.root + url, **kwargs) response = self._response_hook(response=response) return response From 20e9b817a29873e5c8ce702fe709c0493d4bc6f8 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 13:16:12 +0100 Subject: [PATCH 03/12] Revert "Add WIP fix for the client side and make sure urls with unicode" This reverts commit 4e0bb04cae8e97538d8bb05057740b43e95e187b. --- st2client/st2client/utils/httpclient.py | 29 ------------------------- 1 file changed, 29 deletions(-) diff --git a/st2client/st2client/utils/httpclient.py b/st2client/st2client/utils/httpclient.py index eca872b1f0..6af6595ec5 100644 --- a/st2client/st2client/utils/httpclient.py +++ b/st2client/st2client/utils/httpclient.py @@ -15,8 +15,6 @@ from __future__ import absolute_import -from typing import Union - import json import logging from pipes import quote as pquote @@ -80,27 +78,6 @@ def get_url_without_trailing_slash(value): return result -def sanitize_url(url: Union[bytes, str]) -> str: - """ - Sanitize the provided url and ensure it's a valid unicode string. - - By default, sys.argv will contain a unicode string where the actual item values which contain - unicode sequences are escaped using unicode surrogates. - - For example, if "examples.test_rule_utf8_náme" value is specified as a CLI argument, sys.argv - and as such url, would contain "examples.test_rule_utf8_n%ED%B3%83%ED%B2%A1me" which is not - what we want. - - This won't work correctly when sending requests to the API. As such, we correctly escape the - value to the unicode string here and then let the http layer (requests) correctly url encode - this value. - """ - if isinstance(url, str): - url = url.encode("ascii", "surrogateescape").decode("utf-8") - - return url - - class HTTPClient(object): def __init__(self, root, cacert=None, debug=False): self.root = get_url_without_trailing_slash(root) @@ -110,7 +87,6 @@ def __init__(self, root, cacert=None, debug=False): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def get(self, url, **kwargs): - url = sanitize_url(url) response = requests.get(self.root + url, **kwargs) response = self._response_hook(response=response) return response @@ -119,7 +95,6 @@ def get(self, url, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def post(self, url, data, **kwargs): - url = sanitize_url(url) response = requests.post(self.root + url, json.dumps(data), **kwargs) response = self._response_hook(response=response) return response @@ -127,7 +102,6 @@ def post(self, url, data, **kwargs): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def post_raw(self, url, data, **kwargs): - url = sanitize_url(url) response = requests.post(self.root + url, data, **kwargs) response = self._response_hook(response=response) return response @@ -136,7 +110,6 @@ def post_raw(self, url, data, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def put(self, url, data, **kwargs): - url = sanitize_url(url) response = requests.put(self.root + url, json.dumps(data), **kwargs) response = self._response_hook(response=response) return response @@ -145,7 +118,6 @@ def put(self, url, data, **kwargs): @add_auth_token_to_headers @add_json_content_type_to_headers def patch(self, url, data, **kwargs): - url = sanitize_url(url) response = requests.patch(self.root + url, data, **kwargs) response = self._response_hook(response=response) return response @@ -153,7 +125,6 @@ def patch(self, url, data, **kwargs): @add_ssl_verify_to_kwargs @add_auth_token_to_headers def delete(self, url, **kwargs): - url = sanitize_url(url) response = requests.delete(self.root + url, **kwargs) response = self._response_hook(response=response) return response From cab8b52829a6367e157ad450f74ce0bde8d1d8da Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 13:16:37 +0100 Subject: [PATCH 04/12] Add a fix for CLI which would result in exceptions in various scenarios where the argument contains unicode character. --- st2client/st2client/shell.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/st2client/st2client/shell.py b/st2client/st2client/shell.py index 7d3359c532..31748144ee 100755 --- a/st2client/st2client/shell.py +++ b/st2client/st2client/shell.py @@ -98,6 +98,47 @@ PACKAGE_METADATA_FILE_PATH = "/opt/stackstorm/st2/package.meta" +""" +Here we sanitize the provided args and ensure they contain valid unicode values. + +By default, sys.argv will contain a unicode string where the actual item values which contain +unicode sequences are escaped using unicode surrogates. + +For example, if "examples.test_rule_utf8_náme" value is specified as a CLI argument, sys.argv +and as such also url, would contain "examples.test_rule_utf8_n%ED%B3%83%ED%B2%A1me" which is not +what we want. + +Complete sys.argv example: + +1. Default - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_n\udcc3\udca1me'] +2. What we want - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_náme'] + +This won't work correctly when sending requests to the API. As such, we correctly escape the +value to the unicode string here and then let the http layer (requests) correctly url encode +this value. + +Technically, we could also just try to re-encode it in the HTTPClient and I tried that first, but +it turns out more code in the client results in exceptions if it's not re-encoded as early as +possible. +""" + +REENCODE_ARGV = os.environ.get("ST2_CLI_RENCODE_ARGV", "true").lower() in [ + "true", + "1", + "yes", +] + +if REENCODE_ARGV: + try: + sys.argv = list( + map( + lambda arg: arg.encode("ascii", "surrogateescape").decode("utf-8"), + sys.argv, + ) + ) + except Exception as e: + print("Failed to re-encode sys.argv: %s" % (str(e))) + def get_stackstorm_version(): """ From e08f9f114175b833d4e82a5cb182dce263e16a9f Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 15:07:02 +0100 Subject: [PATCH 05/12] Move functionality for surrogate re-encoding to a uility function for easier testability, etc. Add unit tests for that function. --- st2client/st2client/shell.py | 8 ++------ st2client/st2client/utils/misc.py | 24 +++++++++++++++++++++++- st2client/tests/unit/test_shell.py | 20 ++++++++++++++++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/st2client/st2client/shell.py b/st2client/st2client/shell.py index 31748144ee..4bca31e753 100755 --- a/st2client/st2client/shell.py +++ b/st2client/st2client/shell.py @@ -63,6 +63,7 @@ from st2client.config import set_config from st2client.exceptions.operations import OperationFailureException from st2client.utils.logging import LogLevelFilter, set_log_level_for_all_loggers +from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences from st2client.commands.auth import TokenCreateCommand from st2client.commands.auth import LoginCommand @@ -130,12 +131,7 @@ if REENCODE_ARGV: try: - sys.argv = list( - map( - lambda arg: arg.encode("ascii", "surrogateescape").decode("utf-8"), - sys.argv, - ) - ) + sys.argv = reencode_list_with_surrogate_escape_sequences(sys.argv) except Exception as e: print("Failed to re-encode sys.argv: %s" % (str(e))) diff --git a/st2client/st2client/utils/misc.py b/st2client/st2client/utils/misc.py index 62c7b1a61f..f8e81a5e99 100644 --- a/st2client/st2client/utils/misc.py +++ b/st2client/st2client/utils/misc.py @@ -14,11 +14,14 @@ # limitations under the License. from __future__ import absolute_import + +from typing import List + import copy import six -__all__ = ["merge_dicts"] +__all__ = ["merge_dicts", "reencode_list_with_surrogate_escape_sequences"] def merge_dicts(d1, d2): @@ -39,3 +42,22 @@ def merge_dicts(d1, d2): result[key] = value return result + + +def reencode_list_with_surrogate_escape_sequences(value: List[str]) -> List[str]: + """ + Function which reencodes each item in the provided list replacing unicode surrogate escape + sequences using actual unicode values. + """ + result = [] + + for item in value: + try: + item = item.encode("ascii", "surrogateescape").decode("utf-8") + except UnicodeEncodeError: + # Already a unicode string, nothing to do + pass + + result.append(item) + + return result diff --git a/st2client/tests/unit/test_shell.py b/st2client/tests/unit/test_shell.py index bce176b4ad..2f3422e03f 100644 --- a/st2client/tests/unit/test_shell.py +++ b/st2client/tests/unit/test_shell.py @@ -32,6 +32,7 @@ from st2client.shell import Shell from st2client.client import Client from st2client.utils import httpclient +from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences from st2common.models.db.auth import TokenDB from tests import base @@ -828,3 +829,22 @@ def test_automatic_auth_skipped_if_token_provided_as_cli_argument(self): args = shell.parser.parse_args(args=argv) shell.get_client(args=args) self.assertEqual(shell._get_auth_token.call_count, 0) + + def test_reencode_list_replace_surrogate_escape(self): + value = ["a", "b", "c", "d"] + expected = value + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) + + value = ["a", "b", "c", "n\udcc3\udca1me"] + expected = ["a", "b", "c", "náme"] + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) + + value = ["a", "b", "c", "你好"] + expected = value + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) From 2f06857f7fb754d806e1e56ea74ff60dfafdc36b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 15:50:43 +0100 Subject: [PATCH 06/12] Add additional unit tests for the utf-8 bug. Keep in mind that API tests are problematic - we don't exercise exactly the same code paths as we do when running outside tests because the tests itself utilize webtest module which has the same bug as webob. This means I needed to patch the code so it utilizes the same version of the functions as we do in prod. Not ideal and also shows why it's importat we also have actual end to end tests for the api. --- .../tests/unit/controllers/v1/test_actions.py | 59 +++++++++++++++++++ st2client/tests/unit/test_commands.py | 15 +++++ st2client/tests/unit/test_shell.py | 13 ++++ st2tests/st2tests/api.py | 23 ++++++++ 4 files changed, 110 insertions(+) diff --git a/st2api/tests/unit/controllers/v1/test_actions.py b/st2api/tests/unit/controllers/v1/test_actions.py index 40e973c0a1..cd1d8b555c 100644 --- a/st2api/tests/unit/controllers/v1/test_actions.py +++ b/st2api/tests/unit/controllers/v1/test_actions.py @@ -17,6 +17,7 @@ import os import os.path import copy +import urllib try: import simplejson as json @@ -277,6 +278,22 @@ } +ACTION_WITH_UNICODE_NAME = { + "name": "st2.dummy.action_unicode_我爱狗", + "description": "test description", + "enabled": True, + "pack": "dummy_pack_1", + "entry_point": "/tmp/test/action1.sh", + "runner_type": "local-shell-script", + "parameters": { + "a": {"type": "string", "default": "A1"}, + "b": {"type": "string", "default": "B1"}, + "sudo": {"default": True, "immutable": True}, + }, + "notify": {"on-complete": {"message": "Woohoo! I completed!!!"}}, +} + + class ActionsControllerTestCase( FunctionalTest, APIControllerWithIncludeAndExcludeFilterTestCase, CleanFilesTestCase ): @@ -640,6 +657,48 @@ def test_action_with_notify_update(self): self.assertEqual(get_resp.json["notify"], {}) self.__do_delete(action_id) + @mock.patch.object( + action_validator, "validate_action", mock.MagicMock(return_value=True) + ) + def test_action_with_unicode_name_create(self): + post_resp = self.__do_post(ACTION_WITH_UNICODE_NAME) + action_id = self.__get_action_id(post_resp) + get_resp = self.__do_get_one(action_id) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(self.__get_action_id(get_resp), action_id) + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + get_resp = self.__do_get_one("dummy_pack_1.st2.dummy.action_unicode_我爱狗") + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + # Now retrieve the action using the ref and ensure it works correctly + # NOTE: We need to use urlquoted value when retrieving the item since that's how all the + # http clients work - non ascii characters get escaped / quoted. Passing in unquoted + # value will result in exception (as expected). + ref_quoted = urllib.parse.quote("dummy_pack_1.st2.dummy.action_unicode_我爱狗") + get_resp = self.__do_get_one(ref_quoted) + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + self.__do_delete(action_id) + @mock.patch.object( action_validator, "validate_action", mock.MagicMock(return_value=True) ) diff --git a/st2client/tests/unit/test_commands.py b/st2client/tests/unit/test_commands.py index de84f7883f..6dc5803b2c 100644 --- a/st2client/tests/unit/test_commands.py +++ b/st2client/tests/unit/test_commands.py @@ -295,6 +295,21 @@ def test_command_delete_failed(self): args = self.parser.parse_args(["fakeresource", "delete", "cba"]) self.assertRaises(Exception, self.branch.commands["delete"].run, args) + @mock.patch.object( + models.ResourceManager, + "get_by_id", + mock.MagicMock(return_value=base.FakeResource(**base.RESOURCES[0])), + ) + def test_command_get_unicode_primary_key(self): + args = self.parser.parse_args( + ["fakeresource", "get", "examples.test_rule_utf8_náme"] + ) + self.assertEqual(args.func, self.branch.commands["get"].run_and_print) + instance = self.branch.commands["get"].run(args) + actual = instance.serialize() + expected = json.loads(json.dumps(base.RESOURCES[0])) + self.assertEqual(actual, expected) + class ResourceViewCommandTestCase(unittest2.TestCase): def setUp(self): diff --git a/st2client/tests/unit/test_shell.py b/st2client/tests/unit/test_shell.py index 2f3422e03f..32f128a506 100644 --- a/st2client/tests/unit/test_shell.py +++ b/st2client/tests/unit/test_shell.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2020 The StackStorm Authors. # Copyright 2019 Extreme Networks, Inc. # @@ -830,6 +831,18 @@ def test_automatic_auth_skipped_if_token_provided_as_cli_argument(self): shell.get_client(args=args) self.assertEqual(shell._get_auth_token.call_count, 0) + def test_get_one_unicode_character_in_name(self): + self._write_mock_config() + + shell = Shell() + shell._get_auth_token = mock.Mock() + + os.environ["ST2_AUTH_TOKEN"] = "fooo" + argv = ["action", "get", "examples.test_rule_utf8_náme"] + args = shell.parser.parse_args(args=argv) + shell.get_client(args=args) + self.assertEqual(args.ref_or_id, "examples.test_rule_utf8_náme") + def test_reencode_list_replace_surrogate_escape(self): value = ["a", "b", "c", "d"] expected = value diff --git a/st2tests/st2tests/api.py b/st2tests/st2tests/api.py index 7000ddd9a1..dae8eb7831 100644 --- a/st2tests/st2tests/api.py +++ b/st2tests/st2tests/api.py @@ -23,8 +23,12 @@ import six import webtest +import webob.compat +import webob.request import mock +from six.moves import urllib + from oslo_config import cfg from st2common.router import Router @@ -57,6 +61,25 @@ class ResponseLeakError(ValueError): pass +# NOTE: This is not ideal, but we need to patch those functions so it works correctly for the +# tests. +# The problem is that for the unit based api tests we utilize webtest which has the same bug as +# webob when handling unicode characters in the path names and the actual unit test API code doesn't +# follow exactly the same code path as actual production code which doesn't utilize webtest +# In short, that's why important we also have end to end tests for API endpoints! +webob.request.url_unquote = urllib.parse.unquote +webob.compat.url_unquote = urllib.parse.unquote + + +def bytes_(s, encoding="utf-8", errors="strict"): + if isinstance(s, six.text_type): + return s.encode("utf-8", errors) + + +webob.compat.bytes_ = bytes_ +webob.request.bytes_ = bytes_ + + class TestApp(webtest.TestApp): def do_request(self, req, **kwargs): self.cookiejar.clear() From 3ef58c9fc41cec98fcb9db5de72507434b7dc431 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 15:58:43 +0100 Subject: [PATCH 07/12] Add changelog entry. --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ac713b8a7c..11b7739f4d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,11 @@ Fixed * Refactor spec_loader util to use yaml.load with SafeLoader. (security) Contributed by @ashwini-orchestral +* Fix a bug in the API and CLI code which would prevent users from being able to retrieve resources + which contain non-ascii (utf-8) characters in the names / references. (bug fix) #5189 + + Contributed by @Kami. + Changed ~~~~~~~ From db8e85e4d192a646b7fcc130c78d75e5c12d7652 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 16:04:58 +0100 Subject: [PATCH 08/12] Add an example rule fixture for rule with unicode name which can be used by end to end tests. --- ...sample_rule_with_webhook_unicode_name.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml diff --git a/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml b/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml new file mode 100644 index 0000000000..e10467211d --- /dev/null +++ b/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml @@ -0,0 +1,20 @@ +--- + name: "sample_rule_with_webhook_你好" + pack: "examples" + description: "Sample rule dumping webhook payload to a file. 你好吗" + enabled: true + + trigger: + type: "core.st2.webhook" + parameters: + url: "unicode" + + criteria: + trigger.body.name: + pattern: "st2" + type: "equals" + + action: + ref: "core.local" + parameters: + cmd: "echo \"{{trigger.body}}\" >> ~/st2.webhook_sample.out ; sync" From f3a92b1d8ca1a13ef6c3aba68b299a1d2b1bfbcf Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 17:14:19 +0100 Subject: [PATCH 09/12] Use a different more unique name to avoid breaking other test with a similar name. --- .../examples/rules/sample_rule_with_webhook_unicode_name.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml b/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml index e10467211d..45707861e0 100644 --- a/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml +++ b/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml @@ -1,5 +1,5 @@ --- - name: "sample_rule_with_webhook_你好" + name: "unicode_rule_with_webhook_你好" pack: "examples" description: "Sample rule dumping webhook payload to a file. 你好吗" enabled: true From 04e58b615d366d7a57bf276f9eb78c199d2ab2fa Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 19:02:19 +0100 Subject: [PATCH 10/12] Update API router code to throw a more user-friendly exception in case the request URL contains invalid or incorrectly URL encoded characters. Previously we didn't handle this scenario which means original UnicodeDecodeError error with the stack trace got propagated to the user which is a no go. Related issue - https://github.com/GoogleCloudPlatform/webapp2/issues/152. --- CHANGELOG.rst | 9 ++++++++ .../st2common/middleware/instrumentation.py | 12 ++++++++++ st2common/st2common/router.py | 23 ++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 11b7739f4d..5a7113543a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -28,6 +28,15 @@ Fixed Contributed by @Kami. +* Fix a bug in the API router code and make sure we return correct and user-friendly error to the + user in case we fail to parse the request URL / path because it contains invalid or incorrectly + URL encoded data. + + Previously such errors weren't handled correctly which meant original exception with a stack + trace got propagated to the user. (bug fix) #5189 + + Contributed by @Kami. + Changed ~~~~~~~ diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index e5d01d2223..537dce4536 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -20,6 +20,8 @@ from st2common.metrics.base import get_driver from st2common.util.date import get_datetime_utc_now from st2common.router import NotFoundException +from st2common.router import Response +from st2common.util.jsonify import json_encode __all__ = ["RequestInstrumentationMiddleware", "ResponseInstrumentationMiddleware"] @@ -47,6 +49,16 @@ def __call__(self, environ, start_response): endpoint, _ = self.router.match(request) except NotFoundException: endpoint = {} + except Exception as e: + # Special case to make sure we return friendly error to the user. + # If we don't do that and router.match() throws an exception, we will return stack trace + # to the end user which is not good. + status_code = getattr(e, "status_code", 500) + headers = {"Content-Type": "application/json"} + body = {"faultstring": getattr(e, "detail", str(e))} + response_body = json_encode(body) + resp = Response(response_body, status=status_code, headers=headers) + return resp(environ, start_response) # NOTE: We don't track per request and response metrics for /v1/executions/ and some # other endpoints because this would result in a lot of unique metrics which is an diff --git a/st2common/st2common/router.py b/st2common/st2common/router.py index 1309037ced..77e622a1d5 100644 --- a/st2common/st2common/router.py +++ b/st2common/st2common/router.py @@ -242,7 +242,28 @@ def match(self, req): # NOTE: webob.url_unquote doesn't work correctly under Python 3 when paths contain non-ascii # characters. That method supposed to handle Python 2 and Python 3 compatibility, but it # doesn't work correctly under Python 3. - path = urllib.parse.unquote(req.path) + try: + path = urllib.parse.unquote(req.path) + except Exception as e: + # This exception being thrown indicates that the URL / path contains bad or incorrectly + # URL escaped characters. Instead of returning this stack track + 500 error to the + # user we return a friendly and more correct exception + # NOTE: We should not access or log req.path here since it's a property which results + # in exception and if we try to log it, it will fail. + try: + path = req.environ["PATH_INFO"] + except Exception: + path = "unknown" + + LOG.error('Failed to parse request URL / path "%s": %s' % (path, str(e))) + + abort( + 400, + 'Failed to parse request path "%s". URL likely contains invalid or incorrectly ' + "URL encoded values." % (path), + ) + return + LOG.debug("Match path: %s", path) if len(path) > 1 and path.endswith("/"): From 15274eaa743b5c1b251323bbc152e8ca68d44d69 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 13 Mar 2021 19:42:37 +0100 Subject: [PATCH 11/12] Add a test case for it. --- st2api/tests/unit/controllers/v1/test_base.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/st2api/tests/unit/controllers/v1/test_base.py b/st2api/tests/unit/controllers/v1/test_base.py index cbfe3e54c2..c26dc0a692 100644 --- a/st2api/tests/unit/controllers/v1/test_base.py +++ b/st2api/tests/unit/controllers/v1/test_base.py @@ -13,7 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import webob from oslo_config import cfg +from webob.request import Request + +from st2common.router import Router + from st2tests.api import FunctionalTest @@ -95,3 +100,16 @@ def test_valid_status_code_is_returned_on_invalid_path(self): "/v1/executions/577f775b0640fd1451f2030b/re_run/a/b", expect_errors=True ) self.assertEqual(resp.status_int, 404) + + def test_router_invalid_url_path_friendly_error(self): + # NOTE: We intentionally don't use sp.app.get here since that goes through the webtest + # layer which manipulates the path which means we won't be testing what we actually want + # to test (an edge case). To test the edge case correctly, we need to manually call router + # with specifically crafted data. + router = Router() + request = Request(environ={"PATH_INFO": "/v1/rules/好的".encode("utf-8")}) + + expected_msg = "URL likely contains invalid or incorrectly URL encoded values" + self.assertRaisesRegexp( + webob.exc.HTTPBadRequest, expected_msg, router.match, request + ) From c0ed21b996a02a91bd6eb0eed1a4e2b0e523d264 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 5 Apr 2021 12:31:47 +0200 Subject: [PATCH 12/12] For now, remove test fixture which is causing failure in end to end tests on Ubuntu 16.04. Ubuntu 16.04 is EOL and it makes no sense to spend a lot of time working on a test workaround since it won't be needed in the near future anyway once we remove support for 16.04. --- ...sample_rule_with_webhook_unicode_name.yaml | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml diff --git a/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml b/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml deleted file mode 100644 index 45707861e0..0000000000 --- a/contrib/examples/rules/sample_rule_with_webhook_unicode_name.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- - name: "unicode_rule_with_webhook_你好" - pack: "examples" - description: "Sample rule dumping webhook payload to a file. 你好吗" - enabled: true - - trigger: - type: "core.st2.webhook" - parameters: - url: "unicode" - - criteria: - trigger.body.name: - pattern: "st2" - type: "equals" - - action: - ref: "core.local" - parameters: - cmd: "echo \"{{trigger.body}}\" >> ~/st2.webhook_sample.out ; sync"