diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 86cc9f72f0..fbfae13636 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -28,6 +28,20 @@ Added * Added the command line utility `st2-validate-pack` that can be used by pack developers to validate pack contents. (improvement) +* Fix a bug in the API and CLI code which would prevent users from being able to retrieve resources + which contain non-ascii (utf-8) characters in the names / references. (bug fix) #5189 + + Contributed by @Kami. + +* Fix a bug in the API router code and make sure we return correct and user-friendly error to the + user in case we fail to parse the request URL / path because it contains invalid or incorrectly + URL encoded data. + + Previously such errors weren't handled correctly which meant original exception with a stack + trace got propagated to the user. (bug fix) #5189 + + Contributed by @Kami. + Changed ~~~~~~~ diff --git a/st2api/tests/unit/controllers/v1/test_actions.py b/st2api/tests/unit/controllers/v1/test_actions.py index 40e973c0a1..cd1d8b555c 100644 --- a/st2api/tests/unit/controllers/v1/test_actions.py +++ b/st2api/tests/unit/controllers/v1/test_actions.py @@ -17,6 +17,7 @@ import os import os.path import copy +import urllib try: import simplejson as json @@ -277,6 +278,22 @@ } +ACTION_WITH_UNICODE_NAME = { + "name": "st2.dummy.action_unicode_我爱狗", + "description": "test description", + "enabled": True, + "pack": "dummy_pack_1", + "entry_point": "/tmp/test/action1.sh", + "runner_type": "local-shell-script", + "parameters": { + "a": {"type": "string", "default": "A1"}, + "b": {"type": "string", "default": "B1"}, + "sudo": {"default": True, "immutable": True}, + }, + "notify": {"on-complete": {"message": "Woohoo! I completed!!!"}}, +} + + class ActionsControllerTestCase( FunctionalTest, APIControllerWithIncludeAndExcludeFilterTestCase, CleanFilesTestCase ): @@ -640,6 +657,48 @@ def test_action_with_notify_update(self): self.assertEqual(get_resp.json["notify"], {}) self.__do_delete(action_id) + @mock.patch.object( + action_validator, "validate_action", mock.MagicMock(return_value=True) + ) + def test_action_with_unicode_name_create(self): + post_resp = self.__do_post(ACTION_WITH_UNICODE_NAME) + action_id = self.__get_action_id(post_resp) + get_resp = self.__do_get_one(action_id) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(self.__get_action_id(get_resp), action_id) + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + get_resp = self.__do_get_one("dummy_pack_1.st2.dummy.action_unicode_我爱狗") + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + # Now retrieve the action using the ref and ensure it works correctly + # NOTE: We need to use urlquoted value when retrieving the item since that's how all the + # http clients work - non ascii characters get escaped / quoted. Passing in unquoted + # value will result in exception (as expected). + ref_quoted = urllib.parse.quote("dummy_pack_1.st2.dummy.action_unicode_我爱狗") + get_resp = self.__do_get_one(ref_quoted) + self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗") + self.assertEqual( + get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗" + ) + self.assertEqual( + get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗" + ) + + self.__do_delete(action_id) + @mock.patch.object( action_validator, "validate_action", mock.MagicMock(return_value=True) ) diff --git a/st2api/tests/unit/controllers/v1/test_base.py b/st2api/tests/unit/controllers/v1/test_base.py index cbfe3e54c2..c26dc0a692 100644 --- a/st2api/tests/unit/controllers/v1/test_base.py +++ b/st2api/tests/unit/controllers/v1/test_base.py @@ -13,7 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import webob from oslo_config import cfg +from webob.request import Request + +from st2common.router import Router + from st2tests.api import FunctionalTest @@ -95,3 +100,16 @@ def test_valid_status_code_is_returned_on_invalid_path(self): "/v1/executions/577f775b0640fd1451f2030b/re_run/a/b", expect_errors=True ) self.assertEqual(resp.status_int, 404) + + def test_router_invalid_url_path_friendly_error(self): + # NOTE: We intentionally don't use sp.app.get here since that goes through the webtest + # layer which manipulates the path which means we won't be testing what we actually want + # to test (an edge case). To test the edge case correctly, we need to manually call router + # with specifically crafted data. + router = Router() + request = Request(environ={"PATH_INFO": "/v1/rules/好的".encode("utf-8")}) + + expected_msg = "URL likely contains invalid or incorrectly URL encoded values" + self.assertRaisesRegexp( + webob.exc.HTTPBadRequest, expected_msg, router.match, request + ) diff --git a/st2client/st2client/shell.py b/st2client/st2client/shell.py index 7d3359c532..4bca31e753 100755 --- a/st2client/st2client/shell.py +++ b/st2client/st2client/shell.py @@ -63,6 +63,7 @@ from st2client.config import set_config from st2client.exceptions.operations import OperationFailureException from st2client.utils.logging import LogLevelFilter, set_log_level_for_all_loggers +from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences from st2client.commands.auth import TokenCreateCommand from st2client.commands.auth import LoginCommand @@ -98,6 +99,42 @@ PACKAGE_METADATA_FILE_PATH = "/opt/stackstorm/st2/package.meta" +""" +Here we sanitize the provided args and ensure they contain valid unicode values. + +By default, sys.argv will contain a unicode string where the actual item values which contain +unicode sequences are escaped using unicode surrogates. + +For example, if "examples.test_rule_utf8_náme" value is specified as a CLI argument, sys.argv +and as such also url, would contain "examples.test_rule_utf8_n%ED%B3%83%ED%B2%A1me" which is not +what we want. + +Complete sys.argv example: + +1. Default - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_n\udcc3\udca1me'] +2. What we want - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_náme'] + +This won't work correctly when sending requests to the API. As such, we correctly escape the +value to the unicode string here and then let the http layer (requests) correctly url encode +this value. + +Technically, we could also just try to re-encode it in the HTTPClient and I tried that first, but +it turns out more code in the client results in exceptions if it's not re-encoded as early as +possible. +""" + +REENCODE_ARGV = os.environ.get("ST2_CLI_RENCODE_ARGV", "true").lower() in [ + "true", + "1", + "yes", +] + +if REENCODE_ARGV: + try: + sys.argv = reencode_list_with_surrogate_escape_sequences(sys.argv) + except Exception as e: + print("Failed to re-encode sys.argv: %s" % (str(e))) + def get_stackstorm_version(): """ diff --git a/st2client/st2client/utils/misc.py b/st2client/st2client/utils/misc.py index 62c7b1a61f..f8e81a5e99 100644 --- a/st2client/st2client/utils/misc.py +++ b/st2client/st2client/utils/misc.py @@ -14,11 +14,14 @@ # limitations under the License. from __future__ import absolute_import + +from typing import List + import copy import six -__all__ = ["merge_dicts"] +__all__ = ["merge_dicts", "reencode_list_with_surrogate_escape_sequences"] def merge_dicts(d1, d2): @@ -39,3 +42,22 @@ def merge_dicts(d1, d2): result[key] = value return result + + +def reencode_list_with_surrogate_escape_sequences(value: List[str]) -> List[str]: + """ + Function which reencodes each item in the provided list replacing unicode surrogate escape + sequences using actual unicode values. + """ + result = [] + + for item in value: + try: + item = item.encode("ascii", "surrogateescape").decode("utf-8") + except UnicodeEncodeError: + # Already a unicode string, nothing to do + pass + + result.append(item) + + return result diff --git a/st2client/tests/unit/test_commands.py b/st2client/tests/unit/test_commands.py index de84f7883f..6dc5803b2c 100644 --- a/st2client/tests/unit/test_commands.py +++ b/st2client/tests/unit/test_commands.py @@ -295,6 +295,21 @@ def test_command_delete_failed(self): args = self.parser.parse_args(["fakeresource", "delete", "cba"]) self.assertRaises(Exception, self.branch.commands["delete"].run, args) + @mock.patch.object( + models.ResourceManager, + "get_by_id", + mock.MagicMock(return_value=base.FakeResource(**base.RESOURCES[0])), + ) + def test_command_get_unicode_primary_key(self): + args = self.parser.parse_args( + ["fakeresource", "get", "examples.test_rule_utf8_náme"] + ) + self.assertEqual(args.func, self.branch.commands["get"].run_and_print) + instance = self.branch.commands["get"].run(args) + actual = instance.serialize() + expected = json.loads(json.dumps(base.RESOURCES[0])) + self.assertEqual(actual, expected) + class ResourceViewCommandTestCase(unittest2.TestCase): def setUp(self): diff --git a/st2client/tests/unit/test_shell.py b/st2client/tests/unit/test_shell.py index bce176b4ad..32f128a506 100644 --- a/st2client/tests/unit/test_shell.py +++ b/st2client/tests/unit/test_shell.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2020 The StackStorm Authors. # Copyright 2019 Extreme Networks, Inc. # @@ -32,6 +33,7 @@ from st2client.shell import Shell from st2client.client import Client from st2client.utils import httpclient +from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences from st2common.models.db.auth import TokenDB from tests import base @@ -828,3 +830,34 @@ def test_automatic_auth_skipped_if_token_provided_as_cli_argument(self): args = shell.parser.parse_args(args=argv) shell.get_client(args=args) self.assertEqual(shell._get_auth_token.call_count, 0) + + def test_get_one_unicode_character_in_name(self): + self._write_mock_config() + + shell = Shell() + shell._get_auth_token = mock.Mock() + + os.environ["ST2_AUTH_TOKEN"] = "fooo" + argv = ["action", "get", "examples.test_rule_utf8_náme"] + args = shell.parser.parse_args(args=argv) + shell.get_client(args=args) + self.assertEqual(args.ref_or_id, "examples.test_rule_utf8_náme") + + def test_reencode_list_replace_surrogate_escape(self): + value = ["a", "b", "c", "d"] + expected = value + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) + + value = ["a", "b", "c", "n\udcc3\udca1me"] + expected = ["a", "b", "c", "náme"] + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) + + value = ["a", "b", "c", "你好"] + expected = value + result = reencode_list_with_surrogate_escape_sequences(value) + + self.assertEqual(result, expected) diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index e5d01d2223..537dce4536 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -20,6 +20,8 @@ from st2common.metrics.base import get_driver from st2common.util.date import get_datetime_utc_now from st2common.router import NotFoundException +from st2common.router import Response +from st2common.util.jsonify import json_encode __all__ = ["RequestInstrumentationMiddleware", "ResponseInstrumentationMiddleware"] @@ -47,6 +49,16 @@ def __call__(self, environ, start_response): endpoint, _ = self.router.match(request) except NotFoundException: endpoint = {} + except Exception as e: + # Special case to make sure we return friendly error to the user. + # If we don't do that and router.match() throws an exception, we will return stack trace + # to the end user which is not good. + status_code = getattr(e, "status_code", 500) + headers = {"Content-Type": "application/json"} + body = {"faultstring": getattr(e, "detail", str(e))} + response_body = json_encode(body) + resp = Response(response_body, status=status_code, headers=headers) + return resp(environ, start_response) # NOTE: We don't track per request and response metrics for /v1/executions/ and some # other endpoints because this would result in a lot of unique metrics which is an diff --git a/st2common/st2common/router.py b/st2common/st2common/router.py index 235db9d67e..fa9c002354 100644 --- a/st2common/st2common/router.py +++ b/st2common/st2common/router.py @@ -29,7 +29,7 @@ from six.moves.urllib import parse as urlparse # pylint: disable=import-error import webob from webob import cookies, exc -from webob.compat import url_unquote +from six.moves import urllib from st2common.exceptions import rbac as rbac_exc from st2common.exceptions import auth as auth_exc @@ -264,7 +264,31 @@ def add_spec(self, spec, transforms): ) def match(self, req): - path = url_unquote(req.path) + # NOTE: webob.url_unquote doesn't work correctly under Python 3 when paths contain non-ascii + # characters. That method supposed to handle Python 2 and Python 3 compatibility, but it + # doesn't work correctly under Python 3. + try: + path = urllib.parse.unquote(req.path) + except Exception as e: + # This exception being thrown indicates that the URL / path contains bad or incorrectly + # URL escaped characters. Instead of returning this stack track + 500 error to the + # user we return a friendly and more correct exception + # NOTE: We should not access or log req.path here since it's a property which results + # in exception and if we try to log it, it will fail. + try: + path = req.environ["PATH_INFO"] + except Exception: + path = "unknown" + + LOG.error('Failed to parse request URL / path "%s": %s' % (path, str(e))) + + abort( + 400, + 'Failed to parse request path "%s". URL likely contains invalid or incorrectly ' + "URL encoded values." % (path), + ) + return + LOG.debug("Match path: %s", path) if len(path) > 1 and path.endswith("/"): diff --git a/st2tests/st2tests/api.py b/st2tests/st2tests/api.py index 7000ddd9a1..dae8eb7831 100644 --- a/st2tests/st2tests/api.py +++ b/st2tests/st2tests/api.py @@ -23,8 +23,12 @@ import six import webtest +import webob.compat +import webob.request import mock +from six.moves import urllib + from oslo_config import cfg from st2common.router import Router @@ -57,6 +61,25 @@ class ResponseLeakError(ValueError): pass +# NOTE: This is not ideal, but we need to patch those functions so it works correctly for the +# tests. +# The problem is that for the unit based api tests we utilize webtest which has the same bug as +# webob when handling unicode characters in the path names and the actual unit test API code doesn't +# follow exactly the same code path as actual production code which doesn't utilize webtest +# In short, that's why important we also have end to end tests for API endpoints! +webob.request.url_unquote = urllib.parse.unquote +webob.compat.url_unquote = urllib.parse.unquote + + +def bytes_(s, encoding="utf-8", errors="strict"): + if isinstance(s, six.text_type): + return s.encode("utf-8", errors) + + +webob.compat.bytes_ = bytes_ +webob.request.bytes_ = bytes_ + + class TestApp(webtest.TestApp): def do_request(self, req, **kwargs): self.cookiejar.clear()