|
| 1 | +import pytest |
| 2 | + |
| 3 | +from redisvl.utils.token_escaper import TokenEscaper |
| 4 | + |
| 5 | + |
| 6 | +@pytest.fixture |
| 7 | +def escaper(): |
| 8 | + return TokenEscaper() |
| 9 | + |
| 10 | + |
| 11 | +@pytest.mark.parametrize( |
| 12 | + ("test_input,expected"), |
| 13 | + [ |
| 14 | + (r"a [big] test.", r"a\ \[big\]\ test\."), |
| 15 | + (r"hello, world!", r"hello\,\ world\!"), |
| 16 | + ( |
| 17 | + r'special "quotes" (and parentheses)', |
| 18 | + r"special\ \"quotes\"\ \(and\ parentheses\)", |
| 19 | + ), |
| 20 | + ( |
| 21 | + r"& symbols, like * and ?", |
| 22 | + r"\&\ symbols\,\ like\ \*\ and\ ?", |
| 23 | + ), # TODO: question marks are not caught? |
| 24 | + # underscores are ignored |
| 25 | + (r"-dashes_and_underscores-", r"\-dashes_and_underscores\-"), |
| 26 | + ], |
| 27 | + ids=[ |
| 28 | + "brackets", |
| 29 | + "commas", |
| 30 | + "quotes", |
| 31 | + "symbols", |
| 32 | + "underscores" |
| 33 | + ] |
| 34 | +) |
| 35 | +def test_escape_text_chars(escaper, test_input, expected): |
| 36 | + assert escaper.escape(test_input) == expected |
| 37 | + |
| 38 | + |
| 39 | +@pytest.mark.parametrize( |
| 40 | + ("test_input,expected"), |
| 41 | + [ |
| 42 | + # Simple tags |
| 43 | + ("user:name", r"user\:name"), |
| 44 | + ("123#comment", r"123\#comment"), |
| 45 | + ("hyphen-separated", r"hyphen\-separated"), |
| 46 | + # Tags with special characters |
| 47 | + ("price$", r"price\$"), |
| 48 | + ("super*star", r"super\*star"), |
| 49 | + ("tag&value", r"tag\&value"), |
| 50 | + ("@username", r"\@username"), |
| 51 | + # Space-containing tags often used in search scenarios |
| 52 | + ("San Francisco", r"San\ Francisco"), |
| 53 | + ("New Zealand", r"New\ Zealand"), |
| 54 | + # Multi-special-character tags |
| 55 | + ("complex/tag:value", r"complex\/tag\:value"), |
| 56 | + ("$special$tag$", r"\$special\$tag\$"), |
| 57 | + ("tag-with-hyphen", r"tag\-with\-hyphen"), |
| 58 | + # Tags with less common, but legal characters |
| 59 | + ("_underscore_", r"_underscore_"), |
| 60 | + ("dot.tag", r"dot\.tag"), |
| 61 | + # ("pipe|tag", r"pipe\|tag"), #TODO - pipes are not caught? |
| 62 | + # More edge cases with special characters |
| 63 | + ("(parentheses)", r"\(parentheses\)"), |
| 64 | + ("[brackets]", r"\[brackets\]"), |
| 65 | + ("{braces}", r"\{braces\}"), |
| 66 | + # ("question?mark", r"question\?mark"), #TODO - question marks are not caught? |
| 67 | + # Unicode characters in tags |
| 68 | + ("你好", r"你好"), # Assuming non-Latin characters don't need escaping |
| 69 | + ("emoji:😊", r"emoji\:😊"), |
| 70 | + # ...other cases as needed... |
| 71 | + ], |
| 72 | + ids=[ |
| 73 | + ":", |
| 74 | + "#", |
| 75 | + "-", |
| 76 | + "$", |
| 77 | + "*", |
| 78 | + "&", |
| 79 | + "@", |
| 80 | + "space", |
| 81 | + "space-2", |
| 82 | + "complex", |
| 83 | + "special", |
| 84 | + "hyphen", |
| 85 | + "underscore", |
| 86 | + "dot", |
| 87 | + "parentheses", |
| 88 | + "brackets", |
| 89 | + "braces", |
| 90 | + "non-latin", |
| 91 | + "emoji" |
| 92 | + ] |
| 93 | +) |
| 94 | +def test_escape_tag_like_values(escaper, test_input, expected): |
| 95 | + assert escaper.escape(test_input) == expected |
| 96 | + |
| 97 | + |
| 98 | +@pytest.mark.parametrize("test_input", [123, 45.67, None, [], {}]) |
| 99 | +def test_escape_non_string_input(escaper, test_input): |
| 100 | + with pytest.raises(TypeError): |
| 101 | + escaper.escape(test_input) |
| 102 | + |
| 103 | + |
| 104 | +@pytest.mark.parametrize( |
| 105 | + "test_input,expected", |
| 106 | + [ |
| 107 | + # ('你好,世界!', r'你好\,世界\!'), # TODO - non latin chars? |
| 108 | + ("😊 ❤️ 👍", r"😊\ ❤️\ 👍"), |
| 109 | + # ...other cases as needed... |
| 110 | + ], |
| 111 | + ids=[ |
| 112 | + "emoji" |
| 113 | + ] |
| 114 | +) |
| 115 | +def test_escape_unicode_characters(escaper, test_input, expected): |
| 116 | + assert escaper.escape(test_input) == expected |
| 117 | + |
| 118 | + |
| 119 | +def test_escape_empty_string(escaper): |
| 120 | + assert escaper.escape("") == "" |
| 121 | + |
| 122 | + |
| 123 | +def test_escape_long_string(escaper): |
| 124 | + # Construct a very long string |
| 125 | + long_str = "a," * 1000 # This creates a string "a,a,a,a,...a," |
| 126 | + expected = r"a\," * 1000 # Expected escaped string |
| 127 | + |
| 128 | + # Use pytest's benchmark fixture to check performance |
| 129 | + escaped = escaper.escape(long_str) |
| 130 | + assert escaped == expected |
0 commit comments