Skip to content

Commit 56d2e75

Browse files
committed
fix(sandbox): decode + as space in query params and validate glob syntax
Three improvements from PR #617 review: 1. Decode + as space in query string values per the application/x-www-form-urlencoded convention. This matches Python's urllib.parse, JavaScript's URLSearchParams, Go's url.ParseQuery, and most HTTP frameworks. Literal + should be sent as %2B. 2. Add glob pattern syntax validation (warnings) for query matchers. Checks for unclosed brackets and braces in glob/any patterns. These are warnings (not errors) because OPA's glob.match is forgiving, but they surface likely typos during policy loading. 3. Add missing test cases: empty query values, keys without values, unicode after percent-decoding, empty query strings, and literal + via %2B encoding.
1 parent 63a46c6 commit 56d2e75

File tree

2 files changed

+246
-7
lines changed

2 files changed

+246
-7
lines changed

crates/openshell-sandbox/src/l7/mod.rs

Lines changed: 178 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,49 @@ fn get_object_str(val: &regorus::Value, key: &str) -> Option<String> {
146146
}
147147
}
148148

149+
/// Check a glob pattern for obvious syntax issues.
150+
///
151+
/// Returns `Some(warning_message)` if the pattern looks malformed.
152+
/// OPA's `glob.match` is forgiving, so these are warnings (not errors)
153+
/// to surface likely typos without blocking policy loading.
154+
fn check_glob_syntax(pattern: &str) -> Option<String> {
155+
let mut bracket_depth: i32 = 0;
156+
for c in pattern.chars() {
157+
match c {
158+
'[' => bracket_depth += 1,
159+
']' => {
160+
if bracket_depth == 0 {
161+
return Some(format!("glob pattern '{pattern}' has unmatched ']'"));
162+
}
163+
bracket_depth -= 1;
164+
}
165+
_ => {}
166+
}
167+
}
168+
if bracket_depth > 0 {
169+
return Some(format!("glob pattern '{pattern}' has unclosed '['"));
170+
}
171+
172+
let mut brace_depth: i32 = 0;
173+
for c in pattern.chars() {
174+
match c {
175+
'{' => brace_depth += 1,
176+
'}' => {
177+
if brace_depth == 0 {
178+
return Some(format!("glob pattern '{pattern}' has unmatched '}}'"));
179+
}
180+
brace_depth -= 1;
181+
}
182+
_ => {}
183+
}
184+
}
185+
if brace_depth > 0 {
186+
return Some(format!("glob pattern '{pattern}' has unclosed '{{'"));
187+
}
188+
189+
None
190+
}
191+
149192
/// Validate L7 policy configuration in the loaded OPA data.
150193
///
151194
/// Returns a list of errors and warnings. Errors should prevent sandbox startup;
@@ -310,7 +353,12 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec<String>, Vec<
310353
};
311354

312355
for (param, matcher) in query_obj {
313-
if matcher.as_str().is_some() {
356+
if let Some(glob_str) = matcher.as_str() {
357+
if let Some(warning) = check_glob_syntax(glob_str) {
358+
warnings.push(format!(
359+
"{loc}.rules[{rule_idx}].allow.query.{param}: {warning}"
360+
));
361+
}
314362
continue;
315363
}
316364

@@ -346,10 +394,19 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec<String>, Vec<
346394
}
347395

348396
if has_glob {
349-
if matcher_obj.get("glob").and_then(|v| v.as_str()).is_none() {
350-
errors.push(format!(
351-
"{loc}.rules[{rule_idx}].allow.query.{param}.glob: expected glob string"
352-
));
397+
match matcher_obj.get("glob").and_then(|v| v.as_str()) {
398+
None => {
399+
errors.push(format!(
400+
"{loc}.rules[{rule_idx}].allow.query.{param}.glob: expected glob string"
401+
));
402+
}
403+
Some(g) => {
404+
if let Some(warning) = check_glob_syntax(g) {
405+
warnings.push(format!(
406+
"{loc}.rules[{rule_idx}].allow.query.{param}.glob: {warning}"
407+
));
408+
}
409+
}
353410
}
354411
continue;
355412
}
@@ -374,6 +431,14 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec<String>, Vec<
374431
"{loc}.rules[{rule_idx}].allow.query.{param}.any: all values must be strings"
375432
));
376433
}
434+
435+
for item in any.iter().filter_map(|v| v.as_str()) {
436+
if let Some(warning) = check_glob_syntax(item) {
437+
warnings.push(format!(
438+
"{loc}.rules[{rule_idx}].allow.query.{param}.any: {warning}"
439+
));
440+
}
441+
}
377442
}
378443
}
379444
}
@@ -925,6 +990,114 @@ mod tests {
925990
);
926991
}
927992

993+
#[test]
994+
fn validate_query_glob_warns_on_unclosed_bracket() {
995+
let data = serde_json::json!({
996+
"network_policies": {
997+
"test": {
998+
"endpoints": [{
999+
"host": "api.example.com",
1000+
"port": 8080,
1001+
"protocol": "rest",
1002+
"rules": [{
1003+
"allow": {
1004+
"method": "GET",
1005+
"path": "/download",
1006+
"query": {
1007+
"tag": "[unclosed"
1008+
}
1009+
}
1010+
}]
1011+
}],
1012+
"binaries": []
1013+
}
1014+
}
1015+
});
1016+
let (errors, warnings) = validate_l7_policies(&data);
1017+
assert!(
1018+
errors.is_empty(),
1019+
"malformed glob should warn, not error: {errors:?}"
1020+
);
1021+
assert!(
1022+
warnings
1023+
.iter()
1024+
.any(|w| w.contains("unclosed '['") && w.contains("allow.query.tag")),
1025+
"expected glob syntax warning, got: {warnings:?}"
1026+
);
1027+
}
1028+
1029+
#[test]
1030+
fn validate_query_glob_warns_on_unclosed_brace() {
1031+
let data = serde_json::json!({
1032+
"network_policies": {
1033+
"test": {
1034+
"endpoints": [{
1035+
"host": "api.example.com",
1036+
"port": 8080,
1037+
"protocol": "rest",
1038+
"rules": [{
1039+
"allow": {
1040+
"method": "GET",
1041+
"path": "/download",
1042+
"query": {
1043+
"format": { "glob": "{json,xml" }
1044+
}
1045+
}
1046+
}]
1047+
}],
1048+
"binaries": []
1049+
}
1050+
}
1051+
});
1052+
let (errors, warnings) = validate_l7_policies(&data);
1053+
assert!(
1054+
errors.is_empty(),
1055+
"malformed glob should warn, not error: {errors:?}"
1056+
);
1057+
assert!(
1058+
warnings
1059+
.iter()
1060+
.any(|w| w.contains("unclosed '{'") && w.contains("allow.query.format.glob")),
1061+
"expected glob syntax warning, got: {warnings:?}"
1062+
);
1063+
}
1064+
1065+
#[test]
1066+
fn validate_query_any_warns_on_malformed_glob_item() {
1067+
let data = serde_json::json!({
1068+
"network_policies": {
1069+
"test": {
1070+
"endpoints": [{
1071+
"host": "api.example.com",
1072+
"port": 8080,
1073+
"protocol": "rest",
1074+
"rules": [{
1075+
"allow": {
1076+
"method": "GET",
1077+
"path": "/download",
1078+
"query": {
1079+
"tag": { "any": ["valid-*", "[bad"] }
1080+
}
1081+
}
1082+
}]
1083+
}],
1084+
"binaries": []
1085+
}
1086+
}
1087+
});
1088+
let (errors, warnings) = validate_l7_policies(&data);
1089+
assert!(
1090+
errors.is_empty(),
1091+
"malformed glob in any should warn, not error: {errors:?}"
1092+
);
1093+
assert!(
1094+
warnings
1095+
.iter()
1096+
.any(|w| w.contains("unclosed '['") && w.contains("allow.query.tag.any")),
1097+
"expected glob syntax warning for any item, got: {warnings:?}"
1098+
);
1099+
}
1100+
9281101
#[test]
9291102
fn validate_query_string_and_any_matchers_are_accepted() {
9301103
let data = serde_json::json!({

crates/openshell-sandbox/src/l7/rest.rs

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,25 @@ fn parse_query_params(query: &str) -> Result<HashMap<String, Vec<String>>> {
171171
Ok(params)
172172
}
173173

174+
/// Decode a single query string component (key or value).
175+
///
176+
/// Handles both RFC 3986 percent-encoding (`%20` → space) and the
177+
/// `application/x-www-form-urlencoded` convention (`+` → space).
178+
/// Decoding `+` as space matches the behavior of Python's `urllib.parse`,
179+
/// JavaScript's `URLSearchParams`, Go's `url.ParseQuery`, and most HTTP
180+
/// frameworks. Callers that need a literal `+` should send `%2B`.
174181
fn decode_query_component(input: &str) -> Result<String> {
175182
let bytes = input.as_bytes();
176183
let mut decoded = Vec::with_capacity(bytes.len());
177184
let mut i = 0;
178185

179186
while i < bytes.len() {
187+
if bytes[i] == b'+' {
188+
decoded.push(b' ');
189+
i += 1;
190+
continue;
191+
}
192+
180193
if bytes[i] != b'%' {
181194
decoded.push(bytes[i]);
182195
i += 1;
@@ -769,16 +782,69 @@ mod tests {
769782
}
770783

771784
#[test]
772-
fn parse_target_query_decodes_percent_and_preserves_plus() {
785+
fn parse_target_query_decodes_percent_and_plus() {
773786
let (path, query) = parse_target_query("/download?slug=my%2Fskill&name=Foo+Bar").unwrap();
774787
assert_eq!(path, "/download");
775788
assert_eq!(
776789
query.get("slug").cloned(),
777790
Some(vec!["my/skill".to_string()])
778791
);
792+
// `+` is decoded as space per application/x-www-form-urlencoded.
793+
// Literal `+` should be sent as `%2B`.
779794
assert_eq!(
780795
query.get("name").cloned(),
781-
Some(vec!["Foo+Bar".to_string()])
796+
Some(vec!["Foo Bar".to_string()])
797+
);
798+
}
799+
800+
#[test]
801+
fn parse_target_query_literal_plus_via_percent_encoding() {
802+
let (_path, query) = parse_target_query("/search?q=a%2Bb").unwrap();
803+
assert_eq!(
804+
query.get("q").cloned(),
805+
Some(vec!["a+b".to_string()]),
806+
"%2B should decode to literal +"
807+
);
808+
}
809+
810+
#[test]
811+
fn parse_target_query_empty_value() {
812+
let (_path, query) = parse_target_query("/api?tag=").unwrap();
813+
assert_eq!(
814+
query.get("tag").cloned(),
815+
Some(vec!["".to_string()]),
816+
"key with empty value should produce empty string"
817+
);
818+
}
819+
820+
#[test]
821+
fn parse_target_query_key_without_value() {
822+
let (_path, query) = parse_target_query("/api?verbose").unwrap();
823+
assert_eq!(
824+
query.get("verbose").cloned(),
825+
Some(vec!["".to_string()]),
826+
"key without = should produce empty string value"
827+
);
828+
}
829+
830+
#[test]
831+
fn parse_target_query_unicode_after_decoding() {
832+
// "café" = c a f %C3%A9
833+
let (_path, query) = parse_target_query("/search?q=caf%C3%A9").unwrap();
834+
assert_eq!(
835+
query.get("q").cloned(),
836+
Some(vec!["café".to_string()]),
837+
"percent-encoded UTF-8 should decode correctly"
838+
);
839+
}
840+
841+
#[test]
842+
fn parse_target_query_empty_query_string() {
843+
let (path, query) = parse_target_query("/api?").unwrap();
844+
assert_eq!(path, "/api");
845+
assert!(
846+
query.is_empty(),
847+
"empty query after ? should produce empty map"
782848
);
783849
}
784850

0 commit comments

Comments
 (0)