Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 65 additions & 40 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ impl PyDocument {
match self.inner.query_exact(&r) {
Ok(Some(feature)) => {
let span = feature.location.byte_span;
// Note: span.0 <= span.1 is guaranteed by tree-sitter node
// ranges, so we only check bounds and UTF-8 alignment.
if span.1 > source.len()
|| !source.is_char_boundary(span.0)
|| !source.is_char_boundary(span.1)
{
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
"Feature span is not valid in source",
));
}
Comment thread
nathanjmcdougall marked this conversation as resolved.
let raw = &source[span.0..span.1];
// Calculate the column offset (in bytes) of the value
// start relative to the beginning of its line, so we can
Expand Down Expand Up @@ -285,6 +295,10 @@ fn apply_insert_at(
.ok_or_else(|| format!("insert_at: item at index {resolved} not found"))?;

let item_start = item_feature.location.byte_span.0;
// Note: no reversed-span check needed; tree-sitter nodes guarantee start <= end.
if item_start > source.len() || !source.is_char_boundary(item_start) {
return Err("Feature span is not valid in source".to_string());
}
let line_start = source[..item_start]
.rfind('\n')
.map(|nl| nl + 1)
Expand Down Expand Up @@ -343,6 +357,14 @@ fn apply_complex_replace(
.query_pretty(route)
.map_err(|e| format!("Query failed: {e}"))?;

let span = feature.location.byte_span;
// Note: span.0 <= span.1 is guaranteed by tree-sitter node ranges,
// so we only check bounds and UTF-8 alignment.
if span.1 > source.len() || !source.is_char_boundary(span.0) || !source.is_char_boundary(span.1)
{
return Err("Feature span is not valid in source".to_string());
}
Comment thread
nathanjmcdougall marked this conversation as resolved.

let content_with_ws = doc.extract_with_leading_whitespace(&feature);
let content = doc.extract(&feature);

Expand All @@ -353,39 +375,52 @@ fn apply_complex_replace(
let start_byte = feature.location.byte_span.0 - ws_len;
let end_byte = feature.location.byte_span.1;

// Find the colon separating key from value
let colon_pos = find_key_colon(content_with_ws);
// Use query_exact to locate the value's byte span independently.
// This avoids string-searching for the colon separator, which breaks
// on quoted keys containing colons (e.g. "http://example.com": 8080).
let value_feature = doc
.query_exact(route)
.map_err(|e| format!("Query failed: {e}"))?;

let key_part = match colon_pos {
Some(pos) => {
let key = &content_with_ws[..pos + 1]; // through the colon
key.to_string()
let key_part = match value_feature {
Some(vf) => {
let vf_start = vf.location.byte_span.0;
// Note: no reversed-span check needed; tree-sitter nodes guarantee start <= end.
if vf_start > source.len() || !source.is_char_boundary(vf_start) {
return Err("Value feature span is not valid in source".to_string());
}
let prefix = source[start_byte..vf_start].trim_end();
if prefix.is_empty() {
// Bare value (e.g. sequence item) — no key prefix
let serialized = serde_yaml::to_string(value)
.map_err(|e| format!("Failed to serialize YAML: {e}"))?;
let trimmed = serialized.trim_end_matches('\n');

let line_start = source[..feature.location.byte_span.0]
.rfind('\n')
.map(|nl| nl + 1)
.unwrap_or(0);
let base_indent = feature.location.byte_span.0 - line_start;
let indent_str = " ".repeat(base_indent);

let indented = indent_block(trimmed, &indent_str);

let mut result = source.to_string();
result.replace_range(
feature.location.byte_span.0..feature.location.byte_span.1,
&indented,
);
if !result.ends_with('\n') {
result.push('\n');
}
return yamlpath::Document::new(result)
.map_err(|e| format!("Failed to re-parse YAML: {e}"));
}
prefix.to_string()
}
None => {
// No colon found — bare value (e.g. sequence item)
let serialized = serde_yaml::to_string(value)
.map_err(|e| format!("Failed to serialize YAML: {e}"))?;
let trimmed = serialized.trim_end_matches('\n');

let line_start = source[..feature.location.byte_span.0]
.rfind('\n')
.map(|nl| nl + 1)
.unwrap_or(0);
let base_indent = feature.location.byte_span.0 - line_start;
let indent_str = " ".repeat(base_indent);

let indented = indent_block(trimmed, &indent_str);

let mut result = source.to_string();
result.replace_range(
feature.location.byte_span.0..feature.location.byte_span.1,
&indented,
);
if !result.ends_with('\n') {
result.push('\n');
}
return yamlpath::Document::new(result)
.map_err(|e| format!("Failed to re-parse YAML: {e}"));
// Absent value (e.g. `key:\n`) — content is just key+colon
content_with_ws.trim_end().to_string()
}
};

Expand Down Expand Up @@ -424,16 +459,6 @@ fn apply_complex_replace(
yamlpath::Document::new(result).map_err(|e| format!("Failed to re-parse YAML: {e}"))
}

/// Find the first colon (key-value separator) in a YAML fragment.
///
/// Uses a naive `find(':')`, consistent with yamlpatch's own Replace
/// implementation. This means colons inside quoted keys will be
/// misidentified — a known yamlpatch limitation that will be fixed
/// uniformly when yamlpatch addresses it.
fn find_key_colon(content: &str) -> Option<usize> {
content.find(':')
}

fn indent_block(content: &str, indent: &str) -> String {
let mut result = String::new();
for (i, line) in content.lines().enumerate() {
Expand Down
12 changes: 12 additions & 0 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,18 @@ def test_replace_flow_mapping_with_dict(self):
doc2 = doc.replace("config", value={"x": 10})
assert doc2["config"] == {"x": 10}

def test_replace_quoted_key_with_colon_complex_value(self):
"""Regression: colon inside quoted key must not corrupt complex replace."""
doc = Document('"http://example.com": 8080\n')
doc2 = doc.replace("http://example.com", value={"port": 9090})
assert doc2["http://example.com"] == {"port": 9090}

def test_replace_quoted_key_with_multiple_colons_complex_value(self):
"""Regression: multiple colons inside quoted key must not corrupt complex replace."""
doc = Document('"a:b:c": val\n')
doc2 = doc.replace("a:b:c", value={"x": 1})
assert doc2["a:b:c"] == {"x": 1}

def test_replace_key_with_hash_in_value(self):
doc = Document("color: '#ff0000'\n")
doc2 = doc.replace("color", value={"r": 255, "g": 0, "b": 0})
Expand Down
Loading