Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 130 additions & 4 deletions core/src/value/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
impl<'js> String<'js> {
/// Convert the JavaScript string to a Rust string.
pub fn to_string(&self) -> Result<StdString> {
let (ptr, len) = self.get_ptr_len()?;
let bytes: &[u8] = unsafe { slice::from_raw_parts(ptr as _, len as _) };
let result = str::from_utf8(bytes).map(|s| s.into());
unsafe { qjs::JS_FreeCString(self.0.ctx.as_ptr(), ptr) };

Check failure on line 15 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-gnu, full-async)

mismatched types

Check failure on line 15 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-musl, full-async)

mismatched types
Ok(result?)
}

pub fn to_string_lossy(&self) -> Result<StdString> {
let (ptr, len) = self.get_ptr_len()?;
let bytes: &[u8] = unsafe { slice::from_raw_parts(ptr as _, len as _) };
let string = Self::replace_invalid_utf8_and_utf16(bytes);
unsafe { qjs::JS_FreeCString(self.0.ctx.as_ptr(), ptr) };

Check failure on line 23 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-gnu, full-async)

mismatched types

Check failure on line 23 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-musl, full-async)

mismatched types
Ok(string)
}

fn get_ptr_len(&self) -> Result<(*const i8, usize)> {
let mut len = mem::MaybeUninit::uninit();
let ptr = unsafe {
qjs::JS_ToCStringLen(self.0.ctx.as_ptr(), len.as_mut_ptr(), self.0.as_js_value())
Expand All @@ -19,10 +35,104 @@
return Err(Error::Unknown);
}
let len = unsafe { len.assume_init() };
let bytes: &[u8] = unsafe { slice::from_raw_parts(ptr as _, len as _) };
let result = str::from_utf8(bytes).map(|s| s.into());
unsafe { qjs::JS_FreeCString(self.0.ctx.as_ptr(), ptr) };
Ok(result?)
Ok((ptr, len))

Check failure on line 38 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-gnu, full-async)

mismatched types

Check failure on line 38 in core/src/value/string.rs

View workflow job for this annotation

GitHub Actions / test (bindings, ubuntu-latest, stable, aarch64-unknown-linux-musl, full-async)

mismatched types
}

fn replace_invalid_utf8_and_utf16(bytes: &[u8]) -> StdString {
let mut result = StdString::with_capacity(bytes.len());
let mut i = 0;

while i < bytes.len() {
let current = bytes[i];
match current {
// ASCII (1-byte)
0x00..=0x7F => {
result.push(current as char);
i += 1;
}
// 2-byte UTF-8 sequence
0xC0..=0xDF => {
if i + 1 < bytes.len() {
let next = bytes[i + 1];
if (next & 0xC0) == 0x80 {
let code_point = ((current as u32 & 0x1F) << 6) | (next as u32 & 0x3F);
if let Some(c) = char::from_u32(code_point) {
result.push(c);
} else {
result.push('�');
}
i += 2;
} else {
result.push('�');
i += 1;
}
} else {
result.push('�');
i += 1;
}
}
// 3-byte UTF-8 sequence
0xE0..=0xEF => {
if i + 2 < bytes.len() {
let next1 = bytes[i + 1];
let next2 = bytes[i + 2];
if (next1 & 0xC0) == 0x80 && (next2 & 0xC0) == 0x80 {
let code_point = ((current as u32 & 0x0F) << 12)
| ((next1 as u32 & 0x3F) << 6)
| (next2 as u32 & 0x3F);
if let Some(c) = char::from_u32(code_point) {
result.push(c);
} else {
result.push('�');
}
i += 3;
} else {
result.push('�');
i += 1;
}
} else {
result.push('�');
i += 1;
}
}
// 4-byte UTF-8 sequence
0xF0..=0xF7 => {
if i + 3 < bytes.len() {
let next1 = bytes[i + 1];
let next2 = bytes[i + 2];
let next3 = bytes[i + 3];
if (next1 & 0xC0) == 0x80
&& (next2 & 0xC0) == 0x80
&& (next3 & 0xC0) == 0x80
{
let code_point = ((current as u32 & 0x07) << 18)
| ((next1 as u32 & 0x3F) << 12)
| ((next2 as u32 & 0x3F) << 6)
| (next3 as u32 & 0x3F);
if let Some(c) = char::from_u32(code_point) {
result.push(c);
} else {
result.push('�');
}
i += 4;
} else {
result.push('�');
i += 1;
}
} else {
result.push('�');
i += 1;
}
}
// Invalid starting byte
_ => {
result.push('�');
i += 1;
}
}
}

result
}

/// Convert the Javascript string to a Javascript C string.
Expand Down Expand Up @@ -142,4 +252,20 @@
assert_eq!(text, "foobar".to_string());
});
}

#[test]
fn utf8_sliced_string() {
test_with(|ctx| {
let string = String::from_str(ctx.clone(), "🌍🌎🌏").unwrap();

assert_eq!(string.to_string().unwrap(), "🌍🌎🌏".to_string());
assert_eq!(string.to_string_lossy().unwrap(), "🌍🌎🌏".to_string());

let func: Function = ctx.eval("x => x.slice(1)").unwrap();
let text: String = (string,).apply(&func).unwrap();
let text = text.to_string_lossy().unwrap();

assert_eq!(text, "�🌎🌏".to_string());
});
}
}
Loading