Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"git.ignoreLimitWarning": true,
"files.eol": "\n",
"editor.formatOnSave": true,
"files.exclude": {
"tmp/**": true
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"[markdown]": {
"editor.formatOnSave": true,
"editor.formatOnPaste": true
},
"markdownlint.config": {
"MD013": false,
"MD024": false
},
"cSpell.diagnosticLevel": "Hint",
}
42 changes: 28 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
[package]
name = "encoding_rs"
name = "encoding_rs2"
description = "A Gecko-oriented implementation of the Encoding Standard"
version = "0.8.35"
edition = '2018'
version = "0.8.36"
edition = '2024'
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
license = "(Apache-2.0 OR MIT) AND BSD-3-Clause"
include = ["src/**/*.rs", "/data", "Cargo.toml", "COPYRIGHT", "LICENSE*", "README.md"]
include = [
"src/**/*.rs",
"/data",
"Cargo.toml",
"COPYRIGHT",
"LICENSE*",
"README.md",
]
readme = "README.md"
documentation = "https://docs.rs/encoding_rs/"
homepage = "https://docs.rs/encoding_rs/"
repository = "https://github.com/hsivonen/encoding_rs"
repository = "https://github.com/brmmm3/encoding_rs"
keywords = ["encoding", "web", "unicode", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
rust-version = "1.40"
categories = [
"text-processing",
"encoding",
"web-programming",
"internationalization",
]
rust-version = "1.86"

[features]
default = ["alloc"]
Expand All @@ -26,20 +38,22 @@ fast-hanja-encode = []
fast-kanji-encode = []
fast-gb-hanzi-encode = []
fast-big5-hanzi-encode = []
fast-legacy-encode = ["fast-hangul-encode",
"fast-hanja-encode",
"fast-kanji-encode",
"fast-gb-hanzi-encode",
"fast-big5-hanzi-encode"]
fast-legacy-encode = [
"fast-hangul-encode",
"fast-hanja-encode",
"fast-kanji-encode",
"fast-gb-hanzi-encode",
"fast-big5-hanzi-encode",
]

[dependencies]
cfg-if = "1.0"
serde = { version = "1.0", optional = true }
any_all_workaround = { version = "0.1.0" , optional = true }
any_all_workaround = { version = "0.1.0", optional = true }

[dev-dependencies]
serde_derive = "1.0"
bincode = "1.0"
bincode = "2.0"
serde_json = "1.0"

[profile.release]
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
[![crates.io](https://img.shields.io/crates/v/encoding_rs.svg)](https://crates.io/crates/encoding_rs)
[![docs.rs](https://docs.rs/encoding_rs/badge.svg)](https://docs.rs/encoding_rs/)

This is a fork from hsivonen/encoding_rs with some updates.

encoding_rs an implementation of the (non-JavaScript parts of) the
[Encoding Standard](https://encoding.spec.whatwg.org/) written in Rust.

Expand Down Expand Up @@ -53,7 +55,7 @@ Specifically, encoding_rs does the following:
workloads than the standard library; hopefully will get upstreamed some
day) and ASCII.

Additionally, `encoding_rs::mem` does the following:
Additionally, `encoding_rs2::mem` does the following:

* Checks if a byte buffer contains only ASCII.
* Checks if a potentially-invalid UTF-16 buffer contains only Basic Latin (ASCII).
Expand Down
128 changes: 65 additions & 63 deletions fuzz/fuzzers/fuzz_encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,48 +12,50 @@
extern crate libfuzzer_sys;
extern crate encoding_rs;

use encoding_rs::*;
use encoding_rs2::*;

// Doesn't included ISO-8859-8-I.
static ENCODINGS: [&'static Encoding; 39] = [&UTF_8_INIT,
&REPLACEMENT_INIT,
&GBK_INIT,
&BIG5_INIT,
&EUC_JP_INIT,
&GB18030_INIT,
&UTF_16BE_INIT,
&UTF_16LE_INIT,
&SHIFT_JIS_INIT,
&EUC_KR_INIT,
&ISO_2022_JP_INIT,
&X_USER_DEFINED_INIT,
&WINDOWS_1250_INIT,
&WINDOWS_1251_INIT,
&WINDOWS_1252_INIT,
&WINDOWS_1253_INIT,
&WINDOWS_1254_INIT,
&WINDOWS_1255_INIT,
&WINDOWS_1256_INIT,
&WINDOWS_1257_INIT,
&WINDOWS_1258_INIT,
&KOI8_U_INIT,
&MACINTOSH_INIT,
&IBM866_INIT,
&KOI8_R_INIT,
&ISO_8859_2_INIT,
&ISO_8859_3_INIT,
&ISO_8859_4_INIT,
&ISO_8859_5_INIT,
&ISO_8859_6_INIT,
&ISO_8859_7_INIT,
&ISO_8859_10_INIT,
&ISO_8859_13_INIT,
&ISO_8859_14_INIT,
&WINDOWS_874_INIT,
&ISO_8859_15_INIT,
&ISO_8859_16_INIT,
&ISO_8859_8_I_INIT,
&X_MAC_CYRILLIC_INIT];
static ENCODINGS: [&'static Encoding; 39] = [
&UTF_8_INIT,
&REPLACEMENT_INIT,
&GBK_INIT,
&BIG5_INIT,
&EUC_JP_INIT,
&GB18030_INIT,
&UTF_16BE_INIT,
&UTF_16LE_INIT,
&SHIFT_JIS_INIT,
&EUC_KR_INIT,
&ISO_2022_JP_INIT,
&X_USER_DEFINED_INIT,
&WINDOWS_1250_INIT,
&WINDOWS_1251_INIT,
&WINDOWS_1252_INIT,
&WINDOWS_1253_INIT,
&WINDOWS_1254_INIT,
&WINDOWS_1255_INIT,
&WINDOWS_1256_INIT,
&WINDOWS_1257_INIT,
&WINDOWS_1258_INIT,
&KOI8_U_INIT,
&MACINTOSH_INIT,
&IBM866_INIT,
&KOI8_R_INIT,
&ISO_8859_2_INIT,
&ISO_8859_3_INIT,
&ISO_8859_4_INIT,
&ISO_8859_5_INIT,
&ISO_8859_6_INIT,
&ISO_8859_7_INIT,
&ISO_8859_10_INIT,
&ISO_8859_13_INIT,
&ISO_8859_14_INIT,
&WINDOWS_874_INIT,
&ISO_8859_15_INIT,
&ISO_8859_16_INIT,
&ISO_8859_8_I_INIT,
&X_MAC_CYRILLIC_INIT,
];

fn check_utf8(data: &[u8]) {
if let Err(_) = ::std::str::from_utf8(data) {
Expand Down Expand Up @@ -146,8 +148,7 @@ fn encode_from_utf8(encoding: &'static Encoding, data: &[u8]) {
} else {
let mut total_read = 0;
loop {
if let Some(needed) = encoder
.max_buffer_length_from_utf8_if_no_unmappables(
if let Some(needed) = encoder.max_buffer_length_from_utf8_if_no_unmappables(
string.len() - total_read,
) {
dst.resize(needed, 0);
Expand All @@ -164,9 +165,9 @@ fn encode_from_utf8(encoding: &'static Encoding, data: &[u8]) {
}
let mut total_read = 0;
loop {
if let Some(needed) = encoder.max_buffer_length_from_utf8_if_no_unmappables(
string.len() - total_read,
) {
if let Some(needed) =
encoder.max_buffer_length_from_utf8_if_no_unmappables(string.len() - total_read)
{
dst.resize(needed, 0);
let (result, read, _, _) =
encoder.encode_from_utf8(&string[total_read..], &mut dst, false);
Expand Down Expand Up @@ -206,7 +207,8 @@ fn encode_from_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]
string.push(c);
} else {
if let Some(needed) =
encoder.max_buffer_length_from_utf8_without_replacement(string.len()) {
encoder.max_buffer_length_from_utf8_without_replacement(string.len())
{
dst.resize(needed, 0);
let (result, _, _) =
encoder.encode_from_utf8_without_replacement(&string, &mut dst, true);
Expand All @@ -216,7 +218,8 @@ fn encode_from_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]
}
}
if let Some(needed) =
encoder.max_buffer_length_from_utf8_without_replacement(string.len()) {
encoder.max_buffer_length_from_utf8_without_replacement(string.len())
{
dst.resize(needed, 0);
let (result, _, _) =
encoder.encode_from_utf8_without_replacement(&string, &mut dst, false);
Expand Down Expand Up @@ -260,7 +263,8 @@ fn encode_from_utf16(encoding: &'static Encoding, data: &[u8]) {
let mut total_read = 0;
loop {
if let Some(needed) =
encoder.max_buffer_length_from_utf16_if_no_unmappables(chunk.len() - total_read) {
encoder.max_buffer_length_from_utf16_if_no_unmappables(chunk.len() - total_read)
{
dst.resize(needed, 0);
let (result, read, _, _) =
encoder.encode_from_utf16(&chunk[total_read..], &mut dst, last);
Expand Down Expand Up @@ -301,11 +305,11 @@ fn encode_from_utf16_without_replacement(encoding: &'static Encoding, data: &[u8
let new_offset = offset + chunk_size;
let chunk = &s[offset..new_offset];
offset = new_offset;
if let Some(needed) = encoder
.max_buffer_length_from_utf16_without_replacement(chunk.len()) {
if let Some(needed) = encoder.max_buffer_length_from_utf16_without_replacement(chunk.len())
{
dst.resize(needed, 0);
let (result, _, _) = encoder
.encode_from_utf16_without_replacement(&chunk, &mut dst, last);
let (result, _, _) =
encoder.encode_from_utf16_without_replacement(&chunk, &mut dst, last);
match result {
EncoderResult::InputEmpty => {
if last {
Expand Down Expand Up @@ -524,16 +528,14 @@ fn dispatch_test(encoding: &'static Encoding, data: &[u8]) {
}
}

fuzz_target!(
|data: &[u8]| {
if let Some(first) = data.first() {
let index = *first as usize;
if index >= ENCODINGS.len() {
return;
}
let encoding = ENCODINGS[index];
dispatch_test(encoding, &data[1..]);
fuzz_target!(|data: &[u8]| {
if let Some(first) = data.first() {
let index = *first as usize;
if index >= ENCODINGS.len() {
return;
}
// Comment to make rustfmt not introduce a compilation error
let encoding = ENCODINGS[index];
dispatch_test(encoding, &data[1..]);
}
);
// Comment to make rustfmt not introduce a compilation error
});
5 changes: 3 additions & 2 deletions fuzz/fuzzers/fuzz_labels.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#![no_main]
#[macro_use] extern crate libfuzzer_sys;
#[macro_use]
extern crate libfuzzer_sys;
extern crate encoding_rs;
use encoding_rs::*;
use encoding_rs2::*;

fuzz_target!(|data: &[u8]| {
Encoding::for_label(data);
Expand Down
Loading