Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
11e6ed7
JSON.dump: write directly into the provided IO
byroot Nov 5, 2024
31fa596
Add changes for v2.7.6
y-yagi Nov 6, 2024
74b979a
Categorize deprecated warning
nobu Nov 6, 2024
6b9a694
Fix right shift warnings
nobu Nov 6, 2024
f08790e
parser.rl: parse_string implement a fast path
byroot Nov 6, 2024
56f080b
Implement a fast path for integer parsing
byroot Nov 6, 2024
fa55cf3
Update benchmark annotations
byroot Nov 6, 2024
d91483e
Release 2.8.0
byroot Nov 6, 2024
0c3d4d0
Fix gemspec to include .jar files
byroot Nov 6, 2024
c26c49b
Release 2.8.1
byroot Nov 6, 2024
f1858cf
CI: ci.yml - update to actions/checkout@v4
MSP-Greg Nov 6, 2024
ce00d95
Added automated update workflow for GitHub Actions
hsbt Nov 7, 2024
0ba3b65
Mark parser.c and Parser.java as generated files
byroot Nov 7, 2024
650a5a5
Reduce comparisons when parsing numbers
tenderlove Nov 7, 2024
8b5c94e
Rename parse_float into parse_number
byroot Nov 7, 2024
1f346b5
JSON.load_file: explictly load the file as UTF-8
byroot Nov 7, 2024
0400251
Benchmark `Oj::Parser` in a thread safe way
byroot Nov 8, 2024
972752e
Fix $VERBOSE restore
tompng Nov 11, 2024
4b8fbdb
Only use the key cache if the Hash is in an Array
byroot Nov 13, 2024
566eaac
Fix redundant to_str call
YuheiNakasaka Nov 14, 2024
36913ae
Release 2.8.2
byroot Nov 14, 2024
cd182ae
Fix the BEWARE documentation in `load` and `unsafe_load`.
byroot Nov 16, 2024
4581b1e
Plumb OutputStream through generators
headius Nov 20, 2024
2ebe42c
Connect OutputStream-based generator to Ruby bits
headius Nov 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ext/json/ext/parser/parser.c linguist-generated=true
java/src/json/ext/Parser.java linguist-generated=true
6 changes: 6 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: 'github-actions'
directory: '/'
schedule:
interval: 'daily'
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- { os: windows-latest, ruby: jruby-head }

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Ruby
uses: ruby/setup-ruby-pkgs@v1
Expand Down Expand Up @@ -64,7 +64,7 @@ jobs:
fail-fast: false

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Ruby
uses: ruby/setup-ruby-pkgs@v1
Expand Down
21 changes: 18 additions & 3 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Changes

### UNRELEASED
### 2024-11-14 (2.8.2)

* `JSON.load_file` explictly read the file as UTF-8.

### 2024-11-06 (2.8.1)

* Fix the java packages to include the extension.

### 2024-11-06 (2.8.0)

* Emit a deprecation warning when `JSON.load` create custom types without the `create_additions` option being explictly enabled.
* Prefer to use `JSON.unsafe_load(string)` or `JSON.load(string, create_additions: true)`.
* Emit a deprecation warning when serializing valid UTF-8 strings encoded in `ASCII_8BIT` aka `BINARY`.
* Bump required_ruby_version to 2.7.
* More performance improvments to `JSON.dump` and `JSON.generate`.
* Bump required Ruby version to 2.7.
* Add support for optionally parsing trailing commas, via `allow_trailing_comma: true`, which in cunjunction with the
pre-existing support for comments, make it suitable to parse `jsonc` documents.
* Many performance improvements to `JSON.parse` and `JSON.load`, up to `1.7x` faster on real world documents.
* Some minor performance improvements to `JSON.dump` and `JSON.generate`.

### 2024-11-04 (2.7.6)

* Fix a regression in JSON.generate when dealing with Hash keys that are string subclasses, call `to_json` on them.

### 2024-10-25 (2.7.5)

Expand Down
4 changes: 2 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
file JRUBY_PARSER_JAR => :compile do
cd 'java/src' do
parser_classes = FileList[
"json/ext/ByteListTranscoder*.class",
"json/ext/ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/Parser*.class",
"json/ext/RuntimeInfo*.class",
Expand All @@ -179,7 +179,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
file JRUBY_GENERATOR_JAR => :compile do
cd 'java/src' do
generator_classes = FileList[
"json/ext/ByteListTranscoder*.class",
"json/ext/ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/Generator*.class",
"json/ext/RuntimeInfo*.class",
Expand Down
26 changes: 11 additions & 15 deletions benchmark/encoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
def implementations(ruby_obj)
state = JSON::State.new(JSON.dump_default_options)
{
json_state: ["json (reuse)", proc { state.generate(ruby_obj) }],
json: ["json", proc { JSON.generate(ruby_obj) }],
oj: ["oj", proc { Oj.dump(ruby_obj) }],
}
Expand Down Expand Up @@ -58,27 +57,24 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]

# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the
# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster
# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20% faster
# than `Oj.dump`.
benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]]
benchmark_encoding "small nested array", [[1,2,3,4,5]]*10

# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why
# Hash serialization doesn't perform as well as other types.
benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }

# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing
# with mostly multi-byte characters. This is a tradeoff.
benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state)
benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state)
# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50% slower when dealing
# with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling.
benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500)
benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500)

# On these benchmarks we perform well, we're on par or better.
benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state)
benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state)
benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state)
benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json")
benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json")

# On twitter.json we're still about 10% slower, this is worth investigating.
benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
# On twitter.json we're still about 6% slower, this is worth investigating.
benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json")

# This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
# which uses a relatively old version of dtoa.c from David M. Gay.
Expand All @@ -89,8 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
# but all these are implemented in C++11 or newer, making it hard if not impossible to include them.
# Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match
# Oj speed without losing precision.
benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state)
benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false

# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be
# opportunities here.
benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state)
benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20
21 changes: 8 additions & 13 deletions benchmark/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def benchmark_parsing(name, json_output)
Benchmark.ips do |x|
x.report("json") { JSON.parse(json_output) } if RUN[:json]
x.report("oj") { Oj.load(json_output) } if RUN[:oj]
x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj]
x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj]
x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson]
x.compare!(order: :baseline)
end
Expand All @@ -28,27 +28,22 @@ def benchmark_parsing(name, json_output)

# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]

# Oj::Parser is very significanly faster (1.80x) on the nested array benchmark.
benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10)

# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part because its
# cache is persisted across calls. That's not something we can do with the current API, we'd
# need to expose a stateful API as well, but that's no really desirable.
# Other than that we're faster than regular `Oj.load` by a good margin.
benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" })

benchmark_parsing "test from oj", <<JSON
{"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]}
{"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],
"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,
"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]}
JSON

# On these macro-benchmarks, we're on par with `Oj::Parser` and significantly
# faster than `Oj.load`.
# On these macro-benchmarks, we're on par with `Oj::Parser`, except `twitter.json` where we're `1.14x` faster,
# And between 1.3x and 1.5x faster than `Oj.load`.
benchmark_parsing "activitypub.json", File.read("#{__dir__}/data/activitypub.json")
benchmark_parsing "twitter.json", File.read("#{__dir__}/data/twitter.json")
benchmark_parsing "citm_catalog.json", File.read("#{__dir__}/data/citm_catalog.json")

# rapidjson is 8x faster thanks to it's much more performant float parser.
# rapidjson is 8x faster thanks to its much more performant float parser.
# Unfortunately, there isn't a lot of existing fast float parsers in pure C,
# and including C++ is problematic.
# Aside from that, we're much faster than other alternatives here.
# Aside from that, we're close to the alternatives here.
benchmark_parsing "float parsing", File.read("#{__dir__}/data/canada.json")
33 changes: 26 additions & 7 deletions ext/json/ext/fbuffer/fbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ typedef struct FBufferStruct {
unsigned long len;
unsigned long capa;
char *ptr;
VALUE io;
} FBuffer;

#define FBUFFER_STACK_SIZE 512
Expand All @@ -66,7 +67,7 @@ static void fbuffer_append_long(FBuffer *fb, long number);
#endif
static inline void fbuffer_append_char(FBuffer *fb, char newchr);
#ifdef JSON_GENERATOR
static VALUE fbuffer_to_s(FBuffer *fb);
static VALUE fbuffer_finalize(FBuffer *fb);
#endif

static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size)
Expand All @@ -86,15 +87,26 @@ static void fbuffer_free(FBuffer *fb)
}
}

#ifndef JSON_GENERATOR
static void fbuffer_clear(FBuffer *fb)
{
fb->len = 0;
}
#endif

static void fbuffer_flush(FBuffer *fb)
{
rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len));
fbuffer_clear(fb);
}

static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
{
if (RB_UNLIKELY(fb->io)) {
fbuffer_flush(fb);
if (RB_LIKELY(requested < fb->capa)) {
return;
}
}

unsigned long required;

if (RB_UNLIKELY(!fb->ptr)) {
Expand Down Expand Up @@ -174,11 +186,18 @@ static void fbuffer_append_long(FBuffer *fb, long number)
fbuffer_append(fb, buffer_end - len, len);
}

static VALUE fbuffer_to_s(FBuffer *fb)
static VALUE fbuffer_finalize(FBuffer *fb)
{
VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
fbuffer_free(fb);
return result;
if (fb->io) {
fbuffer_flush(fb);
fbuffer_free(fb);
rb_io_flush(fb->io);
return fb->io;
} else {
VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
fbuffer_free(fb);
return result;
}
}
#endif
#endif
Loading
Loading