diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index 17c027593..3b29b7b27 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -5,7 +5,7 @@ inputs: zig: description: 'Zig version to install' required: false - default: '0.15.1' + default: '0.15.2' arch: description: 'CPU arch used to select the v8 lib' required: false @@ -17,7 +17,7 @@ inputs: zig-v8: description: 'zig v8 version to install' required: false - default: 'v0.1.33' + default: 'v0.1.35' v8: description: 'v8 version to install' required: false @@ -26,6 +26,10 @@ inputs: description: 'cache dir to use' required: false default: '~/.cache' + mode: + description: 'debug or release' + required: false + default: 'debug' runs: using: "composite" @@ -38,7 +42,7 @@ runs: sudo apt-get update sudo apt-get install -y wget xz-utils python3 ca-certificates git pkg-config libglib2.0-dev gperf libexpat1-dev cmake clang - - uses: mlugg/setup-zig@v2 + - uses: mlugg/setup-zig@v2.0.5 with: version: ${{ inputs.zig }} @@ -58,37 +62,26 @@ runs: wget -O ${{ inputs.cache-dir }}/v8/libc_v8.a https://github.com/lightpanda-io/zig-v8-fork/releases/download/${{ inputs.zig-v8 }}/libc_v8_${{ inputs.v8 }}_${{ inputs.os }}_${{ inputs.arch }}.a - - name: install v8 + - name: install v8 release + if: ${{ inputs.mode == 'release' }} shell: bash run: | - mkdir -p v8/out/${{ inputs.os }}/debug/obj/zig/ - ln -s ${{ inputs.cache-dir }}/v8/libc_v8.a v8/out/${{ inputs.os }}/debug/obj/zig/libc_v8.a - mkdir -p v8/out/${{ inputs.os }}/release/obj/zig/ ln -s ${{ inputs.cache-dir }}/v8/libc_v8.a v8/out/${{ inputs.os }}/release/obj/zig/libc_v8.a - - name: Cache libiconv - id: cache-libiconv - uses: actions/cache@v4 - env: - cache-name: cache-libiconv - with: - path: ${{ inputs.cache-dir }}/libiconv - key: vendor/libiconv/libiconv-1.17 - - - name: download libiconv - if: ${{ steps.cache-libiconv.outputs.cache-hit != 'true' }} - shell: bash - run: make download-libiconv - - - name: build libiconv + - name: install v8 debug + if: ${{ inputs.mode == 'debug' }} shell: bash - run: make build-libiconv + run: | + mkdir -p v8/out/${{ inputs.os }}/debug/obj/zig/ + ln -s ${{ inputs.cache-dir }}/v8/libc_v8.a v8/out/${{ inputs.os }}/debug/obj/zig/libc_v8.a - - name: build mimalloc + - name: hmtl5ever release + if: ${{ inputs.mode == 'release' }} shell: bash - run: make install-mimalloc + run: zig build -Doptimize=ReleaseFast html5ever - - name: build netsurf + - name: hmtl5ever debug + if: ${{ inputs.mode == 'debug' }} shell: bash - run: make install-netsurf + run: zig build html5ever diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index df16af4c9..0ab034e84 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,6 +36,7 @@ jobs: with: os: ${{env.OS}} arch: ${{env.ARCH}} + mode: 'release' - name: zig build run: zig build --release=safe -Doptimize=ReleaseSafe -Dcpu=x86_64 -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) @@ -74,6 +75,7 @@ jobs: with: os: ${{env.OS}} arch: ${{env.ARCH}} + mode: 'release' - name: zig build run: zig build --release=safe -Doptimize=ReleaseSafe -Dcpu=generic -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) @@ -114,6 +116,7 @@ jobs: with: os: ${{env.OS}} arch: ${{env.ARCH}} + mode: 'release' - name: zig build run: zig build --release=safe -Doptimize=ReleaseSafe -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) @@ -157,6 +160,7 @@ jobs: with: os: ${{env.OS}} arch: ${{env.ARCH}} + mode: 'release' - name: zig build run: zig build --release=safe -Doptimize=ReleaseSafe -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index fb295246c..1b8b910b7 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -56,6 +56,8 @@ jobs: submodules: recursive - uses: ./.github/actions/install + with: + mode: 'release' - name: zig build release run: zig build -Doptimize=ReleaseFast -Dcpu=x86_64 -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) @@ -122,7 +124,7 @@ jobs: needs: zig-build-release env: - MAX_MEMORY: 27000 + MAX_MEMORY: 28000 MAX_AVG_DURATION: 23 LIGHTPANDA_DISABLE_TELEMETRY: true diff --git a/.github/workflows/zig-fmt.yml b/.github/workflows/zig-fmt.yml index 2a1fdd527..106e557a1 100644 --- a/.github/workflows/zig-fmt.yml +++ b/.github/workflows/zig-fmt.yml @@ -1,7 +1,7 @@ name: zig-fmt env: - ZIG_VERSION: 0.15.1 + ZIG_VERSION: 0.15.2 on: pull_request: diff --git a/.gitignore b/.gitignore index ad9ae7b45..59d6886ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ -zig-cache /.zig-cache/ -zig-out -/vendor/netsurf/out -/vendor/libiconv/ +/zig-out/ lightpanda.id /v8/ +/build/ +/src/html5ever/target/ diff --git a/.gitmodules b/.gitmodules index 717d079bb..5462f8f0e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,30 +1,9 @@ -[submodule "vendor/netsurf/libwapcaplet"] - path = vendor/netsurf/libwapcaplet - url = https://github.com/lightpanda-io/libwapcaplet.git/ -[submodule "vendor/netsurf/libparserutils"] - path = vendor/netsurf/libparserutils - url = https://github.com/lightpanda-io/libparserutils.git/ -[submodule "vendor/netsurf/libdom"] - path = vendor/netsurf/libdom - url = https://github.com/lightpanda-io/libdom.git/ -[submodule "vendor/netsurf/share/netsurf-buildsystem"] - path = vendor/netsurf/share/netsurf-buildsystem - url = https://github.com/lightpanda-io/netsurf-buildsystem.git -[submodule "vendor/netsurf/libhubbub"] - path = vendor/netsurf/libhubbub - url = https://github.com/lightpanda-io/libhubbub.git/ [submodule "tests/wpt"] path = tests/wpt url = https://github.com/lightpanda-io/wpt -[submodule "vendor/mimalloc"] - path = vendor/mimalloc - url = https://github.com/microsoft/mimalloc.git/ [submodule "vendor/nghttp2"] path = vendor/nghttp2 url = https://github.com/nghttp2/nghttp2.git -[submodule "vendor/mbedtls"] - path = vendor/mbedtls - url = https://github.com/Mbed-TLS/mbedtls.git [submodule "vendor/zlib"] path = vendor/zlib url = https://github.com/madler/zlib.git diff --git a/Dockerfile b/Dockerfile index bcb613f7f..a405a057c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ FROM debian:stable ARG MINISIG=0.12 -ARG ZIG=0.15.1 +ARG ZIG=0.15.2 ARG ZIG_MINISIG=RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U ARG V8=14.0.365.4 -ARG ZIG_V8=v0.1.33 +ARG ZIG_V8=v0.1.35 ARG TARGETPLATFORM RUN apt-get update -yq && \ @@ -40,9 +40,7 @@ WORKDIR /browser RUN git submodule init && \ git submodule update --recursive -RUN make install-libiconv && \ - make install-netsurf && \ - make install-mimalloc +RUN zig build -Doptimize=ReleaseFast html5ever # download and install v8 RUN case $TARGETPLATFORM in \ diff --git a/Makefile b/Makefile index b0ae69015..3f79e1fa2 100644 --- a/Makefile +++ b/Makefile @@ -96,9 +96,16 @@ wpt-summary: @printf "\e[36mBuilding wpt...\e[0m\n" @$(ZIG) build wpt -- --summary $(filter-out $@,$(MAKECMDGOALS)) || (printf "\e[33mBuild ERROR\e[0m\n"; exit 1;) -## Test +## Test - `grep` is used to filter out the huge compile command on build +ifeq ($(OS), macos) test: - @TEST_FILTER='${F}' $(ZIG) build test -freference-trace --summary all + @script -q /dev/null sh -c 'TEST_FILTER="${F}" $(ZIG) build test -freference-trace' 2>&1 \ + | grep --line-buffered -v "^/.*zig test -freference-trace" +else +test: + @script -qec 'TEST_FILTER="${F}" $(ZIG) build test -freference-trace' /dev/null 2>&1 \ + | grep --line-buffered -v "^/.*zig test -freference-trace" +endif ## Run demo/runner end to end tests end2end: @@ -120,128 +127,17 @@ build-v8: # Install and build required dependencies commands # ------------ -.PHONY: install-submodule -.PHONY: install-libiconv -.PHONY: _install-netsurf install-netsurf clean-netsurf test-netsurf install-netsurf-dev -.PHONY: install-mimalloc install-mimalloc-dev clean-mimalloc -.PHONY: install-dev install +.PHONY: install install-dev ## Install and build dependencies for release -install: install-submodule install-libiconv install-netsurf install-mimalloc +install: install-submodule ## Install and build dependencies for dev -install-dev: install-submodule install-libiconv install-netsurf-dev install-mimalloc-dev - -install-netsurf-dev: _install-netsurf -install-netsurf-dev: OPTCFLAGS := -O0 -g -DNDEBUG - -install-netsurf: _install-netsurf -install-netsurf: OPTCFLAGS := -DNDEBUG - -BC_NS := $(BC)vendor/netsurf/out/$(OS)-$(ARCH) -ICONV := $(BC)vendor/libiconv/out/$(OS)-$(ARCH) -# TODO: add Linux iconv path (I guess it depends on the distro) -# TODO: this way of linking libiconv is not ideal. We should have a more generic way -# and stick to a specif version. Maybe build from source. Anyway not now. -_install-netsurf: clean-netsurf - @printf "\e[36mInstalling NetSurf...\e[0m\n" && \ - ls $(ICONV)/lib/libiconv.a 1> /dev/null || (printf "\e[33mERROR: you need to execute 'make install-libiconv'\e[0m\n"; exit 1;) && \ - mkdir -p $(BC_NS) && \ - cp -R vendor/netsurf/share $(BC_NS) && \ - export PREFIX=$(BC_NS) && \ - export OPTLDFLAGS="-L$(ICONV)/lib" && \ - export OPTCFLAGS="$(OPTCFLAGS) -I$(ICONV)/include" && \ - printf "\e[33mInstalling libwapcaplet...\e[0m\n" && \ - cd vendor/netsurf/libwapcaplet && \ - BUILDDIR=$(BC_NS)/build/libwapcaplet make install && \ - cd ../libparserutils && \ - printf "\e[33mInstalling libparserutils...\e[0m\n" && \ - BUILDDIR=$(BC_NS)/build/libparserutils make install && \ - cd ../libhubbub && \ - printf "\e[33mInstalling libhubbub...\e[0m\n" && \ - BUILDDIR=$(BC_NS)/build/libhubbub make install && \ - rm src/treebuilder/autogenerated-element-type.c && \ - cd ../libdom && \ - printf "\e[33mInstalling libdom...\e[0m\n" && \ - BUILDDIR=$(BC_NS)/build/libdom make install && \ - printf "\e[33mRunning libdom example...\e[0m\n" && \ - cd examples && \ - $(ZIG) cc \ - -I$(ICONV)/include \ - -I$(BC_NS)/include \ - -L$(ICONV)/lib \ - -L$(BC_NS)/lib \ - -liconv \ - -ldom \ - -lhubbub \ - -lparserutils \ - -lwapcaplet \ - -o a.out \ - dom-structure-dump.c \ - $(ICONV)/lib/libiconv.a && \ - ./a.out > /dev/null && \ - rm a.out && \ - printf "\e[36mDone NetSurf $(OS)\e[0m\n" - -clean-netsurf: - @printf "\e[36mCleaning NetSurf build...\e[0m\n" && \ - rm -Rf $(BC_NS) - -test-netsurf: - @printf "\e[36mTesting NetSurf...\e[0m\n" && \ - export PREFIX=$(BC_NS) && \ - export LDFLAGS="-L$(ICONV)/lib -L$(BC_NS)/lib" && \ - export CFLAGS="-I$(ICONV)/include -I$(BC_NS)/include" && \ - cd vendor/netsurf/libdom && \ - BUILDDIR=$(BC_NS)/build/libdom make test - -download-libiconv: -ifeq ("$(wildcard vendor/libiconv/libiconv-1.17)","") - @mkdir -p vendor/libiconv - @cd vendor/libiconv && \ - curl -L https://github.com/lightpanda-io/libiconv/releases/download/1.17/libiconv-1.17.tar.gz | tar -xvzf - -endif - -build-libiconv: clean-libiconv - @cd vendor/libiconv/libiconv-1.17 && \ - ./configure --prefix=$(ICONV) --enable-static && \ - make && make install - -install-libiconv: download-libiconv build-libiconv - -clean-libiconv: -ifneq ("$(wildcard vendor/libiconv/libiconv-1.17/Makefile)","") - @cd vendor/libiconv/libiconv-1.17 && \ - make clean -endif +install-dev: install-submodule data: cd src/data && go run public_suffix_list_gen.go > public_suffix_list.zig -.PHONY: _build_mimalloc - -MIMALLOC := $(BC)vendor/mimalloc/out/$(OS)-$(ARCH) -_build_mimalloc: clean-mimalloc - @mkdir -p $(MIMALLOC)/build && \ - cd $(MIMALLOC)/build && \ - cmake -DMI_BUILD_SHARED=OFF -DMI_BUILD_OBJECT=OFF -DMI_BUILD_TESTS=OFF -DMI_OVERRIDE=OFF $(OPTS) ../../.. && \ - make && \ - mkdir -p $(MIMALLOC)/lib - -install-mimalloc-dev: _build_mimalloc -install-mimalloc-dev: OPTS=-DCMAKE_BUILD_TYPE=Debug -install-mimalloc-dev: - @cd $(MIMALLOC) && \ - mv build/libmimalloc-debug.a lib/libmimalloc.a - -install-mimalloc: _build_mimalloc -install-mimalloc: - @cd $(MIMALLOC) && \ - mv build/libmimalloc.a lib/libmimalloc.a - -clean-mimalloc: - @rm -Rf $(MIMALLOC)/build - ## Init and update git submodule install-submodule: @git submodule init && \ diff --git a/README.md b/README.md index a1009e7f1..5e25926ab 100644 --- a/README.md +++ b/README.md @@ -140,13 +140,14 @@ You may still encounter errors or crashes. Please open an issue with specifics i Here are the key features we have implemented: -- [x] HTTP loader (based on Libcurl) -- [x] HTML parser and DOM tree (based on Netsurf libs) -- [x] Javascript support (v8) +- [x] HTTP loader ([Libcurl](https://curl.se/libcurl/)) +- [x] HTML parser ([html5ever](https://github.com/servo/html5ever)) +- [x] DOM tree +- [x] Javascript support ([v8](https://v8.dev/)) - [x] DOM APIs - [x] Ajax - [x] XHR API - - [x] Fetch API (polyfill) + - [x] Fetch API - [x] DOM dump - [x] CDP/websockets server - [x] Click @@ -164,7 +165,7 @@ You can also follow the progress of our Javascript support in our dedicated [zig ### Prerequisites -Lightpanda is written with [Zig](https://ziglang.org/) `0.15.1`. You have to +Lightpanda is written with [Zig](https://ziglang.org/) `0.15.2`. You have to install it with the right version in order to build the project. Lightpanda also depends on @@ -214,37 +215,15 @@ To init or update the submodules in the `vendor/` directory: make install-submodule ``` -**iconv** +**html5ever** -libiconv is an internationalization library used by Netsurf. +[html5ver](https://github.com/servo/html5ever) is high-performance browser-grade HTML5 parser. ``` -make install-libiconv +zig build html5ever ``` -**Netsurf libs** - -Netsurf libs are used for HTML parsing and DOM tree generation. - -``` -make install-netsurf -``` - -For dev env, use `make install-netsurf-dev`. - -**Mimalloc** - -Mimalloc is used as a C memory allocator. - -``` -make install-mimalloc -``` - -For dev env, use `make install-mimalloc-dev`. - -Note: when Mimalloc is built in dev mode, you can dump memory stats with the -env var `MIMALLOC_SHOW_STATS=1`. See -[https://microsoft.github.io/mimalloc/environment.html](https://microsoft.github.io/mimalloc/environment.html). +For a release build, use `zig build -Doptimize=ReleaseFast html5ever`. **v8** diff --git a/build.zig b/build.zig index 3437dfad0..0070e5769 100644 --- a/build.zig +++ b/build.zig @@ -23,7 +23,7 @@ const Build = std.Build; /// Do not rename this constant. It is scanned by some scripts to determine /// which zig version to install. -const recommended_zig_version = "0.15.1"; +const recommended_zig_version = "0.15.2"; pub fn build(b: *Build) !void { switch (comptime builtin.zig_version.order(std.SemanticVersion.parse(recommended_zig_version) catch unreachable)) { @@ -39,6 +39,9 @@ pub fn build(b: *Build) !void { }, } + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + var opts = b.addOptions(); opts.addOption( []const u8, @@ -46,90 +49,147 @@ pub fn build(b: *Build) !void { b.option([]const u8, "git_commit", "Current git commit") orelse "dev", ); - const target = b.standardTargetOptions(.{}); - const optimize = b.standardOptimizeOption(.{}); + // Build step to install html5ever dependency. + const html5ever_argv = blk: { + const argv: []const []const u8 = &.{ + "cargo", + "build", + // Seems cargo can figure out required paths out of Cargo.toml. + "--manifest-path", + "src/html5ever/Cargo.toml", + // TODO: We can prefer `--artifact-dir` once it become stable. + "--target-dir", + b.getInstallPath(.prefix, "html5ever"), + // This must be the last argument. + "--release", + }; - // We're still using llvm because the new x86 backend seems to crash - // with v8. This can be reproduced in zig-v8-fork. + break :blk switch (optimize) { + // Prefer dev build on debug option. + .Debug => argv[0 .. argv.len - 1], + else => argv, + }; + }; + const html5ever_exec_cargo = b.addSystemCommand(html5ever_argv); + const html5ever_step = b.step("html5ever", "Install html5ever dependency (requires cargo)"); + html5ever_step.dependOn(&html5ever_exec_cargo.step); - const lightpanda_module = b.addModule("lightpanda", .{ - .root_source_file = b.path("src/main.zig"), - .target = target, - .optimize = optimize, - .link_libc = true, - .link_libcpp = true, - }); - try addDependencies(b, lightpanda_module, opts); + const enable_tsan = b.option(bool, "tsan", "Enable Thread Sanitizer"); + const enable_csan = b.option(std.zig.SanitizeC, "csan", "Enable C Sanitizers"); + + const lightpanda_module = blk: { + const mod = b.addModule("lightpanda", .{ + .root_source_file = b.path("src/lightpanda.zig"), + .target = target, + .optimize = optimize, + .link_libc = true, + .link_libcpp = true, + .sanitize_c = enable_csan, + .sanitize_thread = enable_tsan, + }); + + try addDependencies(b, mod, opts); + + break :blk mod; + }; + + const html5ever_obj = switch (optimize) { + .Debug => b.getInstallPath(.prefix, "html5ever/debug/liblitefetch_html5ever.a"), + // Release builds. + else => b.getInstallPath(.prefix, "html5ever/release/liblitefetch_html5ever.a"), + }; + + lightpanda_module.addObjectFile(.{ .cwd_relative = html5ever_obj }); { // browser - // ------- - - // compile and install const exe = b.addExecutable(.{ .name = "lightpanda", .use_llvm = true, - .root_module = lightpanda_module, + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + .sanitize_c = enable_csan, + .sanitize_thread = enable_tsan, + .imports = &.{ + .{ .name = "lightpanda", .module = lightpanda_module }, + }, + }), }); b.installArtifact(exe); - // run const run_cmd = b.addRunArtifact(exe); if (b.args) |args| { run_cmd.addArgs(args); } - - // step const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); } { - // tests - // ---- - - // compile + // test const tests = b.addTest(.{ .root_module = lightpanda_module, .use_llvm = true, .test_runner = .{ .path = b.path("src/test_runner.zig"), .mode = .simple }, }); - const run_tests = b.addRunArtifact(tests); + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_tests.step); + } + + { + // ZIGDOM + // browser + const exe = b.addExecutable(.{ + .name = "legacy_test", + .use_llvm = true, + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main_legacy_test.zig"), + .target = target, + .optimize = optimize, + .sanitize_c = enable_csan, + .sanitize_thread = enable_tsan, + .imports = &.{ + .{ .name = "lightpanda", .module = lightpanda_module }, + }, + }), + }); + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); if (b.args) |args| { - run_tests.addArgs(args); + run_cmd.addArgs(args); } - - // step - const tests_step = b.step("test", "Run unit tests"); - tests_step.dependOn(&run_tests.step); + const run_step = b.step("legacy_test", "Run the app"); + run_step.dependOn(&run_cmd.step); } { // wpt - // ----- - const wpt_module = b.createModule(.{ - .root_source_file = b.path("src/main_wpt.zig"), - .target = target, - .optimize = optimize, - }); - try addDependencies(b, wpt_module, opts); - - // compile and install - const wpt = b.addExecutable(.{ + const exe = b.addExecutable(.{ .name = "lightpanda-wpt", .use_llvm = true, - .root_module = wpt_module, + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main_wpt.zig"), + .target = target, + .optimize = optimize, + .sanitize_c = enable_csan, + .sanitize_thread = enable_tsan, + .imports = &.{ + .{ .name = "lightpanda", .module = lightpanda_module }, + }, + }), }); + b.installArtifact(exe); - // run - const wpt_cmd = b.addRunArtifact(wpt); + const run_cmd = b.addRunArtifact(exe); if (b.args) |args| { - wpt_cmd.addArgs(args); + run_cmd.addArgs(args); } - // step - const wpt_step = b.step("wpt", "WPT tests"); - wpt_step.dependOn(&wpt_cmd.step); + const run_step = b.step("wpt", "Run WPT tests"); + run_step.dependOn(&run_cmd.step); } { @@ -152,7 +212,6 @@ pub fn build(b: *Build) !void { } fn addDependencies(b: *Build, mod: *Build.Module, opts: *Build.Step.Options) !void { - try moduleNetSurf(b, mod); mod.addImport("build_config", opts.createModule()); const target = mod.resolved_target.?; @@ -374,14 +433,27 @@ fn addDependencies(b: *Build, mod: *Build.Module, opts: *Build.Step.Options) !vo mod.addCMacro("STDC_HEADERS", "1"); mod.addCMacro("TIME_WITH_SYS_TIME", "1"); mod.addCMacro("USE_NGHTTP2", "1"); - mod.addCMacro("USE_MBEDTLS", "1"); + mod.addCMacro("USE_OPENSSL", "1"); + mod.addCMacro("OPENSSL_IS_BORINGSSL", "1"); mod.addCMacro("USE_THREADS_POSIX", "1"); mod.addCMacro("USE_UNIX_SOCKETS", "1"); } try buildZlib(b, mod); try buildBrotli(b, mod); - try buildMbedtls(b, mod); + const boringssl_dep = b.dependency("boringssl-zig", .{ + .target = target, + .optimize = mod.optimize.?, + .force_pic = true, + }); + + const ssl = boringssl_dep.artifact("ssl"); + ssl.bundle_ubsan_rt = false; + const crypto = boringssl_dep.artifact("crypto"); + crypto.bundle_ubsan_rt = false; + + mod.linkLibrary(ssl); + mod.linkLibrary(crypto); try buildNghttp2(b, mod); try buildCurl(b, mod); @@ -397,63 +469,6 @@ fn addDependencies(b: *Build, mod: *Build.Module, opts: *Build.Step.Options) !vo } } -fn moduleNetSurf(b: *Build, mod: *Build.Module) !void { - const target = mod.resolved_target.?; - const os = target.result.os.tag; - const arch = target.result.cpu.arch; - - // iconv - const libiconv_lib_path = try std.fmt.allocPrint( - b.allocator, - "vendor/libiconv/out/{s}-{s}/lib/libiconv.a", - .{ @tagName(os), @tagName(arch) }, - ); - const libiconv_include_path = try std.fmt.allocPrint( - b.allocator, - "vendor/libiconv/out/{s}-{s}/lib/libiconv.a", - .{ @tagName(os), @tagName(arch) }, - ); - mod.addObjectFile(b.path(libiconv_lib_path)); - mod.addIncludePath(b.path(libiconv_include_path)); - - { - // mimalloc - const mimalloc = "vendor/mimalloc"; - const lib_path = try std.fmt.allocPrint( - b.allocator, - mimalloc ++ "/out/{s}-{s}/lib/libmimalloc.a", - .{ @tagName(os), @tagName(arch) }, - ); - mod.addObjectFile(b.path(lib_path)); - mod.addIncludePath(b.path(mimalloc ++ "/include")); - } - - // netsurf libs - const ns = "vendor/netsurf"; - const ns_include_path = try std.fmt.allocPrint( - b.allocator, - ns ++ "/out/{s}-{s}/include", - .{ @tagName(os), @tagName(arch) }, - ); - mod.addIncludePath(b.path(ns_include_path)); - - const libs: [4][]const u8 = .{ - "libdom", - "libhubbub", - "libparserutils", - "libwapcaplet", - }; - inline for (libs) |lib| { - const ns_lib_path = try std.fmt.allocPrint( - b.allocator, - ns ++ "/out/{s}-{s}/lib/" ++ lib ++ ".a", - .{ @tagName(os), @tagName(arch) }, - ); - mod.addObjectFile(b.path(ns_lib_path)); - mod.addIncludePath(b.path(ns ++ "/" ++ lib ++ "/src")); - } -} - fn buildZlib(b: *Build, m: *Build.Module) !void { const zlib = b.addLibrary(.{ .name = "zlib", @@ -510,126 +525,6 @@ fn buildBrotli(b: *Build, m: *Build.Module) !void { } }); } -fn buildMbedtls(b: *Build, m: *Build.Module) !void { - const mbedtls = b.addLibrary(.{ - .name = "mbedtls", - .root_module = m, - }); - - const root = "vendor/mbedtls/"; - mbedtls.addIncludePath(b.path(root ++ "include")); - mbedtls.addIncludePath(b.path(root ++ "library")); - - mbedtls.addCSourceFiles(.{ .flags = &.{}, .files = &.{ - root ++ "library/aes.c", - root ++ "library/aesni.c", - root ++ "library/aesce.c", - root ++ "library/aria.c", - root ++ "library/asn1parse.c", - root ++ "library/asn1write.c", - root ++ "library/base64.c", - root ++ "library/bignum.c", - root ++ "library/bignum_core.c", - root ++ "library/bignum_mod.c", - root ++ "library/bignum_mod_raw.c", - root ++ "library/camellia.c", - root ++ "library/ccm.c", - root ++ "library/chacha20.c", - root ++ "library/chachapoly.c", - root ++ "library/cipher.c", - root ++ "library/cipher_wrap.c", - root ++ "library/constant_time.c", - root ++ "library/cmac.c", - root ++ "library/ctr_drbg.c", - root ++ "library/des.c", - root ++ "library/dhm.c", - root ++ "library/ecdh.c", - root ++ "library/ecdsa.c", - root ++ "library/ecjpake.c", - root ++ "library/ecp.c", - root ++ "library/ecp_curves.c", - root ++ "library/entropy.c", - root ++ "library/entropy_poll.c", - root ++ "library/error.c", - root ++ "library/gcm.c", - root ++ "library/hkdf.c", - root ++ "library/hmac_drbg.c", - root ++ "library/lmots.c", - root ++ "library/lms.c", - root ++ "library/md.c", - root ++ "library/md5.c", - root ++ "library/memory_buffer_alloc.c", - root ++ "library/nist_kw.c", - root ++ "library/oid.c", - root ++ "library/padlock.c", - root ++ "library/pem.c", - root ++ "library/pk.c", - root ++ "library/pk_ecc.c", - root ++ "library/pk_wrap.c", - root ++ "library/pkcs12.c", - root ++ "library/pkcs5.c", - root ++ "library/pkparse.c", - root ++ "library/pkwrite.c", - root ++ "library/platform.c", - root ++ "library/platform_util.c", - root ++ "library/poly1305.c", - root ++ "library/psa_crypto.c", - root ++ "library/psa_crypto_aead.c", - root ++ "library/psa_crypto_cipher.c", - root ++ "library/psa_crypto_client.c", - root ++ "library/psa_crypto_ffdh.c", - root ++ "library/psa_crypto_driver_wrappers_no_static.c", - root ++ "library/psa_crypto_ecp.c", - root ++ "library/psa_crypto_hash.c", - root ++ "library/psa_crypto_mac.c", - root ++ "library/psa_crypto_pake.c", - root ++ "library/psa_crypto_rsa.c", - root ++ "library/psa_crypto_se.c", - root ++ "library/psa_crypto_slot_management.c", - root ++ "library/psa_crypto_storage.c", - root ++ "library/psa_its_file.c", - root ++ "library/psa_util.c", - root ++ "library/ripemd160.c", - root ++ "library/rsa.c", - root ++ "library/rsa_alt_helpers.c", - root ++ "library/sha1.c", - root ++ "library/sha3.c", - root ++ "library/sha256.c", - root ++ "library/sha512.c", - root ++ "library/threading.c", - root ++ "library/timing.c", - root ++ "library/version.c", - root ++ "library/version_features.c", - root ++ "library/pkcs7.c", - root ++ "library/x509.c", - root ++ "library/x509_create.c", - root ++ "library/x509_crl.c", - root ++ "library/x509_crt.c", - root ++ "library/x509_csr.c", - root ++ "library/x509write.c", - root ++ "library/x509write_crt.c", - root ++ "library/x509write_csr.c", - root ++ "library/debug.c", - root ++ "library/mps_reader.c", - root ++ "library/mps_trace.c", - root ++ "library/net_sockets.c", - root ++ "library/ssl_cache.c", - root ++ "library/ssl_ciphersuites.c", - root ++ "library/ssl_client.c", - root ++ "library/ssl_cookie.c", - root ++ "library/ssl_debug_helpers_generated.c", - root ++ "library/ssl_msg.c", - root ++ "library/ssl_ticket.c", - root ++ "library/ssl_tls.c", - root ++ "library/ssl_tls12_client.c", - root ++ "library/ssl_tls12_server.c", - root ++ "library/ssl_tls13_keys.c", - root ++ "library/ssl_tls13_server.c", - root ++ "library/ssl_tls13_client.c", - root ++ "library/ssl_tls13_generic.c", - } }); -} - fn buildNghttp2(b: *Build, m: *Build.Module) !void { const nghttp2 = b.addLibrary(.{ .name = "nghttp2", @@ -683,6 +578,8 @@ fn buildCurl(b: *Build, m: *Build.Module) !void { curl.addIncludePath(b.path(root ++ "lib")); curl.addIncludePath(b.path(root ++ "include")); + curl.addIncludePath(b.path("vendor/zlib")); + curl.addCSourceFiles(.{ .flags = &.{}, .files = &.{ @@ -841,8 +738,9 @@ fn buildCurl(b: *Build, m: *Build.Module) !void { root ++ "lib/vauth/spnego_sspi.c", root ++ "lib/vauth/vauth.c", root ++ "lib/vtls/cipher_suite.c", - root ++ "lib/vtls/mbedtls.c", - root ++ "lib/vtls/mbedtls_threadlock.c", + root ++ "lib/vtls/openssl.c", + root ++ "lib/vtls/hostcheck.c", + root ++ "lib/vtls/keylog.c", root ++ "lib/vtls/vtls.c", root ++ "lib/vtls/vtls_scache.c", root ++ "lib/vtls/x509asn1.c", diff --git a/build.zig.zon b/build.zig.zon index 9d57095f9..cb0136209 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -5,9 +5,13 @@ .fingerprint = 0xda130f3af836cea0, .dependencies = .{ .v8 = .{ - .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/305bb3706716d32d59b2ffa674731556caa1002b.tar.gz", - .hash = "v8-0.0.0-xddH63bVAwBSEobaUok9J0er1FqsvEujCDDVy6ItqKQ5", + .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/0d19781ccec829640e4f07591cbc166fa7dbe139.tar.gz", + .hash = "v8-0.0.0-xddH6wTgAwALFCYoZbUIqtsRyP6mr69N7aKT_cySHKN2", }, //.v8 = .{ .path = "../zig-v8-fork" } + .@"boringssl-zig" = .{ + .url = "git+https://github.com/Syndica/boringssl-zig.git#c53df00d06b02b755ad88bbf4d1202ed9687b096", + .hash = "boringssl-0.1.0-VtJeWehMAAA4RNnwRnzEvKcS9rjsR1QVRw1uJrwXxmVK", + }, }, } diff --git a/flake.lock b/flake.lock index 497b274e8..13693e9a8 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,26 @@ { "nodes": { + "fenix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ], + "rust-analyzer-src": "rust-analyzer-src" + }, + "locked": { + "lastModified": 1763016383, + "narHash": "sha256-eYmo7FNvm3q08iROzwIi8i9dWuUbJJl3uLR3OLnSmdI=", + "owner": "nix-community", + "repo": "fenix", + "rev": "0fad5c0e5c531358e7174cd666af4608f08bc3ba", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "fenix", + "type": "github" + } + }, "flake-compat": { "flake": false, "locked": { @@ -75,11 +96,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1756822655, - "narHash": "sha256-xQAk8xLy7srAkR5NMZFsQFioL02iTHuuEIs3ohGpgdk=", + "lastModified": 1763043403, + "narHash": "sha256-DgCTbHdIpzbXSlQlOZEWj8oPt2lrRMlSk03oIstvkVQ=", "owner": "nixos", "repo": "nixpkgs", - "rev": "4bdac60bfe32c41103ae500ddf894c258291dd61", + "rev": "75e04ecd084f93d4105ce68c07dac7656291fe2e", "type": "github" }, "original": { @@ -91,12 +112,30 @@ }, "root": { "inputs": { + "fenix": "fenix", "flake-utils": "flake-utils", "nixpkgs": "nixpkgs", "zigPkgs": "zigPkgs", "zlsPkg": "zlsPkg" } }, + "rust-analyzer-src": { + "flake": false, + "locked": { + "lastModified": 1762860488, + "narHash": "sha256-rMfWMCOo/pPefM2We0iMBLi2kLBAnYoB9thi4qS7uk4=", + "owner": "rust-lang", + "repo": "rust-analyzer", + "rev": "2efc80078029894eec0699f62ec8d5c1a56af763", + "type": "github" + }, + "original": { + "owner": "rust-lang", + "ref": "nightly", + "repo": "rust-analyzer", + "type": "github" + } + }, "systems": { "locked": { "lastModified": 1681028828, @@ -136,11 +175,11 @@ ] }, "locked": { - "lastModified": 1756555914, - "narHash": "sha256-7yoSPIVEuL+3Wzf6e7NHuW3zmruHizRrYhGerjRHTLI=", + "lastModified": 1762907712, + "narHash": "sha256-VNW/+VYIg6N4b9Iq+F0YZmm22n74IdFS7hsPLblWuOY=", "owner": "mitchellh", "repo": "zig-overlay", - "rev": "d0df3a2fd0f11134409d6d5ea0e510e5e477f7d6", + "rev": "d16453ee78765e49527c56d23386cead799b6b53", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 971f0f44c..330bbdf04 100644 --- a/flake.nix +++ b/flake.nix @@ -11,6 +11,11 @@ zlsPkg.inputs.zig-overlay.follows = "zigPkgs"; zlsPkg.inputs.nixpkgs.follows = "nixpkgs"; + fenix = { + url = "github:nix-community/fenix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + flake-utils.url = "github:numtide/flake-utils"; }; @@ -19,6 +24,7 @@ nixpkgs, zigPkgs, zlsPkg, + fenix, flake-utils, ... }: @@ -36,6 +42,8 @@ inherit system overlays; }; + rustToolchain = fenix.packages.${system}.stable.toolchain; + # We need crtbeginS.o for building. crtFiles = pkgs.runCommand "crt-files" { } '' mkdir -p $out/lib @@ -49,8 +57,9 @@ targetPkgs = pkgs: with pkgs; [ # Build Tools - zigpkgs."0.15.1" + zigpkgs."0.15.2" zls + rustToolchain python3 pkg-config cmake @@ -66,7 +75,6 @@ glib.dev glibc.dev zlib - zlib.dev ]; }; in diff --git a/src/App.zig b/src/App.zig new file mode 100644 index 000000000..24d015c01 --- /dev/null +++ b/src/App.zig @@ -0,0 +1,127 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +const log = @import("log.zig"); +const Http = @import("http/Http.zig"); +const Platform = @import("browser/js/Platform.zig"); + +const Notification = @import("Notification.zig"); +const Telemetry = @import("telemetry/telemetry.zig").Telemetry; + +// Container for global state / objects that various parts of the system +// might need. +const App = @This(); + +http: Http, +config: Config, +platform: Platform, +telemetry: Telemetry, +allocator: Allocator, +app_dir_path: ?[]const u8, +notification: *Notification, + +pub const RunMode = enum { + help, + fetch, + serve, + version, +}; + +pub const Config = struct { + run_mode: RunMode, + tls_verify_host: bool = true, + http_proxy: ?[:0]const u8 = null, + proxy_bearer_token: ?[:0]const u8 = null, + http_timeout_ms: ?u31 = null, + http_connect_timeout_ms: ?u31 = null, + http_max_host_open: ?u8 = null, + http_max_concurrent: ?u8 = null, + user_agent: [:0]const u8, +}; + +pub fn init(allocator: Allocator, config: Config) !*App { + const app = try allocator.create(App); + errdefer allocator.destroy(app); + + app.config = config; + app.allocator = allocator; + + app.notification = try Notification.init(allocator, null); + errdefer app.notification.deinit(); + + app.http = try Http.init(allocator, .{ + .max_host_open = config.http_max_host_open orelse 4, + .max_concurrent = config.http_max_concurrent orelse 10, + .timeout_ms = config.http_timeout_ms orelse 5000, + .connect_timeout_ms = config.http_connect_timeout_ms orelse 0, + .http_proxy = config.http_proxy, + .tls_verify_host = config.tls_verify_host, + .proxy_bearer_token = config.proxy_bearer_token, + .user_agent = config.user_agent, + }); + errdefer app.http.deinit(); + + app.platform = try Platform.init(); + errdefer app.platform.deinit(); + + app.app_dir_path = getAndMakeAppDir(allocator); + + app.telemetry = try Telemetry.init(app, config.run_mode); + errdefer app.telemetry.deinit(); + + try app.telemetry.register(app.notification); + + return app; +} + +pub fn deinit(self: *App) void { + const allocator = self.allocator; + if (self.app_dir_path) |app_dir_path| { + allocator.free(app_dir_path); + } + self.telemetry.deinit(); + self.notification.deinit(); + self.http.deinit(); + self.platform.deinit(); + + allocator.destroy(self); +} + +fn getAndMakeAppDir(allocator: Allocator) ?[]const u8 { + if (@import("builtin").is_test) { + return allocator.dupe(u8, "/tmp") catch unreachable; + } + const app_dir_path = std.fs.getAppDataDir(allocator, "lightpanda") catch |err| { + log.warn(.app, "get data dir", .{ .err = err }); + return null; + }; + + std.fs.cwd().makePath(app_dir_path) catch |err| switch (err) { + error.PathAlreadyExists => return app_dir_path, + else => { + allocator.free(app_dir_path); + log.warn(.app, "create data dir", .{ .err = err, .path = app_dir_path }); + return null; + }, + }; + return app_dir_path; +} diff --git a/src/Notification.zig b/src/Notification.zig new file mode 100644 index 000000000..dea1d5499 --- /dev/null +++ b/src/Notification.zig @@ -0,0 +1,404 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const log = @import("log.zig"); +const Page = @import("browser/Page.zig"); +const Transfer = @import("http/Client.zig").Transfer; + +const Allocator = std.mem.Allocator; + +const List = std.DoublyLinkedList; + +// Allows code to register for and emit events. +// Keeps two lists +// 1 - for a given event type, a linked list of all the listeners +// 2 - for a given listener, a list of all it's registration +// The 2nd one is so that a listener can unregister all of it's listeners +// (there's currently no need for a listener to unregister only 1 or more +// specific listener). +// +// Scoping is important. Imagine we created a global singleton registry, and our +// CDP code registers for the "network_bytes_sent" event, because it needs to +// send messages to the client when this happens. Our HTTP client could then +// emit a "network_bytes_sent" message. It would be easy, and it would work. +// That is, it would work until the Telemetry code makes an HTTP request, and +// because everything's just one big global, that gets picked up by the +// registered CDP listener, and the telemetry network activity gets sent to the +// CDP client. +// +// To avoid this, one way or another, we need scoping. We could still have +// a global registry but every "register" and every "emit" has some type of +// "scope". This would have a run-time cost and still require some coordination +// between components to share a common scope. +// +// Instead, the approach that we take is to have a notification instance per +// scope. This makes some things harder, but we only plan on having 2 +// notification instances at a given time: one in a Browser and one in the App. +// What about something like Telemetry, which lives outside of a Browser but +// still cares about Browser-events (like .page_navigate)? When the Browser +// notification is created, a `notification_created` event is raised in the +// App's notification, which Telemetry is registered for. This allows Telemetry +// to register for events in the Browser notification. See the Telemetry's +// register function. +const Notification = @This(); +// Every event type (which are hard-coded), has a list of Listeners. +// When the event happens, we dispatch to those listener. +event_listeners: EventListeners, + +// list of listeners for a specified receiver +// @intFromPtr(receiver) -> [listener1, listener2, ...] +// Used when `unregisterAll` is called. +listeners: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(*Listener)), + +allocator: Allocator, +mem_pool: std.heap.MemoryPool(Listener), + +const EventListeners = struct { + page_remove: List = .{}, + page_created: List = .{}, + page_navigate: List = .{}, + page_navigated: List = .{}, + page_network_idle: List = .{}, + page_network_almost_idle: List = .{}, + http_request_fail: List = .{}, + http_request_start: List = .{}, + http_request_intercept: List = .{}, + http_request_done: List = .{}, + http_request_auth_required: List = .{}, + http_response_data: List = .{}, + http_response_header_done: List = .{}, + notification_created: List = .{}, +}; + +const Events = union(enum) { + page_remove: PageRemove, + page_created: *Page, + page_navigate: *const PageNavigate, + page_navigated: *const PageNavigated, + page_network_idle: *const PageNetworkIdle, + page_network_almost_idle: *const PageNetworkAlmostIdle, + http_request_fail: *const RequestFail, + http_request_start: *const RequestStart, + http_request_intercept: *const RequestIntercept, + http_request_auth_required: *const RequestAuthRequired, + http_request_done: *const RequestDone, + http_response_data: *const ResponseData, + http_response_header_done: *const ResponseHeaderDone, + notification_created: *Notification, +}; +const EventType = std.meta.FieldEnum(Events); + +pub const PageRemove = struct {}; + +pub const PageNavigate = struct { + timestamp: u64, + url: [:0]const u8, + opts: Page.NavigateOpts, +}; + +pub const PageNavigated = struct { + timestamp: u64, + url: [:0]const u8, +}; + +pub const PageNetworkIdle = struct { + timestamp: u64, +}; + +pub const PageNetworkAlmostIdle = struct { + timestamp: u64, +}; + +pub const RequestStart = struct { + transfer: *Transfer, +}; + +pub const RequestIntercept = struct { + transfer: *Transfer, + wait_for_interception: *bool, +}; + +pub const RequestAuthRequired = struct { + transfer: *Transfer, + wait_for_interception: *bool, +}; + +pub const ResponseData = struct { + data: []const u8, + transfer: *Transfer, +}; + +pub const ResponseHeaderDone = struct { + transfer: *Transfer, +}; + +pub const RequestDone = struct { + transfer: *Transfer, +}; + +pub const RequestFail = struct { + transfer: *Transfer, + err: anyerror, +}; + +pub fn init(allocator: Allocator, parent: ?*Notification) !*Notification { + + // This is put on the heap because we want to raise a .notification_created + // event, so that, something like Telemetry, can receive the + // .page_navigate event on all notification instances. That can only work + // if we dispatch .notification_created with a *Notification. + const notification = try allocator.create(Notification); + errdefer allocator.destroy(notification); + + notification.* = .{ + .listeners = .{}, + .event_listeners = .{}, + .allocator = allocator, + .mem_pool = std.heap.MemoryPool(Listener).init(allocator), + }; + + if (parent) |pn| { + pn.dispatch(.notification_created, notification); + } + + return notification; +} + +pub fn deinit(self: *Notification) void { + const allocator = self.allocator; + + var it = self.listeners.valueIterator(); + while (it.next()) |listener| { + listener.deinit(allocator); + } + self.listeners.deinit(allocator); + self.mem_pool.deinit(); + allocator.destroy(self); +} + +pub fn register(self: *Notification, comptime event: EventType, receiver: anytype, func: EventFunc(event)) !void { + var list = &@field(self.event_listeners, @tagName(event)); + + var listener = try self.mem_pool.create(); + errdefer self.mem_pool.destroy(listener); + + listener.* = .{ + .node = .{}, + .list = list, + .receiver = receiver, + .event = event, + .func = @ptrCast(func), + .struct_name = @typeName(@typeInfo(@TypeOf(receiver)).pointer.child), + }; + + const allocator = self.allocator; + const gop = try self.listeners.getOrPut(allocator, @intFromPtr(receiver)); + if (gop.found_existing == false) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(allocator, listener); + + // we don't add this until we've successfully added the entry to + // self.listeners + list.append(&listener.node); +} + +pub fn unregister(self: *Notification, comptime event: EventType, receiver: anytype) void { + var listeners = self.listeners.getPtr(@intFromPtr(receiver)) orelse return; + + var i: usize = 0; + while (i < listeners.items.len) { + const listener = listeners.items[i]; + if (listener.event != event) { + i += 1; + continue; + } + listener.list.remove(&listener.node); + self.mem_pool.destroy(listener); + _ = listeners.swapRemove(i); + } + + if (listeners.items.len == 0) { + listeners.deinit(self.allocator); + const removed = self.listeners.remove(@intFromPtr(receiver)); + std.debug.assert(removed == true); + } +} + +pub fn unregisterAll(self: *Notification, receiver: *anyopaque) void { + var kv = self.listeners.fetchRemove(@intFromPtr(receiver)) orelse return; + for (kv.value.items) |listener| { + listener.list.remove(&listener.node); + self.mem_pool.destroy(listener); + } + kv.value.deinit(self.allocator); +} + +pub fn dispatch(self: *Notification, comptime event: EventType, data: ArgType(event)) void { + const list = &@field(self.event_listeners, @tagName(event)); + + var node = list.first; + while (node) |n| { + const listener: *Listener = @fieldParentPtr("node", n); + const func: EventFunc(event) = @ptrCast(@alignCast(listener.func)); + func(listener.receiver, data) catch |err| { + log.err(.app, "dispatch error", .{ + .err = err, + .event = event, + .source = "notification", + .listener = listener.struct_name, + }); + }; + node = n.next; + } +} + +// Given an event type enum, returns the type of arg the event emits +fn ArgType(comptime event: Notification.EventType) type { + inline for (std.meta.fields(Notification.Events)) |f| { + if (std.mem.eql(u8, f.name, @tagName(event))) { + return f.type; + } + } + unreachable; +} + +// Given an event type enum, returns the listening function type +fn EventFunc(comptime event: Notification.EventType) type { + return *const fn (*anyopaque, ArgType(event)) anyerror!void; +} + +// A listener. This is 1 receiver, with its function, and the linked list +// node that goes in the appropriate EventListeners list. +const Listener = struct { + // the receiver of the event, i.e. the self parameter to `func` + receiver: *anyopaque, + + // the function to call + func: *const anyopaque, + + // For logging slightly better error + struct_name: []const u8, + + event: Notification.EventType, + + // intrusive linked list node + node: List.Node, + + // The event list this listener belongs to. + // We need this in order to be able to remove the node from the list + list: *List, +}; + +const testing = std.testing; +test "Notification" { + var notifier = try Notification.init(testing.allocator, null); + defer notifier.deinit(); + + // noop + notifier.dispatch(.page_navigate, &.{ + .timestamp = 4, + .url = undefined, + .opts = .{}, + }); + + var tc = TestClient{}; + + try notifier.register(.page_navigate, &tc, TestClient.pageNavigate); + notifier.dispatch(.page_navigate, &.{ + .timestamp = 4, + .url = undefined, + .opts = .{}, + }); + try testing.expectEqual(4, tc.page_navigate); + + notifier.unregisterAll(&tc); + notifier.dispatch(.page_navigate, &.{ + .timestamp = 10, + .url = undefined, + .opts = .{}, + }); + try testing.expectEqual(4, tc.page_navigate); + + try notifier.register(.page_navigate, &tc, TestClient.pageNavigate); + try notifier.register(.page_navigated, &tc, TestClient.pageNavigated); + notifier.dispatch(.page_navigate, &.{ + .timestamp = 10, + .url = undefined, + .opts = .{}, + }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 6, .url = undefined }); + try testing.expectEqual(14, tc.page_navigate); + try testing.expectEqual(6, tc.page_navigated); + + notifier.unregisterAll(&tc); + notifier.dispatch(.page_navigate, &.{ + .timestamp = 100, + .url = undefined, + .opts = .{}, + }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 100, .url = undefined }); + try testing.expectEqual(14, tc.page_navigate); + try testing.expectEqual(6, tc.page_navigated); + + { + // unregister + try notifier.register(.page_navigate, &tc, TestClient.pageNavigate); + try notifier.register(.page_navigated, &tc, TestClient.pageNavigated); + notifier.dispatch(.page_navigate, &.{ .timestamp = 100, .url = undefined, .opts = .{} }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 1000, .url = undefined }); + try testing.expectEqual(114, tc.page_navigate); + try testing.expectEqual(1006, tc.page_navigated); + + notifier.unregister(.page_navigate, &tc); + notifier.dispatch(.page_navigate, &.{ .timestamp = 100, .url = undefined, .opts = .{} }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 1000, .url = undefined }); + try testing.expectEqual(114, tc.page_navigate); + try testing.expectEqual(2006, tc.page_navigated); + + notifier.unregister(.page_navigated, &tc); + notifier.dispatch(.page_navigate, &.{ .timestamp = 100, .url = undefined, .opts = .{} }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 1000, .url = undefined }); + try testing.expectEqual(114, tc.page_navigate); + try testing.expectEqual(2006, tc.page_navigated); + + // already unregistered, try anyways + notifier.unregister(.page_navigated, &tc); + notifier.dispatch(.page_navigate, &.{ .timestamp = 100, .url = undefined, .opts = .{} }); + notifier.dispatch(.page_navigated, &.{ .timestamp = 1000, .url = undefined }); + try testing.expectEqual(114, tc.page_navigate); + try testing.expectEqual(2006, tc.page_navigated); + } +} + +const TestClient = struct { + page_navigate: u64 = 0, + page_navigated: u64 = 0, + + fn pageNavigate(ptr: *anyopaque, data: *const Notification.PageNavigate) !void { + const self: *TestClient = @ptrCast(@alignCast(ptr)); + self.page_navigate += data.timestamp; + } + + fn pageNavigated(ptr: *anyopaque, data: *const Notification.PageNavigated) !void { + const self: *TestClient = @ptrCast(@alignCast(ptr)); + self.page_navigated += data.timestamp; + } +}; diff --git a/src/server.zig b/src/Server.zig similarity index 89% rename from src/server.zig rename to src/Server.zig index 974517054..34621efd0 100644 --- a/src/server.zig +++ b/src/Server.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -26,7 +26,7 @@ const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const log = @import("log.zig"); -const App = @import("app.zig").App; +const App = @import("App.zig"); const CDP = @import("cdp/cdp.zig").CDP; const MAX_HTTP_REQUEST_SIZE = 4096; @@ -36,147 +36,146 @@ const MAX_HTTP_REQUEST_SIZE = 4096; // +140 for the max control packet that might be interleaved in a message const MAX_MESSAGE_SIZE = 512 * 1024 + 14 + 140; -pub const Server = struct { - app: *App, - shutdown: bool, - allocator: Allocator, - client: ?posix.socket_t, - listener: ?posix.socket_t, - json_version_response: []const u8, +const Server = @This(); +app: *App, +shutdown: bool, +allocator: Allocator, +client: ?posix.socket_t, +listener: ?posix.socket_t, +json_version_response: []const u8, - pub fn init(app: *App, address: net.Address) !Server { - const allocator = app.allocator; - const json_version_response = try buildJSONVersionResponse(allocator, address); - errdefer allocator.free(json_version_response); +pub fn init(app: *App, address: net.Address) !Server { + const allocator = app.allocator; + const json_version_response = try buildJSONVersionResponse(allocator, address); + errdefer allocator.free(json_version_response); - return .{ - .app = app, - .client = null, - .listener = null, - .shutdown = false, - .allocator = allocator, - .json_version_response = json_version_response, - }; - } + return .{ + .app = app, + .client = null, + .listener = null, + .shutdown = false, + .allocator = allocator, + .json_version_response = json_version_response, + }; +} - pub fn deinit(self: *Server) void { - self.shutdown = true; - if (self.listener) |listener| { - posix.close(listener); - } - // *if* server.run is running, we should really wait for it to return - // before existing from here. - self.allocator.free(self.json_version_response); +pub fn deinit(self: *Server) void { + self.shutdown = true; + if (self.listener) |listener| { + posix.close(listener); } + // *if* server.run is running, we should really wait for it to return + // before existing from here. + self.allocator.free(self.json_version_response); +} - pub fn run(self: *Server, address: net.Address, timeout_ms: i32) !void { - const flags = posix.SOCK.STREAM | posix.SOCK.CLOEXEC; - const listener = try posix.socket(address.any.family, flags, posix.IPPROTO.TCP); - self.listener = listener; - - try posix.setsockopt(listener, posix.SOL.SOCKET, posix.SO.REUSEADDR, &std.mem.toBytes(@as(c_int, 1))); - if (@hasDecl(posix.TCP, "NODELAY")) { - try posix.setsockopt(listener, posix.IPPROTO.TCP, posix.TCP.NODELAY, &std.mem.toBytes(@as(c_int, 1))); - } - - try posix.bind(listener, &address.any, address.getOsSockLen()); - try posix.listen(listener, 1); +pub fn run(self: *Server, address: net.Address, timeout_ms: u32) !void { + const flags = posix.SOCK.STREAM | posix.SOCK.CLOEXEC; + const listener = try posix.socket(address.any.family, flags, posix.IPPROTO.TCP); + self.listener = listener; - log.info(.app, "server running", .{ .address = address }); - while (true) { - const socket = posix.accept(listener, null, null, posix.SOCK.NONBLOCK) catch |err| { - if (self.shutdown) { - return; - } - log.err(.app, "CDP accept", .{ .err = err }); - std.Thread.sleep(std.time.ns_per_s); - continue; - }; + try posix.setsockopt(listener, posix.SOL.SOCKET, posix.SO.REUSEADDR, &std.mem.toBytes(@as(c_int, 1))); + if (@hasDecl(posix.TCP, "NODELAY")) { + try posix.setsockopt(listener, posix.IPPROTO.TCP, posix.TCP.NODELAY, &std.mem.toBytes(@as(c_int, 1))); + } - self.client = socket; - defer if (self.client) |s| { - posix.close(s); - self.client = null; - }; + try posix.bind(listener, &address.any, address.getOsSockLen()); + try posix.listen(listener, 1); - if (log.enabled(.app, .info)) { - var client_address: std.net.Address = undefined; - var socklen: posix.socklen_t = @sizeOf(net.Address); - try std.posix.getsockname(socket, &client_address.any, &socklen); - log.info(.app, "client connected", .{ .ip = client_address }); + log.info(.app, "server running", .{ .address = address }); + while (true) { + const socket = posix.accept(listener, null, null, posix.SOCK.NONBLOCK) catch |err| { + if (self.shutdown) { + return; } + log.err(.app, "CDP accept", .{ .err = err }); + std.Thread.sleep(std.time.ns_per_s); + continue; + }; - self.readLoop(socket, timeout_ms) catch |err| { - log.err(.app, "CDP client loop", .{ .err = err }); - }; + self.client = socket; + defer if (self.client) |s| { + posix.close(s); + self.client = null; + }; + + if (log.enabled(.app, .info)) { + var client_address: std.net.Address = undefined; + var socklen: posix.socklen_t = @sizeOf(net.Address); + try std.posix.getsockname(socket, &client_address.any, &socklen); + log.info(.app, "client connected", .{ .ip = client_address }); } + + self.readLoop(socket, timeout_ms) catch |err| { + log.err(.app, "CDP client loop", .{ .err = err }); + }; } +} - fn readLoop(self: *Server, socket: posix.socket_t, timeout_ms: i32) !void { - // This shouldn't be necessary, but the Client is HUGE (> 512KB) because - // it has a large read buffer. I don't know why, but v8 crashes if this - // is on the stack (and I assume it's related to its size). - const client = try self.allocator.create(Client); - defer self.allocator.destroy(client); +fn readLoop(self: *Server, socket: posix.socket_t, timeout_ms: u32) !void { + // This shouldn't be necessary, but the Client is HUGE (> 512KB) because + // it has a large read buffer. I don't know why, but v8 crashes if this + // is on the stack (and I assume it's related to its size). + const client = try self.allocator.create(Client); + defer self.allocator.destroy(client); - client.* = try Client.init(socket, self); - defer client.deinit(); + client.* = try Client.init(socket, self); + defer client.deinit(); - var http = &self.app.http; - http.monitorSocket(socket); - defer http.unmonitorSocket(); + var http = &self.app.http; + http.monitorSocket(socket); + defer http.unmonitorSocket(); - std.debug.assert(client.mode == .http); - while (true) { - if (http.poll(timeout_ms) != .extra_socket) { - log.info(.app, "CDP timeout", .{}); - return; - } + std.debug.assert(client.mode == .http); + while (true) { + if (http.poll(timeout_ms) != .extra_socket) { + log.info(.app, "CDP timeout", .{}); + return; + } - if (try client.readSocket() == false) { - return; - } + if (try client.readSocket() == false) { + return; + } - if (client.mode == .cdp) { - break; // switch to our CDP loop - } + if (client.mode == .cdp) { + break; // switch to our CDP loop } + } - var cdp = &client.mode.cdp; - var last_message = timestamp(); - var ms_remaining = timeout_ms; - while (true) { - switch (cdp.pageWait(ms_remaining)) { - .extra_socket => { - if (try client.readSocket() == false) { - return; - } - last_message = timestamp(); - ms_remaining = timeout_ms; - }, - .no_page => { - if (http.poll(ms_remaining) != .extra_socket) { - log.info(.app, "CDP timeout", .{}); - return; - } - if (try client.readSocket() == false) { - return; - } - last_message = timestamp(); - ms_remaining = timeout_ms; - }, - .done => { - const elapsed = timestamp() - last_message; - if (elapsed > ms_remaining) { - log.info(.app, "CDP timeout", .{}); - return; - } - ms_remaining -= @as(i32, @intCast(elapsed)); - }, - } + var cdp = &client.mode.cdp; + var last_message = timestamp(.monotonic); + var ms_remaining = timeout_ms; + while (true) { + switch (cdp.pageWait(ms_remaining)) { + .extra_socket => { + if (try client.readSocket() == false) { + return; + } + last_message = timestamp(.monotonic); + ms_remaining = timeout_ms; + }, + .no_page => { + if (http.poll(ms_remaining) != .extra_socket) { + log.info(.app, "CDP timeout", .{}); + return; + } + if (try client.readSocket() == false) { + return; + } + last_message = timestamp(.monotonic); + ms_remaining = timeout_ms; + }, + .done => { + const elapsed = timestamp(.monotonic) - last_message; + if (elapsed > ms_remaining) { + log.info(.app, "CDP timeout", .{}); + return; + } + ms_remaining -= @intCast(elapsed); + }, } } -}; +} pub const Client = struct { // The client is initially serving HTTP requests but, under normal circumstances @@ -487,7 +486,7 @@ pub const Client = struct { } // called by CDP - // Websocket frames have a variable lenght header. For server-client, + // Websocket frames have a variable length header. For server-client, // it could be anywhere from 2 to 10 bytes. Our IO.Loop doesn't have // writev, so we need to get creative. We'll JSON serialize to a // buffer, where the first 10 bytes are reserved. We can then backfill @@ -929,9 +928,7 @@ fn buildJSONVersionResponse( return try std.fmt.allocPrint(allocator, response_format, .{ body_len, address }); } -fn timestamp() u32 { - return @import("datetime.zig").timestamp(); -} +pub const timestamp = @import("datetime.zig").timestamp; // In-place string lowercase fn toLower(str: []u8) []u8 { diff --git a/src/TestHTTPServer.zig b/src/TestHTTPServer.zig index 9867600d0..fdf4e1247 100644 --- a/src/TestHTTPServer.zig +++ b/src/TestHTTPServer.zig @@ -1,3 +1,21 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + const std = @import("std"); const TestHTTPServer = @This(); @@ -61,6 +79,7 @@ fn handleConnection(self: *TestHTTPServer, conn: std.net.Server.Connection) !voi return err; }, }; + self.handler(&req) catch |err| { std.debug.print("test http error '{s}': {}\n", .{ req.head.target, err }); try req.respond("server error", .{ .status = .internal_server_error }); diff --git a/src/app.zig b/src/app.zig deleted file mode 100644 index 719dd9b72..000000000 --- a/src/app.zig +++ /dev/null @@ -1,115 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -const log = @import("log.zig"); -const Http = @import("http/Http.zig"); -const Platform = @import("browser/js/Platform.zig"); - -const Telemetry = @import("telemetry/telemetry.zig").Telemetry; -const Notification = @import("notification.zig").Notification; - -// Container for global state / objects that various parts of the system -// might need. -pub const App = struct { - http: Http, - config: Config, - platform: Platform, - allocator: Allocator, - telemetry: Telemetry, - app_dir_path: ?[]const u8, - notification: *Notification, - - pub const RunMode = enum { - help, - fetch, - serve, - version, - }; - - pub const Config = struct { - run_mode: RunMode, - tls_verify_host: bool = true, - http_proxy: ?[:0]const u8 = null, - proxy_bearer_token: ?[:0]const u8 = null, - http_timeout_ms: ?u31 = null, - http_connect_timeout_ms: ?u31 = null, - http_max_host_open: ?u8 = null, - http_max_concurrent: ?u8 = null, - user_agent: [:0]const u8, - }; - - pub fn init(allocator: Allocator, config: Config) !*App { - const app = try allocator.create(App); - errdefer allocator.destroy(app); - - const notification = try Notification.init(allocator, null); - errdefer notification.deinit(); - - var http = try Http.init(allocator, .{ - .max_host_open = config.http_max_host_open orelse 4, - .max_concurrent = config.http_max_concurrent orelse 10, - .timeout_ms = config.http_timeout_ms orelse 5000, - .connect_timeout_ms = config.http_connect_timeout_ms orelse 0, - .http_proxy = config.http_proxy, - .tls_verify_host = config.tls_verify_host, - .proxy_bearer_token = config.proxy_bearer_token, - .user_agent = config.user_agent, - }); - errdefer http.deinit(); - - const platform = try Platform.init(); - errdefer platform.deinit(); - - const app_dir_path = getAndMakeAppDir(allocator); - - app.* = .{ - .http = http, - .allocator = allocator, - .telemetry = undefined, - .platform = platform, - .app_dir_path = app_dir_path, - .notification = notification, - .config = config, - }; - - app.telemetry = try Telemetry.init(app, config.run_mode); - errdefer app.telemetry.deinit(); - - try app.telemetry.register(app.notification); - - return app; - } - - pub fn deinit(self: *App) void { - const allocator = self.allocator; - if (self.app_dir_path) |app_dir_path| { - allocator.free(app_dir_path); - } - self.telemetry.deinit(); - self.notification.deinit(); - self.http.deinit(); - self.platform.deinit(); - allocator.destroy(self); - } -}; - -fn getAndMakeAppDir(allocator: Allocator) ?[]const u8 { - if (@import("builtin").is_test) { - return allocator.dupe(u8, "/tmp") catch unreachable; - } - const app_dir_path = std.fs.getAppDataDir(allocator, "lightpanda") catch |err| { - log.warn(.app, "get data dir", .{ .err = err }); - return null; - }; - - std.fs.cwd().makePath(app_dir_path) catch |err| switch (err) { - error.PathAlreadyExists => return app_dir_path, - else => { - allocator.free(app_dir_path); - log.warn(.app, "create data dir", .{ .err = err, .path = app_dir_path }); - return null; - }, - }; - return app_dir_path; -} diff --git a/src/browser/Browser.zig b/src/browser/Browser.zig new file mode 100644 index 000000000..1d3bcbfeb --- /dev/null +++ b/src/browser/Browser.zig @@ -0,0 +1,110 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +const js = @import("js/js.zig"); +const log = @import("../log.zig"); +const App = @import("../App.zig"); +const HttpClient = @import("../http/Client.zig"); +const Notification = @import("../Notification.zig"); + +const Session = @import("Session.zig"); + +// Browser is an instance of the browser. +// You can create multiple browser instances. +// A browser contains only one session. +const Browser = @This(); + +env: *js.Env, +app: *App, +session: ?Session, +allocator: Allocator, +http_client: *HttpClient, +call_arena: ArenaAllocator, +page_arena: ArenaAllocator, +session_arena: ArenaAllocator, +transfer_arena: ArenaAllocator, +notification: *Notification, + +pub fn init(app: *App) !Browser { + const allocator = app.allocator; + + const env = try js.Env.init(allocator, &app.platform, .{}); + errdefer env.deinit(); + + const notification = try Notification.init(allocator, app.notification); + app.http.client.notification = notification; + app.http.client.next_request_id = 0; // Should we track ids in CDP only? + errdefer notification.deinit(); + + return .{ + .app = app, + .env = env, + .session = null, + .allocator = allocator, + .notification = notification, + .http_client = app.http.client, + .call_arena = ArenaAllocator.init(allocator), + .page_arena = ArenaAllocator.init(allocator), + .session_arena = ArenaAllocator.init(allocator), + .transfer_arena = ArenaAllocator.init(allocator), + }; +} + +pub fn deinit(self: *Browser) void { + self.closeSession(); + self.env.deinit(); + self.call_arena.deinit(); + self.page_arena.deinit(); + self.session_arena.deinit(); + self.transfer_arena.deinit(); + self.http_client.notification = null; + self.notification.deinit(); +} + +pub fn newSession(self: *Browser) !*Session { + self.closeSession(); + self.session = @as(Session, undefined); + const session = &self.session.?; + try Session.init(session, self); + return session; +} + +pub fn closeSession(self: *Browser) void { + if (self.session) |*session| { + session.deinit(); + self.session = null; + _ = self.session_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); + self.env.lowMemoryNotification(); + } +} + +pub fn runMicrotasks(self: *const Browser) void { + self.env.runMicrotasks(); +} + +pub fn runMessageLoop(self: *const Browser) void { + while (self.env.pumpMessageLoop()) { + log.debug(.browser, "pumpMessageLoop", .{}); + } + self.env.runIdleTasks(); +} diff --git a/src/browser/DataURI.zig b/src/browser/DataURI.zig deleted file mode 100644 index 00d3792f1..000000000 --- a/src/browser/DataURI.zig +++ /dev/null @@ -1,52 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; - -// Parses data:[][;base64], -pub fn parse(allocator: Allocator, src: []const u8) !?[]const u8 { - if (!std.mem.startsWith(u8, src, "data:")) { - return null; - } - - const uri = src[5..]; - const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; - - var data = uri[data_starts + 1 ..]; - - // Extract the encoding. - const metadata = uri[0..data_starts]; - if (std.mem.endsWith(u8, metadata, ";base64")) { - const decoder = std.base64.standard.Decoder; - const decoded_size = try decoder.calcSizeForSlice(data); - - const buffer = try allocator.alloc(u8, decoded_size); - errdefer allocator.free(buffer); - - try decoder.decode(buffer, data); - data = buffer; - } - - return data; -} - -const testing = @import("../testing.zig"); -test "DataURI: parse valid" { - try test_valid("data:text/javascript; charset=utf-8;base64,Zm9v", "foo"); - try test_valid("data:text/javascript; charset=utf-8;,foo", "foo"); - try test_valid("data:,foo", "foo"); -} - -test "DataURI: parse invalid" { - try test_cannot_parse("atad:,foo"); - try test_cannot_parse("data:foo"); - try test_cannot_parse("data:"); -} - -fn test_valid(uri: []const u8, expected: []const u8) !void { - defer testing.reset(); - const data_uri = try parse(testing.arena_allocator, uri) orelse return error.TestFailed; - try testing.expectEqual(expected, data_uri); -} - -fn test_cannot_parse(uri: []const u8) !void { - try testing.expectEqual(null, parse(undefined, uri)); -} diff --git a/src/browser/EventManager.zig b/src/browser/EventManager.zig new file mode 100644 index 000000000..d458a3b2e --- /dev/null +++ b/src/browser/EventManager.zig @@ -0,0 +1,446 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); + +const log = @import("../log.zig"); +const String = @import("../string.zig").String; + +const js = @import("js/js.zig"); +const Page = @import("Page.zig"); + +const Node = @import("webapi/Node.zig"); +const Event = @import("webapi/Event.zig"); +const EventTarget = @import("webapi/EventTarget.zig"); + +const Allocator = std.mem.Allocator; + +const IS_DEBUG = builtin.mode == .Debug; + +pub const EventManager = @This(); + +page: *Page, +arena: Allocator, +listener_pool: std.heap.MemoryPool(Listener), +lookup: std.AutoHashMapUnmanaged(usize, std.DoublyLinkedList), +dispatch_depth: u32 = 0, + +pub fn init(page: *Page) EventManager { + return .{ + .page = page, + .lookup = .{}, + .arena = page.arena, + .listener_pool = std.heap.MemoryPool(Listener).init(page.arena), + .dispatch_depth = 0, + }; +} + +pub const RegisterOptions = struct { + once: bool = false, + capture: bool = false, + passive: bool = false, + signal: ?*@import("webapi/AbortSignal.zig") = null, +}; +pub fn register(self: *EventManager, target: *EventTarget, typ: []const u8, function: js.Function, opts: RegisterOptions) !void { + if (comptime IS_DEBUG) { + log.debug(.event, "eventManager.register", .{ .type = typ, .capture = opts.capture, .once = opts.once }); + } + + // If a signal is provided and already aborted, don't register the listener + if (opts.signal) |signal| { + if (signal.getAborted()) { + return; + } + } + + const gop = try self.lookup.getOrPut(self.arena, @intFromPtr(target)); + if (gop.found_existing) { + // check for duplicate functions already registered + var node = gop.value_ptr.first; + while (node) |n| { + const listener: *Listener = @alignCast(@fieldParentPtr("node", n)); + if (listener.function.eql(function) and listener.capture == opts.capture) { + return; + } + node = n.next; + } + } else { + gop.value_ptr.* = .{}; + } + + const listener = try self.listener_pool.create(); + listener.* = .{ + .node = .{}, + .once = opts.once, + .capture = opts.capture, + .passive = opts.passive, + .function = .{ .value = function }, + .signal = opts.signal, + .typ = try String.init(self.arena, typ, .{}), + }; + // append the listener to the list of listeners for this target + gop.value_ptr.append(&listener.node); +} + +pub fn remove(self: *EventManager, target: *EventTarget, typ: []const u8, function: js.Function, use_capture: bool) void { + const list = self.lookup.getPtr(@intFromPtr(target)) orelse return; + if (findListener(list, typ, function, use_capture)) |listener| { + self.removeListener(list, listener); + } +} + +pub fn dispatch(self: *EventManager, target: *EventTarget, event: *Event) !void { + if (comptime IS_DEBUG) { + log.debug(.event, "eventManager.dispatch", .{ .type = event._type_string.str(), .bubbles = event._bubbles }); + } + + event._target = target; + var was_handled = false; + + defer if (was_handled) { + self.page.js.runMicrotasks(); + }; + + switch (target._type) { + .node => |node| try self.dispatchNode(node, event, &was_handled), + .xhr, .window, .abort_signal, .media_query_list, .message_port, .text_track_cue, .navigation, .screen, .screen_orientation => { + const list = self.lookup.getPtr(@intFromPtr(target)) orelse return; + try self.dispatchAll(list, target, event, &was_handled); + }, + } +} + +// There are a lot of events that can be attached via addEventListener or as +// a property, like the XHR events, or window.onload. You might think that the +// property is just a shortcut for calling addEventListener, but they are distinct. +// An event set via property cannot be removed by removeEventListener. If you +// set both the property and add a listener, they both execute. +const DispatchWithFunctionOptions = struct { + context: []const u8, + inject_target: bool = true, +}; +pub fn dispatchWithFunction(self: *EventManager, target: *EventTarget, event: *Event, function_: ?js.Function, comptime opts: DispatchWithFunctionOptions) !void { + if (comptime IS_DEBUG) { + log.debug(.event, "dispatchWithFunction", .{ .type = event._type_string.str(), .context = opts.context, .has_function = function_ != null }); + } + + if (comptime opts.inject_target) { + event._target = target; + } + + var was_dispatched = false; + defer if (was_dispatched) { + self.page.js.runMicrotasks(); + }; + + if (function_) |func| { + event._current_target = target; + if (func.call(void, .{event})) { + was_dispatched = true; + } else |err| { + // a non-JS error + log.warn(.event, opts.context, .{ .err = err }); + } + } + + const list = self.lookup.getPtr(@intFromPtr(target)) orelse return; + try self.dispatchAll(list, target, event, &was_dispatched); +} + +fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: *bool) !void { + const ShadowRoot = @import("webapi/ShadowRoot.zig"); + + var path_len: usize = 0; + var path_buffer: [128]*EventTarget = undefined; + + var node: ?*Node = target; + while (node) |n| { + if (path_len >= path_buffer.len) break; + path_buffer[path_len] = n.asEventTarget(); + path_len += 1; + + // Check if this node is a shadow root + if (n.is(ShadowRoot)) |shadow| { + event._needs_retargeting = true; + + // If event is not composed, stop at shadow boundary + if (!event._composed) { + break; + } + + // Otherwise, jump to the shadow host and continue + node = shadow._host.asNode(); + continue; + } + + node = n._parent; + } + + // Even though the window isn't part of the DOM, events always propagate + // through it in the capture phase (unless we stopped at a shadow boundary) + if (path_len < path_buffer.len) { + path_buffer[path_len] = self.page.window.asEventTarget(); + path_len += 1; + } + + const path = path_buffer[0..path_len]; + + // Phase 1: Capturing phase (root → target, excluding target) + // This happens for all events, regardless of bubbling + event._event_phase = .capturing_phase; + var i: usize = path_len; + while (i > 1) { + i -= 1; + const current_target = path[i]; + if (self.lookup.getPtr(@intFromPtr(current_target))) |list| { + try self.dispatchPhase(list, current_target, event, was_handled, true); + if (event._stop_propagation) { + event._event_phase = .none; + return; + } + } + } + + // Phase 2: At target + event._event_phase = .at_target; + const target_et = target.asEventTarget(); + if (self.lookup.getPtr(@intFromPtr(target_et))) |list| { + try self.dispatchPhase(list, target_et, event, was_handled, null); + if (event._stop_propagation) { + event._event_phase = .none; + return; + } + } + + // Phase 3: Bubbling phase (target → root, excluding target) + // This only happens if the event bubbles + if (event._bubbles) { + event._event_phase = .bubbling_phase; + for (path[1..]) |current_target| { + if (self.lookup.getPtr(@intFromPtr(current_target))) |list| { + try self.dispatchPhase(list, current_target, event, was_handled, false); + if (event._stop_propagation) { + break; + } + } + } + } + + event._event_phase = .none; +} + +fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool, comptime capture_only: ?bool) !void { + const page = self.page; + const typ = event._type_string; + + // Track that we're dispatching to prevent immediate removal + self.dispatch_depth += 1; + defer { + self.dispatch_depth -= 1; + // Clean up any marked listeners in this target's list after this phase + // We do this regardless of depth to handle cross-target removals correctly + self.cleanupMarkedListeners(list); + } + + var node = list.first; + while (node) |n| { + // do this now, in case we need to remove n (once: true or aborted signal) + node = n.next; + + const listener: *Listener = @alignCast(@fieldParentPtr("node", n)); + + // Skip listeners that were marked for removal + if (listener.marked_for_removal) { + continue; + } + + if (!listener.typ.eql(typ)) { + continue; + } + + // Can be null when dispatching to the target itself + if (comptime capture_only) |capture| { + if (listener.capture != capture) { + continue; + } + } + + // If the listener has an aborted signal, remove it and skip + if (listener.signal) |signal| { + if (signal.getAborted()) { + self.removeListener(list, listener); + continue; + } + } + + was_handled.* = true; + event._current_target = current_target; + + // Compute adjusted target for shadow DOM retargeting (only if needed) + const original_target = event._target; + if (event._needs_retargeting) { + event._target = getAdjustedTarget(original_target, current_target); + } + + switch (listener.function) { + .value => |value| try value.call(void, .{event}), + .string => |string| { + const str = try page.call_arena.dupeZ(u8, string.str()); + try self.page.js.eval(str, null); + }, + } + + // Restore original target (only if we changed it) + if (event._needs_retargeting) { + event._target = original_target; + } + + if (listener.once) { + self.removeListener(list, listener); + } + + if (event._stop_immediate_propagation) { + return; + } + } +} + +// Non-Node dispatching (XHR, Window without propagation) +fn dispatchAll(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool) !void { + return self.dispatchPhase(list, current_target, event, was_handled, null); +} + +fn removeListener(self: *EventManager, list: *std.DoublyLinkedList, listener: *Listener) void { + if (self.dispatch_depth > 0) { + // We're in the middle of dispatching, just mark for removal + // This prevents invalidating the linked list during iteration + listener.marked_for_removal = true; + } else { + // Safe to remove immediately + list.remove(&listener.node); + self.listener_pool.destroy(listener); + } +} + +fn cleanupMarkedListeners(self: *EventManager, list: *std.DoublyLinkedList) void { + var node = list.first; + while (node) |n| { + node = n.next; + const listener: *Listener = @alignCast(@fieldParentPtr("node", n)); + if (listener.marked_for_removal) { + list.remove(&listener.node); + self.listener_pool.destroy(listener); + } + } +} + +fn findListener(list: *const std.DoublyLinkedList, typ: []const u8, function: js.Function, capture: bool) ?*Listener { + var node = list.first; + while (node) |n| { + node = n.next; + const listener: *Listener = @alignCast(@fieldParentPtr("node", n)); + if (!listener.function.eql(function)) { + continue; + } + if (listener.capture != capture) { + continue; + } + if (!listener.typ.eqlSlice(typ)) { + continue; + } + return listener; + } + return null; +} + +const Listener = struct { + typ: String, + once: bool, + capture: bool, + passive: bool, + function: Function, + signal: ?*@import("webapi/AbortSignal.zig") = null, + node: std.DoublyLinkedList.Node, + marked_for_removal: bool = false, +}; + +const Function = union(enum) { + value: js.Function, + string: String, + + fn eql(self: Function, func: js.Function) bool { + return switch (self) { + .string => false, + .value => |v| return v.id == func.id, + }; + } +}; + +// Computes the adjusted target for shadow DOM event retargeting +// Returns the lowest shadow-including ancestor of original_target that is +// also an ancestor-or-self of current_target +fn getAdjustedTarget(original_target: ?*EventTarget, current_target: *EventTarget) ?*EventTarget { + const ShadowRoot = @import("webapi/ShadowRoot.zig"); + + const orig_node = switch ((original_target orelse return null)._type) { + .node => |n| n, + else => return original_target, + }; + const curr_node = switch (current_target._type) { + .node => |n| n, + else => return original_target, + }; + + // Walk up from original target, checking if we can reach current target + var node: ?*Node = orig_node; + while (node) |n| { + // Check if current_target is an ancestor of n (or n itself) + if (isAncestorOrSelf(curr_node, n)) { + return n.asEventTarget(); + } + + // Cross shadow boundary if needed + if (n.is(ShadowRoot)) |shadow| { + node = shadow._host.asNode(); + continue; + } + + node = n._parent; + } + + return original_target; +} + +// Check if ancestor is an ancestor of (or the same as) node +// WITHOUT crossing shadow boundaries (just regular DOM tree) +fn isAncestorOrSelf(ancestor: *Node, node: *Node) bool { + if (ancestor == node) { + return true; + } + + var current: ?*Node = node._parent; + while (current) |n| { + if (n == ancestor) { + return true; + } + current = n._parent; + } + + return false; +} diff --git a/src/browser/Factory.zig b/src/browser/Factory.zig new file mode 100644 index 000000000..6a0de8037 --- /dev/null +++ b/src/browser/Factory.zig @@ -0,0 +1,445 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const assert = std.debug.assert; +const builtin = @import("builtin"); +const reflect = @import("reflect.zig"); +const IS_DEBUG = builtin.mode == .Debug; + +const log = @import("../log.zig"); +const String = @import("../string.zig").String; + +const SlabAllocator = @import("../slab.zig").SlabAllocator; + +const Page = @import("Page.zig"); +const Node = @import("webapi/Node.zig"); +const Event = @import("webapi/Event.zig"); +const Element = @import("webapi/Element.zig"); +const Document = @import("webapi/Document.zig"); +const EventTarget = @import("webapi/EventTarget.zig"); +const XMLHttpRequestEventTarget = @import("webapi/net/XMLHttpRequestEventTarget.zig"); +const Blob = @import("webapi/Blob.zig"); + +const Factory = @This(); +_page: *Page, +_slab: SlabAllocator, + +fn PrototypeChain(comptime types: []const type) type { + return struct { + const Self = @This(); + memory: []u8, + + fn totalSize() usize { + var size: usize = 0; + for (types) |T| { + size = std.mem.alignForward(usize, size, @alignOf(T)); + size += @sizeOf(T); + } + return size; + } + + fn maxAlign() std.mem.Alignment { + var alignment: std.mem.Alignment = .@"1"; + + for (types) |T| { + alignment = std.mem.Alignment.max(alignment, std.mem.Alignment.of(T)); + } + + return alignment; + } + + fn getType(comptime index: usize) type { + return types[index]; + } + + fn allocate(allocator: std.mem.Allocator) !Self { + const size = comptime Self.totalSize(); + const alignment = comptime Self.maxAlign(); + + const memory = try allocator.alignedAlloc(u8, alignment, size); + return .{ .memory = memory }; + } + + fn get(self: *const Self, comptime index: usize) *getType(index) { + var offset: usize = 0; + inline for (types, 0..) |T, i| { + offset = std.mem.alignForward(usize, offset, @alignOf(T)); + + if (i == index) { + return @as(*T, @ptrCast(@alignCast(self.memory.ptr + offset))); + } + offset += @sizeOf(T); + } + unreachable; + } + + fn set(self: *const Self, comptime index: usize, value: getType(index)) void { + const ptr = self.get(index); + ptr.* = value; + } + + fn setRoot(self: *const Self, comptime T: type) void { + const ptr = self.get(0); + ptr.* = .{ ._type = unionInit(T, self.get(1)) }; + } + + fn setMiddle(self: *const Self, comptime index: usize, comptime T: type) void { + assert(index >= 1); + assert(index < types.len); + + const ptr = self.get(index); + ptr.* = .{ ._proto = self.get(index - 1), ._type = unionInit(T, self.get(index + 1)) }; + } + + fn setMiddleWithValue(self: *const Self, comptime index: usize, comptime T: type, value: anytype) void { + assert(index >= 1); + + const ptr = self.get(index); + ptr.* = .{ ._proto = self.get(index - 1), ._type = unionInit(T, value) }; + } + + fn setLeaf(self: *const Self, comptime index: usize, value: anytype) void { + assert(index >= 1); + + const ptr = self.get(index); + ptr.* = value; + ptr._proto = self.get(index - 1); + } + }; +} + +fn AutoPrototypeChain(comptime types: []const type) type { + return struct { + fn create(allocator: std.mem.Allocator, leaf_value: anytype) !*@TypeOf(leaf_value) { + const chain = try PrototypeChain(types).allocate(allocator); + + const RootType = types[0]; + chain.setRoot(RootType.Type); + + inline for (1..types.len - 1) |i| { + const MiddleType = types[i]; + chain.setMiddle(i, MiddleType.Type); + } + + chain.setLeaf(types.len - 1, leaf_value); + return chain.get(types.len - 1); + } + }; +} + +pub fn init(page: *Page) Factory { + return .{ + ._page = page, + ._slab = SlabAllocator.init(page.arena, 128), + }; +} + +// this is a root object +pub fn eventTarget(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + const chain = try PrototypeChain( + &.{ EventTarget, @TypeOf(child) }, + ).allocate(allocator); + + const event_ptr = chain.get(0); + event_ptr.* = .{ + ._type = unionInit(EventTarget.Type, chain.get(1)), + }; + chain.setLeaf(1, child); + + return chain.get(1); +} + +// this is a root object +pub fn event(self: *Factory, typ: []const u8, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + + // Special case: Event has a _type_string field, so we need manual setup + const chain = try PrototypeChain( + &.{ Event, @TypeOf(child) }, + ).allocate(allocator); + + const event_ptr = chain.get(0); + event_ptr.* = .{ + ._type = unionInit(Event.Type, chain.get(1)), + ._type_string = try String.init(self._page.arena, typ, .{}), + }; + chain.setLeaf(1, child); + + return chain.get(1); +} + +pub fn blob(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + + // Special case: Blob has slice and mime fields, so we need manual setup + const chain = try PrototypeChain( + &.{ Blob, @TypeOf(child) }, + ).allocate(allocator); + + const blob_ptr = chain.get(0); + blob_ptr.* = .{ + ._type = unionInit(Blob.Type, chain.get(1)), + ._slice = "", + ._mime = "", + }; + chain.setLeaf(1, child); + + return chain.get(1); +} + +pub fn node(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn document(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, Document, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn documentFragment(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, Node.DocumentFragment, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn element(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, Element, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn htmlElement(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, Element, Element.Html, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn htmlMediaElement(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + return try AutoPrototypeChain( + &.{ EventTarget, Node, Element, Element.Html, Element.Html.Media, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn svgElement(self: *Factory, tag_name: []const u8, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + const ChildT = @TypeOf(child); + + if (ChildT == Element.Svg) { + return self.element(child); + } + + const chain = try PrototypeChain( + &.{ EventTarget, Node, Element, Element.Svg, ChildT }, + ).allocate(allocator); + + chain.setRoot(EventTarget.Type); + chain.setMiddle(1, Node.Type); + chain.setMiddle(2, Element.Type); + + // will never allocate, can't fail + const tag_name_str = String.init(self._page.arena, tag_name, .{}) catch unreachable; + + // Manually set Element.Svg with the tag_name + chain.set(3, .{ + ._proto = chain.get(2), + ._tag_name = tag_name_str, + ._type = unionInit(Element.Svg.Type, chain.get(4)), + }); + + chain.setLeaf(4, child); + return chain.get(4); +} + +pub fn xhrEventTarget(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + + return try AutoPrototypeChain( + &.{ EventTarget, XMLHttpRequestEventTarget, @TypeOf(child) }, + ).create(allocator, child); +} + +pub fn textTrackCue(self: *Factory, child: anytype) !*@TypeOf(child) { + const allocator = self._slab.allocator(); + const TextTrackCue = @import("webapi/media/TextTrackCue.zig"); + + return try AutoPrototypeChain( + &.{ EventTarget, TextTrackCue, @TypeOf(child) }, + ).create(allocator, child); +} + +fn hasChainRoot(comptime T: type) bool { + // Check if this is a root + if (@hasDecl(T, "_prototype_root")) { + return true; + } + + // If no _proto field, we're at the top but not a recognized root + if (!@hasField(T, "_proto")) return false; + + // Get the _proto field's type and recurse + const fields = @typeInfo(T).@"struct".fields; + inline for (fields) |field| { + if (std.mem.eql(u8, field.name, "_proto")) { + const ProtoType = reflect.Struct(field.type); + return hasChainRoot(ProtoType); + } + } + + return false; +} + +fn isChainType(comptime T: type) bool { + if (@hasField(T, "_proto")) return false; + return comptime hasChainRoot(T); +} + +pub fn destroy(self: *Factory, value: anytype) void { + const S = reflect.Struct(@TypeOf(value)); + + if (comptime IS_DEBUG) { + // We should always destroy from the leaf down. + if (@hasDecl(S, "_prototype_root")) { + // A Event{._type == .generic} (or any other similar types) + // _should_ be destoyed directly. The _type = .generic is a pseudo + // child + if (S != Event or value._type != .generic) { + log.fatal(.bug, "factory.destroy.event", .{ .type = @typeName(S) }); + unreachable; + } + } + } + + if (comptime isChainType(S)) { + self.destroyChain(value, true, 0, std.mem.Alignment.@"1"); + } else { + self.destroyStandalone(value); + } +} + +pub fn destroyStandalone(self: *Factory, value: anytype) void { + const S = reflect.Struct(@TypeOf(value)); + assert(!@hasDecl(S, "_prototype_root")); + + const allocator = self._slab.allocator(); + + if (@hasDecl(S, "deinit")) { + // And it has a deinit, we'll call it + switch (@typeInfo(@TypeOf(S.deinit)).@"fn".params.len) { + 1 => value.deinit(), + 2 => value.deinit(self._page), + else => @compileLog(@typeName(S) ++ " has an invalid deinit function"), + } + } + + allocator.destroy(value); +} + +fn destroyChain( + self: *Factory, + value: anytype, + comptime first: bool, + old_size: usize, + old_align: std.mem.Alignment, +) void { + const S = reflect.Struct(@TypeOf(value)); + const allocator = self._slab.allocator(); + + // aligns the old size to the alignment of this element + const current_size = std.mem.alignForward(usize, old_size, @alignOf(S)); + const alignment = std.mem.Alignment.fromByteUnits(@alignOf(S)); + + const new_align = std.mem.Alignment.max(old_align, alignment); + const new_size = current_size + @sizeOf(S); + + // This is initially called from a deinit. We don't want to call that + // same deinit. So when this is the first time destroyChain is called + // we don't call deinit (because we're in that deinit) + if (!comptime first) { + // But if it isn't the first time + if (@hasDecl(S, "deinit")) { + // And it has a deinit, we'll call it + switch (@typeInfo(@TypeOf(S.deinit)).@"fn".params.len) { + 1 => value.deinit(), + 2 => value.deinit(self._page), + else => @compileLog(@typeName(S) ++ " has an invalid deinit function"), + } + } + } + + if (@hasField(S, "_proto")) { + self.destroyChain(value._proto, false, new_size, new_align); + } else if (@hasDecl(S, "JsApi")) { + // Doesn't have a _proto, but has a JsApi. + if (self._page.js.removeTaggedMapping(@intFromPtr(value))) |tagged| { + allocator.destroy(tagged); + } + } else { + // no proto so this is the head of the chain. + // we use this as the ptr to the start of the chain. + // and we have summed up the length. + assert(@hasDecl(S, "_prototype_root")); + + const memory_ptr: [*]const u8 = @ptrCast(value); + const len = std.mem.alignForward(usize, new_size, new_align.toByteUnits()); + allocator.free(memory_ptr[0..len]); + } +} + +pub fn createT(self: *Factory, comptime T: type) !*T { + const allocator = self._slab.allocator(); + return try allocator.create(T); +} + +pub fn create(self: *Factory, value: anytype) !*@TypeOf(value) { + const ptr = try self.createT(@TypeOf(value)); + ptr.* = value; + return ptr; +} + +fn unionInit(comptime T: type, value: anytype) T { + const V = @TypeOf(value); + const field_name = comptime unionFieldName(T, V); + return @unionInit(T, field_name, value); +} + +// There can be friction between comptime and runtime. Comptime has to +// account for all possible types, even if some runtime flow makes certain +// cases impossible. At runtime, we always call `unionFieldName` with the +// correct struct or pointer type. But at comptime time, `unionFieldName` +// is called with both variants (S and *S). So we use reflect.Struct(). +// This only works because we never have a union with a field S and another +// field *S. +fn unionFieldName(comptime T: type, comptime V: type) []const u8 { + inline for (@typeInfo(T).@"union".fields) |field| { + if (reflect.Struct(field.type) == reflect.Struct(V)) { + return field.name; + } + } + @compileError(@typeName(V) ++ " is not a valid type for " ++ @typeName(T) ++ ".type"); +} diff --git a/src/browser/Mime.zig b/src/browser/Mime.zig new file mode 100644 index 000000000..27fe35a85 --- /dev/null +++ b/src/browser/Mime.zig @@ -0,0 +1,518 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const Mime = @This(); +content_type: ContentType, +params: []const u8 = "", +// IANA defines max. charset value length as 40. +// We keep 41 for null-termination since HTML parser expects in this format. +charset: [41]u8 = default_charset, + +/// String "UTF-8" continued by null characters. +pub const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; + +/// Mime with unknown Content-Type, empty params and empty charset. +pub const unknown = Mime{ .content_type = .{ .unknown = {} } }; + +pub const ContentTypeEnum = enum { + text_xml, + text_html, + text_javascript, + text_plain, + text_css, + application_json, + unknown, + other, +}; + +pub const ContentType = union(ContentTypeEnum) { + text_xml: void, + text_html: void, + text_javascript: void, + text_plain: void, + text_css: void, + application_json: void, + unknown: void, + other: struct { type: []const u8, sub_type: []const u8 }, +}; + +/// Returns the null-terminated charset value. +pub fn charsetString(mime: *const Mime) [:0]const u8 { + return @ptrCast(&mime.charset); +} + +/// Removes quotes of value if quotes are given. +/// +/// Currently we don't validate the charset. +/// See section 2.3 Naming Requirements: +/// https://datatracker.ietf.org/doc/rfc2978/ +fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 { + // Cannot be larger than 40. + // https://datatracker.ietf.org/doc/rfc2978/ + if (value.len > 40) return error.CharsetTooBig; + + // If the first char is a quote, look for a pair. + if (value[0] == '"') { + if (value.len < 3 or value[value.len - 1] != '"') { + return error.Invalid; + } + + return value[1 .. value.len - 1]; + } + + // No quotes. + return value; +} + +pub fn parse(input: []u8) !Mime { + if (input.len > 255) { + return error.TooBig; + } + + // Zig's trim API is broken. The return type is always `[]const u8`, + // even if the input type is `[]u8`. @constCast is safe here. + var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace)); + _ = std.ascii.lowerString(normalized, normalized); + + const content_type, const type_len = try parseContentType(normalized); + if (type_len >= normalized.len) { + return .{ .content_type = content_type }; + } + + const params = trimLeft(normalized[type_len..]); + + var charset: [41]u8 = undefined; + + var it = std.mem.splitScalar(u8, params, ';'); + while (it.next()) |attr| { + const i = std.mem.indexOfScalarPos(u8, attr, 0, '=') orelse return error.Invalid; + const name = trimLeft(attr[0..i]); + + const value = trimRight(attr[i + 1 ..]); + if (value.len == 0) { + return error.Invalid; + } + + const attribute_name = std.meta.stringToEnum(enum { + charset, + }, name) orelse continue; + + switch (attribute_name) { + .charset => { + if (value.len == 0) { + break; + } + + const attribute_value = try parseCharset(value); + @memcpy(charset[0..attribute_value.len], attribute_value); + // Null-terminate right after attribute value. + charset[attribute_value.len] = 0; + }, + } + } + + return .{ + .params = params, + .charset = charset, + .content_type = content_type, + }; +} + +pub fn sniff(body: []const u8) ?Mime { + // 0x0C is form feed + const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C }); + if (content.len == 0) { + return null; + } + + if (content[0] != '<') { + if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) { + // UTF-8 BOM + return .{ .content_type = .{ .text_plain = {} } }; + } + if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) { + // UTF-16 big-endian BOM + return .{ .content_type = .{ .text_plain = {} } }; + } + if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) { + // UTF-16 little-endian BOM + return .{ .content_type = .{ .text_plain = {} } }; + } + return null; + } + + // The longest prefix we have is " known_prefix.len) { + const next = prefix[known_prefix.len]; + // a "tag-terminating-byte" + if (next == ' ' or next == '>') { + return .{ .content_type = kp.@"1" }; + } + } + } + + return null; +} + +pub fn isHTML(self: *const Mime) bool { + return self.content_type == .text_html; +} + +// we expect value to be lowercase +fn parseContentType(value: []const u8) !struct { ContentType, usize } { + const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len; + const type_name = trimRight(value[0..end]); + const attribute_start = end + 1; + + if (std.meta.stringToEnum(enum { + @"text/xml", + @"text/html", + @"text/css", + @"text/plain", + + @"text/javascript", + @"application/javascript", + @"application/x-javascript", + + @"application/json", + }, type_name)) |known_type| { + const ct: ContentType = switch (known_type) { + .@"text/xml" => .{ .text_xml = {} }, + .@"text/html" => .{ .text_html = {} }, + .@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} }, + .@"text/plain" => .{ .text_plain = {} }, + .@"text/css" => .{ .text_css = {} }, + .@"application/json" => .{ .application_json = {} }, + }; + return .{ ct, attribute_start }; + } + + const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid; + + const main_type = value[0..separator]; + const sub_type = trimRight(value[separator + 1 .. end]); + + if (main_type.len == 0 or validType(main_type) == false) { + return error.Invalid; + } + if (sub_type.len == 0 or validType(sub_type) == false) { + return error.Invalid; + } + + return .{ .{ .other = .{ + .type = main_type, + .sub_type = sub_type, + } }, attribute_start }; +} + +const T_SPECIAL = blk: { + var v = [_]bool{false} ** 256; + for ("()<>@,;:\\\"/[]?=") |b| { + v[b] = true; + } + break :blk v; +}; + +const VALID_CODEPOINTS = blk: { + var v: [256]bool = undefined; + for (0..256) |i| { + v[i] = std.ascii.isAlphanumeric(i); + } + for ("!#$%&\\*+-.^'_`|~") |b| { + v[b] = true; + } + break :blk v; +}; + +fn validType(value: []const u8) bool { + for (value) |b| { + if (VALID_CODEPOINTS[b] == false) { + return false; + } + } + return true; +} + +fn trimLeft(s: []const u8) []const u8 { + return std.mem.trimLeft(u8, s, &std.ascii.whitespace); +} + +fn trimRight(s: []const u8) []const u8 { + return std.mem.trimRight(u8, s, &std.ascii.whitespace); +} + +const testing = @import("../testing.zig"); +test "Mime: invalid" { + defer testing.reset(); + + const invalids = [_][]const u8{ + "", + "text", + "text /html", + "text/ html", + "text / html", + "text/html other", + "text/html; x", + "text/html; x=", + "text/html; x= ", + "text/html; = ", + "text/html;=", + "text/html; charset=\"\"", + "text/html; charset=\"", + "text/html; charset=\"\\", + }; + + for (invalids) |invalid| { + const mutable_input = try testing.arena_allocator.dupe(u8, invalid); + try testing.expectError(error.Invalid, Mime.parse(mutable_input)); + } +} + +test "Mime: parse common" { + defer testing.reset(); + + try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml"); + try expect(.{ .content_type = .{ .text_html = {} } }, "text/html"); + try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain"); + + try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml;"); + try expect(.{ .content_type = .{ .text_html = {} } }, "text/html;"); + try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain;"); + + try expect(.{ .content_type = .{ .text_xml = {} } }, " \ttext/xml"); + try expect(.{ .content_type = .{ .text_html = {} } }, "text/html "); + try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain \t\t"); + + try expect(.{ .content_type = .{ .text_xml = {} } }, "TEXT/xml"); + try expect(.{ .content_type = .{ .text_html = {} } }, "text/Html"); + try expect(.{ .content_type = .{ .text_plain = {} } }, "TEXT/PLAIN"); + + try expect(.{ .content_type = .{ .text_xml = {} } }, " TeXT/xml"); + try expect(.{ .content_type = .{ .text_html = {} } }, "teXt/HtML ;"); + try expect(.{ .content_type = .{ .text_plain = {} } }, "tExT/PlAiN;"); + + try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript"); + try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript"); + try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript"); + + try expect(.{ .content_type = .{ .application_json = {} } }, "application/json"); + try expect(.{ .content_type = .{ .text_css = {} } }, "text/css"); +} + +test "Mime: parse uncommon" { + defer testing.reset(); + + const text_csv = Expectation{ + .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } }, + }; + try expect(text_csv, "text/csv"); + try expect(text_csv, "text/csv;"); + try expect(text_csv, " text/csv\t "); + try expect(text_csv, " text/csv\t ;"); + + try expect( + .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } } }, + "Text/CSV", + ); +} + +test "Mime: parse charset" { + defer testing.reset(); + + try expect(.{ + .content_type = .{ .text_xml = {} }, + .charset = "utf-8", + .params = "charset=utf-8", + }, "text/xml; charset=utf-8"); + + try expect(.{ + .content_type = .{ .text_xml = {} }, + .charset = "utf-8", + .params = "charset=\"utf-8\"", + }, "text/xml;charset=\"UTF-8\""); + + try expect(.{ + .content_type = .{ .text_html = {} }, + .charset = "iso-8859-1", + .params = "charset=\"iso-8859-1\"", + }, "text/html; charset=\"iso-8859-1\""); + + try expect(.{ + .content_type = .{ .text_html = {} }, + .charset = "iso-8859-1", + .params = "charset=\"iso-8859-1\"", + }, "text/html; charset=\"ISO-8859-1\""); + + try expect(.{ + .content_type = .{ .text_xml = {} }, + .charset = "custom-non-standard-charset-value", + .params = "charset=\"custom-non-standard-charset-value\"", + }, "text/xml;charset=\"custom-non-standard-charset-value\""); +} + +test "Mime: isHTML" { + defer testing.reset(); + + const assert = struct { + fn assert(expected: bool, input: []const u8) !void { + const mutable_input = try testing.arena_allocator.dupe(u8, input); + var mime = try Mime.parse(mutable_input); + try testing.expectEqual(expected, mime.isHTML()); + } + }.assert; + try assert(true, "text/html"); + try assert(true, "text/html;"); + try assert(true, "text/html; charset=utf-8"); + try assert(false, "text/htm"); // htm not html + try assert(false, "text/plain"); + try assert(false, "over/9000"); +} + +test "Mime: sniff" { + try testing.expectEqual(null, Mime.sniff("")); + try testing.expectEqual(null, Mime.sniff("")); + try testing.expectEqual(null, Mime.sniff("\n ")); + try testing.expectEqual(null, Mime.sniff("\n \t ")); + + const expectHTML = struct { + fn expect(input: []const u8) !void { + try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type)); + } + }.expect; + + try expectHTML(" even more stufff"); + + try expectHTML(""); + + try expectHTML("