diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e58c8c445..167081e64 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,12 +60,6 @@ jobs: - { os: ubuntu-latest, configType: tcc } - { os: ubuntu-latest, arch: x86, runTest262: true } - { os: ubuntu-latest, arch: riscv64 } - - - { os: macos-14, configType: Debug } - - { os: macos-14, configType: Release } - - { os: macos-14, configType: examples } - - { os: macos-14, configType: shared } - - { os: macos-14, configType: asan+ubsan, runTest262: true } steps: - uses: actions/checkout@v6 with: @@ -556,18 +550,6 @@ jobs: "$ANDROID_HOME/cmake/3.22.1/bin/cmake" --build build --target qjs ls -lh build - ios: - runs-on: macos-latest - steps: - - uses: actions/checkout@v6 - - name: configure - run: | - cmake -B build -GXcode -DQJS_BUILD_WERROR=ON -DCMAKE_SYSTEM_NAME:STRING=iOS -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED:BOOL=NO -DQJS_BUILD_LIBC=ON - - name: build - run: | - cmake --build build --config Release --target qjs - ls -lh build - mimalloc-linux: runs-on: ubuntu-24.04 env: @@ -588,33 +570,12 @@ jobs: run: | make test - mimalloc-macos: - runs-on: macos-latest - env: - QJS_BUILD_CLI_WITH_STATIC_MIMALLOC: ON - MIMALLOC_SHOW_STATS: 1 - steps: - - uses: actions/checkout@v6 - - name: install dependencies - run: | - brew install mimalloc - - name: build - run: | - make - - name: cxxtest - run: | - make cxxtest - - name: test - run: | - make test - amalgam: strategy: matrix: # TODO(bnoordhuis) test on windows config: - { os: ubuntu-latest } - - { os: macos-latest } runs-on: ${{ matrix.config.os }} steps: - uses: actions/checkout@v6 @@ -727,16 +688,12 @@ jobs: platform: - ubuntu-latest - windows-latest - - macos-latest exclude: # clang-cl only makes sense on windows. - platform: ubuntu-latest mode: name: clang-cl+sanitize - - platform: macos-latest - mode: - name: clang-cl+sanitize # Use clang-cl instead of MSYS2 clang. # @@ -756,42 +713,15 @@ jobs: - platform: ubuntu-latest mode: name: sanitize+asanonly - - platform: macos-latest - mode: - name: sanitize+asanonly - platform: windows-latest mode: name: sanitize - # clang is the default on macos - # also gcc is an alias to clang - - platform: macos-latest - mode: - name: clang - - platform: macos-latest - mode: - name: gcc - # gcc is the default on linux - platform: ubuntu-latest mode: name: gcc - # only run sanitizer tests on linux - # - # gcc/clang's codegen shouldn't massively change across platforms, - # and linux supports most of the sanitizers. - - platform: macos-latest - mode: - name: clang+sanitize - - platform: macos-latest - mode: - # macos does not support msan - name: clang+msan - - platform: macos-latest - mode: - name: sanitize - steps: - name: Setup meson run: | @@ -799,9 +729,6 @@ jobs: - name: Install mimalloc if: ${{ matrix.platform == 'ubuntu-latest' && matrix.features.name == 'mimalloc' }} run: sudo apt update && sudo apt -y install libmimalloc-dev - - name: Install mimalloc - if: ${{ matrix.platform == 'macos-latest' && matrix.features.name == 'mimalloc' }} - run: brew install mimalloc # TODO: Install mimalloc on Windows # You need to: # - checkout mimalloc diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cc3c2f311..5ae1f44d7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,7 @@ name: Docs on: + workflow_dispatch: push: branches: - master diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 00c7f95b5..e0cf64c9d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,35 +52,6 @@ jobs: with: name: qjs-linux-${{matrix.arch}} path: build/*-linux-${{matrix.arch}} - - macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v6 - - name: build - run: | - mkdir build - cd build - cmake -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" -DQJS_BUILD_WERROR=ON .. - make -j$(getconf _NPROCESSORS_ONLN) - make -C .. amalgam # writes build/quickjs-amalgam.zip - mv qjs qjs-darwin - mv qjsc qjsc-darwin - - name: check - run: | - lipo -info build/qjs-darwin build/qjsc-darwin - - name: upload amalgamation - uses: actions/upload-artifact@v7 - with: - name: quickjs-amalgam.zip - path: build/quickjs-amalgam.zip - compression-level: 0 # already compressed - - name: upload - uses: actions/upload-artifact@v7 - with: - name: qjs-darwin - path: build/*-darwin - windows: runs-on: windows-latest strategy: @@ -145,7 +116,7 @@ jobs: build/qjs-wasi-reactor.wasm upload-to-release: - needs: [linux, macos, windows, wasi, check_meson_version] + needs: [linux, windows, wasi, check_meson_version] runs-on: ubuntu-22.04 steps: - name: get assets diff --git a/CMakeLists.txt b/CMakeLists.txt index 82f366570..b63afc7c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,12 +160,6 @@ if(WIN32) endif() endif() -# MacOS and GCC 11 or later need -Wno-maybe-uninitialized -# https://github.com/quickjs-ng/quickjs/issues/453 -if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 11) - xcheck_add_c_compiler_flag(-Wno-maybe-uninitialized) -endif() - if(CMAKE_SYSTEM_NAME STREQUAL "WASI") add_compile_definitions( _WASI_EMULATED_PROCESS_CLOCKS @@ -508,18 +502,12 @@ if(QJS_BUILD_EXAMPLES) PREFIX "" ) target_link_libraries(fib PRIVATE qjs_module_lib) - if(APPLE) - target_link_options(fib PRIVATE -undefined dynamic_lookup) - endif() add_library(point MODULE examples/point.c) set_target_properties(point PROPERTIES PREFIX "" ) target_link_libraries(point PRIVATE qjs_module_lib) - if(APPLE) - target_link_options(point PRIVATE -undefined dynamic_lookup) - endif() add_executable(test_fib gen/test_fib.c diff --git a/api-test.c b/api-test.c index 7aa7dc8a6..ee80995f0 100644 --- a/api-test.c +++ b/api-test.c @@ -1013,6 +1013,131 @@ static void get_uint8array(void) JS_FreeRuntime(rt); } +static struct { + int call_count; + int last_line; + int last_col; + char last_filename[256]; + char last_funcname[256]; + int stack_depth; + int max_local_count; + int abort_at; /* abort (return -1) on this call, 0 = never */ +} trace_state; + +static int debug_trace_cb(JSContext *ctx, + const char *filename, + const char *funcname, + int line, + int col, + void *opaque) +{ + trace_state.call_count++; + trace_state.last_line = line; + trace_state.last_col = col; + snprintf(trace_state.last_filename, sizeof(trace_state.last_filename), + "%s", filename); + snprintf(trace_state.last_funcname, sizeof(trace_state.last_funcname), + "%s", funcname); + trace_state.stack_depth = JS_GetStackDepth(ctx); + int count = 0; + JSDebugLocalVar *vars = NULL; + assert(JS_GetLocalVariablesAtLevel(ctx, 0, &vars, &count) == 0); + if (count > trace_state.max_local_count) + trace_state.max_local_count = count; + if (vars) + JS_FreeLocalVariables(ctx, vars, count); + if (trace_state.abort_at > 0 && + trace_state.call_count >= trace_state.abort_at) + return -1; + return 0; +} + +static void debug_trace(void) +{ + JSRuntime *rt = JS_NewRuntime(); + JSContext *ctx = JS_NewContext(rt); + + memset(&trace_state, 0, sizeof(trace_state)); + { + JSValue ret = eval(ctx, "1+2"); + assert(!JS_IsException(ret)); + JS_FreeValue(ctx, ret); + assert(trace_state.call_count == 0); + } + + JS_SetDebugTraceHandler(ctx, debug_trace_cb, NULL); + memset(&trace_state, 0, sizeof(trace_state)); + { + JSValue ret = eval(ctx, "var x = 1; x + 2"); + assert(!JS_IsException(ret)); + JS_FreeValue(ctx, ret); + assert(trace_state.call_count > 0); + assert(!strcmp(trace_state.last_filename, "")); + } + + { + JSDebugLocalVar *vars = NULL; + int count = -1; + assert(JS_GetLocalVariablesAtLevel(ctx, 0, &vars, &count) == 0); + assert(vars == NULL); + assert(count == 0); + } + + memset(&trace_state, 0, sizeof(trace_state)); + { + static const char code[] = + "function outer() {\n" + " function inner() {\n" + " return 42;\n" + " }\n" + " return inner();\n" + "}\n" + "outer();\n"; + JSValue ret = eval(ctx, code); + assert(!JS_IsException(ret)); + JS_FreeValue(ctx, ret); + assert(trace_state.call_count > 0); + assert(trace_state.stack_depth >= 1); + } + + memset(&trace_state, 0, sizeof(trace_state)); + { + static const char code[] = + "function f(a, b) {\n" + " var c = a + b;\n" + " return c;\n" + "}\n" + "f(10, 20);\n"; + JSValue ret = eval(ctx, code); + assert(!JS_IsException(ret)); + JS_FreeValue(ctx, ret); + assert(trace_state.call_count > 0); + assert(trace_state.max_local_count >= 2); + } + + memset(&trace_state, 0, sizeof(trace_state)); + trace_state.abort_at = 1; + { + JSValue ret = eval(ctx, "1+2; 3+4"); + assert(JS_IsException(ret)); + JS_FreeValue(ctx, ret); + JSValue exc = JS_GetException(ctx); + JS_FreeValue(ctx, exc); + } + + JS_SetDebugTraceHandler(ctx, NULL, NULL); + memset(&trace_state, 0, sizeof(trace_state)); + { + JSValue ret = eval(ctx, "1+2"); + assert(!JS_IsException(ret)); + JS_FreeValue(ctx, ret); + assert(trace_state.call_count == 0); + } + + JS_FreeContext(ctx); + JS_FreeRuntime(rt); +} + static void new_symbol(void) { JSRuntime *rt = new_runtime(); @@ -1089,6 +1214,7 @@ int main(void) slice_string_tocstring(); immutable_array_buffer(); get_uint8array(); + debug_trace(); new_symbol(); return 0; } diff --git a/cutils.h b/cutils.h index c45009e10..4201bd310 100644 --- a/cutils.h +++ b/cutils.h @@ -32,9 +32,6 @@ #if !defined(_MSC_VER) #include #endif -#if defined(__APPLE__) -#include -#endif #include #include #include @@ -50,9 +47,7 @@ extern "C" { #define alloca _alloca #define ssize_t ptrdiff_t #endif -#if defined(__APPLE__) -#include -#elif defined(__linux__) || defined(__ANDROID__) || defined(__CYGWIN__) || defined(__GLIBC__) +#if defined(__linux__) || defined(__ANDROID__) || defined(__CYGWIN__) || defined(__GLIBC__) #include #elif defined(__FreeBSD__) #include @@ -600,9 +595,7 @@ static inline uint64_t js__hrtime_ns(void); static inline size_t js__malloc_usable_size(const void *ptr) { -#if defined(__APPLE__) - return malloc_size(ptr); -#elif defined(_WIN32) +#if defined(_WIN32) return _msize((void *)ptr); #elif defined(__linux__) || defined(__ANDROID__) || defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__GLIBC__) return malloc_usable_size((void *)ptr); @@ -846,6 +839,12 @@ static inline int JS_PRINTF_FORMAT_ATTR(2, 3) dbuf_printf(DynBuf *s, JS_PRINTF_F va_start(ap, fmt); len = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); + if (len < 0) { + /* vsnprintf encoding error: don't let the caller wrap s->size by + advancing it by -1, which would underflow to near SIZE_MAX. */ + s->error = true; + return -1; + } if (len < (int)sizeof(buf)) { /* fast case */ return dbuf_put(s, (uint8_t *)buf, len); @@ -1656,37 +1655,6 @@ static inline int js_exepath(char *buffer, size_t *size_ptr) { free(utf16_buffer); return -1; } -#elif defined(__APPLE__) -static inline int js_exepath(char *buffer, size_t *size) { - /* realpath(exepath) may be > PATH_MAX so double it to be on the safe side. */ - char abspath[PATH_MAX * 2 + 1]; - char exepath[PATH_MAX + 1]; - uint32_t exepath_size; - size_t abspath_size; - - if (buffer == NULL || size == NULL || *size == 0) - return -1; - - exepath_size = sizeof(exepath); - if (_NSGetExecutablePath(exepath, &exepath_size)) - return -1; - - if (realpath(exepath, abspath) != abspath) - return -1; - - abspath_size = strlen(abspath); - if (abspath_size == 0) - return -1; - - *size -= 1; - if (*size > abspath_size) - *size = abspath_size; - - memcpy(buffer, abspath, *size); - buffer[*size] = '\0'; - - return 0; -} #elif defined(__linux__) || defined(__GNU__) static inline int js_exepath(char *buffer, size_t *size) { ssize_t n; @@ -1840,7 +1808,7 @@ static inline void js_mutex_unlock(js_mutex_t *mutex) { } static inline void js_cond_init(js_cond_t *cond) { -#if defined(__APPLE__) && defined(__MACH__) +#if defined(__MACH__) if (pthread_cond_init(cond, NULL)) abort(); #else @@ -1861,7 +1829,7 @@ static inline void js_cond_init(js_cond_t *cond) { } static inline void js_cond_destroy(js_cond_t *cond) { -#if defined(__APPLE__) && defined(__MACH__) +#if defined(__MACH__) /* It has been reported that destroying condition variables that have been * signalled but not waited on can sometimes result in application crashes. * See https://codereview.chromium.org/1323293005. @@ -1888,7 +1856,7 @@ static inline void js_cond_destroy(js_cond_t *cond) { if (pthread_mutex_destroy(&mutex)) abort(); -#endif /* defined(__APPLE__) && defined(__MACH__) */ +#endif /* defined(__MACH__) */ if (pthread_cond_destroy(cond)) abort(); @@ -1905,7 +1873,7 @@ static inline void js_cond_broadcast(js_cond_t *cond) { } static inline void js_cond_wait(js_cond_t *cond, js_mutex_t *mutex) { -#if defined(__APPLE__) && defined(__MACH__) +#if defined(__MACH__) int r; errno = 0; @@ -1928,13 +1896,11 @@ static inline int js_cond_timedwait(js_cond_t *cond, js_mutex_t *mutex, uint64_t int r; struct timespec ts; -#if !defined(__APPLE__) timeout += js__hrtime_ns(); -#endif ts.tv_sec = timeout / NANOSEC; ts.tv_nsec = timeout % NANOSEC; -#if defined(__APPLE__) && defined(__MACH__) +#if defined(__MACH__) r = pthread_cond_timedwait_relative_np(cond, mutex, &ts); #else r = pthread_cond_timedwait(cond, mutex, &ts); diff --git a/docs/docs/stdlib.md b/docs/docs/stdlib.md index f579760d8..ffc690747 100644 --- a/docs/docs/stdlib.md +++ b/docs/docs/stdlib.md @@ -328,7 +328,7 @@ Cancel a timer. ### `platform` -Return a string representing the platform: `"linux"`, `"darwin"`, `"win32"` or `"js"`. +Return a string representing the platform: `"linux"`, `"win32"` or `"js"`. ### `Worker(module_filename)` diff --git a/docs/docs/supported_platforms.md b/docs/docs/supported_platforms.md index bb817b3d4..b0826b9af 100644 --- a/docs/docs/supported_platforms.md +++ b/docs/docs/supported_platforms.md @@ -7,7 +7,6 @@ sidebar_position: 8 | System | Supported versions | Notes | |---|---|---| | GNU/Linux | * | glibc and musl are supported | -| macOS | macOS >= 11 | Currently supported macOS releases | | Windows | >= Windows 7* | VS >= 2022 and Clang are supported; requires `` | | FreeBSD | * | Limited testing | | OpenBSD | * | Limited testing | diff --git a/docs/package-lock.json b/docs/package-lock.json index f13c2d8cc..526862411 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -6615,19 +6615,6 @@ "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "hasInstallScript": true, - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", diff --git a/meson.build b/meson.build index 35d727062..908e01964 100644 --- a/meson.build +++ b/meson.build @@ -40,11 +40,6 @@ if host_system != 'sunos' qjs_gcc_args += '-funsigned-char' endif -if host_system == 'darwin' - # https://github.com/quickjs-ng/quickjs/issues/453 - qjs_gcc_warning_args += '-Wno-maybe-uninitialized' -endif - # https://github.com/microsoft/cpp-docs/tree/main/docs/error-messages/compiler-warnings qjs_msvc_warning_args = [ '/wd4018', # -Wno-sign-conversion @@ -130,8 +125,10 @@ qjs_sys_deps = [] m_dep = cc.find_library('m', required: false) qjs_sys_deps += m_dep -qjs_sys_deps += dependency('threads', required: false) -qjs_sys_deps += dependency('dl', required: false) +if host_system != 'dos' + qjs_sys_deps += dependency('threads', required: false) + qjs_sys_deps += dependency('dl', required: false) +endif qjs_srcs = files( 'dtoa.c', @@ -143,6 +140,7 @@ qjs_hdrs = files( 'quickjs.h', ) +lib_only = get_option('libonly') qjs_libc = get_option('libc') qjs_libc_srcs = files('quickjs-libc.c') qjs_libc_hdrs = files('quickjs-libc.h') @@ -220,7 +218,7 @@ if qjs_libc include_directories: include_directories('.'), dependencies: qjs_dep, ) -else +elif not lib_only qjs_libc_lib = static_library( 'quickjs-libc', qjs_libc_srcs, @@ -304,14 +302,19 @@ endif qjsc_srcs = files( 'qjsc.c', ) -qjsc_exe = executable( - 'qjsc', - qjsc_srcs, - c_args: qjs_c_args, - dependencies: [qjs_dep, qjs_libc_dep], - install: true, -) +if lib_only + qjsc_exe = '/bin/true' +else + qjsc_exe = executable( + 'qjsc', + qjsc_srcs, + + c_args: qjs_c_args, + dependencies: [qjs_dep, qjs_libc_dep], + install: true, + ) +endif mimalloc_dep = [] mimalloc_sys_dep = dependency('mimalloc', required: get_option('cli_mimalloc')) @@ -328,16 +331,21 @@ qjs_exe_srcs = files( 'gen/standalone.c', 'qjs.c', ) -qjs_exe = executable( - 'qjs', - qjs_exe_srcs, - c_args: qjs_c_args, - dependencies: [qjs_dep, qjs_libc_dep, mimalloc_dep], - export_dynamic: true, +if lib_only + qjs_exe = '/bin/true' +else + qjs_exe = executable( + 'qjs', + qjs_exe_srcs, - install: true, -) + c_args: qjs_c_args, + dependencies: [qjs_dep, qjs_libc_dep, mimalloc_dep], + export_dynamic: true, + + install: true, + ) +endif if meson.is_cross_build() mimalloc_native_dep = [] diff --git a/meson_options.txt b/meson_options.txt index 20e661e1a..1b24c2a33 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -4,3 +4,4 @@ option('libc', type: 'boolean', value: false, description: 'build qjs standard l option('cli_mimalloc', type: 'feature', value: 'disabled', description: 'build qjs cli with mimalloc') option('docdir', type: 'string', description: 'documentation directory') option('parser', type: 'boolean', value: true, description: 'Enable JS source code parser') +option('libonly', type: 'boolean', value: false, description: 'Build qjs library only') diff --git a/qjsc.c b/qjsc.c index 635ed2700..39acde63a 100644 --- a/qjsc.c +++ b/qjsc.c @@ -60,8 +60,8 @@ static FILE *outfile; static const char *c_ident_prefix = "qjsc_"; static int strip; -void namelist_add(namelist_t *lp, const char *name, const char *short_name, - int flags) +static void namelist_add(namelist_t *lp, const char *name, const char *short_name, + int flags) { namelist_entry_t *e; if (lp->count == lp->size) { @@ -81,7 +81,7 @@ void namelist_add(namelist_t *lp, const char *name, const char *short_name, e->flags = flags; } -void namelist_free(namelist_t *lp) +static void namelist_free(namelist_t *lp) { while (lp->count > 0) { namelist_entry_t *e = &lp->array[--lp->count]; @@ -93,7 +93,7 @@ void namelist_free(namelist_t *lp) lp->size = 0; } -namelist_entry_t *namelist_find(namelist_t *lp, const char *name) +static namelist_entry_t *namelist_find(namelist_t *lp, const char *name) { int i; for(i = 0; i < lp->count; i++) { @@ -216,8 +216,10 @@ static void find_unique_cname(char *cname, size_t cname_size) js__pstrcpy(cname, cname_size, cname1); } -JSModuleDef *jsc_module_loader(JSContext *ctx, - const char *module_name, void *opaque) +/* loader for ES6 modules */ +static JSModuleDef *jsc_module_loader(JSContext *ctx, + const char *module_name, + void *opaque) { JSModuleDef *m; namelist_entry_t *e; @@ -239,12 +241,17 @@ JSModuleDef *jsc_module_loader(JSContext *ctx, JSValue func_val; char cname[1000]; - buf = js_load_file(ctx, &buf_len, module_name); + char *module_path = JS_GetContextOpaque(ctx); + buf = js_load_file(ctx, &buf_len, module_path); if (!buf) { - JS_ThrowReferenceError(ctx, "could not load module filename '%s'", - module_name); + JS_ThrowReferenceError(ctx, "could not load module '%s' from path '%s'", + module_name, module_path); + JS_SetContextOpaque(ctx, NULL); + js_free(ctx, module_path); return NULL; } + JS_SetContextOpaque(ctx, NULL); + js_free(ctx, module_path); /* compile the module */ func_val = JS_Eval(ctx, (char *)buf, buf_len, module_name, @@ -265,8 +272,78 @@ JSModuleDef *jsc_module_loader(JSContext *ctx, return m; } -static void compile_file(JSContext *ctx, FILE *fo, - const char *filename, +/* copied from quickjs.c:js_default_module_normalize_name */ +static char *jsc_module_normalize_impl(JSContext *ctx, + const char *base_name, + const char *name) +{ + char *filename, *p; + const char *r; + int cap; + int len; + + if (name[0] != '.') { + /* if no initial dot, the module name is not modified */ + return js_strdup(ctx, name); + } + + p = strrchr(base_name, '/'); + if (p) + len = p - base_name; + else + len = 0; + + cap = len + strlen(name) + 1 + 1; + filename = js_malloc(ctx, cap); + if (!filename) + return NULL; + memcpy(filename, base_name, len); + filename[len] = '\0'; + + /* we only normalize the leading '..' or '.' */ + r = name; + for(;;) { + if (r[0] == '.' && r[1] == '/') { + r += 2; + } else if (r[0] == '.' && r[1] == '.' && r[2] == '/') { + /* remove the last path element of filename, except if "." + or ".." */ + if (filename[0] == '\0') + break; + p = strrchr(filename, '/'); + if (!p) + p = filename; + else + p++; + if (!strcmp(p, ".") || !strcmp(p, "..")) + break; + if (p > filename) + p--; + *p = '\0'; + r += 3; + } else { + break; + } + } + if (filename[0] != '\0') + js__pstrcat(filename, cap, "/"); + js__pstrcat(filename, cap, r); + // printf("normalize: %s %s -> %s\n", base_name, name, filename); + return filename; +} + +static char *jsc_module_normalize(JSContext *ctx, + const char *base_name, + const char *name, + void *opaque) +{ + char *base_file_name = opaque; + JS_SetContextOpaque(ctx, jsc_module_normalize_impl(ctx, base_file_name, name)); + return jsc_module_normalize_impl(ctx, base_name, name); +} + +static void compile_file(JSRuntime *rt, JSContext *ctx, + FILE *fo, const char *filename, const char *script_name, const char *c_name1, int module) @@ -291,11 +368,16 @@ static void compile_file(JSContext *ctx, FILE *fo, eval_flags |= JS_EVAL_TYPE_MODULE; else eval_flags |= JS_EVAL_TYPE_GLOBAL; + + char* filename_dup = js_strdup(ctx, filename); + JS_SetModuleLoaderFunc(rt, jsc_module_normalize, jsc_module_loader, filename_dup); obj = JS_Eval(ctx, (const char *)buf, buf_len, script_name ? script_name : filename, eval_flags); if (JS_IsException(obj)) { js_std_dump_error(ctx); exit(1); } + JS_SetModuleLoaderFunc(rt, jsc_module_normalize, jsc_module_loader, NULL); + js_free(ctx, filename_dup); js_free(ctx, buf); if (c_name1) { js__pstrcpy(c_name, sizeof(c_name), c_name1); @@ -331,7 +413,7 @@ static const char main_c_template2[] = #define PROG_NAME "qjsc" -void help(void) +static void help(void) { printf("QuickJS-ng Compiler version %s\n" "usage: " PROG_NAME " [options] [files]\n" @@ -573,9 +655,6 @@ int main(int argc, char **argv) rt = JS_NewRuntime(); ctx = JS_NewContext(rt); - /* loader for ES6 modules */ - JS_SetModuleLoaderFunc(rt, NULL, jsc_module_loader, NULL); - if (output_type != OUTPUT_RAW) { fprintf(fo, "/* File generated automatically by the QuickJS-ng compiler. */\n" "\n" @@ -594,11 +673,12 @@ int main(int argc, char **argv) for(i = optind; i < argc; i++) { const char *filename = argv[i]; - compile_file(ctx, fo, filename, script_name, cname, module); + compile_file(rt, ctx, fo, filename, script_name, cname, module); cname = NULL; } for(i = 0; i < dynamic_module_list.count; i++) { + JS_SetContextOpaque(ctx, js_strdup(ctx, dynamic_module_list.array[i].name)); if (!jsc_module_loader(ctx, dynamic_module_list.array[i].name, NULL)) { fprintf(stderr, "Could not load dynamic module '%s'\n", dynamic_module_list.array[i].name); diff --git a/quickjs-libc.c b/quickjs-libc.c index 56c38453b..e8d854370 100644 --- a/quickjs-libc.c +++ b/quickjs-libc.c @@ -66,13 +66,6 @@ #include #endif -#if defined(__APPLE__) -typedef sig_t sighandler_t; -#include -#include -#define environ (*_NSGetEnviron()) -#endif - #ifdef __sun typedef void (*sighandler_t)(int); extern char **environ; @@ -197,6 +190,8 @@ typedef struct JSThreadState { #endif // USE_WORKER JSClassID std_file_class_id; JSClassID worker_class_id; + JSClassID text_encoder_class_id; + JSClassID text_decoder_class_id; } JSThreadState; static uint64_t os_pending_signals; @@ -3262,16 +3257,6 @@ static JSValue js_os_stat(JSContext *ctx, JSValueConst this_val, JS_DefinePropertyValueStr(ctx, obj, "ctime", JS_NewInt64(ctx, (int64_t)st.st_ctime * 1000), JS_PROP_C_W_E); -#elif defined(__APPLE__) - JS_DefinePropertyValueStr(ctx, obj, "atime", - JS_NewInt64(ctx, timespec_to_ms(&st.st_atimespec)), - JS_PROP_C_W_E); - JS_DefinePropertyValueStr(ctx, obj, "mtime", - JS_NewInt64(ctx, timespec_to_ms(&st.st_mtimespec)), - JS_PROP_C_W_E); - JS_DefinePropertyValueStr(ctx, obj, "ctime", - JS_NewInt64(ctx, timespec_to_ms(&st.st_ctimespec)), - JS_PROP_C_W_E); #else JS_DefinePropertyValueStr(ctx, obj, "atime", JS_NewInt64(ctx, timespec_to_ms(&st.st_atim)), @@ -3394,7 +3379,7 @@ static JSValue js_os_realpath(JSContext *ctx, JSValueConst this_val, } #endif -#if !defined(_WIN32) && !defined(__wasi__) && !(defined(__APPLE__) && (TARGET_OS_TV || TARGET_OS_WATCH)) +#if !defined(_WIN32) && !defined(__wasi__) static JSValue js_os_symlink(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { @@ -3580,7 +3565,8 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val, bool block_flag = true, use_path = true; static const char *std_name[3] = { "stdin", "stdout", "stderr" }; int std_fds[3]; - uint32_t uid = -1, gid = -1; + uint32_t uid = 0, gid = 0; + bool uid_set = false, gid_set = false; int ngroups = -1; gid_t groups[64]; @@ -3675,6 +3661,7 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val, JS_FreeValue(ctx, val); if (ret) goto exception; + uid_set = true; } val = JS_GetPropertyStr(ctx, options, "gid"); @@ -3685,6 +3672,7 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val, JS_FreeValue(ctx, val); if (ret) goto exception; + gid_set = true; } val = JS_GetPropertyStr(ctx, options, "groups"); @@ -3755,16 +3743,21 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val, if (chdir(cwd) < 0) _exit(127); } + /* Drop privileges in the correct order: supplementary groups and the + primary gid must change before setuid(), because setuid(non-root) + strips the capability needed for setgroups()/setgid() to succeed. + Track "was set" with explicit bools instead of a (uint32_t)-1 + sentinel, which collided with the legitimate value 0xFFFFFFFF. */ if (ngroups != -1) { if (setgroups(ngroups, groups) < 0) _exit(127); } - if (uid != -1) { - if (setuid(uid) < 0) + if (gid_set) { + if (setgid(gid) < 0) _exit(127); } - if (gid != -1) { - if (setgid(gid) < 0) + if (uid_set) { + if (setuid(uid) < 0) _exit(127); } @@ -4254,7 +4247,10 @@ static JSValue js_worker_postMessage(JSContext *ctx, JSValueConst this_val, msg->data = malloc(data_len); if (!msg->data) goto fail; - memcpy(msg->data, data, data_len); + /* memcpy with NULL src/dst is UB even when n == 0; the writer side + can produce zero-length payloads (e.g. JSON.stringify(undefined)). */ + if (data_len > 0) + memcpy(msg->data, data, data_len); msg->data_len = data_len; if (sab_tab.len > 0) { @@ -4367,8 +4363,6 @@ void js_std_set_worker_new_context_func(JSContext *(*func)(JSRuntime *rt)) #if defined(_WIN32) #define OS_PLATFORM "win32" -#elif defined(__APPLE__) -#define OS_PLATFORM "darwin" #elif defined(EMSCRIPTEN) #define OS_PLATFORM "js" #elif defined(__CYGWIN__) @@ -4474,7 +4468,7 @@ static const JSCFunctionListEntry js_os_funcs[] = { #if !defined(__wasi__) JS_CFUNC_DEF("realpath", 1, js_os_realpath ), #endif -#if !defined(_WIN32) && !defined(__wasi__) && !(defined(__APPLE__) && (TARGET_OS_TV || TARGET_OS_WATCH)) +#if !defined(_WIN32) && !defined(__wasi__) JS_CFUNC_MAGIC_DEF("lstat", 1, js_os_stat, 1 ), JS_CFUNC_DEF("symlink", 2, js_os_symlink ), JS_CFUNC_DEF("readlink", 1, js_os_readlink ), @@ -4599,6 +4593,536 @@ static JSValue js_print(JSContext *ctx, JSValueConst this_val, return JS_UNDEFINED; } +/**********************************************************/ +/* WHATWG Encoding: TextEncoder / TextDecoder (UTF-8 only) */ + +typedef struct { + bool fatal; + bool ignore_bom; + /* Once we've decoded any input (or skipped a BOM), we stop treating + a leading U+FEFF as a BOM. Reset on non-stream decode(). */ + bool bom_seen; + /* Up to 3 trailing bytes of an incomplete UTF-8 sequence saved + across stream decode() calls. */ + uint8_t pending[4]; + int pending_len; +} JSTextDecoder; + +static void js_text_decoder_finalizer(JSRuntime *rt, JSValueConst val) +{ + JSThreadState *ts = js_get_thread_state(rt); + JSTextDecoder *td = JS_GetOpaque(val, ts->text_decoder_class_id); + js_free_rt(rt, td); +} + +static JSClassDef js_text_encoder_class = { + "TextEncoder", +}; + +static JSClassDef js_text_decoder_class = { + "TextDecoder", + .finalizer = js_text_decoder_finalizer, +}; + +/* Lead-byte length of a UTF-8 sequence, or 0 for invalid/continuation. */ +static int js_utf8_seq_len(uint8_t b) +{ + if (b < 0x80) return 1; + if (b < 0xC2) return 0; + if (b < 0xE0) return 2; + if (b < 0xF0) return 3; + if (b < 0xF5) return 4; + return 0; +} + +/* Bounds for the first continuation byte after `lead`, matching the + acceptance set of utf8_decode() in cutils.h. Subsequent continuation + bytes are always [0x80, 0xBF]. */ +static void js_utf8_first_cont_bounds(uint8_t lead, uint8_t *lo, uint8_t *hi) +{ + if (lead == 0xE0) { *lo = 0xA0; *hi = 0xBF; } + else if (lead == 0xF0) { *lo = 0x90; *hi = 0xBF; } + else if (lead == 0xF4) { *lo = 0x80; *hi = 0x8F; } + else { *lo = 0x80; *hi = 0xBF; } +} + +/* TextEncoder ------------------------------------------------------------ */ + +static JSValue js_text_encoder_constructor(JSContext *ctx, + JSValueConst new_target, + int argc, JSValueConst *argv) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSValue proto, obj; + + proto = JS_GetPropertyStr(ctx, new_target, "prototype"); + if (JS_IsException(proto)) + return proto; + obj = JS_NewObjectProtoClass(ctx, proto, ts->text_encoder_class_id); + JS_FreeValue(ctx, proto); + if (JS_IsException(obj)) + return obj; + /* Stateless; opaque is just a brand. */ + JS_SetOpaque(obj, (void *)1); + return obj; +} + +static JSValue js_text_encoder_encode(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + const char *str; + size_t len; + JSValue ret; + + if (!JS_GetOpaque(this_val, ts->text_encoder_class_id)) + return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder"); + if (argc < 1 || JS_IsUndefined(argv[0])) + return JS_NewUint8ArrayCopy(ctx, NULL, 0); + str = JS_ToCStringLen(ctx, &len, argv[0]); + if (!str) + return JS_EXCEPTION; + /* JS_ToCStringLen keeps lone surrogates as their 3-byte CESU-8-like + encoding (ED A0..BF XX). USVString conversion in the WHATWG Encoding + spec replaces them with U+FFFD before UTF-8 encoding. Valid UTF-8 + never produces ED A0..BF, so any such triple comes from a lone + surrogate. The replacement is 3 bytes, so output length is unchanged. */ + { + const uint8_t *s = (const uint8_t *)str; + size_t i; + for (i = 0; i + 2 < len; i++) { + if (s[i] == 0xED && s[i+1] >= 0xA0 && s[i+1] <= 0xBF) + break; + } + if (i + 2 >= len) { + ret = JS_NewUint8ArrayCopy(ctx, s, len); + } else { + uint8_t *buf = js_malloc(ctx, len); + size_t j; + if (!buf) { + JS_FreeCString(ctx, str); + return JS_EXCEPTION; + } + memcpy(buf, s, i); + for (j = i; i < len; ) { + if (i + 2 < len && s[i] == 0xED + && s[i+1] >= 0xA0 && s[i+1] <= 0xBF + && s[i+2] >= 0x80 && s[i+2] <= 0xBF) { + buf[j++] = 0xEF; buf[j++] = 0xBF; buf[j++] = 0xBD; + i += 3; + } else { + buf[j++] = s[i++]; + } + } + ret = JS_NewUint8ArrayCopy(ctx, buf, j); + js_free(ctx, buf); + } + } + JS_FreeCString(ctx, str); + return ret; +} + +static JSValue js_text_encoder_encode_into(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + const char *src; + size_t src_len, dst_len; + uint8_t *dst; + int read = 0, written = 0; + const uint8_t *p, *end, *next; + uint32_t cp; + size_t enc_len; + JSValue ret; + + if (!JS_GetOpaque(this_val, ts->text_encoder_class_id)) + return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder"); + if (argc < 2) + return JS_ThrowTypeError(ctx, "TextEncoder.encodeInto requires two arguments"); + src = JS_ToCStringLen(ctx, &src_len, argv[0]); + if (!src) + return JS_EXCEPTION; + if (JS_GetTypedArrayType(argv[1]) != JS_TYPED_ARRAY_UINT8) { + JS_FreeCString(ctx, src); + return JS_ThrowTypeError(ctx, + "TextEncoder.encodeInto: destination must be a Uint8Array"); + } + dst = JS_GetUint8Array(ctx, &dst_len, argv[1]); + if (!dst) { + JS_FreeCString(ctx, src); + return JS_EXCEPTION; + } + + p = (const uint8_t *)src; + end = p + src_len; + while (p < end) { + cp = utf8_decode(p, &next); + /* JS_ToCStringLen keeps lone surrogates as ED A0..BF XX, which + utf8_decode happily decodes back to a surrogate code point. The + USVString conversion in the spec replaces them with U+FFFD. */ + if (cp >= 0xD800 && cp <= 0xDFFF) + cp = 0xFFFD; + enc_len = utf8_encode_len(cp); + if ((size_t)written + enc_len > dst_len) + break; + utf8_encode(dst + written, cp); + written += (int)enc_len; + /* Spec: read counts UTF-16 code units consumed from the input. */ + read += (cp > 0xFFFF) ? 2 : 1; + p = next; + } + JS_FreeCString(ctx, src); + + ret = JS_NewObject(ctx); + if (JS_IsException(ret)) + return ret; + JS_DefinePropertyValueStr(ctx, ret, "read", + JS_NewInt32(ctx, read), JS_PROP_C_W_E); + JS_DefinePropertyValueStr(ctx, ret, "written", + JS_NewInt32(ctx, written), JS_PROP_C_W_E); + return ret; +} + +static JSValue js_text_encoder_get_encoding(JSContext *ctx, JSValueConst this_val) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + if (!JS_GetOpaque(this_val, ts->text_encoder_class_id)) + return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder"); + return JS_NewString(ctx, "utf-8"); +} + +static const JSCFunctionListEntry js_text_encoder_proto_funcs[] = { + JS_PROP_STRING_DEF("[Symbol.toStringTag]", "TextEncoder", JS_PROP_CONFIGURABLE), + JS_CFUNC_DEF("encode", 1, js_text_encoder_encode), + JS_CFUNC_DEF("encodeInto", 2, js_text_encoder_encode_into), + JS_CGETSET_DEF("encoding", js_text_encoder_get_encoding, NULL), +}; + +/* TextDecoder ------------------------------------------------------------ */ + +/* Match a label against the WHATWG list of UTF-8 aliases (case-insensitive, + ASCII-whitespace trimmed). Returns 0 on match, -1 otherwise. */ +static int js_text_decoder_label_is_utf8(const char *label, size_t len) +{ + static const char * const aliases[] = { + "unicode-1-1-utf-8", "unicode11utf8", "unicode20utf8", + "utf-8", "utf8", "x-unicode20utf8", + }; + size_t i, j; + while (len > 0 && (*label == ' ' || *label == '\t' || *label == '\n' + || *label == '\r' || *label == '\f')) { + label++; len--; + } + while (len > 0 && (label[len-1] == ' ' || label[len-1] == '\t' + || label[len-1] == '\n' || label[len-1] == '\r' + || label[len-1] == '\f')) { + len--; + } + for (i = 0; i < countof(aliases); i++) { + size_t alen = strlen(aliases[i]); + if (alen != len) continue; + for (j = 0; j < len; j++) { + int c = (unsigned char)label[j]; + if (c >= 'A' && c <= 'Z') c += 32; + if (c != aliases[i][j]) break; + } + if (j == len) return 0; + } + return -1; +} + +static JSValue js_text_decoder_constructor(JSContext *ctx, + JSValueConst new_target, + int argc, JSValueConst *argv) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSValue proto, obj; + JSTextDecoder *td; + bool fatal = false, ignore_bom = false; + + if (argc >= 1 && !JS_IsUndefined(argv[0])) { + size_t llen; + const char *label = JS_ToCStringLen(ctx, &llen, argv[0]); + if (!label) + return JS_EXCEPTION; + if (js_text_decoder_label_is_utf8(label, llen) < 0) { + JSValue err = JS_ThrowRangeError(ctx, + "The \"%s\" encoding is not supported", label); + JS_FreeCString(ctx, label); + return err; + } + JS_FreeCString(ctx, label); + } + if (argc >= 2 && JS_IsObject(argv[1])) { + JSValue v = JS_GetPropertyStr(ctx, argv[1], "fatal"); + if (JS_IsException(v)) return v; + fatal = JS_ToBool(ctx, v); + JS_FreeValue(ctx, v); + v = JS_GetPropertyStr(ctx, argv[1], "ignoreBOM"); + if (JS_IsException(v)) return v; + ignore_bom = JS_ToBool(ctx, v); + JS_FreeValue(ctx, v); + } + + proto = JS_GetPropertyStr(ctx, new_target, "prototype"); + if (JS_IsException(proto)) + return proto; + obj = JS_NewObjectProtoClass(ctx, proto, ts->text_decoder_class_id); + JS_FreeValue(ctx, proto); + if (JS_IsException(obj)) + return obj; + td = js_mallocz(ctx, sizeof(*td)); + if (!td) { + JS_FreeValue(ctx, obj); + return JS_EXCEPTION; + } + td->fatal = fatal; + td->ignore_bom = ignore_bom; + JS_SetOpaque(obj, td); + return obj; +} + +/* Get the byte view of a BufferSource (ArrayBuffer or any TypedArray view). + On success returns 0 with bytes/len populated; on failure returns -1 + with a TypeError pending. JS_UNDEFINED yields the empty input. */ +static int js_text_decoder_get_bytes(JSContext *ctx, JSValueConst v, + const uint8_t **bytes, size_t *len) +{ + if (JS_IsUndefined(v)) { + *bytes = NULL; *len = 0; + return 0; + } + if (JS_IsArrayBuffer(v)) { + size_t l; + uint8_t *p = JS_GetArrayBuffer(ctx, &l, v); + if (!p) return -1; + *bytes = p; *len = l; + return 0; + } + if (JS_GetTypedArrayType(v) >= 0) { + size_t off, blen, bpe, ablen; + JSValue ab = JS_GetTypedArrayBuffer(ctx, v, &off, &blen, &bpe); + uint8_t *p; + if (JS_IsException(ab)) return -1; + p = JS_GetArrayBuffer(ctx, &ablen, ab); + JS_FreeValue(ctx, ab); + if (!p) return -1; + *bytes = p + off; *len = blen; + return 0; + } + JS_ThrowTypeError(ctx, + "TextDecoder.decode: input must be an ArrayBuffer or TypedArray"); + return -1; +} + +static JSValue js_text_decoder_decode(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSTextDecoder *td; + const uint8_t *src; + size_t src_len; + bool stream = false; + uint8_t *combined = NULL; + uint8_t *out = NULL; + size_t out_len = 0, out_cap; + const uint8_t *p, *p_end, *next; + uint32_t cp; + JSValue ret; + JSValueConst input = argc > 0 ? argv[0] : JS_UNDEFINED; + + td = JS_GetOpaque(this_val, ts->text_decoder_class_id); + if (!td) + return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder"); + if (argc >= 2 && JS_IsObject(argv[1])) { + JSValue v = JS_GetPropertyStr(ctx, argv[1], "stream"); + if (JS_IsException(v)) return v; + stream = JS_ToBool(ctx, v); + JS_FreeValue(ctx, v); + } + if (js_text_decoder_get_bytes(ctx, input, &src, &src_len) < 0) + return JS_EXCEPTION; + + if (td->pending_len > 0) { + size_t total = (size_t)td->pending_len + src_len; + combined = js_malloc(ctx, total ? total : 1); + if (!combined) return JS_EXCEPTION; + memcpy(combined, td->pending, td->pending_len); + if (src_len > 0) memcpy(combined + td->pending_len, src, src_len); + src = combined; + src_len = total; + td->pending_len = 0; + } + + /* Worst case output: each byte expands to 3-byte U+FFFD replacement. */ + out_cap = src_len * 3 + 4; + out = js_malloc(ctx, out_cap); + if (!out) { + if (combined) js_free(ctx, combined); + return JS_EXCEPTION; + } + + p = src; + if (p == NULL){ + assert(src_len == 0); + p_end = NULL; + goto skip_loop_1; + } else { + p_end = src + src_len; + } + while (p < p_end) { + int seq_len = js_utf8_seq_len(*p); + if (seq_len == 0) { + if (td->fatal) goto invalid; + out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD; + p++; + continue; + } + if (p + seq_len > p_end) { + /* Sequence is incomplete by length. Check the bytes we do have + against the per-lead continuation bounds: a byte that's out + of range is a known error and must be re-read as a fresh + lead, not buffered. */ + int avail = (int)(p_end - p); + int k = 1; + if (avail >= 2) { + uint8_t lo, hi; + js_utf8_first_cont_bounds(*p, &lo, &hi); + if (p[1] >= lo && p[1] <= hi) { + for (k = 2; k < avail; k++) { + if (p[k] < 0x80 || p[k] > 0xBF) break; + } + } + } + if (k < avail) { + /* p[k] violates the continuation rules: emit one error, + advance past the lead and any valid continuations, and + leave p[k] for the next iteration. */ + if (td->fatal) goto invalid; + out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD; + p += k; + continue; + } + /* Truly partial: defer in stream mode, otherwise flush as one error. */ + if (stream) { + memcpy(td->pending, p, avail); + td->pending_len = avail; + p = p_end; + break; + } + if (td->fatal) goto invalid; + out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD; + p = p_end; + break; + } + cp = utf8_decode_len(p, p_end - p, &next); + if (cp == 0xFFFD && next == p + 1 && *p >= 0x80) { + if (td->fatal) goto invalid; + out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD; + p = next; + continue; + } + if (!td->bom_seen) { + td->bom_seen = true; + if (!td->ignore_bom && cp == 0xFEFF) { + p = next; + continue; + } + } + out_len += utf8_encode(out + out_len, cp); + p = next; + } + skip_loop_1: + + if (!stream) { + td->pending_len = 0; + td->bom_seen = false; + } + ret = JS_NewStringLen(ctx, (const char *)out, out_len); + js_free(ctx, out); + if (combined) js_free(ctx, combined); + return ret; + +invalid: + js_free(ctx, out); + if (combined) js_free(ctx, combined); + return JS_ThrowTypeError(ctx, "The encoded data was not valid"); +} + +static JSValue js_text_decoder_get_encoding(JSContext *ctx, JSValueConst this_val) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + if (!JS_GetOpaque(this_val, ts->text_decoder_class_id)) + return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder"); + return JS_NewString(ctx, "utf-8"); +} + +static JSValue js_text_decoder_get_fatal(JSContext *ctx, JSValueConst this_val) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSTextDecoder *td = JS_GetOpaque(this_val, ts->text_decoder_class_id); + if (!td) return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder"); + return JS_NewBool(ctx, td->fatal); +} + +static JSValue js_text_decoder_get_ignore_bom(JSContext *ctx, JSValueConst this_val) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSTextDecoder *td = JS_GetOpaque(this_val, ts->text_decoder_class_id); + if (!td) return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder"); + return JS_NewBool(ctx, td->ignore_bom); +} + +static const JSCFunctionListEntry js_text_decoder_proto_funcs[] = { + JS_PROP_STRING_DEF("[Symbol.toStringTag]", "TextDecoder", JS_PROP_CONFIGURABLE), + JS_CFUNC_DEF("decode", 1, js_text_decoder_decode), + JS_CGETSET_DEF("encoding", js_text_decoder_get_encoding, NULL), + JS_CGETSET_DEF("fatal", js_text_decoder_get_fatal, NULL), + JS_CGETSET_DEF("ignoreBOM", js_text_decoder_get_ignore_bom, NULL), +}; + +void js_std_add_text_codecs(JSContext *ctx) +{ + JSRuntime *rt = JS_GetRuntime(ctx); + JSThreadState *ts = js_get_thread_state(rt); + JSValue global_obj, proto, ctor; + + global_obj = JS_GetGlobalObject(ctx); + + JS_NewClassID(rt, &ts->text_encoder_class_id); + JS_NewClass(rt, ts->text_encoder_class_id, &js_text_encoder_class); + proto = JS_NewObject(ctx); + JS_SetPropertyFunctionList(ctx, proto, js_text_encoder_proto_funcs, + countof(js_text_encoder_proto_funcs)); + JS_SetClassProto(ctx, ts->text_encoder_class_id, proto); + ctor = JS_NewCFunction2(ctx, js_text_encoder_constructor, "TextEncoder", 0, + JS_CFUNC_constructor, 0); + JS_SetConstructor(ctx, ctor, proto); + JS_SetPropertyStr(ctx, global_obj, "TextEncoder", ctor); + + JS_NewClassID(rt, &ts->text_decoder_class_id); + JS_NewClass(rt, ts->text_decoder_class_id, &js_text_decoder_class); + proto = JS_NewObject(ctx); + JS_SetPropertyFunctionList(ctx, proto, js_text_decoder_proto_funcs, + countof(js_text_decoder_proto_funcs)); + JS_SetClassProto(ctx, ts->text_decoder_class_id, proto); + ctor = JS_NewCFunction2(ctx, js_text_decoder_constructor, "TextDecoder", 2, + JS_CFUNC_constructor, 0); + JS_SetConstructor(ctx, ctor, proto); + JS_SetPropertyStr(ctx, global_obj, "TextDecoder", ctor); + + JS_FreeValue(ctx, global_obj); +} + void js_std_add_helpers(JSContext *ctx, int argc, char **argv) { JSValue global_obj, console, args; @@ -4624,6 +5148,8 @@ void js_std_add_helpers(JSContext *ctx, int argc, char **argv) JS_SetPropertyStr(ctx, global_obj, "print", JS_NewCFunction(ctx, js_print, "print", 1)); + js_std_add_text_codecs(ctx); + JS_FreeValue(ctx, global_obj); } diff --git a/quickjs-libc.h b/quickjs-libc.h index fd91a2f68..847c6d58f 100644 --- a/quickjs-libc.h +++ b/quickjs-libc.h @@ -45,6 +45,7 @@ JS_LIBC_EXTERN JSModuleDef *js_init_module_os(JSContext *ctx, JS_LIBC_EXTERN JSModuleDef *js_init_module_bjson(JSContext *ctx, const char *module_name); JS_LIBC_EXTERN void js_std_add_helpers(JSContext *ctx, int argc, char **argv); +JS_LIBC_EXTERN void js_std_add_text_codecs(JSContext *ctx); JS_LIBC_EXTERN int js_std_loop(JSContext *ctx); JS_LIBC_EXTERN int js_std_loop_once(JSContext *ctx); JS_LIBC_EXTERN int js_std_poll_io(JSContext *ctx, int timeout_ms); diff --git a/quickjs-opcode.h b/quickjs-opcode.h index ec2a5ad91..a454f836a 100644 --- a/quickjs-opcode.h +++ b/quickjs-opcode.h @@ -372,6 +372,8 @@ DEF( is_null, 1, 1, 1, none) DEF(typeof_is_undefined, 1, 1, 1, none) DEF( typeof_is_function, 1, 1, 1, none) +DEF( debug, 1, 0, 0, none) + #undef DEF #undef def #endif /* DEF */ diff --git a/quickjs.c b/quickjs.c index 38a724fbb..49ef4b827 100644 --- a/quickjs.c +++ b/quickjs.c @@ -54,11 +54,7 @@ #define DIRECT_DISPATCH 1 #endif -#if defined(__APPLE__) -#define MALLOC_OVERHEAD 0 -#else #define MALLOC_OVERHEAD 8 -#endif #if defined(__NEWLIB__) #define NO_TM_GMTOFF @@ -539,6 +535,9 @@ struct JSContext { const char *input, size_t input_len, const char *filename, int line, int flags, int scope_idx); void *user_opaque; + + JSDebugTraceFunc *debug_trace; + void *debug_trace_opaque; }; typedef union JSFloat64Union { @@ -1390,6 +1389,7 @@ static void js_async_function_resolve_mark(JSRuntime *rt, JSValueConst val, static JSValue JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, const char *input, size_t input_len, const char *filename, int line, int flags, int scope_idx); +static const char *JS_AtomGetStr(JSContext *ctx, char *buf, int buf_size, JSAtom atom); static void js_free_module_def(JSContext *ctx, JSModuleDef *m); static void js_mark_module_def(JSRuntime *rt, JSModuleDef *m, JS_MarkFunc *mark_func); @@ -2593,6 +2593,141 @@ JSValue JS_GetFunctionProto(JSContext *ctx) return js_dup(ctx->function_proto); } +void JS_SetDebugTraceHandler(JSContext *ctx, JSDebugTraceFunc *cb, void *opaque) +{ + ctx->debug_trace = cb; + ctx->debug_trace_opaque = opaque; +} + +static JSStackFrame *js_get_stack_frame_at_level(JSContext *ctx, int level) +{ + JSRuntime *rt = ctx->rt; + JSStackFrame *sf = rt->current_stack_frame; + int current_level = 0; + + while (sf != NULL && current_level < level) { + sf = sf->prev_frame; + current_level++; + } + return sf; +} + +int JS_GetStackDepth(JSContext *ctx) +{ + JSRuntime *rt = ctx->rt; + JSStackFrame *sf = rt->current_stack_frame; + int depth = 0; + + while (sf != NULL) { + depth++; + sf = sf->prev_frame; + } + return depth; +} + +int JS_GetLocalVariablesAtLevel(JSContext *ctx, int level, + JSDebugLocalVar **pvars, int *pcount) +{ + if (pvars) + *pvars = NULL; + if (pcount) + *pcount = 0; + if (!pvars) { + JS_ThrowTypeError(ctx, "pvars must not be NULL"); + return -1; + } + + JSStackFrame *sf = js_get_stack_frame_at_level(ctx, level); + if (sf == NULL) + return 0; + + JSValue func = sf->cur_func; + if (JS_VALUE_GET_TAG(func) != JS_TAG_OBJECT) + return 0; + + JSObject *p = JS_VALUE_GET_OBJ(func); + if (p->class_id != JS_CLASS_BYTECODE_FUNCTION) + return 0; + + JSFunctionBytecode *b = p->u.func.function_bytecode; + int total_vars = b->arg_count + b->var_count; + + if (total_vars == 0) + return 0; + + JSDebugLocalVar *vars = js_malloc(ctx, sizeof(JSDebugLocalVar) * total_vars); + if (!vars) + return -1; + + int idx = 0; + +#define APPEND_VAR(vd_, value_, is_arg_) \ + do { \ + JSAtom name_ = (vd_)->var_name; \ + const char *name_str_; \ + if (name_ != JS_ATOM_NULL) { \ + char tmp_[32]; \ + JS_AtomGetStr(ctx, tmp_, sizeof(tmp_), name_); \ + if (tmp_[0] == '<') \ + break; \ + } \ + name_str_ = JS_AtomToCString(ctx, name_); \ + if (unlikely(!name_str_)) \ + goto fail; \ + vars[idx].name = name_str_; \ + /* Do not expose the internal TDZ sentinel to C callers. */ \ + if (JS_VALUE_GET_TAG(value_) == JS_TAG_UNINITIALIZED) \ + vars[idx].value = JS_UNDEFINED; \ + else \ + vars[idx].value = js_dup(value_); \ + vars[idx].is_arg = (is_arg_); \ + vars[idx].scope_level = (vd_)->scope_level; \ + idx++; \ + } while (0) + + for (int i = 0; i < b->arg_count; i++) { + JSVarDef *vd = &b->vardefs[i]; + APPEND_VAR(vd, sf->arg_buf[i], true); + } + + for (int i = 0; i < b->var_count; i++) { + JSVarDef *vd = &b->vardefs[b->arg_count + i]; + APPEND_VAR(vd, sf->var_buf[i], false); + } + +#undef APPEND_VAR + + if (idx == 0) { + js_free(ctx, vars); + return 0; + } + + if (pvars) + *pvars = vars; + if (pcount) + *pcount = idx; + return 0; + +fail: + for (int i = 0; i < idx; i++) { + JS_FreeCString(ctx, vars[i].name); + JS_FreeValue(ctx, vars[i].value); + } + js_free(ctx, vars); + return -1; +} + +void JS_FreeLocalVariables(JSContext *ctx, JSDebugLocalVar *vars, int count) +{ + if (!vars) + return; + for (int i = 0; i < count; i++) { + JS_FreeCString(ctx, vars[i].name); + JS_FreeValue(ctx, vars[i].value); + } + js_free(ctx, vars); +} + typedef enum JSFreeModuleEnum { JS_FREE_MODULE_ALL, JS_FREE_MODULE_NOT_RESOLVED, @@ -17687,6 +17822,44 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSValue *call_argv; SWITCH(pc) { + CASE(OP_debug): + if (unlikely(ctx->debug_trace)) { + int col_num = 0; + int line_num = -1; + uint32_t pc_index = (uint32_t)(pc - b->byte_code_buf - 1); + line_num = find_line_num(ctx, b, pc_index, &col_num); + + /* Use JS_AtomToCString to get the full filename / funcname + without the 63-byte truncation that a stack buffer would + impose. The pointers are only valid for the duration of + the callback. */ + const char *filename = JS_AtomToCString(ctx, b->filename); + if (unlikely(!filename)) { + /* OOM: a pending exception has been raised */ + goto exception; + } + const char *funcname = JS_AtomToCString(ctx, b->func_name); + if (unlikely(!funcname)) { + JS_FreeCString(ctx, filename); + goto exception; + } + int ret = ctx->debug_trace(ctx, filename, funcname, + line_num, col_num, + ctx->debug_trace_opaque); + JS_FreeCString(ctx, filename); + JS_FreeCString(ctx, funcname); + + if (ret != 0 || JS_HasException(ctx)) { + /* If the callback indicated failure but did not raise + an exception itself, synthesize a default one so the + caller never observes JS_UNINITIALIZED via + JS_GetException(). */ + if (ret != 0 && !JS_HasException(ctx)) + JS_ThrowInternalError(ctx, "aborted by debugger"); + goto exception; + } + } + BREAK; CASE(OP_push_i32): *sp++ = js_int32(get_u32(pc)); pc += 4; @@ -23410,6 +23583,20 @@ static void emit_source_loc(JSParseState *s) emit_source_loc_at(s, s->token.line_num, s->token.col_num); } +static void emit_debug(JSParseState *s) +{ + if (unlikely(s->ctx->debug_trace)) + dbuf_putc(&s->cur_func->byte_code, OP_debug); +} + +static void emit_source_loc_debug(JSParseState *s) +{ + if (unlikely(s->ctx->debug_trace)) { + emit_source_loc(s); + emit_debug(s); + } +} + static void emit_op(JSParseState *s, uint8_t val) { JSFunctionDef *fd = s->cur_func; @@ -23598,7 +23785,8 @@ static int find_var_htab(JSFunctionDef *fd, JSAtom var_name) p = &fd->vars_htab[i & m]; if (*p == UINT32_MAX) return -1; - if (fd->vars[*p].var_name == var_name) + if (fd->vars[*p].var_name == var_name && + fd->vars[*p].scope_level == 0) return *p; i += j; j += 1; // quadratic probing @@ -23625,11 +23813,9 @@ static int find_var(JSContext *ctx, JSFunctionDef *fd, JSAtom name) if (fd->vars_htab) { i = find_var_htab(fd, name); - if (i == -1) - goto not_found; - vd = &fd->vars[i]; - if (vd->scope_level == 0) + if (i >= 0) return i; + goto not_found; } for(i = fd->var_count; i-- > 0;) { vd = &fd->vars[i]; @@ -28774,6 +28960,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, goto fail; break; case TOK_RETURN: + emit_source_loc_debug(s); if (s->cur_func->is_eval) { js_parse_error(s, "return not in a function"); goto fail; @@ -28802,6 +28989,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, goto fail; } emit_source_loc(s); + emit_debug(s); if (js_parse_expr(s)) goto fail; emit_op(s, OP_throw); @@ -28825,6 +29013,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, goto fail; } s->cur_func->has_await = true; + emit_source_loc_debug(s); if (next_token(s)) /* skip 'using' */ goto fail; if (js_parse_var(s, PF_IN_ACCEPTED | PF_AWAIT_USING, TOK_USING, /*export_flag*/false)) @@ -28847,6 +29036,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, } /* fall thru */ case TOK_VAR: + emit_source_loc_debug(s); if (next_token(s)) goto fail; if (js_parse_var(s, PF_IN_ACCEPTED, tok, /*export_flag*/false)) @@ -28857,6 +29047,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, case TOK_IF: { int label1, label2, mask; + emit_source_loc_debug(s); if (next_token(s)) goto fail; /* create a new scope for `let f;if(1) function f(){}` */ @@ -28967,6 +29158,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, int source_line_num, source_col_num; bool is_async; + emit_source_loc_debug(s); source_line_num = s->token.line_num; source_col_num = s->token.col_num; if (next_token(s)) @@ -29202,6 +29394,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, int default_label_pos; BlockEnv break_entry; + emit_source_loc_debug(s); if (next_token(s)) goto fail; @@ -29553,6 +29746,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, js_parse_error(s, "using declaration is not allowed at the top level of a script"); goto fail; } + emit_source_loc_debug(s); if (next_token(s)) goto fail; if (js_parse_var(s, PF_IN_ACCEPTED, TOK_USING, /*export_flag*/false)) @@ -29606,6 +29800,7 @@ static __exception int js_parse_statement_or_decl(JSParseState *s, default: hasexpr: emit_source_loc(s); + emit_debug(s); if (js_parse_expr(s)) goto fail; if (s->cur_func->eval_ret_idx >= 0) { @@ -33969,6 +34164,8 @@ static bool code_match(CodeContext *s, int pos, ...) line_num = get_u32(tab + pos + 1); col_num = get_u32(tab + pos + 5); pos = pos_next; + } else if (op == OP_debug) { + pos = pos_next; } else { break; } @@ -34256,6 +34453,9 @@ static int get_label_pos(JSFunctionDef *s, int label) case OP_source_loc: pos += 9; continue; + case OP_debug: + pos += 1; + continue; case OP_label: pos += 5; continue; @@ -35017,6 +35217,13 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) col_num = get_u32(bc_buf + pos + 5); break; + case OP_debug: + /* record pc2line so the debugger can resolve the source + location when OP_debug is hit at runtime */ + add_pc2line_info(s, bc_out.size, line_num, col_num); + dbuf_putc(&bc_out, OP_debug); + break; + case OP_label: { label = get_u32(bc_buf + pos + 1); diff --git a/quickjs.h b/quickjs.h index b9ed27560..498ee1a2b 100644 --- a/quickjs.h +++ b/quickjs.h @@ -523,6 +523,62 @@ JS_EXTERN void JS_SetClassProto(JSContext *ctx, JSClassID class_id, JSValue obj) JS_EXTERN JSValue JS_GetClassProto(JSContext *ctx, JSClassID class_id); JS_EXTERN JSValue JS_GetFunctionProto(JSContext *ctx); +/* Debug callback - invoked when the interpreter hits an OP_debug opcode. + Return 0 to continue execution. Return non-zero to abort execution at + this point: the engine will jump to the exception handler. The + callback may itself call JS_Throw* to provide a specific exception; + if the callback returns non-zero without having raised one, the engine + will synthesize a default InternalError("aborted by debugger"). If + the callback raises an exception via JS_Throw* but returns 0, the + engine still treats it as a request to abort. + + The filename / funcname pointers passed to the callback are only valid + for the duration of the callback invocation; do not store them. + + OP_debug opcodes are only emitted at statement boundaries when a debug + trace handler is registered at parse time. Therefore only code that + is parsed (e.g. by JS_Eval / JS_Compile) AFTER JS_SetDebugTraceHandler + has been called will be instrumented; previously compiled bytecode + will not invoke the callback. In practice, install the handler before + evaluating any application code. */ +typedef int JSDebugTraceFunc(JSContext *ctx, + const char *filename, + const char *funcname, + int line, + int col, + void *opaque); + +/* Set (or clear) the debug trace handler on a context. Pass NULL to + disable. Works with any context, including those created with + JS_NewContextRaw. See JSDebugTraceFunc above for the parse-time + instrumentation contract. */ +JS_EXTERN void JS_SetDebugTraceHandler(JSContext *ctx, + JSDebugTraceFunc *cb, + void *opaque); + +/* Debug API: Get local variables in stack frames */ +typedef struct JSDebugLocalVar { + const char *name; + JSValue value; + bool is_arg; + int scope_level; +} JSDebugLocalVar; + +/* Get the call stack depth (0 when no frames are active). */ +JS_EXTERN int JS_GetStackDepth(JSContext *ctx); + +/* Get local variables at a specific stack level (0 = current frame, 1 = caller, etc.). + On success, *pvars receives an allocated array of JSDebugLocalVar entries + that must be freed with JS_FreeLocalVariables(), and *pcount receives the + entry count. If no variables are available, *pvars is set to NULL and + *pcount is set to 0. Returns -1 on exception. */ +JS_EXTERN int JS_GetLocalVariablesAtLevel(JSContext *ctx, int level, + JSDebugLocalVar **pvars, + int *pcount); + +/* Free local variables array returned by JS_GetLocalVariablesAtLevel */ +JS_EXTERN void JS_FreeLocalVariables(JSContext *ctx, JSDebugLocalVar *vars, int count); + /* the following functions are used to select the intrinsic object to save memory */ JS_EXTERN JSContext *JS_NewContextRaw(JSRuntime *rt); diff --git a/run-test262.c b/run-test262.c index 4c996fb06..ba2f3ff1c 100644 --- a/run-test262.c +++ b/run-test262.c @@ -1729,6 +1729,7 @@ JSContext *JS_NewCustomContext(JSRuntime *rt) js_init_module_std(ctx, "qjs:std"); js_init_module_os(ctx, "qjs:os"); js_init_module_bjson(ctx, "qjs:bjson"); + js_std_add_text_codecs(ctx); obj = JS_GetGlobalObject(ctx); JS_SetPropertyFunctionList(ctx, obj, &qjs_object, 1); JS_FreeValue(ctx, obj); diff --git a/tests/test_find_var_htab.js b/tests/test_find_var_htab.js new file mode 100644 index 000000000..996954307 --- /dev/null +++ b/tests/test_find_var_htab.js @@ -0,0 +1,20 @@ +import { assert } from "./assert.js"; + +// Regression test for find_var_htab: when a var shadows a block-scoped +// let of the same name, the htab probe must skip entries with +// scope_level != 0. 27 vars are needed to trigger the htab path. +function test_find_var_htab() { + { let x = "let"; } + var v0, v1, v2, v3, v4, v5, v6, v7, v8, v9; + var v10, v11, v12, v13, v14, v15, v16, v17; + var v18, v19, v20, v21, v22, v23, v24; + var x = "var"; + + function closure() { + return x; + } + + assert(closure(), "var", "find_var_htab returned wrong slot index"); +} + +test_find_var_htab(); diff --git a/tests/test_text_codec.js b/tests/test_text_codec.js new file mode 100644 index 000000000..ab5710d8e --- /dev/null +++ b/tests/test_text_codec.js @@ -0,0 +1,304 @@ +import { assert, assertThrows, assertArrayEquals } from "./assert.js"; + +function bytes(arr) { return new Uint8Array(arr); } +function arr(u8) { return Array.from(u8); } + +function test_encoder_basic() { + const e = new TextEncoder(); + assert(e.encoding, "utf-8"); + assert(Object.prototype.toString.call(e), "[object TextEncoder]"); + + assertArrayEquals(arr(e.encode()), []); + assertArrayEquals(arr(e.encode(undefined)), []); + assertArrayEquals(arr(e.encode("")), []); + assertArrayEquals(arr(e.encode("hi")), [0x68, 0x69]); + // U+2603 SNOWMAN — 3-byte sequence. + assertArrayEquals(arr(e.encode("☃")), [0xE2, 0x98, 0x83]); + // U+10000 via surrogate pair — 4-byte sequence. + assertArrayEquals(arr(e.encode("𐀀")), [0xF0, 0x90, 0x80, 0x80]); + // ToString coercion. + assertArrayEquals(arr(e.encode(null)), [0x6E, 0x75, 0x6C, 0x6C]); // "null" + assertArrayEquals(arr(e.encode(42)), [0x34, 0x32]); // "42" +} + +function test_encoder_lone_surrogates() { + // USVString conversion: lone surrogates become U+FFFD before encoding. + const e = new TextEncoder(); + assertArrayEquals(arr(e.encode("\uD800")), [0xEF, 0xBF, 0xBD]); + assertArrayEquals(arr(e.encode("\uDFFF")), [0xEF, 0xBF, 0xBD]); + assertArrayEquals(arr(e.encode("\uDC00")), [0xEF, 0xBF, 0xBD]); + assertArrayEquals(arr(e.encode("a\uD800b")), + [0x61, 0xEF, 0xBF, 0xBD, 0x62]); + // Two adjacent lone high surrogates: each replaced independently. + assertArrayEquals(arr(e.encode("\uD800\uD800")), + [0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD]); + // Reverse-order surrogates (low then high): both lone. + assertArrayEquals(arr(e.encode("\uDC00\uD800")), + [0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD]); + // Lone high followed by ASCII before a matched pair: only the lone one + // is replaced. + assertArrayEquals(arr(e.encode("\uD800a😀")), + [0xEF, 0xBF, 0xBD, 0x61, 0xF0, 0x9F, 0x98, 0x80]); +} + +function test_encode_into_basic() { + const e = new TextEncoder(); + + let dst = new Uint8Array(8); + let r = e.encodeInto("hi", dst); + assert(r.read, 2); + assert(r.written, 2); + assertArrayEquals(arr(dst.subarray(0, 2)), [0x68, 0x69]); + + // Surrogate pair: read counts UTF-16 code units (2), written is 4 bytes. + dst = new Uint8Array(8); + r = e.encodeInto("😀", dst); + assert(r.read, 2); + assert(r.written, 4); + assertArrayEquals(arr(dst.subarray(0, 4)), [0xF0, 0x9F, 0x98, 0x80]); + + // Lone surrogate replaced with U+FFFD; read still counts 1 UTF-16 unit. + dst = new Uint8Array(8); + r = e.encodeInto("a\uD800", dst); + assert(r.read, 2); + assert(r.written, 4); + assertArrayEquals(arr(dst.subarray(0, 4)), [0x61, 0xEF, 0xBF, 0xBD]); + + // Empty source. + dst = new Uint8Array(4); dst.fill(0xAA); + r = e.encodeInto("", dst); + assert(r.read, 0); assert(r.written, 0); + assertArrayEquals(arr(dst), [0xAA, 0xAA, 0xAA, 0xAA]); + + // Empty destination. + r = e.encodeInto("abc", new Uint8Array(0)); + assert(r.read, 0); assert(r.written, 0); +} + +function test_encode_into_partial() { + const e = new TextEncoder(); + + // Destination too small for the next char's full encoding — must NOT + // write a partial sequence. + let dst = new Uint8Array(2); dst.fill(0xAA); + let r = e.encodeInto("☃hi", dst); // snowman is 3 bytes + assert(r.read, 0); assert(r.written, 0); + assertArrayEquals(arr(dst), [0xAA, 0xAA]); + + // Same for U+FFFD replacement of a lone surrogate (3 bytes). + dst = new Uint8Array(2); dst.fill(0xAA); + r = e.encodeInto("\uD800X", dst); + assert(r.read, 0); assert(r.written, 0); + assertArrayEquals(arr(dst), [0xAA, 0xAA]); + + // Some chars fit, then we stop short of an over-large one. + dst = new Uint8Array(4); dst.fill(0xAA); + r = e.encodeInto("ab☃c", dst); + assert(r.read, 2); assert(r.written, 2); + assertArrayEquals(arr(dst), [0x61, 0x62, 0xAA, 0xAA]); +} + +function test_encode_into_argument_errors() { + const e = new TextEncoder(); + + assertThrows(TypeError, () => e.encodeInto()); + assertThrows(TypeError, () => e.encodeInto("x")); + assertThrows(TypeError, () => e.encodeInto("x", "not a buffer")); + assertThrows(TypeError, () => e.encodeInto("x", new Int8Array(4))); + assertThrows(TypeError, () => e.encodeInto("x", new Uint16Array(4))); + assertThrows(TypeError, () => e.encodeInto("x", new Uint8ClampedArray(4))); + assertThrows(TypeError, () => e.encodeInto("x", new ArrayBuffer(4))); + + // Source is stringified before destination is validated (spec order). + let calls = []; + const src = { toString() { calls.push("src"); return "x"; } }; + assertThrows(TypeError, () => e.encodeInto(src, "not a buffer")); + assertArrayEquals(calls, ["src"]); +} + +function test_encoder_brand() { + assertThrows(TypeError, () => TextEncoder.prototype.encode.call({}, "x")); + assertThrows(TypeError, () => + TextEncoder.prototype.encodeInto.call({}, "x", new Uint8Array(4))); + // Calling the constructor without `new`. + assertThrows(TypeError, () => TextEncoder()); +} + +function test_decoder_basic() { + const d = new TextDecoder(); + assert(d.encoding, "utf-8"); + assert(d.fatal, false); + assert(d.ignoreBOM, false); + assert(Object.prototype.toString.call(d), "[object TextDecoder]"); + + assert(d.decode(), ""); + assert(d.decode(undefined), ""); + assert(d.decode(bytes([])), ""); + assert(d.decode(bytes([0x68, 0x69])), "hi"); + assert(d.decode(bytes([0xE2, 0x98, 0x83])), "☃"); + assert(d.decode(bytes([0xF0, 0x9F, 0x98, 0x80])), "😀"); // U+1F600 +} + +function test_decoder_input_types() { + const d = new TextDecoder(); + const data = [0x61, 0x62, 0x63]; + + assert(d.decode(new Uint8Array(data)), "abc"); + assert(d.decode(new Uint8Array(data).buffer), "abc"); + assert(d.decode(new Int8Array(new Uint8Array(data).buffer)), "abc"); + + // Subarray view at an offset must use that view's bytes only. + const big = new Uint8Array([0xFF, 0x61, 0x62, 0x63, 0xFF]); + assert(d.decode(big.subarray(1, 4)), "abc"); + + assertThrows(TypeError, () => d.decode("not a buffer")); + assertThrows(TypeError, () => d.decode({})); + assertThrows(TypeError, () => d.decode(null)); + assertThrows(TypeError, () => d.decode(123)); +} + +function test_decoder_label() { + for (const label of [ + "utf-8", "UTF-8", "utf8", "UTF8", "Utf-8", + " utf-8\t", "\nutf-8\r\f", "\fUTF-8 ", + "unicode-1-1-utf-8", "unicode11utf8", + "unicode20utf8", "x-unicode20utf8", + ]) { + assert(new TextDecoder(label).encoding, "utf-8"); + } + for (const label of ["latin1", "iso-8859-1", "utf-16", "windows-1252", + "utf-7", "ascii", ""]) { + assertThrows(RangeError, () => new TextDecoder(label)); + } +} + +function test_decoder_options() { + let d = new TextDecoder("utf-8", { fatal: true }); + assert(d.fatal, true); assert(d.ignoreBOM, false); + + d = new TextDecoder("utf-8", { ignoreBOM: true }); + assert(d.fatal, false); assert(d.ignoreBOM, true); + + d = new TextDecoder("utf-8", { fatal: true, ignoreBOM: true }); + assert(d.fatal, true); assert(d.ignoreBOM, true); + + // Truthy/falsy coercion. + d = new TextDecoder("utf-8", { fatal: 1, ignoreBOM: 0 }); + assert(d.fatal, true); assert(d.ignoreBOM, false); + + // Missing or non-object options: defaults. + d = new TextDecoder("utf-8"); + assert(d.fatal, false); assert(d.ignoreBOM, false); +} + +function test_decoder_bom() { + const bom = [0xEF, 0xBB, 0xBF]; + + // Default: BOM at start is stripped. + let d = new TextDecoder(); + assert(d.decode(bytes([...bom, 0x68, 0x69])), "hi"); + // BOM in the middle is kept as U+FEFF. + assert(d.decode(bytes([0x68, ...bom, 0x69])), "hi"); + // ignoreBOM=true: BOM is kept. + d = new TextDecoder("utf-8", { ignoreBOM: true }); + assert(d.decode(bytes([...bom, 0x68])), "h"); + // Decoder state is reset on non-stream call: a fresh BOM is honored. + d = new TextDecoder(); + assert(d.decode(bytes([...bom, 0x61])), "a"); + assert(d.decode(bytes([...bom, 0x62])), "b"); + // BOM split across stream calls is still recognized. + d = new TextDecoder(); + assert(d.decode(bytes([0xEF, 0xBB]), { stream: true }), ""); + assert(d.decode(bytes([0xBF, 0x68])), "h"); +} + +function test_decoder_invalid_sequences() { + const d = new TextDecoder(); + + // Stray continuation byte. + assert(d.decode(bytes([0x80])), "�"); + + // Lead byte followed by an out-of-range continuation: emit U+FFFD AND + // re-process the offending byte. + assert(d.decode(bytes([0xE0, 0x41])), "�A"); + assert(d.decode(bytes([0xE0, 0x80])), "��"); + assert(d.decode(bytes([0xF0, 0x80])), "��"); + assert(d.decode(bytes([0xF4, 0x90])), "��"); + assert(d.decode(bytes([0xF0, 0x90, 0x7F])), "�"); + + // Truly partial sequences (valid prefix, no following byte): single U+FFFD. + assert(d.decode(bytes([0xE0])), "�"); + assert(d.decode(bytes([0xE0, 0xA0])), "�"); + assert(d.decode(bytes([0xF0, 0x90])), "�"); + assert(d.decode(bytes([0xF0, 0x90, 0x80])), "�"); + + // Bytes that can never start a UTF-8 sequence. + assert(d.decode(bytes([0xC0])), "�"); + assert(d.decode(bytes([0xC1])), "�"); + assert(d.decode(bytes([0xF5])), "�"); + assert(d.decode(bytes([0xFF])), "�"); +} + +function test_decoder_fatal() { + const d = new TextDecoder("utf-8", { fatal: true }); + assert(d.decode(bytes([0x68, 0x69])), "hi"); + assertThrows(TypeError, () => d.decode(bytes([0x80]))); + assertThrows(TypeError, () => d.decode(bytes([0xE0, 0x41]))); + assertThrows(TypeError, () => d.decode(bytes([0xE0]))); + assertThrows(TypeError, () => d.decode(bytes([0xC0]))); + + // Stream mode with valid partial: pending, no error. + const d2 = new TextDecoder("utf-8", { fatal: true }); + assert(d2.decode(bytes([0xE2, 0x98]), { stream: true }), ""); + assert(d2.decode(bytes([0x83])), "☃"); + + // Stream + flush with partial pending → error on flush. + const d3 = new TextDecoder("utf-8", { fatal: true }); + assert(d3.decode(bytes([0xE2, 0x98]), { stream: true }), ""); + assertThrows(TypeError, () => d3.decode()); +} + +function test_decoder_stream() { + // Split a 4-byte sequence at every boundary and reassemble. + const seq = [0xF0, 0x9F, 0x98, 0x80]; // U+1F600 + for (let split = 1; split < 4; split++) { + const d = new TextDecoder(); + let out = d.decode(bytes(seq.slice(0, split)), { stream: true }); + out += d.decode(bytes(seq.slice(split))); + assert(out, "😀"); + } + + // E0 alone deferred; second call's first byte (0x41) is an invalid + // continuation, so we emit U+FFFD eagerly and re-read 0x41 as ASCII. + const d = new TextDecoder(); + assert(d.decode(bytes([0xE0]), { stream: true }), ""); + assert(d.decode(bytes([0x41])), "�A"); +} + +function test_decoder_brand() { + assertThrows(TypeError, () => TextDecoder.prototype.decode.call({})); + const enc_get = + Object.getOwnPropertyDescriptor(TextDecoder.prototype, "encoding").get; + assertThrows(TypeError, () => enc_get.call({})); + const fatal_get = + Object.getOwnPropertyDescriptor(TextDecoder.prototype, "fatal").get; + assertThrows(TypeError, () => fatal_get.call({})); + // Constructor without `new`. + assertThrows(TypeError, () => TextDecoder()); +} + +test_encoder_basic(); +test_encoder_lone_surrogates(); +test_encode_into_basic(); +test_encode_into_partial(); +test_encode_into_argument_errors(); +test_encoder_brand(); +test_decoder_basic(); +test_decoder_input_types(); +test_decoder_label(); +test_decoder_options(); +test_decoder_bom(); +test_decoder_invalid_sequences(); +test_decoder_fatal(); +test_decoder_stream(); +test_decoder_brand();