From 97ab1119741e5edc36feba264bb926428969ac44 Mon Sep 17 00:00:00 2001 From: Toxypi Date: Sat, 7 Feb 2026 16:02:03 +0000 Subject: [PATCH 1/2] Ran initial expansion of docs. --- .../example-CSS-selectors-easy-way.md | 2 + source/articles/part-1-html.md | 2 + source/articles/part-2-css.md | 2 + source/documentation.md | 29 +- source/download.md | 35 +- source/modules/core.md | 472 ++++++++++- source/modules/css.md | 437 +++++++++- source/modules/dom.md | 751 +++++++++++++++++- source/modules/encoding.md | 8 +- source/modules/html.md | 28 +- source/modules/index.md | 7 +- source/modules/selectors.md | 2 + source/modules/url.md | 584 +++++++++++++- source/roadmap.md | 6 +- 14 files changed, 2210 insertions(+), 155 deletions(-) diff --git a/source/articles/example-CSS-selectors-easy-way.md b/source/articles/example-CSS-selectors-easy-way.md index f9a3239..1686d4c 100644 --- a/source/articles/example-CSS-selectors-easy-way.md +++ b/source/articles/example-CSS-selectors-easy-way.md @@ -1,5 +1,7 @@ # Examples: CSS selectors, the easy way +For the full CSS and Selectors API reference, see the [CSS module](../modules/css.md) and [Selectors module](../modules/selectors.md) documentation. + Let's start with an easy example of using `lexbor` for parsing and serializing CSS selectors. This example breaks down the major steps and elements, explaining the overall purpose, requirements, and assumptions at each step. diff --git a/source/articles/part-1-html.md b/source/articles/part-1-html.md index 3dc9506..b092c8b 100644 --- a/source/articles/part-1-html.md +++ b/source/articles/part-1-html.md @@ -1,5 +1,7 @@ # Part one: HTML +**Note:** This article was written during the early development of the Lexbor HTML parser. Some code examples and internal values (such as token type flags) may differ from the current implementation. For up-to-date API reference, see the [HTML module documentation](../modules/html.md). + Hello, everyone! In this article, I will explain how to create a superfast HTML parser that diff --git a/source/articles/part-2-css.md b/source/articles/part-2-css.md index 4ab8a03..701aaee 100644 --- a/source/articles/part-2-css.md +++ b/source/articles/part-2-css.md @@ -1,5 +1,7 @@ # Part Two: CSS +**Note:** This article was written during the early development of the Lexbor CSS parser. Some internal details may differ from the current implementation. For up-to-date API reference, see the [CSS module documentation](../modules/css.md). For current project status, see the [Roadmap](../roadmap.md). + Hello, everyone! We continue our series on developing a browser engine. Better late than never! diff --git a/source/documentation.md b/source/documentation.md index 6b59baf..ca75719 100644 --- a/source/documentation.md +++ b/source/documentation.md @@ -201,8 +201,8 @@ These functions: - Are defined in `/source/lexbor/core/lexbor.h` (in the [core](#core) module). -- Are implemented in `/source/port/*/lexbor/core/memory.c` (in the `port` - module). +- Are implemented in `/source/lexbor/ports/*/lexbor/core/memory.c` (in the + `ports` module). - Can be redefined if needed. @@ -334,14 +334,14 @@ lexbor_avl_t *avl = lexbor_avl_create(); lxb_status_t status = lexbor_avl_init(avl, 1024); if (status != LXB_STATUS_OK) { - lexbor_avl_node_destroy(avl, true); + lexbor_avl_destroy(avl, true); exit(EXIT_FAILURE); } /* Do something super useful */ -lexbor_avl_node_destroy(avl, true); +lexbor_avl_destroy(avl, true); ``` Now, with an object on the stack: @@ -351,14 +351,14 @@ lexbor_avl_t avl = {0}; lxb_status_t status = lexbor_avl_init(&avl, 1024); if (status != LXB_STATUS_OK) { - lexbor_avl_node_destroy(&avl, false); + lexbor_avl_destroy(&avl, false); exit(EXIT_FAILURE); } /* Do something even more useful */ -lexbor_avl_node_destroy(&avl, false); +lexbor_avl_destroy(&avl, false); ``` Note that this approach is not an absolute requirement, even though it is @@ -398,7 +398,7 @@ as AVL and BST trees, arrays, and strings. It also handles memory management. The module is continuously evolving with new algorithms being added and existing ones optimized. -Documentation for this module will be available later. +See the [Core module documentation](modules/core) for API reference. ### DOM @@ -406,7 +406,7 @@ Documentation for this module will be available later. This module implements the [DOM specification](https://dom.spec.whatwg.org/). Its functions manage the DOM tree, including its nodes, attributes, and events. -Documentation for this module will be available later. +See the [DOM module documentation](modules/dom) for API reference. ### HTML @@ -417,8 +417,7 @@ specification](https://html.spec.whatwg.org/multipage/). Current implementations include: Tokenizer, Tree Builder, Parser, Fragment Parser, and Interfaces for HTML Elements. -Documentation for this module will be available later. For guidance, refer to -the +See the [HTML module documentation](modules/html) for API reference. Also see the [HTML examples](https://github.com/lexbor/lexbor/tree/master/examples/lexbor/html) in our repo or the corresponding [articles](articles/index). @@ -440,9 +439,8 @@ windows-1255, windows-1256, windows-1257, windows-1258, windows-874, x-mac-cyrillic, x-user-defined ``` -Documentation for this module will be available later. For guidance, refer to -the [Encoding -examples](https://github.com/lexbor/lexbor/tree/master/examples/lexbor/encoding) +See the [Encoding module documentation](modules/encoding) for API reference. Also see the +[Encoding examples](https://github.com/lexbor/lexbor/tree/master/examples/lexbor/encoding) in our repo or the corresponding [articles](articles/index). @@ -450,7 +448,6 @@ in our repo or the corresponding [articles](articles/index). This module implements the [CSS specification](https://drafts.csswg.org/). -Documentation for this module will be available later. For guidance, refer to -the [CSS -examples](https://github.com/lexbor/lexbor/tree/master/examples/lexbor/css) in +See the [CSS module documentation](modules/css) for API reference. Also see the +[CSS examples](https://github.com/lexbor/lexbor/tree/master/examples/lexbor/css) in our repo or the corresponding [articles](articles/index). diff --git a/source/download.md b/source/download.md index cad48aa..20281b7 100644 --- a/source/download.md +++ b/source/download.md @@ -4,19 +4,20 @@ The `lexbor` binaries are available for: -* [CentOS](#centos) 6, 7, 8 +* [CentOS](#centos) 7 -* [Debian](#debian) 8, 9, 10, 11 +* [Debian](#debian) 11, 12 -* [Fedora](#fedora) 28, 29, 30, 31, 32, 33, 34, 36, 37 +* [Fedora](#fedora) 39, 40, 41 -* [RHEL](#rhel) 7, 8 +* [RHEL](#rhel) 8, 9 -* [Ubuntu](#ubuntu) 14.04, 16.04, 18.04, 18.10, 19.04, 19.10, 20.04, 20.10, - 21.04, 22.04 +* [Ubuntu](#ubuntu) 20.04, 22.04, 24.04 * [macOS](#macos) +**Note:** Older distribution versions that have reached end-of-life are no longer listed. If you need packages for an older version, check the repository at `packages.lexbor.com` directly. + ### CentOS @@ -52,14 +53,14 @@ curl https://lexbor.com/keys/lexbor_signing.key | \ ``` 2. To configure the `lexbor` repository, create the following file named - `/etc/apt/sources.list.d/lexbor.list`. For Debian 11: + `/etc/apt/sources.list.d/lexbor.list`. For Debian 12: ```ini -deb-src [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/debian/ bullseye liblexbor -deb [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/debian/ bullseye liblexbor +deb-src [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/debian/ bookworm liblexbor +deb [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/debian/ bookworm liblexbor ``` - Supported distros also include `buster` (10), `stretch` (9), and `jessie` (8). + Supported distros also include `bullseye` (11). 3. Install the core `lexbor` package and any additional packages you need: @@ -86,8 +87,8 @@ deb [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/debian/ 2. Install the core `lexbor` package and any additional packages you need: ```sh - yum install liblexbor - yum install liblexbor-dev + dnf install liblexbor + dnf install liblexbor-dev ``` @@ -125,16 +126,14 @@ curl https://lexbor.com/keys/lexbor_signing.key | \ ``` 2. To configure the `lexbor` repository, create the following file named - `/etc/apt/sources.list.d/lexbor.list`. For Ubuntu 20.04: + `/etc/apt/sources.list.d/lexbor.list`. For Ubuntu 22.04: ```ini -deb-src [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/ubuntu/ focal liblexbor -deb [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/ubuntu/ focal liblexbor +deb-src [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/ubuntu/ jammy liblexbor +deb [signed-by=/etc/apt/keyrings/lexbor.gpg] https://packages.lexbor.com/ubuntu/ jammy liblexbor ``` - Supported distros also include `hirsute` (21.04), `groovy` (20.10), `focal` - (20.04), `eoan` (19.10), `disco` (19.04), `cosmic` (18.10), `bionic` (18.04), - `xenial` (16.04), and `trusty` (14.04). + Supported distros also include `noble` (24.04) and `focal` (20.04). 3. Install the core `lexbor` package and any additional packages you need: diff --git a/source/modules/core.md b/source/modules/core.md index ba109a7..b7026d0 100644 --- a/source/modules/core.md +++ b/source/modules/core.md @@ -8,41 +8,457 @@ ## Overview -The Core module is the foundation of lexbor. It implements essential data structures, algorithms, and memory management used by all other modules. +The Core module is the foundation of lexbor. It implements essential data structures, memory management, and utilities used by all other modules. Written in pure C99 with zero external dependencies. -Core provides the building blocks that all other modules depend on. It's written in pure C99 with zero external dependencies, making it highly portable and easy to embed. +## Status Codes (`lxb_status_t`) -## What's Inside +All lexbor functions return `lxb_status_t` for error handling. Defined in `lexbor/core/base.h`. -- **Memory Management** — custom allocators optimized for parser performance - - `lexbor_malloc`, `lexbor_calloc`, `lexbor_realloc`, `lexbor_free` - - Memory pools for fast object allocation +```c +typedef enum { + LXB_STATUS_OK = 0x0000, + LXB_STATUS_ERROR = 0x0001, + LXB_STATUS_ERROR_MEMORY_ALLOCATION, + LXB_STATUS_ERROR_OBJECT_IS_NULL, + LXB_STATUS_ERROR_SMALL_BUFFER, + LXB_STATUS_ERROR_INCOMPLETE_OBJECT, + LXB_STATUS_ERROR_NO_FREE_SLOT, + LXB_STATUS_ERROR_TOO_SMALL_SIZE, + LXB_STATUS_ERROR_NOT_EXISTS, + LXB_STATUS_ERROR_WRONG_ARGS, + LXB_STATUS_ERROR_WRONG_STAGE, + LXB_STATUS_CONTINUE, + LXB_STATUS_STOP, + LXB_STATUS_ABORTED, + LXB_STATUS_STOPPED, + LXB_STATUS_NEXT, + LXB_STATUS_WARNING +} lxb_status_t; +``` -- **Data Structures** - - AVL trees — self-balancing binary search trees - - BST trees — binary search trees - - Arrays — dynamic arrays with automatic growth - - Strings — efficient string handling with SSO (Small String Optimization) - - Hash tables — fast key-value lookups - - Vectors — generic dynamic arrays +`LXB_STATUS_OK` (`0x0000`) indicates success. All other values indicate errors or control flow signals. -- **Base Types** — common types used across all modules - - `lxb_status_t` — status codes for error handling - - `lxb_char_t` — character type (unsigned char) - - `lxb_codepoint_t` — Unicode code point - - and more... -- **Utilities** - - String operations (case conversion, comparison, hashing) - - Number parsing and conversion - - Bit operations - - Debugging helpers +## Action Type (`lexbor_action_t`) + +Used as callback return values to control iteration: + +```c +typedef enum { + LEXBOR_ACTION_OK = 0x00, /* continue */ + LEXBOR_ACTION_STOP = 0x01, /* stop iteration */ + LEXBOR_ACTION_NEXT = 0x02 /* skip to next */ +} lexbor_action_t; +``` + + +## Base Types + +Common types used across all modules (defined in `lexbor/core/base.h`): + +- `lxb_char_t` — character type (`unsigned char`) +- `lxb_codepoint_t` — Unicode code point +- `lexbor_serialize_cb_f` — serialization callback: `lxb_status_t (*)(const lxb_char_t *data, size_t len, void *ctx)` +- `lexbor_callback_f` — general callback: `lxb_status_t (*)(const lxb_char_t *data, size_t len, void *ctx)` + + +## Memory Allocator (`lexbor_mraw_t`) + +A pooled memory allocator with caching for reallocation. Used throughout lexbor for efficient allocation. Defined in `lexbor/core/mraw.h`. + +```c +typedef struct { + lexbor_mem_t *mem; + lexbor_bst_t *cache; + size_t ref_count; +} lexbor_mraw_t; +``` + +### Lifecycle + +```c +lexbor_mraw_t * +lexbor_mraw_create(void); + +lxb_status_t +lexbor_mraw_init(lexbor_mraw_t *mraw, size_t chunk_size); + +void +lexbor_mraw_clean(lexbor_mraw_t *mraw); + +lexbor_mraw_t * +lexbor_mraw_destroy(lexbor_mraw_t *mraw, bool destroy_self); +``` + +### Allocation + +```c +void *lexbor_mraw_alloc(lexbor_mraw_t *mraw, size_t size); +void *lexbor_mraw_calloc(lexbor_mraw_t *mraw, size_t size); +void *lexbor_mraw_realloc(lexbor_mraw_t *mraw, void *data, size_t new_size); +void lexbor_mraw_free(lexbor_mraw_t *mraw, void *data); +``` + +### Utility + +```c +/* Duplicate a memory block */ +void *lexbor_mraw_dup(lexbor_mraw_t *mraw, const void *src, size_t size); + +/* Get the allocated size of a block */ +size_t lexbor_mraw_data_size(void *data); + +/* Get reference count */ +size_t lexbor_mraw_reference_count(lexbor_mraw_t *mraw); +``` + + +## Dynamic Object Pool (`lexbor_dobject_t`) + +A pool allocator for frequently created and destroyed fixed-size objects. Allocates objects from chunks and recycles freed objects via an internal cache. Defined in `lexbor/core/dobject.h`. + +```c +typedef struct { + lexbor_mem_t *mem; + lexbor_array_t *cache; + size_t allocated; + size_t struct_size; +} lexbor_dobject_t; +``` + +### Lifecycle + +```c +lexbor_dobject_t * +lexbor_dobject_create(void); + +lxb_status_t +lexbor_dobject_init(lexbor_dobject_t *dobject, size_t chunk_size, size_t struct_size); + +void +lexbor_dobject_clean(lexbor_dobject_t *dobject); + +lexbor_dobject_t * +lexbor_dobject_destroy(lexbor_dobject_t *dobject, bool destroy_self); +``` + +### Operations + +```c +void *lexbor_dobject_alloc(lexbor_dobject_t *dobject); /* allocate (uninitialized) */ +void *lexbor_dobject_calloc(lexbor_dobject_t *dobject); /* allocate (zeroed) */ +void *lexbor_dobject_free(lexbor_dobject_t *dobject, void *data); /* return to pool */ + +void *lexbor_dobject_by_absolute_position(lexbor_dobject_t *dobject, size_t pos); + +size_t lexbor_dobject_allocated(lexbor_dobject_t *dobject); /* total allocated */ +size_t lexbor_dobject_cache_length(lexbor_dobject_t *dobject); /* cached (free) count */ +``` + + +## Array (`lexbor_array_t`) + +A dynamic array of `void *` pointers. Defined in `lexbor/core/array.h`. + +```c +typedef struct { + void **list; + size_t size; /* capacity */ + size_t length; /* current count */ +} lexbor_array_t; +``` + +### Lifecycle + +```c +lexbor_array_t * +lexbor_array_create(void); + +lxb_status_t +lexbor_array_init(lexbor_array_t *array, size_t size); + +void +lexbor_array_clean(lexbor_array_t *array); + +lexbor_array_t * +lexbor_array_destroy(lexbor_array_t *array, bool self_destroy); +``` + +### Operations + +```c +lxb_status_t lexbor_array_expand(lexbor_array_t *array, size_t up_to); +lxb_status_t lexbor_array_push(lexbor_array_t *array, void *value); +void * lexbor_array_pop(lexbor_array_t *array); +lxb_status_t lexbor_array_insert(lexbor_array_t *array, size_t idx, void *value); +lxb_status_t lexbor_array_set(lexbor_array_t *array, size_t idx, void *value); +void lexbor_array_delete(lexbor_array_t *array, size_t begin, size_t length); + +void * lexbor_array_get(const lexbor_array_t *array, size_t idx); /* NULL if out of bounds */ +size_t lexbor_array_length(lexbor_array_t *array); +size_t lexbor_array_size(lexbor_array_t *array); +``` + + +## Object Array (`lexbor_array_obj_t`) + +A dynamic array that stores objects by value (not by pointer). Elements are stored in a contiguous byte buffer, accessed by index and struct size. Defined in `lexbor/core/array_obj.h`. + +```c +typedef struct { + uint8_t *list; + size_t size; /* capacity */ + size_t length; /* current count */ + size_t struct_size; /* size of each element */ +} lexbor_array_obj_t; +``` + +### Lifecycle + +```c +lexbor_array_obj_t * +lexbor_array_obj_create(void); + +lxb_status_t +lexbor_array_obj_init(lexbor_array_obj_t *array, size_t size, size_t struct_size); + +void +lexbor_array_obj_clean(lexbor_array_obj_t *array); + +lexbor_array_obj_t * +lexbor_array_obj_destroy(lexbor_array_obj_t *array, bool self_destroy); +``` + +### Operations + +```c +void *lexbor_array_obj_push(lexbor_array_obj_t *array); /* allocate and zero at end */ +void *lexbor_array_obj_push_wo_cls(lexbor_array_obj_t *array); /* allocate without zeroing */ +void *lexbor_array_obj_push_n(lexbor_array_obj_t *array, size_t count); /* allocate N */ +void *lexbor_array_obj_pop(lexbor_array_obj_t *array); /* remove last */ +void lexbor_array_obj_delete(lexbor_array_obj_t *array, size_t begin, size_t length); + +void * lexbor_array_obj_get(const lexbor_array_obj_t *array, size_t idx); +void * lexbor_array_obj_last(lexbor_array_obj_t *array); +size_t lexbor_array_obj_length(lexbor_array_obj_t *array); +size_t lexbor_array_obj_size(lexbor_array_obj_t *array); +size_t lexbor_array_obj_struct_size(lexbor_array_obj_t *array); +``` + +Note: `push()` returns a pointer to the newly allocated slot in the array. The caller writes the object data into this slot. + + +## String (`lexbor_str_t`) + +Dynamically resizable string. Uses `lexbor_mraw_t` for memory allocation. Defined in `lexbor/core/str.h`. + +```c +typedef struct { + lxb_char_t *data; + size_t length; +} lexbor_str_t; +``` + +### Lifecycle + +```c +lexbor_str_t * +lexbor_str_create(void); + +lxb_char_t * +lexbor_str_init(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t size); + +lxb_char_t * +lexbor_str_init_append(lexbor_str_t *str, lexbor_mraw_t *mraw, + const lxb_char_t *data, size_t length); + +void +lexbor_str_clean(lexbor_str_t *str); + +void +lexbor_str_clean_all(lexbor_str_t *str); + +lexbor_str_t * +lexbor_str_destroy(lexbor_str_t *str, lexbor_mraw_t *mraw, bool destroy_obj); +``` + +### Operations + +```c +/* Resize */ +lxb_char_t *lexbor_str_realloc(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t new_size); +lxb_char_t *lexbor_str_check_size(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t plus_len); + +/* Append */ +lxb_char_t *lexbor_str_append(lexbor_str_t *str, lexbor_mraw_t *mraw, + const lxb_char_t *data, size_t length); +lxb_char_t *lexbor_str_append_before(lexbor_str_t *str, lexbor_mraw_t *mraw, + const lxb_char_t *buff, size_t length); +lxb_char_t *lexbor_str_append_one(lexbor_str_t *str, lexbor_mraw_t *mraw, lxb_char_t data); +lxb_char_t *lexbor_str_append_lowercase(lexbor_str_t *str, lexbor_mraw_t *mraw, + const lxb_char_t *data, size_t length); + +/* Copy */ +lxb_char_t *lexbor_str_copy(lexbor_str_t *dest, const lexbor_str_t *target, + lexbor_mraw_t *mraw); + +/* Whitespace */ +void lexbor_str_stay_only_whitespace(lexbor_str_t *target); +void lexbor_str_strip_collapse_whitespace(lexbor_str_t *target); +void lexbor_str_crop_whitespace_from_begin(lexbor_str_t *target); +``` + +### Accessors + +```c +lxb_char_t *lexbor_str_data(lexbor_str_t *str); +size_t lexbor_str_length(lexbor_str_t *str); +size_t lexbor_str_size(lexbor_str_t *str); +``` + +### Data Comparison Functions + +```c +/* Exact match */ +const lxb_char_t *lexbor_str_data_ncmp(const lxb_char_t *first, + const lxb_char_t *sec, size_t size); +bool lexbor_str_data_cmp(const lxb_char_t *first, const lxb_char_t *sec); + +/* Case-insensitive */ +const lxb_char_t *lexbor_str_data_ncasecmp(const lxb_char_t *first, + const lxb_char_t *sec, size_t size); +bool lexbor_str_data_casecmp(const lxb_char_t *first, const lxb_char_t *sec); + +/* Substring search */ +const lxb_char_t *lexbor_str_data_ncmp_contain(const lxb_char_t *where, size_t where_size, + const lxb_char_t *what, size_t what_size); +const lxb_char_t *lexbor_str_data_ncasecmp_contain(const lxb_char_t *where, size_t where_size, + const lxb_char_t *what, size_t what_size); + +/* Case conversion */ +lxb_char_t *lexbor_str_data_to_lowercase(lxb_char_t *to, const lxb_char_t *from, size_t len); +lxb_char_t *lexbor_str_data_to_uppercase(lxb_char_t *to, const lxb_char_t *from, size_t len); +``` + + +## Hash Table (`lexbor_hash_t`) + +Hash table with configurable key handling, collision chaining, and short string optimization for keys. Defined in `lexbor/core/hash.h`. + +### Key Types + +```c +typedef struct { + lexbor_dobject_t *entries; + lexbor_mraw_t *mraw; + lexbor_hash_entry_t **table; + size_t table_size; + size_t struct_size; +} lexbor_hash_t; + +typedef struct { + union { + lxb_char_t *long_str; + lxb_char_t short_str[LEXBOR_HASH_SHORT_SIZE + 1]; /* 17 bytes inline */ + } u; + size_t length; + lexbor_hash_entry_t *next; +} lexbor_hash_entry_t; +``` + +Hash entries use short string optimization: keys up to 16 bytes are stored inline in `short_str`, avoiding a separate allocation. `LEXBOR_HASH_SHORT_SIZE` is `16`. + +### Lifecycle + +```c +lexbor_hash_t * +lexbor_hash_create(void); + +lxb_status_t +lexbor_hash_init(lexbor_hash_t *hash, size_t table_size, size_t struct_size); + +void +lexbor_hash_clean(lexbor_hash_t *hash); + +lexbor_hash_t * +lexbor_hash_destroy(lexbor_hash_t *hash, bool destroy_obj); +``` + +The `struct_size` parameter allows embedding custom data after the hash entry header. Pass `sizeof(lexbor_hash_entry_t)` for entries with no extra data. + +### Operations + +```c +lexbor_hash_entry_t * +lexbor_hash_insert(lexbor_hash_t *hash, const lexbor_hash_insert_t *insert, + const lxb_char_t *key, size_t length); + +lexbor_hash_entry_t * +lexbor_hash_search(lexbor_hash_t *hash, const lexbor_hash_search_t *search, + const lxb_char_t *key, size_t length); + +void * +lexbor_hash_remove(lexbor_hash_t *hash, const lexbor_hash_search_t *search, + const lxb_char_t *key, size_t length); +``` + +Pre-defined insert/search strategies: + +- `lexbor_hash_insert_raw` / `lexbor_hash_search_raw` — exact key matching +- `lexbor_hash_insert_lower` / `lexbor_hash_search_lower` — case-insensitive (lowercase) +- `lexbor_hash_insert_upper` / `lexbor_hash_search_upper` — case-insensitive (uppercase) + + +## AVL Tree (`lexbor_avl_t`) + +Self-balancing AVL tree for ordered data. Defined in `lexbor/core/avl.h`. + +```c +typedef struct { + lexbor_dobject_t *nodes; + lexbor_avl_node_t *last_right; +} lexbor_avl_t; + +typedef struct lexbor_avl_node { + size_t type; /* key */ + short height; + void *value; + struct lexbor_avl_node *left; + struct lexbor_avl_node *right; + struct lexbor_avl_node *parent; +} lexbor_avl_node_t; +``` + +### Lifecycle + +```c +lexbor_avl_t *lexbor_avl_create(void); +lxb_status_t lexbor_avl_init(lexbor_avl_t *avl, size_t chunk_len, size_t struct_size); +void lexbor_avl_clean(lexbor_avl_t *avl); +lexbor_avl_t *lexbor_avl_destroy(lexbor_avl_t *avl, bool self_destroy); +``` + +### Operations + +```c +lexbor_avl_node_t *lexbor_avl_insert(lexbor_avl_t *avl, lexbor_avl_node_t **scope, + size_t type, void *value); +lexbor_avl_node_t *lexbor_avl_search(lexbor_avl_t *avl, lexbor_avl_node_t *scope, + size_t type); +void * lexbor_avl_remove(lexbor_avl_t *avl, lexbor_avl_node_t **scope, + size_t type); + +lxb_status_t lexbor_avl_foreach(lexbor_avl_t *avl, lexbor_avl_node_t **scope, + lexbor_avl_node_f cb, void *ctx); +``` + +The `scope` parameter is a pointer to the root node pointer, allowing the tree to update the root during balancing. + ## Key Features - **Zero Dependencies** — pure C99, no external libraries required -- **Performance-Optimized** — custom algorithms tuned for parser workloads -- **Memory Efficient** — pooled allocation reduces fragmentation -- **Platform Abstraction** — portable across different operating systems - -*(Documentation is currently being developed, details will be available here soon.)* +- **Object Lifecycle** — all types follow the `create`/`init`/`clean`/`destroy` pattern +- **Dual Function Variants** — performance-critical accessors have both inline and non-inline (`_noi`) versions for ABI stability +- **Pool Allocation** — `lexbor_dobject_t` recycles fixed-size objects; `lexbor_mraw_t` provides general-purpose pooled allocation +- **Platform Abstraction** — portable across operating systems via `source/lexbor/ports/` diff --git a/source/modules/css.md b/source/modules/css.md index c68642f..fdc8997 100644 --- a/source/modules/css.md +++ b/source/modules/css.md @@ -3,31 +3,432 @@ * **Version:** 1.4.0 * **Path:** `source/lexbor/css` * **Base Includes:** `lexbor/css/css.h` -* **Examples:** `source/examples/css` -* **Specification:** [CSS](https://www.w3.org/Style/CSS/) +* **Examples:** `examples/lexbor/css` +* **Specifications:** [CSS Syntax Level 3](https://www.w3.org/TR/css-syntax-3/), [Selectors Level 4](https://www.w3.org/TR/selectors-4/), [CSSOM](https://www.w3.org/TR/cssom-1/) ## Overview -The CSS module provides a complete CSS parser implementing `CSS Syntax Module Level 3`. It can parse stylesheets, inline styles, and build CSSOM trees. +The CSS module provides a complete CSS parser implementing CSS Syntax Module Level 3. It can parse stylesheets, individual style rules, and declarations, building a rule tree that can be serialized back to CSS text. -Full-featured CSS parser supporting CSS Syntax, Selectors, CSSOM, and gradual implementation of various CSS modules. +The module includes: -## What's Inside - -- **Syntax Tokenizer** — converts CSS text into tokens -- **Parser** — builds CSS rule tree from tokens -- **CSSOM** — CSS Object Model -- **Property Parsing** — parses CSS properties and values -- **Value Types** — handles lengths, colors, functions +- **Syntax Tokenizer** — converts CSS text into tokens per CSS Syntax Level 3 +- **Parser** — builds a CSS rule tree from tokens +- **Stylesheet** — parses and holds a complete stylesheet's rule tree +- **Rule Tree** — CSSOM-style representation of style rules, at-rules, and declarations +- **Property Parsing** — parses CSS property values into typed structures +- **Selectors** — CSS Selectors Level 4 (documented separately in the [Selectors module](selectors.md)) +- **Log** — collects warnings and errors during parsing ## Supported Features -- ✅ CSS Syntax Level 3 -- ✅ CSS Selectors Level 4 -- ✅ CSSOM -- 🚧 CSS Values (in progress) -- 🚧 CSS Box Model (in progress) -- 🚧 CSS Display, Fonts, Flexbox (in progress) +- CSS Syntax Level 3 (complete) +- CSS Selectors Level 4 (complete) +- CSS Namespaces Level 3 (complete) +- CSSOM (in progress) +- CSS property value parsing for: display, position, color, opacity, width, height, min/max dimensions, margin, padding, border, background-color, font properties (family, size, weight, style, stretch), text properties (align, indent, transform, decoration, overflow, justify, orientation, combine-upright), flexbox (flex, flex-direction, flex-wrap, flex-flow, flex-grow, flex-shrink, flex-basis, justify-content, align-items, align-self, align-content, order), float, clear, overflow, z-index, visibility, box-sizing, line-height, white-space, tab-size, word-break, word-spacing, letter-spacing, hyphens, writing-mode, direction, unicode-bidi, vertical-align, baseline properties + + +## Parser (`lxb_css_parser_t`) + +The CSS parser is the core entry point. Defined in `lexbor/css/parser.h`. + +### Lifecycle + +```c +lxb_css_parser_t * +lxb_css_parser_create(void); + +lxb_status_t +lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz); + +void +lxb_css_parser_clean(lxb_css_parser_t *parser); + +void +lxb_css_parser_erase(lxb_css_parser_t *parser); + +lxb_css_parser_t * +lxb_css_parser_destroy(lxb_css_parser_t *parser, bool self_destroy); +``` + +- `lxb_css_parser_init()`: If `tkz` is `NULL`, the parser creates and manages its own tokenizer. +- `lxb_css_parser_clean()`: Resets state but keeps allocated memory for reuse. +- `lxb_css_parser_erase()`: Resets state and releases internal allocations. +- `lxb_css_parser_destroy()`: If `self_destroy` is `true`, frees the parser object itself. + +### Selectors Integration + +To parse CSS that contains selectors (which is most CSS), initialize the selectors module: + +```c +lxb_status_t +lxb_css_parser_selectors_init(lxb_css_parser_t *parser); + +void +lxb_css_parser_selectors_destroy(lxb_css_parser_t *parser); +``` + +If the selectors module is not initialized when parsing a stylesheet, one is created temporarily for each parse call. For better performance when parsing multiple stylesheets, initialize it once. + +### Status + +```c +lxb_status_t +lxb_css_parser_status(lxb_css_parser_t *parser); + +lxb_css_log_t * +lxb_css_parser_log(lxb_css_parser_t *parser); +``` + + +## Stylesheet (`lxb_css_stylesheet_t`) + +Represents a parsed CSS stylesheet. Defined in `lexbor/css/stylesheet.h`. + +### Lifecycle + +```c +lxb_css_stylesheet_t * +lxb_css_stylesheet_create(lxb_css_memory_t *memory); + +lxb_css_stylesheet_t * +lxb_css_stylesheet_destroy(lxb_css_stylesheet_t *sst, bool destroy_memory); +``` + +- `lxb_css_stylesheet_create()`: If `memory` is `NULL`, the stylesheet creates its own memory pool. +- `lxb_css_stylesheet_destroy()`: If `destroy_memory` is `true`, also destroys the associated memory pool. + +### Parsing + +```c +lxb_status_t +lxb_css_stylesheet_parse(lxb_css_stylesheet_t *sst, lxb_css_parser_t *parser, + const lxb_char_t *data, size_t length); +``` + +Parses CSS text into the stylesheet's rule tree. Only returns errors for severe failures (e.g., out of memory). Invalid CSS is handled gracefully — broken rules are recorded as `lxb_css_rule_bad_style_t`. + +After parsing, the rule tree is available at `sst->root`. + + +## Rule Tree + +The parsed CSS is represented as a tree of rule nodes. All rule types share a common base `lxb_css_rule_t`. Defined in `lexbor/css/rule.h`. + +### Rule Types + +```c +typedef enum { + LXB_CSS_RULE_UNDEF = 0, + LXB_CSS_RULE_STYLESHEET, + LXB_CSS_RULE_LIST, + LXB_CSS_RULE_AT_RULE, + LXB_CSS_RULE_STYLE, + LXB_CSS_RULE_BAD_STYLE, + LXB_CSS_RULE_DECLARATION_LIST, + LXB_CSS_RULE_DECLARATION +} lxb_css_rule_type_t; +``` + +### Key Rule Structures + +**`lxb_css_rule_style_t`** — A CSS style rule (selector + declarations): + +```c +struct lxb_css_rule_style { + lxb_css_rule_t rule; + lxb_css_selector_list_t *selector; + lxb_css_rule_declaration_list_t *declarations; + /* ... */ +}; +``` + +**`lxb_css_rule_declaration_t`** — A single CSS declaration (property: value): + +```c +struct lxb_css_rule_declaration { + lxb_css_rule_t rule; + uintptr_t type; /* property ID from LXB_CSS_PROPERTY_* */ + union { /* typed property value */ } u; + bool important; +}; +``` + +The `type` field holds the property ID (e.g., `LXB_CSS_PROPERTY_DISPLAY`), and the union `u` holds the parsed value in a type-safe structure. + +**`lxb_css_rule_at_t`** — An at-rule (@media, @font-face, @namespace): + +```c +struct lxb_css_rule_at { + lxb_css_rule_t rule; + uintptr_t type; /* at-rule ID from LXB_CSS_AT_RULE_* */ + union { /* typed at-rule data */ } u; +}; +``` + +**`lxb_css_rule_bad_style_t`** — A style rule whose selector failed to parse: + +```c +struct lxb_css_rule_bad_style { + lxb_css_rule_t rule; + lexbor_str_t selectors; /* raw selector text */ + lxb_css_rule_declaration_list_t *declarations; +}; +``` + +### Casting Macros + +```c +lxb_css_rule(obj) /* cast to lxb_css_rule_t * */ +lxb_css_rule_style(obj) /* cast to lxb_css_rule_style_t * */ +lxb_css_rule_at(obj) /* cast to lxb_css_rule_at_t * */ +lxb_css_rule_declaration(obj) /* cast to lxb_css_rule_declaration_t * */ +lxb_css_rule_declaration_list(obj) /* cast to lxb_css_rule_declaration_list_t * */ +``` + +### Traversal + +Rules form a linked list via `next`/`prev` pointers. List containers (`lxb_css_rule_list_t`, `lxb_css_rule_declaration_list_t`) have `first`/`last` pointers. + +```c +/* Iterate over rules in a list */ +lxb_css_rule_t *rule = list->first; +while (rule != NULL) { + /* process rule */ + rule = rule->next; +} +``` + + +## Serialization + +All rule types support callback-based serialization back to CSS text: + +```c +lxb_status_t +lxb_css_rule_serialize(const lxb_css_rule_t *rule, + lexbor_serialize_cb_f cb, void *ctx); + +lxb_status_t +lxb_css_rule_serialize_chain(const lxb_css_rule_t *rule, + lexbor_serialize_cb_f cb, void *ctx); +``` + +- `lxb_css_rule_serialize()`: Serializes a single rule. +- `lxb_css_rule_serialize_chain()`: Serializes a rule and all its `next` siblings. + +Type-specific serialization functions: + +```c +lxb_css_rule_style_serialize(style, cb, ctx); +lxb_css_rule_at_serialize(at, cb, ctx); +lxb_css_rule_declaration_serialize(decl, cb, ctx); +lxb_css_rule_declaration_list_serialize(list, cb, ctx); +``` + +The callback signature is `lexbor_serialize_cb_f`: + +```c +typedef lxb_status_t +(*lexbor_serialize_cb_f)(const lxb_char_t *data, size_t len, void *ctx); +``` + + +## Log (`lxb_css_log_t`) + +The CSS parser log collects messages generated during parsing. Defined in `lexbor/css/log.h`. + +### Message Types + +```c +typedef enum { + LXB_CSS_LOG_INFO = 0, + LXB_CSS_LOG_WARNING, + LXB_CSS_LOG_ERROR, + LXB_CSS_LOG_SYNTAX_ERROR +} lxb_css_log_type_t; +``` + +### Lifecycle + +```c +lxb_css_log_t * +lxb_css_log_create(void); + +lxb_status_t +lxb_css_log_init(lxb_css_log_t *log, lexbor_mraw_t *mraw); + +void +lxb_css_log_clean(lxb_css_log_t *log); + +lxb_css_log_t * +lxb_css_log_destroy(lxb_css_log_t *log, bool self_destroy); +``` + +### Usage + +```c +/* Get the number of log messages */ +size_t +lxb_css_log_length(lxb_css_log_t *log); + +/* Serialize all log messages */ +lxb_status_t +lxb_css_log_serialize(lxb_css_log_t *log, lexbor_serialize_cb_f cb, void *ctx, + const lxb_char_t *indent, size_t indent_length); + +/* Serialize to a string (caller must free with lexbor_free) */ +lxb_char_t * +lxb_css_log_serialize_char(lxb_css_log_t *log, size_t *out_length, + const lxb_char_t *indent, size_t indent_length); +``` + + +## Memory Management (`lxb_css_memory_t`) + +The CSS module uses a shared memory pool for all allocations. Defined in `lexbor/css/base.h`. + +```c +lxb_css_memory_t * +lxb_css_memory_create(void); + +lxb_status_t +lxb_css_memory_init(lxb_css_memory_t *memory, size_t prepare_count); + +void +lxb_css_memory_clean(lxb_css_memory_t *memory); + +lxb_css_memory_t * +lxb_css_memory_destroy(lxb_css_memory_t *memory, bool self_destroy); +``` + +The memory pool uses reference counting: + +```c +lxb_css_memory_t * +lxb_css_memory_ref_inc(lxb_css_memory_t *memory); + +void +lxb_css_memory_ref_dec(lxb_css_memory_t *memory); + +lxb_css_memory_t * +lxb_css_memory_ref_dec_destroy(lxb_css_memory_t *memory); +``` + + +## Examples + +### Parsing and Serializing a Stylesheet + +```c +#include + +static lxb_status_t +serializer_callback(const lxb_char_t *data, size_t len, void *ctx) +{ + printf("%.*s", (int) len, data); + return LXB_STATUS_OK; +} + +int +main(void) +{ + lxb_status_t status; + lxb_css_parser_t *parser; + lxb_css_stylesheet_t *sst; + + static const lxb_char_t css[] = + "div { color: red; display: flex; }" + "p.intro { font-size: 16px; margin: 10px; }"; + + /* Create and initialize the parser */ + parser = lxb_css_parser_create(); + status = lxb_css_parser_init(parser, NULL); + if (status != LXB_STATUS_OK) { + goto failed; + } + + /* Create a stylesheet and parse CSS into it */ + sst = lxb_css_stylesheet_create(NULL); + status = lxb_css_stylesheet_parse(sst, parser, css, sizeof(css) - 1); + + lxb_css_parser_destroy(parser, true); + + if (status != LXB_STATUS_OK) { + lxb_css_stylesheet_destroy(sst, true); + return EXIT_FAILURE; + } + + /* Serialize the parsed stylesheet back to CSS text */ + lxb_css_rule_serialize(sst->root, serializer_callback, NULL); + printf("\n"); + + lxb_css_stylesheet_destroy(sst, true); + return EXIT_SUCCESS; + +failed: + lxb_css_parser_destroy(parser, true); + return EXIT_FAILURE; +} +``` + +### Walking the Rule Tree + +```c +#include + +static lxb_status_t +print_cb(const lxb_char_t *data, size_t len, void *ctx) +{ + printf("%.*s", (int) len, data); + return LXB_STATUS_OK; +} + +int +main(void) +{ + lxb_status_t status; + lxb_css_parser_t *parser; + lxb_css_stylesheet_t *sst; + lxb_css_rule_t *rule; + + static const lxb_char_t css[] = + ".header { color: blue; } .footer { margin: 0; }"; + + parser = lxb_css_parser_create(); + status = lxb_css_parser_init(parser, NULL); + if (status != LXB_STATUS_OK) { + return EXIT_FAILURE; + } + + sst = lxb_css_stylesheet_create(NULL); + status = lxb_css_stylesheet_parse(sst, parser, css, sizeof(css) - 1); + lxb_css_parser_destroy(parser, true); + + if (status != LXB_STATUS_OK) { + lxb_css_stylesheet_destroy(sst, true); + return EXIT_FAILURE; + } + + /* Walk the rule list */ + lxb_css_rule_list_t *list = lxb_css_rule_list(sst->root); + rule = list->first; + + while (rule != NULL) { + printf("Rule type: %d\n", rule->type); + + if (rule->type == LXB_CSS_RULE_STYLE) { + printf(" Style rule: "); + lxb_css_rule_style_serialize(lxb_css_rule_style(rule), + print_cb, NULL); + printf("\n"); + } + rule = rule->next; + } -*(Documentation is currently being developed, details will be available here soon.)* + lxb_css_stylesheet_destroy(sst, true); + return EXIT_SUCCESS; +} +``` diff --git a/source/modules/dom.md b/source/modules/dom.md index 391d431..bb9f4b9 100644 --- a/source/modules/dom.md +++ b/source/modules/dom.md @@ -3,22 +3,753 @@ * **Version:** 2.0.0 * **Path:** `source/lexbor/dom` * **Base Includes:** `lexbor/dom/dom.h` -* **Examples:** not present +* **Examples:** `examples/lexbor/html` (DOM is used through the HTML module) * **Specification:** [WHATWG DOM Living Standard](https://dom.spec.whatwg.org/) ## Overview -The DOM module implements the Document Object Model specification, providing a tree structure for representing and manipulating HTML documents. +The DOM module implements the Document Object Model specification, providing a tree structure for representing and manipulating HTML documents. It defines the node hierarchy, tree operations, element attributes, and namespace handling used throughout lexbor. -Complete implementation of WHATWG DOM standard with efficient tree operations and namespace support. +In practice, the DOM module is most commonly used via the [HTML module](html.md). After parsing HTML with `lxb_html_document_parse()`, you interact with the resulting tree using DOM types and functions. -## What's Inside +## Key Types -- **Node Hierarchy** — all DOM node types (Element, Text, Comment, etc.) -- **Node Operations** — create, append, insert, remove, replace -- **Element Operations** — attribute manipulation -- **Tree Traversal** — parent, children, siblings navigation -- **Namespace Support** — HTML, SVG, MathML, XML +### Node Types +Every node in the DOM tree has a type defined by `lxb_dom_node_type_t`: -*(Documentation is currently being developed, details will be available here soon.)* +```c +typedef enum { + LXB_DOM_NODE_TYPE_UNDEF = 0x00, + LXB_DOM_NODE_TYPE_ELEMENT = 0x01, + LXB_DOM_NODE_TYPE_ATTRIBUTE = 0x02, + LXB_DOM_NODE_TYPE_TEXT = 0x03, + LXB_DOM_NODE_TYPE_CDATA_SECTION = 0x04, + LXB_DOM_NODE_TYPE_ENTITY_REFERENCE = 0x05, // historical + LXB_DOM_NODE_TYPE_ENTITY = 0x06, // historical + LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION = 0x07, + LXB_DOM_NODE_TYPE_COMMENT = 0x08, + LXB_DOM_NODE_TYPE_DOCUMENT = 0x09, + LXB_DOM_NODE_TYPE_DOCUMENT_TYPE = 0x0A, + LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT = 0x0B, + LXB_DOM_NODE_TYPE_NOTATION = 0x0C, // historical + LXB_DOM_NODE_TYPE_CHARACTER_DATA, + LXB_DOM_NODE_TYPE_SHADOW_ROOT, + LXB_DOM_NODE_TYPE_LAST_ENTRY +} lxb_dom_node_type_t; +``` + +### Interface Hierarchy + +The DOM module uses a "poor man's inheritance" pattern where each structure embeds its parent as the first field, allowing safe casting between types: + +``` +lxb_dom_event_target_t + └── lxb_dom_node_t + ├── lxb_dom_element_t + ├── lxb_dom_document_t + ├── lxb_dom_character_data_t + │ ├── lxb_dom_text_t + │ ├── lxb_dom_comment_t + │ ├── lxb_dom_cdata_section_t + │ └── lxb_dom_processing_instruction_t + ├── lxb_dom_document_type_t + ├── lxb_dom_document_fragment_t + ├── lxb_dom_shadow_root_t + └── lxb_dom_attr_t +``` + +### Interface Casting Macros + +Because of the inheritance pattern, casting macros are provided in `lexbor/dom/interface.h`: + +```c +lxb_dom_interface_node(obj) /* cast to lxb_dom_node_t * */ +lxb_dom_interface_element(obj) /* cast to lxb_dom_element_t * */ +lxb_dom_interface_document(obj) /* cast to lxb_dom_document_t * */ +lxb_dom_interface_text(obj) /* cast to lxb_dom_text_t * */ +lxb_dom_interface_comment(obj) /* cast to lxb_dom_comment_t * */ +lxb_dom_interface_attr(obj) /* cast to lxb_dom_attr_t * */ +``` + +For example, to get the node type of an element: + +```c +lxb_dom_element_t *element = /* ... */; +lxb_dom_node_type_t type = lxb_dom_node_type(lxb_dom_interface_node(element)); +``` + + +## Node (`lxb_dom_node_t`) + +The fundamental type for all DOM tree nodes. Defined in `lexbor/dom/interfaces/node.h`. + +### Tree Traversal + +Navigate the tree using these inline functions: + +```c +lxb_dom_node_t * +lxb_dom_node_first_child(lxb_dom_node_t *node); + +lxb_dom_node_t * +lxb_dom_node_last_child(lxb_dom_node_t *node); + +lxb_dom_node_t * +lxb_dom_node_next(lxb_dom_node_t *node); + +lxb_dom_node_t * +lxb_dom_node_prev(lxb_dom_node_t *node); + +lxb_dom_node_t * +lxb_dom_node_parent(lxb_dom_node_t *node); +``` + +All return `NULL` when no such node exists. + +### Node Properties + +```c +/* Get the node type */ +lxb_dom_node_type_t +lxb_dom_node_type(lxb_dom_node_t *node); + +/* Get the tag ID (element local name as numeric ID) */ +lxb_tag_id_t +lxb_dom_node_tag_id(lxb_dom_node_t *node); + +/* Get the node name as a string */ +const lxb_char_t * +lxb_dom_node_name(lxb_dom_node_t *node, size_t *len); +``` + +### Tree Modification + +**Low-level operations** — These insert/remove nodes directly without DOM spec validation: + +```c +/* Insert node as the last child of 'to' */ +void +lxb_dom_node_insert_child(lxb_dom_node_t *to, lxb_dom_node_t *node); + +/* Insert node immediately before 'to' */ +void +lxb_dom_node_insert_before(lxb_dom_node_t *to, lxb_dom_node_t *node); + +/* Insert node immediately after 'to' */ +void +lxb_dom_node_insert_after(lxb_dom_node_t *to, lxb_dom_node_t *node); + +/* Remove node from its parent */ +void +lxb_dom_node_remove(lxb_dom_node_t *node); +``` + +**Spec-compliant operations** — These perform DOM spec validation before modifying the tree, returning an exception code: + +```c +/* Node.appendChild(node) — validates, then appends child */ +lxb_dom_exception_code_t +lxb_dom_node_append_child(lxb_dom_node_t *parent, lxb_dom_node_t *node); + +/* Node.insertBefore(node, child) — validates, then inserts */ +lxb_dom_exception_code_t +lxb_dom_node_insert_before_spec(lxb_dom_node_t *dst, lxb_dom_node_t *node, + lxb_dom_node_t *child); + +/* Node.removeChild(child) — validates, then removes */ +lxb_dom_exception_code_t +lxb_dom_node_remove_child(lxb_dom_node_t *parent, lxb_dom_node_t *child); + +/* Node.replaceChild(node, child) — validates, then replaces */ +lxb_dom_exception_code_t +lxb_dom_node_replace_child(lxb_dom_node_t *parent, lxb_dom_node_t *node, + lxb_dom_node_t *child); +``` + +Returns `LXB_DOM_EXCEPTION_OK` on success. + +### Text Content + +```c +/* Get text content of the node and its descendants. + * Memory is freed when the document is destroyed. + * To free earlier, call lxb_dom_document_destroy_text(). */ +lxb_char_t * +lxb_dom_node_text_content(lxb_dom_node_t *node, size_t *len); + +/* Set text content, replacing all children */ +lxb_status_t +lxb_dom_node_text_content_set(lxb_dom_node_t *node, + const lxb_char_t *content, size_t len); +``` + +### Tree Walking + +Walk all descendants of a node using a callback: + +```c +typedef lexbor_action_t +(*lxb_dom_node_simple_walker_f)(lxb_dom_node_t *node, void *ctx); + +void +lxb_dom_node_simple_walk(lxb_dom_node_t *root, + lxb_dom_node_simple_walker_f walker_cb, void *ctx); +``` + +The callback should return `LEXBOR_ACTION_OK` to continue or `LEXBOR_ACTION_STOP` to stop. + +### Search Functions + +Find nodes within a subtree: + +```c +/* Find the first element with the given ID */ +lxb_dom_node_t * +lxb_dom_node_by_id(lxb_dom_node_t *root, + const lxb_char_t *qualified_name, size_t len); + +/* Collect all elements with the given tag name */ +lxb_status_t +lxb_dom_node_by_tag_name(lxb_dom_node_t *root, lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t len); + +/* Collect all elements with the given class name */ +lxb_status_t +lxb_dom_node_by_class_name(lxb_dom_node_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *class_name, size_t len); + +/* Collect elements by attribute name and value (exact match) */ +lxb_status_t +lxb_dom_node_by_attr(lxb_dom_node_t *root, lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t qname_len, + const lxb_char_t *value, size_t value_len, + bool case_insensitive); + +/* Collect elements by attribute value prefix */ +lxb_status_t +lxb_dom_node_by_attr_begin(lxb_dom_node_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t qname_len, + const lxb_char_t *value, size_t value_len, + bool case_insensitive); + +/* Collect elements by attribute value suffix */ +lxb_status_t +lxb_dom_node_by_attr_end(lxb_dom_node_t *root, lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t qname_len, + const lxb_char_t *value, size_t value_len, + bool case_insensitive); + +/* Collect elements by attribute value substring */ +lxb_status_t +lxb_dom_node_by_attr_contain(lxb_dom_node_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t qname_len, + const lxb_char_t *value, size_t value_len, + bool case_insensitive); +``` + +### Destroy + +```c +/* Destroy a single node (does not remove children) */ +lxb_dom_node_t * +lxb_dom_node_destroy(lxb_dom_node_t *node); + +/* Destroy a node and all its descendants */ +lxb_dom_node_t * +lxb_dom_node_destroy_deep(lxb_dom_node_t *root); + +/* Clone a node, optionally with all descendants */ +lxb_dom_node_t * +lxb_dom_node_clone(lxb_dom_node_t *node, bool deep); +``` + + +## Element (`lxb_dom_element_t`) + +Extends `lxb_dom_node_t` for elements. Defined in `lexbor/dom/interfaces/element.h`. + +### Element Names + +```c +/* Original qualified name (e.g. "LalAla:DiV") */ +const lxb_char_t * +lxb_dom_element_qualified_name(lxb_dom_element_t *element, size_t *len); + +/* Uppercase qualified name */ +const lxb_char_t * +lxb_dom_element_qualified_name_upper(lxb_dom_element_t *element, size_t *len); + +/* Local name only (without prefix) */ +const lxb_char_t * +lxb_dom_element_local_name(lxb_dom_element_t *element, size_t *len); + +/* Tag name (uppercase qualified name) */ +const lxb_char_t * +lxb_dom_element_tag_name(lxb_dom_element_t *element, size_t *len); + +/* Namespace prefix */ +const lxb_char_t * +lxb_dom_element_prefix(lxb_dom_element_t *element, size_t *len); + +/* Tag ID and namespace ID as numeric values */ +lxb_tag_id_t +lxb_dom_element_tag_id(lxb_dom_element_t *element); + +lxb_ns_id_t +lxb_dom_element_ns_id(lxb_dom_element_t *element); +``` + +### Attribute Operations + +```c +/* Set or create an attribute */ +lxb_dom_attr_t * +lxb_dom_element_set_attribute(lxb_dom_element_t *element, + const lxb_char_t *qualified_name, size_t qn_len, + const lxb_char_t *value, size_t value_len); + +/* Get an attribute value */ +const lxb_char_t * +lxb_dom_element_get_attribute(lxb_dom_element_t *element, + const lxb_char_t *qualified_name, size_t qn_len, + size_t *value_len); + +/* Remove an attribute */ +lxb_status_t +lxb_dom_element_remove_attribute(lxb_dom_element_t *element, + const lxb_char_t *qualified_name, size_t qn_len); + +/* Check if attribute exists */ +bool +lxb_dom_element_has_attribute(lxb_dom_element_t *element, + const lxb_char_t *qualified_name, size_t qn_len); + +/* Check if element has any attributes */ +bool +lxb_dom_element_has_attributes(lxb_dom_element_t *element); +``` + +### Attribute Iteration + +```c +lxb_dom_attr_t * +lxb_dom_element_first_attribute(lxb_dom_element_t *element); + +lxb_dom_attr_t * +lxb_dom_element_last_attribute(lxb_dom_element_t *element); + +lxb_dom_attr_t * +lxb_dom_element_next_attribute(lxb_dom_attr_t *attr); + +lxb_dom_attr_t * +lxb_dom_element_prev_attribute(lxb_dom_attr_t *attr); +``` + +### ID and Class Access + +```c +/* Get the element's "id" attribute value */ +const lxb_char_t * +lxb_dom_element_id(lxb_dom_element_t *element, size_t *len); + +/* Get the element's "class" attribute value */ +const lxb_char_t * +lxb_dom_element_class(lxb_dom_element_t *element, size_t *len); + +/* Direct access to the id/class attribute objects */ +lxb_dom_attr_t * +lxb_dom_element_id_attribute(lxb_dom_element_t *element); + +lxb_dom_attr_t * +lxb_dom_element_class_attribute(lxb_dom_element_t *element); +``` + +### Element Search + +These functions search from the element downward and collect results into a collection: + +```c +/* Find the first element with the given ID */ +lxb_dom_element_t * +lxb_dom_element_by_id(lxb_dom_element_t *root, + const lxb_char_t *qualified_name, size_t len); + +/* Collect elements by tag name */ +lxb_status_t +lxb_dom_elements_by_tag_name(lxb_dom_element_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t len); + +/* Collect elements by class name */ +lxb_status_t +lxb_dom_elements_by_class_name(lxb_dom_element_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *class_name, size_t len); + +/* Collect elements by attribute (exact, prefix, suffix, substring) */ +lxb_status_t +lxb_dom_elements_by_attr(lxb_dom_element_t *root, + lxb_dom_collection_t *collection, + const lxb_char_t *qualified_name, size_t qname_len, + const lxb_char_t *value, size_t value_len, + bool case_insensitive); +``` + +Variants `lxb_dom_elements_by_attr_begin()`, `lxb_dom_elements_by_attr_end()`, and `lxb_dom_elements_by_attr_contain()` match by attribute value prefix, suffix, and substring respectively. + +### Lifecycle + +```c +lxb_dom_element_t * +lxb_dom_element_create(lxb_dom_document_t *document, + const lxb_char_t *local_name, size_t lname_len, + const lxb_char_t *ns_name, size_t ns_len, + const lxb_char_t *prefix, size_t prefix_len, + const lxb_char_t *is, size_t is_len, + bool sync_custom); + +lxb_dom_element_t * +lxb_dom_element_destroy(lxb_dom_element_t *element); +``` + +In most cases, prefer `lxb_dom_document_create_element()` (see below) instead of calling `lxb_dom_element_create()` directly. + + +## Attribute (`lxb_dom_attr_t`) + +Represents a single attribute on an element. Defined in `lexbor/dom/interfaces/attr.h`. + +```c +/* Get the local name of the attribute */ +const lxb_char_t * +lxb_dom_attr_local_name(lxb_dom_attr_t *attr, size_t *len); + +/* Get the qualified name (including prefix) */ +const lxb_char_t * +lxb_dom_attr_qualified_name(lxb_dom_attr_t *attr, size_t *len); + +/* Get the attribute value */ +const lxb_char_t * +lxb_dom_attr_value(lxb_dom_attr_t *attr, size_t *len); + +/* Set the attribute value */ +lxb_status_t +lxb_dom_attr_set_value(lxb_dom_attr_t *attr, + const lxb_char_t *value, size_t value_len); +``` + + +## Document (`lxb_dom_document_t`) + +The document node — the root of the DOM tree. Defined in `lexbor/dom/interfaces/document.h`. + +When working with HTML, you typically use `lxb_html_document_t` (from the [HTML module](html.md)) rather than `lxb_dom_document_t` directly. + +### Compatibility Mode + +```c +typedef enum { + LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS = 0x00, + LXB_DOM_DOCUMENT_CMODE_QUIRKS = 0x01, + LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS = 0x02 +} lxb_dom_document_cmode_t; +``` + +### Factory Methods + +Create new DOM nodes owned by the document: + +```c +lxb_dom_element_t * +lxb_dom_document_create_element(lxb_dom_document_t *document, + const lxb_char_t *local_name, size_t lname_len, + void *reserved_for_opt); + +lxb_dom_text_t * +lxb_dom_document_create_text_node(lxb_dom_document_t *document, + const lxb_char_t *data, size_t len); + +lxb_dom_comment_t * +lxb_dom_document_create_comment(lxb_dom_document_t *document, + const lxb_char_t *data, size_t len); + +lxb_dom_cdata_section_t * +lxb_dom_document_create_cdata_section(lxb_dom_document_t *document, + const lxb_char_t *data, size_t len); + +lxb_dom_processing_instruction_t * +lxb_dom_document_create_processing_instruction(lxb_dom_document_t *document, + const lxb_char_t *target, size_t target_len, + const lxb_char_t *data, size_t data_len); + +lxb_dom_document_fragment_t * +lxb_dom_document_create_document_fragment(lxb_dom_document_t *document); +``` + +### Document Access + +```c +/* Get the root node of the document tree */ +lxb_dom_node_t * +lxb_dom_document_root(lxb_dom_document_t *document); + +/* Get the document element (e.g. ) */ +lxb_dom_element_t * +lxb_dom_document_element(lxb_dom_document_t *document); + +/* Import a node from another document */ +lxb_dom_node_t * +lxb_dom_document_import_node(lxb_dom_document_t *doc, lxb_dom_node_t *node, + bool deep); +``` + +### Lifecycle + +```c +lxb_dom_document_t * +lxb_dom_document_create(lxb_dom_document_t *owner); + +lxb_status_t +lxb_dom_document_init(lxb_dom_document_t *document, lxb_dom_document_t *owner, + lxb_dom_interface_create_f create_interface, + lxb_dom_interface_clone_f clone_interface, + lxb_dom_interface_destroy_f destroy_interface, + lxb_dom_document_dtype_t type, unsigned int ns); + +lxb_status_t +lxb_dom_document_clean(lxb_dom_document_t *document); + +lxb_dom_document_t * +lxb_dom_document_destroy(lxb_dom_document_t *document); +``` + + +## Collection (`lxb_dom_collection_t`) + +A dynamic array for holding references to multiple DOM nodes. Used with search functions that return multiple results. Defined in `lexbor/dom/collection.h`. + +### Lifecycle + +```c +lxb_dom_collection_t * +lxb_dom_collection_create(lxb_dom_document_t *document); + +lxb_status_t +lxb_dom_collection_init(lxb_dom_collection_t *col, size_t start_list_size); + +lxb_dom_collection_t * +lxb_dom_collection_destroy(lxb_dom_collection_t *col, bool self_destroy); +``` + +Or use the convenience function that creates and initializes in one call: + +```c +lxb_dom_collection_t * +lxb_dom_collection_make(lxb_dom_document_t *document, size_t start_list_size); +``` + +### Usage + +```c +void +lxb_dom_collection_clean(lxb_dom_collection_t *col); + +lxb_status_t +lxb_dom_collection_append(lxb_dom_collection_t *col, void *value); + +lxb_dom_element_t * +lxb_dom_collection_element(lxb_dom_collection_t *col, size_t idx); + +lxb_dom_node_t * +lxb_dom_collection_node(lxb_dom_collection_t *col, size_t idx); + +size_t +lxb_dom_collection_length(lxb_dom_collection_t *col); +``` + + +## Namespace Support + +The DOM module supports six XML namespaces, managed by the NS module: + +- **HTML** (`LXB_NS_HTML`) +- **SVG** (`LXB_NS_SVG`) +- **MathML** (`LXB_NS_MATH`) +- **XLink** (`LXB_NS_XLINK`) +- **XML** (`LXB_NS_XML`) +- **XMLNS** (`LXB_NS_XMLNS`) + +Namespace IDs are accessed via `lxb_dom_element_ns_id()` or the `ns` field of `lxb_dom_node_t`. + + +## Examples + +### Iterating Child Elements + +```c +#include +#include + +int +main(void) +{ + lxb_status_t status; + lxb_html_document_t *document; + lxb_dom_element_t *body; + lxb_dom_node_t *child; + + static const lxb_char_t html[] = + "
First

Second

Third"; + + document = lxb_html_document_create(); + status = lxb_html_document_parse(document, html, sizeof(html) - 1); + if (status != LXB_STATUS_OK) { + goto failed; + } + + body = lxb_dom_interface_element(document->body); + + child = lxb_dom_node_first_child(lxb_dom_interface_node(body)); + while (child != NULL) { + if (lxb_dom_node_type(child) == LXB_DOM_NODE_TYPE_ELEMENT) { + const lxb_char_t *name; + name = lxb_dom_element_local_name(lxb_dom_interface_element(child), + NULL); + printf("Element: %s\n", (const char *) name); + } + + child = lxb_dom_node_next(child); + } + + lxb_html_document_destroy(document); + return EXIT_SUCCESS; + +failed: + lxb_html_document_destroy(document); + return EXIT_FAILURE; +} +``` + +Expected output: +``` +Element: div +Element: p +Element: span +``` + +### Searching by Attribute + +```c +#include +#include + +int +main(void) +{ + lxb_status_t status; + lxb_html_document_t *document; + lxb_dom_collection_t *collection; + + static const lxb_char_t html[] = + "
One
" + "

Two

" + "Three"; + + document = lxb_html_document_create(); + status = lxb_html_document_parse(document, html, sizeof(html) - 1); + if (status != LXB_STATUS_OK) { + goto failed; + } + + collection = lxb_dom_collection_make( + lxb_dom_interface_document(document), 16); + if (collection == NULL) { + goto failed; + } + + status = lxb_dom_elements_by_class_name( + lxb_dom_interface_element(document->body), + collection, + (const lxb_char_t *) "active", 6); + if (status != LXB_STATUS_OK) { + goto cleanup; + } + + for (size_t i = 0; i < lxb_dom_collection_length(collection); i++) { + lxb_dom_element_t *el = lxb_dom_collection_element(collection, i); + const lxb_char_t *name = lxb_dom_element_local_name(el, NULL); + printf("Found: %s\n", (const char *) name); + } + + lxb_dom_collection_destroy(collection, true); + lxb_html_document_destroy(document); + return EXIT_SUCCESS; + +cleanup: + lxb_dom_collection_destroy(collection, true); +failed: + lxb_html_document_destroy(document); + return EXIT_FAILURE; +} +``` + +Expected output: +``` +Found: div +Found: p +``` + +### Walking the DOM Tree + +```c +#include +#include + +static lexbor_action_t +walker(lxb_dom_node_t *node, void *ctx) +{ + size_t *count = (size_t *) ctx; + if (lxb_dom_node_type(node) == LXB_DOM_NODE_TYPE_ELEMENT) { + (*count)++; + } + return LEXBOR_ACTION_OK; +} + +int +main(void) +{ + lxb_status_t status; + lxb_html_document_t *document; + size_t count = 0; + + static const lxb_char_t html[] = + "

text

  • item
"; + + document = lxb_html_document_create(); + status = lxb_html_document_parse(document, html, sizeof(html) - 1); + if (status != LXB_STATUS_OK) { + lxb_html_document_destroy(document); + return EXIT_FAILURE; + } + + lxb_dom_node_simple_walk( + lxb_dom_interface_node(document->body), walker, &count); + + /* count includes itself */ + printf("Elements in body: %zu\n", count); + + lxb_html_document_destroy(document); + return EXIT_SUCCESS; +} +``` + +Expected output: +``` +Elements in body: 6 +``` + +The count includes ``, `
`, `

`, ``, `