diff --git a/selectolax/lexbor.pxd b/selectolax/lexbor.pxd index 361eb53..5a7ef1d 100644 --- a/selectolax/lexbor.pxd +++ b/selectolax/lexbor.pxd @@ -1,6 +1,5 @@ from libc.stdint cimport uint8_t, uint32_t, uintptr_t - cdef extern from "lexbor/core/core.h" nogil: ctypedef uint32_t lxb_codepoint_t ctypedef unsigned char lxb_char_t @@ -32,6 +31,19 @@ cdef extern from "lexbor/core/core.h" nogil: lexbor_str_t* lexbor_str_create() lxb_char_t * lexbor_str_data_noi(lexbor_str_t *str) +cdef extern from "lexbor/core/lexbor.h" nogil: + ctypedef void *(*lexbor_memory_malloc_f)(size_t size) nogil + ctypedef void *(*lexbor_memory_realloc_f)(void *dst, size_t size) nogil + ctypedef void *(*lexbor_memory_calloc_f)(size_t num, size_t size) nogil + ctypedef void (*lexbor_memory_free_f)(void *dst) nogil + lxb_status_t lexbor_memory_setup( + lexbor_memory_malloc_f new_malloc, + lexbor_memory_realloc_f new_realloc, + lexbor_memory_calloc_f new_calloc, + lexbor_memory_free_f new_free + ) + + cdef extern from "lexbor/html/html.h" nogil: ctypedef unsigned int lxb_html_document_opt_t diff --git a/selectolax/lexbor.pyx b/selectolax/lexbor.pyx index 6d036ca..9866131 100644 --- a/selectolax/lexbor.pyx +++ b/selectolax/lexbor.pyx @@ -1,5 +1,11 @@ from cpython.bool cimport bool - +from cpython.exc cimport PyErr_SetObject +from cpython.mem cimport ( + PyMem_RawCalloc, + PyMem_RawFree, + PyMem_RawMalloc, + PyMem_RawRealloc +) _ENCODING = 'UTF-8' include "base.pxi" @@ -47,6 +53,7 @@ cdef class LexborHTMLParser: """ cdef size_t html_len cdef object bytes_html + self._is_fragment = is_fragment self._fragment_document = NULL self._selector = None @@ -752,3 +759,17 @@ cdef class LexborHTMLParser: dom_node = element return LexborNode.new(dom_node, self) + +# Putting lexbor on python's heap is better than putting it +# onto C's Heap, because python's Garbage collector can collect +# this memory after use and has the bonus of gaining access to +# mimalloc which python uses under the hood... +if lexbor_memory_setup( + PyMem_RawMalloc, + PyMem_RawRealloc, + PyMem_RawCalloc, + PyMem_RawFree +) != LXB_STATUS_OK: + # This will almost never happen due to the code in both the windows and posix versions + # but if something were to happen this excecption on import should be triggered... + raise SelectolaxError("Can't initalize allocators from lexbor_memory_setup(...)")