Skip to content

Commit e358dc5

Browse files
committed
Add structured error API and CSV parsing support
- Add get_error_info() to return error details as dict - Add get_error_message() for simple error string access - Add read_csv() for parsing CSV content in WASM - Use heap allocation for large CSV handling (avoids stack overflow) - Export _malloc/_free for JS heap management - Force single-threaded mode (-p 1) for WASM stability
1 parent 95e9d31 commit e358dc5

3 files changed

Lines changed: 286 additions & 51 deletions

File tree

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ DEBUG_LDFLAGS = \
100100
# Core functions for full SDK support
101101
EXPORTED_FUNCTIONS = [ \
102102
'_main', \
103+
'_malloc', \
104+
'_free', \
103105
'_version_str', \
104106
'_null', \
105107
'_drop_obj', \
@@ -134,6 +136,8 @@ EXPORTED_FUNCTIONS = [ \
134136
'_is_obj_vector', \
135137
'_is_obj_null', \
136138
'_is_obj_error', \
139+
'_get_error_info', \
140+
'_get_error_message', \
137141
'_get_obj_rc', \
138142
'_get_data_ptr', \
139143
'_get_element_size', \
@@ -162,6 +166,7 @@ EXPORTED_FUNCTIONS = [ \
162166
'_read_timestamp', \
163167
'_read_symbol_id', \
164168
'_symbol_to_str', \
169+
'_read_csv', \
165170
'_init_vector', \
166171
'_init_list', \
167172
'_vec_at_idx', \

src/main.c

Lines changed: 214 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515

1616
// Rayforce headers (from RAYFORCE_SRC via -I flag)
1717
#include "binary.h"
18+
#include "error.h" // Include error.h for err_length
1819
#include "eval.h"
1920
#include "format.h"
21+
#include "io.h" // Include io.h for io_read_csv
22+
#include "items.h"
2023
#include "misc.h"
2124
#include "query.h"
2225
#include "runtime.h"
@@ -223,6 +226,33 @@ EMSCRIPTEN_KEEPALIVE b8_t is_obj_error(obj_p obj) {
223226
return IS_ERR(obj) ? B8_TRUE : B8_FALSE;
224227
}
225228

229+
// Get error info as a dict with structured error information
230+
// Returns a dict with keys like: code, message, expected, got, etc.
231+
EMSCRIPTEN_KEEPALIVE obj_p get_error_info(obj_p err) {
232+
if (err == NULL || !IS_ERR(err))
233+
return NULL_OBJ;
234+
return err_info(err);
235+
}
236+
237+
// Get error message as a simple string (doesn't allocate, returns pointer to static or inline data)
238+
EMSCRIPTEN_KEEPALIVE lit_p get_error_message(obj_p err) {
239+
if (UNLIKELY(err == NULL || !IS_ERR(err))) {
240+
return "Unknown error";
241+
}
242+
243+
// For EC_USER errors, get the inline message
244+
err_code_t code = err_code(err);
245+
if (code == EC_USER) {
246+
if (LIKELY(err->len > 0)) {
247+
return (lit_p)(err + 1); // Message stored after struct
248+
}
249+
return "Out of memory"; // Fallback for OOM errors with no message
250+
}
251+
252+
// For other error types, return the error code name
253+
return err_name(code);
254+
}
255+
226256
// Get reference count
227257
EMSCRIPTEN_KEEPALIVE u32_t get_obj_rc(obj_p obj) {
228258
if (obj == NULL)
@@ -630,6 +660,187 @@ EMSCRIPTEN_KEEPALIVE obj_p deserialize(obj_p buf) {
630660

631661
EMSCRIPTEN_KEEPALIVE str_p get_type_name(i8_t type) { return type_name(type); }
632662

663+
// ============================================================================
664+
// CSV Parsing
665+
// ============================================================================
666+
667+
// Read CSV from string content
668+
// - Assumes types are all strings (C8) for simplicity in WASM
669+
// - Infers column names from first line
670+
EMSCRIPTEN_KEEPALIVE obj_p read_csv(lit_p content, i64_t len) {
671+
i64_t i, l, lines;
672+
str_p buf, pos, line, prev;
673+
obj_p names, cols, res;
674+
c8_t sep = ',';
675+
676+
if (content == NULL) {
677+
printf("ERROR: read_csv content is NULL\n");
678+
return err_user("CSV content is NULL");
679+
}
680+
681+
// We receive a heap pointer and byte length from JS.
682+
// Trust the length passed in.
683+
if (len <= 0) {
684+
printf("ERROR: read_csv len <= 0\n");
685+
return err_user("CSV length is zero or negative");
686+
}
687+
688+
printf("INFO: read_csv starting, len=%lld bytes (%.1f MB)\n", len, len / (1024.0 * 1024.0));
689+
690+
// Since we receive a JS string pointer, we shouldn't modify it.
691+
// However, parse_csv_lines expects a buffer it can read.
692+
// We treat 'content' as the buffer.
693+
buf = (str_p)content;
694+
695+
// Count lines
696+
lines = 0;
697+
pos = buf;
698+
while ((pos = (str_p)memchr(pos, '\n', buf + len - pos))) {
699+
++lines;
700+
++pos;
701+
}
702+
703+
if (len > 0 && buf[len - 1] != '\n') {
704+
++lines;
705+
}
706+
707+
if (lines == 0) {
708+
printf("ERROR: read_csv no lines found\n");
709+
return err_user("CSV has no lines");
710+
}
711+
712+
printf("INFO: read_csv found %lld lines\n", lines);
713+
714+
// Parse header
715+
pos = (str_p)memchr(buf, '\n', len);
716+
i64_t header_len = (pos == NULL) ? len : (pos - buf);
717+
line = (pos == NULL) ? NULL : (pos + 1);
718+
719+
// Count columns based on separator
720+
l = 1;
721+
pos = buf;
722+
while ((pos = (str_p)memchr(pos, sep, header_len - (pos - buf)))) {
723+
++l;
724+
++pos;
725+
}
726+
727+
printf("INFO: read_csv found %lld columns\n", l);
728+
729+
names = SYMBOL(l);
730+
if (names == NULL) {
731+
printf("ERROR: read_csv failed to allocate names vector\n");
732+
return err_user("Failed to allocate column names");
733+
}
734+
735+
pos = buf;
736+
i64_t remaining = header_len;
737+
738+
for (i = 0; i < l; i++) {
739+
prev = pos;
740+
str_p next_sep = (str_p)memchr(pos, sep, remaining);
741+
742+
if (next_sep == NULL) {
743+
// Last column
744+
if (remaining > 0 && prev[remaining - 1] == '\r') {
745+
AS_SYMBOL(names)
746+
[i] = io_symbol_from_str_trimmed(prev, remaining - 1);
747+
} else {
748+
AS_SYMBOL(names)[i] = io_symbol_from_str_trimmed(prev, remaining);
749+
}
750+
pos += remaining;
751+
remaining = 0;
752+
} else {
753+
AS_SYMBOL(names)
754+
[i] = io_symbol_from_str_trimmed(prev, next_sep - prev);
755+
remaining -= (next_sep - prev + 1);
756+
pos = next_sep + 1;
757+
}
758+
}
759+
760+
// Alloc types - default to C8 (String)
761+
i8_t *type_arr = (i8_t *)malloc(l * sizeof(i8_t));
762+
if (type_arr == NULL) {
763+
printf("ERROR: read_csv failed to allocate type_arr\n");
764+
drop_obj(names);
765+
return err_user("Failed to allocate type array");
766+
}
767+
for (i = 0; i < l; i++) {
768+
type_arr[i] = TYPE_C8;
769+
}
770+
771+
// Exclude header from data lines
772+
lines--;
773+
if (lines < 0)
774+
lines = 0;
775+
776+
printf("INFO: read_csv allocating %lld columns x %lld rows\n", l, lines);
777+
778+
// Allocate columns
779+
cols = LIST(l);
780+
if (cols == NULL) {
781+
printf("ERROR: read_csv failed to allocate cols list\n");
782+
free(type_arr);
783+
drop_obj(names);
784+
return err_user("Failed to allocate columns list");
785+
}
786+
787+
for (i = 0; i < l; i++) {
788+
AS_LIST(cols)[i] = LIST(lines);
789+
if (AS_LIST(cols)[i] == NULL) {
790+
printf("ERROR: read_csv failed to allocate column %lld (lines=%lld)\n", i, lines);
791+
free(type_arr);
792+
drop_obj(names);
793+
drop_obj(cols);
794+
return err_user("Failed to allocate column data - file too large for memory");
795+
}
796+
}
797+
798+
printf("INFO: read_csv column allocation successful, parsing data...\n");
799+
800+
// parse lines
801+
// If line is NULL (only header), we skip parsing
802+
if (lines > 0 && line != NULL) {
803+
printf("INFO: read_csv calling io_read_csv for %lld lines...\n", lines);
804+
res = io_read_csv(type_arr, l, line, len - (line - buf), lines, cols, sep);
805+
printf("INFO: read_csv io_read_csv returned: %p, type=%d\n", res, res ? res->type : -999);
806+
807+
if (res && res->type == TYPE_ERR) {
808+
printf("ERROR: read_csv io_read_csv returned error\n");
809+
free(type_arr);
810+
drop_obj(names);
811+
drop_obj(cols);
812+
return res;
813+
}
814+
}
815+
816+
free(type_arr);
817+
818+
// Verify objects are still valid before creating table
819+
printf("INFO: read_csv verifying objects - names=%p type=%d len=%lld, cols=%p type=%d len=%lld\n",
820+
names, names ? names->type : -1, names ? names->len : -1,
821+
cols, cols ? cols->type : -1, cols ? cols->len : -1);
822+
823+
if (names == NULL || cols == NULL) {
824+
printf("ERROR: read_csv names or cols became NULL!\n");
825+
if (names) drop_obj(names);
826+
if (cols) drop_obj(cols);
827+
return err_user("Memory corruption - names or cols became NULL");
828+
}
829+
830+
printf("INFO: read_csv creating table...\n");
831+
obj_p t = table(names, cols);
832+
printf("INFO: read_csv table() returned: %p\n", t);
833+
if (t == NULL) {
834+
printf("ERROR: read_csv table() returned NULL - out of memory!\n");
835+
drop_obj(names);
836+
drop_obj(cols);
837+
return err_user("Out of memory: failed to allocate table structure");
838+
}
839+
840+
printf("INFO: read_csv success. Table: %p, rows=%lld, cols=%lld\n", t, lines, l);
841+
return t;
842+
}
843+
633844
// ============================================================================
634845
// Main Entry Point
635846
// ============================================================================
@@ -645,9 +856,10 @@ EMSCRIPTEN_KEEPALIVE i32_t main(i32_t argc, str_p argv[]) {
645856

646857
// Initialize runtime like Python binding does:
647858
// Pass -r 0 to disable REPL (we'll call eval_str directly from JS)
648-
str_p wasm_argv[] = {"rayforce-wasm", "-r", "0", NULL};
859+
// Pass -p 1 to force single-threaded mode (avoids pool crashes in WASM)
860+
str_p wasm_argv[] = {"rayforce-wasm", "-r", "0", "-p", "1", NULL};
649861
atexit((void (*)(void))runtime_destroy);
650-
runtime = runtime_create(3, wasm_argv);
862+
runtime = runtime_create(5, wasm_argv);
651863

652864
if (runtime == NULL) {
653865
printf("Failed to initialize Rayforce runtime\n");

0 commit comments

Comments
 (0)