-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedding.h
More file actions
125 lines (102 loc) · 5.26 KB
/
embedding.h
File metadata and controls
125 lines (102 loc) · 5.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
* embedding.h — EmbeddingGemma-300M sentence embedding + intent recognition.
*
* Zero-dependency C implementation of google/embeddinggemma-300m
* (Gemma 3 bidirectional transformer + mean pooling + dense projection).
*
* Produces 768-dimensional L2-normalized sentence embeddings suitable
* for cosine-similarity-based intent matching.
*
* Concurrency model:
* embedding_model is immutable after creation — share it freely across
* threads with no synchronization. Each concurrent request gets its own
* embedding_state which holds mutable scratch buffers.
*
* intent_recognizer is immutable during processing — register all intents
* before calling intent_recognizer_process from multiple threads. Each
* concurrent process call needs its own embedding_state.
*
* Usage:
* embedding_model *model = embedding_model_load("models/embeddinggemma");
* embedding_state *state = embedding_state_create(model);
*
* float emb[768];
* embedding_model_embed(model, state, "hello world", emb, 768);
*
* embedding_state_free(state);
* embedding_model_free(model);
*/
#ifndef EMBEDDING_H
#define EMBEDDING_H
#ifdef __cplusplus
extern "C" {
#endif
/* ── Embedding model (immutable, thread-safe, load once) ───────── */
typedef struct embedding_model embedding_model;
/* Load embedding model from directory containing embedding.bin + tokenizer.bin.
* Returns NULL on failure. */
embedding_model *embedding_model_load(const char *model_dir);
/* Free model and all weight data. */
void embedding_model_free(embedding_model *model);
/* ── Embedding state (mutable, one per concurrent request) ─────── */
typedef struct embedding_state embedding_state;
/* Create inference state for the given model.
* Lightweight — allocates scratch buffers for the transformer forward pass.
* Create one per thread or per request; do not share across concurrent calls.
* max_seq limits scratch buffer sizes (0 = use model's max_seq_len).
* For intent recognition, 64–128 is typically sufficient. */
embedding_state *embedding_state_create(const embedding_model *model, int max_seq);
/* Free state and scratch buffers. */
void embedding_state_free(embedding_state *state);
/* ── Embedding ─────────────────────────────────────────────────── */
/* Embed text → 768-dim L2-normalized vector.
* model: shared, read-only — safe to use from multiple threads.
* state: per-request scratch — must not be shared across concurrent calls.
* `out` must point to at least `out_dim` floats (768 for full embeddings).
* Returns 0 on success, -1 on error. */
int embedding_model_embed(const embedding_model *model,
embedding_state *state,
const char *text,
float *out, int out_dim);
/* Cosine similarity between two L2-normalized embeddings. */
float embedding_cosine_similarity(const float *a, const float *b, int dim);
/* ── Intent recognizer ─────────────────────────────────────────── */
typedef struct intent_recognizer intent_recognizer;
typedef void (*intent_callback)(const char *trigger_phrase,
const char *utterance,
float similarity,
void *user_data);
/* Create intent recognizer backed by an embedding model.
* Requires an embedding_state for computing trigger phrase embeddings
* during registration. */
intent_recognizer *intent_recognizer_create(const embedding_model *model,
embedding_state *state,
float threshold);
/* Register an intent trigger phrase. The embedding is computed immediately
* using the state provided at creation. Not thread-safe — register all
* intents before calling intent_recognizer_process from multiple threads. */
int intent_recognizer_register(intent_recognizer *ir,
const char *trigger_phrase,
intent_callback callback,
void *user_data);
/* Remove a registered intent. Returns 0 if found and removed.
* Not thread-safe — do not call concurrently with process. */
int intent_recognizer_unregister(intent_recognizer *ir,
const char *trigger_phrase);
/* Match utterance against registered intents.
* Fires callback for best match if similarity >= threshold.
* model: shared, read-only.
* state: per-request scratch — must not be shared across concurrent calls.
* Returns 1 if an intent matched, 0 otherwise.
* Thread-safe when intents are not being modified. */
int intent_recognizer_process(const intent_recognizer *ir,
const embedding_model *model,
embedding_state *state,
const char *utterance);
void intent_recognizer_set_threshold(intent_recognizer *ir, float threshold);
float intent_recognizer_get_threshold(const intent_recognizer *ir);
void intent_recognizer_free(intent_recognizer *ir);
#ifdef __cplusplus
}
#endif
#endif /* EMBEDDING_H */