@@ -87,7 +87,7 @@ impl CortexWasm {
8787 let query_lower = query. to_lowercase ( ) ;
8888 let query_words: Vec < & str > = query_lower. split_whitespace ( ) . collect ( ) ;
8989
90- let mut scored: Vec < SearchResult > = self . memories . iter ( ) . map ( |m | {
90+ let mut scored: Vec < SearchResult > = self . memories . iter ( ) . enumerate ( ) . map ( |( i , m ) | {
9191 let text_lower = m. text . to_lowercase ( ) ;
9292 let mut score: f32 = 0.0 ;
9393
@@ -104,8 +104,7 @@ impl CortexWasm {
104104 }
105105
106106 // Recency boost (newer = higher)
107- let idx = self . memories . iter ( ) . position ( |x| x. id == m. id ) . unwrap_or ( 0 ) ;
108- let recency = idx as f32 / self . memories . len ( ) . max ( 1 ) as f32 ;
107+ let recency = i as f32 / self . memories . len ( ) . max ( 1 ) as f32 ;
109108 score += recency * 0.2 ;
110109
111110 SearchResult {
@@ -191,22 +190,44 @@ impl CortexWasm {
191190 /// without breaking values that contain "and" ("Research and Development").
192191 /// Recurses for 3+ clauses. Accepts "I" prefix in second clause.
193192 fn extract_facts ( & mut self , text : & str ) {
193+ self . extract_facts_inner ( text, 0 ) ;
194+ }
195+
196+ fn extract_facts_inner ( & mut self , text : & str , depth : u8 ) {
197+ // Guard against unbounded recursion (crafted input with many " and work at " repetitions)
198+ if depth >= 10 {
199+ self . extract_single ( text. trim ( ) ) ;
200+ return ;
201+ }
202+
194203 let verb_prefixes = [
195204 "work at " , "work for " , "i work at " , "i work for " ,
196205 "i'm a " , "i am a " , "i'm an " , "i am an " ,
197206 "live in " , "i live in " , "i'm based in " , "i am based in " ,
198207 "based in " ,
199208 ] ;
200209
201- // Scan ALL " and " / " And " / " AND " positions to find clause boundaries.
202- // Search in original text to avoid Unicode byte offset mismatch.
203- let lower = text. to_lowercase ( ) ;
210+ // Search for " and " case-insensitively by scanning the original text.
211+ // We avoid lowercasing the whole string and using its byte offsets, because
212+ // to_lowercase() can change byte lengths (e.g. Turkish İ → i̇).
213+ let bytes = text. as_bytes ( ) ;
204214 let mut search_from = 0 ;
205- while let Some ( rel_pos) = lower[ search_from..] . find ( " and " ) {
206- let pos = search_from + rel_pos;
207- // Verify pos is valid in original text (ASCII " and " guarantees this for text before it,
208- // but lowercasing can shift bytes for chars like İ→i̇. Use original text search as fallback.)
209- if pos + 5 > text. len ( ) { break ; }
215+ while search_from + 5 <= bytes. len ( ) {
216+ let rest = & text[ search_from..] ;
217+ // Find next " and " (case-insensitive) in the original string
218+ let rel_pos = match rest. to_lowercase ( ) . find ( " and " ) {
219+ Some ( p) => p,
220+ None => break ,
221+ } ;
222+ // Map lowered offset back to original: since " and " is pure ASCII,
223+ // we need the character count up to rel_pos to find the right byte offset.
224+ let orig_char_pos = rest[ ..rel_pos] . chars ( ) . count ( ) ;
225+ let orig_byte_pos: usize = rest. chars ( ) . take ( orig_char_pos) . map ( |c| c. len_utf8 ( ) ) . sum ( ) ;
226+ let pos = search_from + orig_byte_pos;
227+ // Verify the next 5 bytes in the original are " and " (case-insensitive)
228+ if pos + 5 > text. len ( ) || !text. is_char_boundary ( pos) || !text. is_char_boundary ( pos + 5 ) {
229+ break ;
230+ }
210231 let after = text[ pos + 5 ..] . trim_start ( ) . to_lowercase ( ) ;
211232 if verb_prefixes. iter ( ) . any ( |p| after. starts_with ( p) ) {
212233 let first = text[ ..pos] . trim ( ) ;
@@ -222,7 +243,7 @@ impl CortexWasm {
222243 } else {
223244 second. to_string ( )
224245 } ;
225- self . extract_facts ( & normalized) ;
246+ self . extract_facts_inner ( & normalized, depth + 1 ) ;
226247 return ;
227248 }
228249 search_from = pos + 5 ;
@@ -259,7 +280,7 @@ impl CortexWasm {
259280 if let Some ( rest) = lower. strip_prefix ( pattern) {
260281 let obj = rest. split ( & [ ',' , '.' , '!' , '?' ] [ ..] ) . next ( ) . unwrap_or ( "" ) . trim ( ) ;
261282 if !obj. is_empty ( ) {
262- self . add_fact ( "User" , "is_a" , obj, 0.80 ) ;
283+ self . add_fact ( "User" , "is_a" , & capitalize ( obj) , 0.80 ) ;
263284 }
264285 }
265286 }
0 commit comments