@@ -283,7 +283,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
283283 case REOP_loop :
284284 case REOP_lookahead :
285285 case REOP_negative_lookahead :
286- case REOP_bne_char_pos :
287286 val = get_u32 (buf + pos + 1 );
288287 val += (pos + 5 );
289288 printf (" %u" , val );
@@ -921,21 +920,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
921920}
922921
923922/* Return:
924- 1 if the opcodes in bc_buf[] always advance the character pointer.
925- 0 if the character pointer may not be advanced.
926- -1 if the code may depend on side effects of its previous execution (backreference)
923+ - true if the opcodes may not advance the char pointer
924+ - false if the opcodes always advance the char pointer
927925*/
928- static int re_check_advance (const uint8_t * bc_buf , int bc_buf_len )
926+ static BOOL re_need_check_advance (const uint8_t * bc_buf , int bc_buf_len )
929927{
930- int pos , opcode , ret , len , i ;
931- uint32_t val , last ;
932- BOOL has_back_reference ;
933- uint8_t capture_bitmap [CAPTURE_COUNT_MAX ];
928+ int pos , opcode , len ;
929+ uint32_t val ;
930+ BOOL ret ;
934931
935- ret = -2 ; /* not known yet */
932+ ret = TRUE;
936933 pos = 0 ;
937- has_back_reference = FALSE;
938- memset (capture_bitmap , 0 , sizeof (capture_bitmap ));
939934
940935 while (pos < bc_buf_len ) {
941936 opcode = bc_buf [pos ];
@@ -955,8 +950,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
955950 case REOP_dot :
956951 case REOP_any :
957952 simple_char :
958- if (ret == -2 )
959- ret = 1 ;
953+ ret = FALSE;
960954 break ;
961955 case REOP_line_start :
962956 case REOP_line_end :
@@ -970,41 +964,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
970964 break ;
971965 case REOP_save_start :
972966 case REOP_save_end :
973- val = bc_buf [pos + 1 ];
974- capture_bitmap [val ] |= 1 ;
975- break ;
976967 case REOP_save_reset :
977- {
978- val = bc_buf [pos + 1 ];
979- last = bc_buf [pos + 2 ];
980- while (val < last )
981- capture_bitmap [val ++ ] |= 1 ;
982- }
983- break ;
984968 case REOP_back_reference :
985969 case REOP_backward_back_reference :
986- val = bc_buf [pos + 1 ];
987- capture_bitmap [val ] |= 2 ;
988- has_back_reference = TRUE;
989970 break ;
990971 default :
991972 /* safe behvior: we cannot predict the outcome */
992- if (ret == -2 )
993- ret = 0 ;
994- break ;
973+ return TRUE;
995974 }
996975 pos += len ;
997976 }
998- if (has_back_reference ) {
999- /* check if there is back reference which references a capture
1000- made in the some code */
1001- for (i = 0 ; i < CAPTURE_COUNT_MAX ; i ++ ) {
1002- if (capture_bitmap [i ] == 3 )
1003- return -1 ;
1004- }
1005- }
1006- if (ret == -2 )
1007- ret = 0 ;
1008977 return ret ;
1009978}
1010979
@@ -1583,8 +1552,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
15831552 running the atom after the first quant_min times,
15841553 then there is no match. We remove this test when we
15851554 are sure the atom always advances the position. */
1586- add_zero_advance_check = ( re_check_advance (s -> byte_code .buf + last_atom_start ,
1587- s -> byte_code .size - last_atom_start ) == 0 );
1555+ add_zero_advance_check = re_need_check_advance (s -> byte_code .buf + last_atom_start ,
1556+ s -> byte_code .size - last_atom_start );
15881557
15891558 {
15901559 int len , pos ;
@@ -1601,38 +1570,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
16011570 }
16021571 if (quant_max == 0 ) {
16031572 s -> byte_code .size = last_atom_start ;
1604- } else if (quant_max == 1 ) {
1605- if (dbuf_insert (& s -> byte_code , last_atom_start , 5 ))
1606- goto out_of_memory ;
1607- s -> byte_code .buf [last_atom_start ] = REOP_split_goto_first +
1608- greedy ;
1609- put_u32 (s -> byte_code .buf + last_atom_start + 1 , len );
1610- } else if (quant_max == INT32_MAX ) {
1573+ } else if (quant_max == 1 || quant_max == INT32_MAX ) {
1574+ BOOL has_goto = (quant_max == INT32_MAX );
16111575 if (dbuf_insert (& s -> byte_code , last_atom_start , 5 + add_zero_advance_check ))
16121576 goto out_of_memory ;
16131577 s -> byte_code .buf [last_atom_start ] = REOP_split_goto_first +
16141578 greedy ;
16151579 put_u32 (s -> byte_code .buf + last_atom_start + 1 ,
1616- len + 5 + add_zero_advance_check );
1580+ len + 5 * has_goto + add_zero_advance_check * 2 );
16171581 if (add_zero_advance_check ) {
1618- /* avoid infinite loop by stoping the
1619- recursion if no advance was made in the
1620- atom (only works if the atom has no
1621- side effect) */
16221582 s -> byte_code .buf [last_atom_start + 1 + 4 ] = REOP_push_char_pos ;
1623- re_emit_goto (s , REOP_bne_char_pos , last_atom_start );
1624- } else {
1625- re_emit_goto (s , REOP_goto , last_atom_start );
1583+ re_emit_op (s , REOP_check_advance );
16261584 }
1585+ if (has_goto )
1586+ re_emit_goto (s , REOP_goto , last_atom_start );
16271587 } else {
1628- if (dbuf_insert (& s -> byte_code , last_atom_start , 10 ))
1588+ if (dbuf_insert (& s -> byte_code , last_atom_start , 10 + add_zero_advance_check ))
16291589 goto out_of_memory ;
16301590 pos = last_atom_start ;
16311591 s -> byte_code .buf [pos ++ ] = REOP_push_i32 ;
16321592 put_u32 (s -> byte_code .buf + pos , quant_max );
16331593 pos += 4 ;
16341594 s -> byte_code .buf [pos ++ ] = REOP_split_goto_first + greedy ;
1635- put_u32 (s -> byte_code .buf + pos , len + 5 );
1595+ put_u32 (s -> byte_code .buf + pos , len + 5 + add_zero_advance_check * 2 );
1596+ pos += 4 ;
1597+ if (add_zero_advance_check ) {
1598+ s -> byte_code .buf [pos ++ ] = REOP_push_char_pos ;
1599+ re_emit_op (s , REOP_check_advance );
1600+ }
16361601 re_emit_goto (s , REOP_loop , last_atom_start + 5 );
16371602 re_emit_op (s , REOP_drop );
16381603 }
@@ -1656,22 +1621,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
16561621 if (quant_max == INT32_MAX ) {
16571622 pos = s -> byte_code .size ;
16581623 re_emit_op_u32 (s , REOP_split_goto_first + greedy ,
1659- len + 5 + add_zero_advance_check );
1624+ len + 5 + add_zero_advance_check * 2 );
16601625 if (add_zero_advance_check )
16611626 re_emit_op (s , REOP_push_char_pos );
16621627 /* copy the atom */
16631628 dbuf_put_self (& s -> byte_code , last_atom_start , len );
16641629 if (add_zero_advance_check )
1665- re_emit_goto (s , REOP_bne_char_pos , pos );
1666- else
1667- re_emit_goto (s , REOP_goto , pos );
1630+ re_emit_op (s , REOP_check_advance );
1631+ re_emit_goto (s , REOP_goto , pos );
16681632 } else if (quant_max > quant_min ) {
16691633 re_emit_op_u32 (s , REOP_push_i32 , quant_max - quant_min );
16701634 pos = s -> byte_code .size ;
1671- re_emit_op_u32 (s , REOP_split_goto_first + greedy , len + 5 );
1635+ re_emit_op_u32 (s , REOP_split_goto_first + greedy ,
1636+ len + 5 + add_zero_advance_check * 2 );
1637+ if (add_zero_advance_check )
1638+ re_emit_op (s , REOP_push_char_pos );
16721639 /* copy the atom */
16731640 dbuf_put_self (& s -> byte_code , last_atom_start , len );
1674-
1641+ if (add_zero_advance_check )
1642+ re_emit_op (s , REOP_check_advance );
16751643 re_emit_goto (s , REOP_loop , pos );
16761644 re_emit_op (s , REOP_drop );
16771645 }
@@ -1785,7 +1753,7 @@ static int lre_compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
17851753 }
17861754 break ;
17871755 case REOP_drop :
1788- case REOP_bne_char_pos :
1756+ case REOP_check_advance :
17891757 assert (stack_size > 0 );
17901758 stack_size -- ;
17911759 break ;
@@ -2281,11 +2249,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22812249 case REOP_push_char_pos :
22822250 stack [stack_len ++ ] = (uintptr_t )cptr ;
22832251 break ;
2284- case REOP_bne_char_pos :
2285- val = get_u32 (pc );
2286- pc += 4 ;
2287- if (stack [-- stack_len ] != (uintptr_t )cptr )
2288- pc += (int )val ;
2252+ case REOP_check_advance :
2253+ if (stack [-- stack_len ] == (uintptr_t )cptr )
2254+ goto no_match ;
22892255 break ;
22902256 case REOP_word_boundary :
22912257 case REOP_not_word_boundary :
0 commit comments