@@ -363,27 +363,6 @@ bool BytecodeManipulator::InjectMethodCall(
363363}
364364
365365
366- // Inserts an entry into the line number table for an insertion in the bytecode.
367- static void InsertAndUpdateLnotab (int offset, int size,
368- std::vector<uint8> *lnotab) {
369- int current_offset = 0 ;
370- for (auto it = lnotab->begin (); it != lnotab->end (); it += 2 ) {
371- current_offset += it[0 ];
372-
373- if (current_offset >= offset) {
374- int remaining_size = size;
375- while (remaining_size > 0 ) {
376- const int current_size = std::min (remaining_size, 0xFF );
377- it = lnotab->insert (it, static_cast <uint8>(current_size)) + 1 ;
378- it = lnotab->insert (it, 0 ) + 1 ;
379- remaining_size -= current_size;
380- }
381- return ;
382- }
383- }
384- }
385-
386-
387366// Use different algorithms to insert method calls for Python 2 and 3.
388367// Technically the algorithm for Python 3 will work with Python 2, but because
389368// it is more complicated and the issue of needing to upgrade branch
@@ -413,6 +392,42 @@ struct Insertion {
413392// InsertAndUpdateBranchInstructions.
414393static const int kMaxInsertionIterations = 10 ;
415394
395+
396+ // Updates the line number table for an insertion in the bytecode.
397+ // This is different than what the Python 2 version of InsertMethodCall() does.
398+ // It should be more accurate, but is confined to Python 3 only for safety.
399+ // This handles the case of adding insertion for EXTENDED_ARG better.
400+ // Example for inserting 2 bytes at offset 2:
401+ // lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta}
402+ // Old algorithm: [{2, 0}, {2, 1}, {4, 1}]
403+ // New algorithm: [{2, 1}, {6, 1}]
404+ // In the old version, trying to get the offset to insert a breakpoint right
405+ // before line 1 would result in an offset of 2, which is inaccurate as the
406+ // instruction before is an EXTENDED_ARG which will now be applied to the first
407+ // instruction inserted instead of its original target.
408+ static void InsertAndUpdateLnotab (int offset, int size,
409+ std::vector<uint8>* lnotab) {
410+ int current_offset = 0 ;
411+ for (auto it = lnotab->begin (); it != lnotab->end (); it += 2 ) {
412+ current_offset += it[0 ];
413+
414+ if (current_offset > offset) {
415+ int remaining_size = it[0 ] + size;
416+ int remaining_lines = it[1 ];
417+ it = lnotab->erase (it, it + 2 );
418+ while (remaining_size > 0xFF ) {
419+ it = lnotab->insert (it, 0xFF ) + 1 ;
420+ it = lnotab->insert (it, 0 ) + 1 ;
421+ remaining_size -= 0xFF ;
422+ }
423+ it = lnotab->insert (it, remaining_size) + 1 ;
424+ it = lnotab->insert (it, remaining_lines) + 1 ;
425+ return ;
426+ }
427+ }
428+ }
429+
430+
416431// Reserves space for instructions to be inserted into the bytecode, and
417432// calculates the new offsets and arguments of branch instructions.
418433// Returns true if the calculation was successful, and false if too many
@@ -484,7 +499,11 @@ static bool InsertAndUpdateBranchInstructions(
484499 if (opcode_type == BRANCH_DELTA_OPCODE) {
485500 // For relative branches, the argument needs to be updated if the
486501 // insertion is between the instruction and the target.
487- int32 target = it->current_offset + instruction.size + arg;
502+ // The Python compiler sometimes prematurely adds EXTENDED_ARG with an
503+ // argument of 0 even when it is not required. This needs to be taken
504+ // into account when calculating the target of a branch instruction.
505+ int inst_size = std::max (instruction.size , it->original_size );
506+ int32 target = it->current_offset + inst_size + arg;
488507 need_to_update = it->current_offset < insertion.current_offset &&
489508 insertion.current_offset < target;
490509 } else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) {
@@ -578,12 +597,17 @@ bool BytecodeManipulator::InsertMethodCall(
578597 for (auto it = updated_instructions.begin ();
579598 it < updated_instructions.end (); it++) {
580599 int size_diff = it->instruction .size - it->original_size ;
581- uint32 offset = it->current_offset ;
600+ int offset = it->current_offset ;
582601 if (size_diff > 0 ) {
583602 data->bytecode .insert (data->bytecode .begin () + offset, size_diff, NOP);
584603 if (has_lnotab_) {
585604 InsertAndUpdateLnotab (it->current_offset , size_diff, &data->lnotab );
586605 }
606+ } else if (size_diff < 0 ) {
607+ // The Python compiler sometimes prematurely adds EXTENDED_ARG with an
608+ // argument of 0 even when it is not required. Just leave it there, but
609+ // start writing the instruction after them.
610+ offset -= size_diff;
587611 }
588612 WriteInstruction (data->bytecode .begin () + offset, it->instruction );
589613 }
@@ -676,7 +700,22 @@ bool BytecodeManipulator::InsertMethodCall(
676700
677701 // Insert a new entry into line table to account for the new bytecode.
678702 if (has_lnotab_) {
679- InsertAndUpdateLnotab (offset, size, &data->lnotab );
703+ int current_offset = 0 ;
704+ for (auto it = data->lnotab .begin (); it != data->lnotab .end (); it += 2 ) {
705+ current_offset += it[0 ];
706+
707+ if (current_offset >= offset) {
708+ int remaining_size = size;
709+ while (remaining_size > 0 ) {
710+ const int current_size = std::min (remaining_size, 0xFF );
711+ it = data->lnotab .insert (it, static_cast <uint8>(current_size)) + 1 ;
712+ it = data->lnotab .insert (it, 0 ) + 1 ;
713+ remaining_size -= current_size;
714+ }
715+
716+ break ;
717+ }
718+ }
680719 }
681720
682721 return true ;
0 commit comments