Skip to content
This repository was archived by the owner on Jan 23, 2024. It is now read-only.

Commit d03fcbd

Browse files
xinghuadou-googleXinghua Dou
authored andcommitted
Fix bugs in the bytecode manipulator under Python 3.
Calculate offset from scratch for each breakpoint insertion. Update line number table entries instead of inserting a new one. Account for uneeded EXTENDED_ARGS when writing new instructions. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=189114944
1 parent 5f5d709 commit d03fcbd

File tree

3 files changed

+93
-29
lines changed

3 files changed

+93
-29
lines changed

src/googleclouddebugger/bytecode_breakpoint.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ int BytecodeBreakpoint::SetBreakpoint(
9595

9696
std::unique_ptr<Breakpoint> breakpoint(new Breakpoint);
9797
breakpoint->code_object = ScopedPyCodeObject::NewReference(code_object);
98+
breakpoint->line = line;
9899
breakpoint->offset = lines_enumerator.offset();
99100
breakpoint->hit_callable = PythonCallback::Wrap(hit_callback);
100101
breakpoint->error_callback = error_callback;
@@ -251,15 +252,38 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {
251252
for (auto it_entry = code->breakpoints.begin();
252253
it_entry != code->breakpoints.end();
253254
++it_entry, ++const_index) {
254-
const int offset = it_entry->first;
255+
int offset = it_entry->first;
256+
bool offset_found = true;
255257
const Breakpoint& breakpoint = *it_entry->second;
256258
DCHECK_EQ(offset, breakpoint.offset);
257259

258260
callbacks.push_back(breakpoint.hit_callable.get());
259261

260-
if (!bytecode_manipulator.InjectMethodCall(offset, const_index)) {
262+
#if PY_MAJOR_VERSION >= 3
263+
// In Python 3, since we allow upgrading of instructions to use
264+
// EXTENDED_ARG, the offsets for lines originally calculated might not be
265+
// accurate, so we need to recalculate them each insertion.
266+
offset_found = false;
267+
if (bytecode_manipulator.has_lnotab()) {
268+
ScopedPyObject lnotab(PyBytes_FromStringAndSize(
269+
reinterpret_cast<const char*>(bytecode_manipulator.lnotab().data()),
270+
bytecode_manipulator.lnotab().size()));
271+
CodeObjectLinesEnumerator lines_enumerator(code_object->co_firstlineno,
272+
lnotab.release());
273+
while (lines_enumerator.line_number() != breakpoint.line) {
274+
if (!lines_enumerator.Next()) {
275+
break;
276+
}
277+
offset = lines_enumerator.offset();
278+
}
279+
offset_found = lines_enumerator.line_number() == breakpoint.line;
280+
}
281+
#endif
282+
283+
if (!offset_found ||
284+
!bytecode_manipulator.InjectMethodCall(offset, const_index)) {
261285
LOG(WARNING) << "Failed to insert bytecode for breakpoint "
262-
<< breakpoint.cookie;
286+
<< breakpoint.cookie << " at line " << breakpoint.line;
263287
errors.push_back(breakpoint.error_callback);
264288
}
265289
}
@@ -299,5 +323,3 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {
299323

300324
} // namespace cdbg
301325
} // namespace devtools
302-
303-

src/googleclouddebugger/bytecode_breakpoint.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ class BytecodeBreakpoint {
6161
// Method in which the breakpoint is set.
6262
ScopedPyCodeObject code_object;
6363

64+
// Line number on which the breakpoint is set.
65+
int line;
66+
6467
// Offset to the instruction on which the breakpoint is set.
6568
int offset;
6669

src/googleclouddebugger/bytecode_manipulator.cc

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -363,27 +363,6 @@ bool BytecodeManipulator::InjectMethodCall(
363363
}
364364

365365

366-
// Inserts an entry into the line number table for an insertion in the bytecode.
367-
static void InsertAndUpdateLnotab(int offset, int size,
368-
std::vector<uint8> *lnotab) {
369-
int current_offset = 0;
370-
for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) {
371-
current_offset += it[0];
372-
373-
if (current_offset >= offset) {
374-
int remaining_size = size;
375-
while (remaining_size > 0) {
376-
const int current_size = std::min(remaining_size, 0xFF);
377-
it = lnotab->insert(it, static_cast<uint8>(current_size)) + 1;
378-
it = lnotab->insert(it, 0) + 1;
379-
remaining_size -= current_size;
380-
}
381-
return;
382-
}
383-
}
384-
}
385-
386-
387366
// Use different algorithms to insert method calls for Python 2 and 3.
388367
// Technically the algorithm for Python 3 will work with Python 2, but because
389368
// it is more complicated and the issue of needing to upgrade branch
@@ -413,6 +392,42 @@ struct Insertion {
413392
// InsertAndUpdateBranchInstructions.
414393
static const int kMaxInsertionIterations = 10;
415394

395+
396+
// Updates the line number table for an insertion in the bytecode.
397+
// This is different than what the Python 2 version of InsertMethodCall() does.
398+
// It should be more accurate, but is confined to Python 3 only for safety.
399+
// This handles the case of adding insertion for EXTENDED_ARG better.
400+
// Example for inserting 2 bytes at offset 2:
401+
// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta}
402+
// Old algorithm: [{2, 0}, {2, 1}, {4, 1}]
403+
// New algorithm: [{2, 1}, {6, 1}]
404+
// In the old version, trying to get the offset to insert a breakpoint right
405+
// before line 1 would result in an offset of 2, which is inaccurate as the
406+
// instruction before is an EXTENDED_ARG which will now be applied to the first
407+
// instruction inserted instead of its original target.
408+
static void InsertAndUpdateLnotab(int offset, int size,
409+
std::vector<uint8>* lnotab) {
410+
int current_offset = 0;
411+
for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) {
412+
current_offset += it[0];
413+
414+
if (current_offset > offset) {
415+
int remaining_size = it[0] + size;
416+
int remaining_lines = it[1];
417+
it = lnotab->erase(it, it + 2);
418+
while (remaining_size > 0xFF) {
419+
it = lnotab->insert(it, 0xFF) + 1;
420+
it = lnotab->insert(it, 0) + 1;
421+
remaining_size -= 0xFF;
422+
}
423+
it = lnotab->insert(it, remaining_size) + 1;
424+
it = lnotab->insert(it, remaining_lines) + 1;
425+
return;
426+
}
427+
}
428+
}
429+
430+
416431
// Reserves space for instructions to be inserted into the bytecode, and
417432
// calculates the new offsets and arguments of branch instructions.
418433
// Returns true if the calculation was successful, and false if too many
@@ -484,7 +499,11 @@ static bool InsertAndUpdateBranchInstructions(
484499
if (opcode_type == BRANCH_DELTA_OPCODE) {
485500
// For relative branches, the argument needs to be updated if the
486501
// insertion is between the instruction and the target.
487-
int32 target = it->current_offset + instruction.size + arg;
502+
// The Python compiler sometimes prematurely adds EXTENDED_ARG with an
503+
// argument of 0 even when it is not required. This needs to be taken
504+
// into account when calculating the target of a branch instruction.
505+
int inst_size = std::max(instruction.size, it->original_size);
506+
int32 target = it->current_offset + inst_size + arg;
488507
need_to_update = it->current_offset < insertion.current_offset &&
489508
insertion.current_offset < target;
490509
} else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) {
@@ -578,12 +597,17 @@ bool BytecodeManipulator::InsertMethodCall(
578597
for (auto it = updated_instructions.begin();
579598
it < updated_instructions.end(); it++) {
580599
int size_diff = it->instruction.size - it->original_size;
581-
uint32 offset = it->current_offset;
600+
int offset = it->current_offset;
582601
if (size_diff > 0) {
583602
data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP);
584603
if (has_lnotab_) {
585604
InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab);
586605
}
606+
} else if (size_diff < 0) {
607+
// The Python compiler sometimes prematurely adds EXTENDED_ARG with an
608+
// argument of 0 even when it is not required. Just leave it there, but
609+
// start writing the instruction after them.
610+
offset -= size_diff;
587611
}
588612
WriteInstruction(data->bytecode.begin() + offset, it->instruction);
589613
}
@@ -676,7 +700,22 @@ bool BytecodeManipulator::InsertMethodCall(
676700

677701
// Insert a new entry into line table to account for the new bytecode.
678702
if (has_lnotab_) {
679-
InsertAndUpdateLnotab(offset, size, &data->lnotab);
703+
int current_offset = 0;
704+
for (auto it = data->lnotab.begin(); it != data->lnotab.end(); it += 2) {
705+
current_offset += it[0];
706+
707+
if (current_offset >= offset) {
708+
int remaining_size = size;
709+
while (remaining_size > 0) {
710+
const int current_size = std::min(remaining_size, 0xFF);
711+
it = data->lnotab.insert(it, static_cast<uint8>(current_size)) + 1;
712+
it = data->lnotab.insert(it, 0) + 1;
713+
remaining_size -= current_size;
714+
}
715+
716+
break;
717+
}
718+
}
680719
}
681720

682721
return true;

0 commit comments

Comments
 (0)