Skip to content

Commit ecd8ca0

Browse files
committed
debuginfo: slices are DW_TAG_array_type's
1 parent 69b7853 commit ecd8ca0

File tree

5 files changed

+77
-45
lines changed

5 files changed

+77
-45
lines changed

compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,20 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
152152
cx.size_and_align_of(Ty::new_mut_ptr(cx.tcx, pointee_type))
153153
);
154154

155-
let pointee_type_di_node = type_di_node(cx, pointee_type);
155+
let pointee_type_di_node = match pointee_type.kind() {
156+
// `&[T]` will look like `{ data_ptr: *const T, length: usize }`
157+
ty::Slice(element_type) => type_di_node(cx, *element_type),
158+
// `&str` will look like `{ data_ptr: *const u8, length: usize }`
159+
ty::Str => type_di_node(cx, cx.tcx.types.u8),
160+
161+
// `&dyn K` will look like `{ pointer: _, vtable: _}`
162+
// any Adt `Foo` containing an unsized type (eg `&[_]` or `&dyn _`)
163+
// will look like `{ data_ptr: *const Foo, length: usize }`
164+
// and thin pointers `&Foo` will just look like `*const Foo`.
165+
//
166+
// in all those cases, we just use the pointee_type
167+
_ => type_di_node(cx, pointee_type),
168+
};
156169

157170
return_if_di_node_created_in_meantime!(cx, unique_type_id);
158171

@@ -194,23 +207,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
194207
DIFlags::FlagZero,
195208
),
196209
|cx, owner| {
197-
// FIXME: If this wide pointer is a `Box` then we don't want to use its
198-
// type layout and instead use the layout of the raw pointer inside
199-
// of it.
200-
// The proper way to handle this is to not treat Box as a pointer
201-
// at all and instead emit regular struct debuginfo for it. We just
202-
// need to make sure that we don't break existing debuginfo consumers
203-
// by doing that (at least not without a warning period).
204-
let layout_type = if ptr_type.is_box() {
205-
// The assertion at the start of this function ensures we have a ZST
206-
// allocator. We'll make debuginfo "skip" all ZST allocators, not just the
207-
// default allocator.
208-
Ty::new_mut_ptr(cx.tcx, pointee_type)
209-
} else {
210-
ptr_type
211-
};
212-
213-
let layout = cx.layout_of(layout_type);
210+
let layout = cx.layout_of(ptr_type);
214211
let addr_field = layout.field(cx, WIDE_PTR_ADDR);
215212
let extra_field = layout.field(cx, WIDE_PTR_EXTRA);
216213

@@ -389,26 +386,11 @@ fn build_dyn_type_di_node<'ll, 'tcx>(
389386
}
390387

391388
/// Create debuginfo for `[T]` and `str`. These are unsized.
392-
///
393-
/// NOTE: We currently emit just emit the debuginfo for the element type here
394-
/// (i.e. `T` for slices and `u8` for `str`), so that we end up with
395-
/// `*const T` for the `data_ptr` field of the corresponding wide-pointer
396-
/// debuginfo of `&[T]`.
397-
///
398-
/// It would be preferable and more accurate if we emitted a DIArray of T
399-
/// without an upper bound instead. That is, LLVM already supports emitting
400-
/// debuginfo of arrays of unknown size. But GDB currently seems to end up
401-
/// in an infinite loop when confronted with such a type.
402-
///
403-
/// As a side effect of the current encoding every instance of a type like
404-
/// `struct Foo { unsized_field: [u8] }` will look like
405-
/// `struct Foo { unsized_field: u8 }` in debuginfo. If the length of the
406-
/// slice is zero, then accessing `unsized_field` in the debugger would
407-
/// result in an out-of-bounds access.
408389
fn build_slice_type_di_node<'ll, 'tcx>(
409390
cx: &CodegenCx<'ll, 'tcx>,
410391
slice_type: Ty<'tcx>,
411392
unique_type_id: UniqueTypeId<'tcx>,
393+
span: Span,
412394
) -> DINodeCreationResult<'ll> {
413395
let element_type = match slice_type.kind() {
414396
ty::Slice(element_type) => *element_type,
@@ -423,7 +405,20 @@ fn build_slice_type_di_node<'ll, 'tcx>(
423405

424406
let element_type_di_node = type_di_node(cx, element_type);
425407
return_if_di_node_created_in_meantime!(cx, unique_type_id);
426-
DINodeCreationResult { di_node: element_type_di_node, already_stored_in_typemap: false }
408+
let (size, align) = cx.spanned_size_and_align_of(slice_type, span);
409+
let subrange = unsafe { llvm::LLVMDIBuilderGetOrCreateSubrange(DIB(cx), 0, -1) };
410+
let subscripts = &[subrange];
411+
let di_node = unsafe {
412+
llvm::LLVMDIBuilderCreateArrayType(
413+
DIB(cx),
414+
size.bits(),
415+
align.bits() as u32,
416+
element_type_di_node,
417+
subscripts.as_ptr(),
418+
subscripts.len() as c_uint,
419+
)
420+
};
421+
DINodeCreationResult { di_node, already_stored_in_typemap: false }
427422
}
428423

429424
/// Get the debuginfo node for the given type.
@@ -454,21 +449,12 @@ pub(crate) fn spanned_type_di_node<'ll, 'tcx>(
454449
}
455450
ty::Tuple(elements) if elements.is_empty() => build_basic_type_di_node(cx, t),
456451
ty::Array(..) => build_fixed_size_array_di_node(cx, unique_type_id, t, span),
457-
ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id),
452+
ty::Slice(_) | ty::Str => build_slice_type_di_node(cx, t, unique_type_id, span),
458453
ty::Dynamic(..) => build_dyn_type_di_node(cx, t, unique_type_id),
459454
ty::Foreign(..) => build_foreign_type_di_node(cx, t, unique_type_id),
460455
ty::RawPtr(pointee_type, _) | ty::Ref(_, pointee_type, _) => {
461456
build_pointer_or_reference_di_node(cx, t, pointee_type, unique_type_id)
462457
}
463-
// Some `Box` are newtyped pointers, make debuginfo aware of that.
464-
// Only works if the allocator argument is a 1-ZST and hence irrelevant for layout
465-
// (or if there is no allocator argument).
466-
ty::Adt(def, args)
467-
if def.is_box()
468-
&& args.get(1).is_none_or(|arg| cx.layout_of(arg.expect_ty()).is_1zst()) =>
469-
{
470-
build_pointer_or_reference_di_node(cx, t, t.expect_boxed_ty(), unique_type_id)
471-
}
472458
ty::FnDef(..) | ty::FnPtr(..) => build_subroutine_type_di_node(cx, unique_type_id),
473459
ty::Closure(..) => build_closure_env_di_node(cx, unique_type_id),
474460
ty::CoroutineClosure(..) => build_closure_env_di_node(cx, unique_type_id),

src/etc/gdb_lookup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def __call__(self, valobj):
104104
printer.add(RustType.STD_OS_STRING, StdOsStringProvider)
105105
printer.add(RustType.STD_STR, StdStrProvider)
106106
printer.add(RustType.STD_SLICE, StdSliceProvider)
107+
printer.add(RustType.STD_BOX, StdBoxProvider)
107108
printer.add(RustType.STD_VEC, StdVecProvider)
108109
printer.add(RustType.STD_VEC_DEQUE, StdVecDequeProvider)
109110
printer.add(RustType.STD_BTREE_SET, StdBTreeSetProvider)

src/etc/gdb_providers.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,29 @@ def display_hint():
136136
return "array"
137137

138138

139+
class StdBoxProvider(printer_base):
140+
def __init__(self, valobj):
141+
self._valobj = valobj
142+
self._pointer = unwrap_unique_or_non_null(valobj[ZERO_FIELD])
143+
# An unfortunate kludge.
144+
# I can't find any good way to distinguish a `Box<str>` from a `Box<u8>`
145+
# as the debuginfo template argument for both is `u8`.
146+
# If we could get `self._valobj.type.template_argument(0) == str`
147+
# then we could make this a bit nicer.
148+
#
149+
# FIXME(shua): the debuginfo template type should be 'str' not 'u8'
150+
if self._valobj.type.name == 'alloc::boxed::Box<str, alloc::alloc::Global>':
151+
ptr_ty = gdb.Type.pointer(gdb.lookup_type("u8"))
152+
data = self._pointer["data_ptr"].cast(ptr_ty)
153+
length = self._pointer["length"]
154+
self._pointer = data.lazy_string(encoding="utf-8", length=length)
155+
156+
def to_string(self):
157+
return "Box"
158+
159+
def children(self):
160+
yield "pointer", self._pointer
161+
139162
class StdVecProvider(printer_base):
140163
def __init__(self, valobj):
141164
self._valobj = valobj
@@ -197,6 +220,11 @@ def __init__(self, valobj, is_atomic=False):
197220
self._is_atomic = is_atomic
198221
self._ptr = unwrap_unique_or_non_null(valobj["ptr"])
199222
self._value = self._ptr["data" if is_atomic else "value"]
223+
# FIXME(shua): the debuginfo template type should be 'str' not 'u8'
224+
if self._ptr.type.target().name == "alloc::rc::RcInner<str>":
225+
length = self._valobj["ptr"]["pointer"]["length"]
226+
ptr = self._value.address.reinterpret_cast(gdb.Type.pointer(gdb.lookup_type("u8")))
227+
self._value = ptr.lazy_string(encoding="utf-8", length=length)
200228
self._strong = self._ptr["strong"]["v" if is_atomic else "value"]["value"]
201229
self._weak = self._ptr["weak"]["v" if is_atomic else "value"]["value"] - 1
202230

src/etc/rust_types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class RustType(object):
2020
STD_OS_STRING = "StdOsString"
2121
STD_STR = "StdStr"
2222
STD_SLICE = "StdSlice"
23+
STD_BOX = "StdBox"
2324
STD_VEC = "StdVec"
2425
STD_VEC_DEQUE = "StdVecDeque"
2526
STD_BTREE_SET = "StdBTreeSet"
@@ -41,6 +42,7 @@ class RustType(object):
4142
STD_STR_REGEX = re.compile(r"^&(mut )?str$")
4243
STD_SLICE_REGEX = re.compile(r"^&(mut )?\[.+\]$")
4344
STD_OS_STRING_REGEX = re.compile(r"^(std::ffi::([a-z_]+::)+)OsString$")
45+
STD_BOX_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Box<.+>$")
4446
STD_VEC_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)Vec<.+>$")
4547
STD_VEC_DEQUE_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)VecDeque<.+>$")
4648
STD_BTREE_SET_REGEX = re.compile(r"^(alloc::([a-z_]+::)+)BTreeSet<.+>$")
@@ -68,6 +70,7 @@ class RustType(object):
6870
RustType.STD_OS_STRING: STD_OS_STRING_REGEX,
6971
RustType.STD_STR: STD_STR_REGEX,
7072
RustType.STD_SLICE: STD_SLICE_REGEX,
73+
RustType.STD_BOX: STD_BOX_REGEX,
7174
RustType.STD_VEC: STD_VEC_REGEX,
7275
RustType.STD_VEC_DEQUE: STD_VEC_DEQUE_REGEX,
7376
RustType.STD_HASH_MAP: STD_HASH_MAP_REGEX,

tests/debuginfo/strings-and-strs.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
//@ gdb-command:print str_in_rc
2424
//@ gdb-check:$5 = alloc::rc::Rc<&str, alloc::alloc::Global> {ptr: core::ptr::non_null::NonNull<alloc::rc::RcInner<&str>> {pointer: 0x[...]}, phantom: core::marker::PhantomData<alloc::rc::RcInner<&str>>, alloc: alloc::alloc::Global}
2525

26+
//@ gdb-command:print box_str
27+
//@ gdb-check:$6 = alloc::boxed::Box<str, alloc::alloc::Global> (core::ptr::unique::Unique<str> {pointer: core::ptr::non_null::NonNull<str> {pointer: *const str [87, 111, 114, 108, 100]}, _marker: core::marker::PhantomData<str>}, alloc::alloc::Global)
28+
29+
//@ gdb-command:print rc_str
30+
//@ gdb-check:$7 = alloc::rc::Rc<str, alloc::alloc::Global> {ptr: core::ptr::non_null::NonNull<alloc::rc::RcInner<str>> {pointer: alloc::rc::RcInner<str> {strong: core::cell::Cell<usize> {value: core::cell::UnsafeCell<usize> {value: 1}}, weak: core::cell::Cell<usize> {value: core::cell::UnsafeCell<usize> {value: 1}}, value: 0x[...]}}, phantom: core::marker::PhantomData<alloc::rc::RcInner<str>>, alloc: alloc::alloc::Global}
31+
2632
// === LLDB TESTS ==================================================================================
2733
//@ lldb-command:run
2834
//@ lldb-command:v plain_string
@@ -40,6 +46,12 @@
4046
//@ lldb-command:v str_in_rc
4147
//@ lldb-check:(alloc::rc::Rc<&str, alloc::alloc::Global>) str_in_rc = strong=1, weak=0 { value = "Hello" { [0] = 'H' [1] = 'e' [2] = 'l' [3] = 'l' [4] = 'o' } }
4248

49+
//@ lldb-command:v box_str
50+
//@ lldb-check:(alloc::boxed::Box<unsigned char[], alloc::alloc::Global>) box_str = { __0 = { pointer = { pointer = { data_ptr = 0x[...] "World" length = 5 } } _marker = } __1 = }
51+
52+
//@ lldb-command:v rc_str
53+
//@ lldb-check:(alloc::rc::Rc<unsigned char[], alloc::alloc::Global>) rc_str = strong=1, weak=0 { value = "World" }
54+
4355
#![allow(unused_variables)]
4456

4557
pub struct Foo<'a> {
@@ -53,6 +65,8 @@ fn main() {
5365
let str_in_tuple = ("Hello", "World");
5466

5567
let str_in_rc = std::rc::Rc::new("Hello");
68+
let box_str: Box<str> = "World".into();
69+
let rc_str: std::rc::Rc<str> = "World".into();
5670
zzz(); // #break
5771
}
5872

0 commit comments

Comments
 (0)