addr2line/
unit.rs

1use alloc::boxed::Box;
2use alloc::sync::Arc;
3use alloc::vec::Vec;
4use core::cmp;
5
6use crate::{
7    Context, DebugFile, Error, Function, Functions, LazyFunctions, LazyLines, LazyResult,
8    LineLocationRangeIter, Lines, Location, LookupContinuation, LookupResult, RangeAttributes,
9    SimpleLookup, SplitDwarfLoad,
10};
11
12pub(crate) struct UnitRange {
13    unit_id: usize,
14    min_begin: u64,
15    range: gimli::Range,
16}
17
18pub(crate) struct ResUnit<R: gimli::Reader> {
19    offset: gimli::DebugInfoOffset<R::Offset>,
20    dw_unit: gimli::Unit<R>,
21    pub(crate) lang: Option<gimli::DwLang>,
22    lines: LazyLines,
23    functions: LazyFunctions<R>,
24    dwo: LazyResult<Option<Box<DwoUnit<R>>>>,
25}
26
27type UnitRef<'unit, R> = (DebugFile, gimli::UnitRef<'unit, R>);
28
29impl<R: gimli::Reader> ResUnit<R> {
30    pub(crate) fn unit_ref<'a>(&'a self, sections: &'a gimli::Dwarf<R>) -> gimli::UnitRef<'a, R> {
31        gimli::UnitRef::new(sections, &self.dw_unit)
32    }
33
34    /// Returns the DWARF sections and the unit.
35    ///
36    /// Loads the DWO unit if necessary.
37    pub(crate) fn dwarf_and_unit<'unit, 'ctx: 'unit>(
38        &'unit self,
39        ctx: &'ctx Context<R>,
40    ) -> LookupResult<
41        SimpleLookup<
42            Result<UnitRef<'unit, R>, Error>,
43            R,
44            impl FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> Result<UnitRef<'unit, R>, Error>,
45        >,
46    > {
47        let map_dwo = move |dwo: &'unit Result<Option<Box<DwoUnit<R>>>, Error>| match dwo {
48            Ok(Some(dwo)) => Ok((DebugFile::Dwo, dwo.unit_ref())),
49            Ok(None) => Ok((DebugFile::Primary, self.unit_ref(&*ctx.sections))),
50            Err(e) => Err(*e),
51        };
52        let complete = |dwo| SimpleLookup::new_complete(map_dwo(dwo));
53
54        if let Some(dwo) = self.dwo.get() {
55            return complete(dwo);
56        }
57
58        let dwo_id = match self.dw_unit.dwo_id {
59            None => {
60                return complete(self.dwo.get_or_init(|| Ok(None)));
61            }
62            Some(dwo_id) => dwo_id,
63        };
64
65        let comp_dir = self.dw_unit.comp_dir.clone();
66
67        let dwo_name = self.dw_unit.dwo_name().and_then(|s| {
68            if let Some(s) = s {
69                Ok(Some(ctx.sections.attr_string(&self.dw_unit, s)?))
70            } else {
71                Ok(None)
72            }
73        });
74
75        let path = match dwo_name {
76            Ok(v) => v,
77            Err(e) => {
78                return complete(self.dwo.get_or_init(|| Err(e)));
79            }
80        };
81
82        let process_dwo = move |dwo_dwarf: Option<Arc<gimli::Dwarf<R>>>| {
83            let dwo_dwarf = match dwo_dwarf {
84                None => return Ok(None),
85                Some(dwo_dwarf) => dwo_dwarf,
86            };
87            let mut dwo_units = dwo_dwarf.units();
88            let dwo_header = match dwo_units.next()? {
89                Some(dwo_header) => dwo_header,
90                None => return Ok(None),
91            };
92
93            let mut dwo_unit = dwo_dwarf.unit(dwo_header)?;
94            dwo_unit.copy_relocated_attributes(&self.dw_unit);
95            Ok(Some(Box::new(DwoUnit {
96                sections: dwo_dwarf,
97                dw_unit: dwo_unit,
98            })))
99        };
100
101        SimpleLookup::new_needs_load(
102            SplitDwarfLoad {
103                dwo_id,
104                comp_dir,
105                path,
106                parent: ctx.sections.clone(),
107            },
108            move |dwo_dwarf| map_dwo(self.dwo.get_or_init(|| process_dwo(dwo_dwarf))),
109        )
110    }
111
112    pub(crate) fn parse_lines(&self, sections: &gimli::Dwarf<R>) -> Result<Option<&Lines>, Error> {
113        // NB: line information is always stored in the main debug file so this does not need
114        // to handle DWOs.
115        let ilnp = match self.dw_unit.line_program {
116            Some(ref ilnp) => ilnp,
117            None => return Ok(None),
118        };
119        self.lines.borrow(self.unit_ref(sections), ilnp).map(Some)
120    }
121
122    pub(crate) fn parse_functions<'unit, 'ctx: 'unit>(
123        &'unit self,
124        ctx: &'ctx Context<R>,
125    ) -> LookupResult<impl LookupContinuation<Output = Result<&'unit Functions<R>, Error>, Buf = R>>
126    {
127        self.dwarf_and_unit(ctx).map(move |r| {
128            let (_file, unit) = r?;
129            self.functions.borrow(unit)
130        })
131    }
132
133    pub(crate) fn parse_inlined_functions<'unit, 'ctx: 'unit>(
134        &'unit self,
135        ctx: &'ctx Context<R>,
136    ) -> LookupResult<impl LookupContinuation<Output = Result<(), Error>, Buf = R> + 'unit> {
137        self.dwarf_and_unit(ctx).map(move |r| {
138            let (file, unit) = r?;
139            self.functions
140                .borrow(unit)?
141                .parse_inlined_functions(file, unit, ctx)
142        })
143    }
144
145    pub(crate) fn find_location(
146        &self,
147        probe: u64,
148        sections: &gimli::Dwarf<R>,
149    ) -> Result<Option<Location<'_>>, Error> {
150        let Some(lines) = self.parse_lines(sections)? else {
151            return Ok(None);
152        };
153        lines.find_location(probe)
154    }
155
156    #[inline]
157    pub(crate) fn find_location_range(
158        &self,
159        probe_low: u64,
160        probe_high: u64,
161        sections: &gimli::Dwarf<R>,
162    ) -> Result<Option<LineLocationRangeIter<'_>>, Error> {
163        let Some(lines) = self.parse_lines(sections)? else {
164            return Ok(None);
165        };
166        lines.find_location_range(probe_low, probe_high).map(Some)
167    }
168
169    pub(crate) fn find_function_or_location<'unit, 'ctx: 'unit>(
170        &'unit self,
171        probe: u64,
172        ctx: &'ctx Context<R>,
173    ) -> LookupResult<
174        impl LookupContinuation<
175            Output = Result<(Option<&'unit Function<R>>, Option<Location<'unit>>), Error>,
176            Buf = R,
177        >,
178    > {
179        self.dwarf_and_unit(ctx).map(move |r| {
180            let (file, unit) = r?;
181            let functions = self.functions.borrow(unit)?;
182            let function = match functions.find_address(probe) {
183                Some(address) => {
184                    let function_index = functions.addresses[address].function;
185                    let function = &functions.functions[function_index];
186                    Some(function.borrow(file, unit, ctx)?)
187                }
188                None => None,
189            };
190            let location = self.find_location(probe, &ctx.sections)?;
191            Ok((function, location))
192        })
193    }
194}
195
196pub(crate) struct ResUnits<R: gimli::Reader> {
197    ranges: Box<[UnitRange]>,
198    units: Box<[ResUnit<R>]>,
199}
200
201impl<R: gimli::Reader> ResUnits<R> {
202    pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
203        // Find all the references to compilation units in .debug_aranges.
204        // Note that we always also iterate through all of .debug_info to
205        // find compilation units, because .debug_aranges may be missing some.
206        let mut aranges = Vec::new();
207        let mut headers = sections.debug_aranges.headers();
208        while let Some(header) = headers.next()? {
209            aranges.push((header.debug_info_offset(), header.offset()));
210        }
211        aranges.sort_by_key(|i| i.0);
212
213        let mut unit_ranges = Vec::new();
214        let mut res_units = Vec::new();
215        let mut units = sections.units();
216        while let Some(header) = units.next()? {
217            let unit_id = res_units.len();
218            let offset = match header.offset().as_debug_info_offset() {
219                Some(offset) => offset,
220                None => continue,
221            };
222            // We mainly want compile units, but we may need to follow references to entries
223            // within other units for function names.  We don't need anything from type units.
224            let mut need_unit_range = match header.type_() {
225                gimli::UnitType::Type { .. } | gimli::UnitType::SplitType { .. } => continue,
226                gimli::UnitType::Partial => {
227                    // Partial units are only needed for references from other units.
228                    // They shouldn't have any address ranges.
229                    false
230                }
231                _ => true,
232            };
233            let dw_unit = match sections.unit(header) {
234                Ok(dw_unit) => dw_unit,
235                Err(_) => continue,
236            };
237            let dw_unit_ref = gimli::UnitRef::new(sections, &dw_unit);
238
239            let mut lang = None;
240            if need_unit_range {
241                let mut entries = dw_unit_ref.entries_raw(None)?;
242
243                let abbrev = match entries.read_abbreviation()? {
244                    Some(abbrev) => abbrev,
245                    None => continue,
246                };
247
248                let mut ranges = RangeAttributes::default();
249                for spec in abbrev.attributes() {
250                    let attr = entries.read_attribute(*spec)?;
251                    match attr.name() {
252                        gimli::DW_AT_low_pc => match attr.value() {
253                            gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
254                            gimli::AttributeValue::DebugAddrIndex(index) => {
255                                ranges.low_pc = Some(dw_unit_ref.address(index)?);
256                            }
257                            _ => {}
258                        },
259                        gimli::DW_AT_high_pc => match attr.value() {
260                            gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
261                            gimli::AttributeValue::DebugAddrIndex(index) => {
262                                ranges.high_pc = Some(dw_unit_ref.address(index)?);
263                            }
264                            gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
265                            _ => {}
266                        },
267                        gimli::DW_AT_ranges => {
268                            ranges.ranges_offset = dw_unit_ref.attr_ranges_offset(attr.value())?;
269                        }
270                        gimli::DW_AT_language => {
271                            if let gimli::AttributeValue::Language(val) = attr.value() {
272                                lang = Some(val);
273                            }
274                        }
275                        _ => {}
276                    }
277                }
278
279                // Find the address ranges for the CU, using in order of preference:
280                // - DW_AT_ranges
281                // - .debug_aranges
282                // - DW_AT_low_pc/DW_AT_high_pc
283                //
284                // Using DW_AT_ranges before .debug_aranges is possibly an arbitrary choice,
285                // but the feeling is that DW_AT_ranges is more likely to be reliable or complete
286                // if it is present.
287                //
288                // .debug_aranges must be used before DW_AT_low_pc/DW_AT_high_pc because
289                // it has been observed on macOS that DW_AT_ranges was not emitted even for
290                // discontiguous CUs.
291                let i = match ranges.ranges_offset {
292                    Some(_) => None,
293                    None => aranges.binary_search_by_key(&offset, |x| x.0).ok(),
294                };
295                if let Some(mut i) = i {
296                    // There should be only one set per CU, but in practice multiple
297                    // sets have been observed. This is probably a compiler bug, but
298                    // either way we need to handle it.
299                    while i > 0 && aranges[i - 1].0 == offset {
300                        i -= 1;
301                    }
302                    for (_, aranges_offset) in aranges[i..].iter().take_while(|x| x.0 == offset) {
303                        let aranges_header = sections.debug_aranges.header(*aranges_offset)?;
304                        let mut aranges = aranges_header.entries();
305                        while let Some(arange) = aranges.next().transpose() {
306                            let Ok(arange) = arange else {
307                                // Ignore errors. In particular, this will ignore address overflow.
308                                // This has been seen for a unit that had a single variable
309                                // with rustc 1.89.0.
310                                //
311                                // This relies on `ArangeEntryIter::next` fusing for errors that
312                                // can't be ignored.
313                                continue;
314                            };
315                            if arange.length() != 0 {
316                                unit_ranges.push(UnitRange {
317                                    range: arange.range(),
318                                    unit_id,
319                                    min_begin: 0,
320                                });
321                                need_unit_range = false;
322                            }
323                        }
324                    }
325                }
326                if need_unit_range {
327                    need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| {
328                        unit_ranges.push(UnitRange {
329                            range,
330                            unit_id,
331                            min_begin: 0,
332                        });
333                    })?;
334                }
335            }
336
337            let lines = LazyLines::new();
338            if need_unit_range {
339                // The unit did not declare any ranges.
340                // Try to get some ranges from the line program sequences.
341                if let Some(ref ilnp) = dw_unit_ref.line_program {
342                    if let Ok(lines) = lines.borrow(dw_unit_ref, ilnp) {
343                        for range in lines.ranges() {
344                            unit_ranges.push(UnitRange {
345                                range,
346                                unit_id,
347                                min_begin: 0,
348                            })
349                        }
350                    }
351                }
352            }
353
354            res_units.push(ResUnit {
355                offset,
356                dw_unit,
357                lang,
358                lines,
359                functions: LazyFunctions::new(),
360                dwo: LazyResult::new(),
361            });
362        }
363
364        // Sort this for faster lookup in `Self::find_range`.
365        unit_ranges.sort_by_key(|i| i.range.end);
366
367        // Calculate the `min_begin` field now that we've determined the order of
368        // CUs.
369        let mut min = !0;
370        for i in unit_ranges.iter_mut().rev() {
371            min = min.min(i.range.begin);
372            i.min_begin = min;
373        }
374
375        Ok(ResUnits {
376            ranges: unit_ranges.into_boxed_slice(),
377            units: res_units.into_boxed_slice(),
378        })
379    }
380
381    pub(crate) fn iter(&self) -> impl Iterator<Item = &ResUnit<R>> {
382        self.units.iter()
383    }
384
385    pub(crate) fn find_offset(
386        &self,
387        offset: gimli::DebugInfoOffset<R::Offset>,
388    ) -> Result<&gimli::Unit<R>, Error> {
389        match self
390            .units
391            .binary_search_by_key(&offset.0, |unit| unit.offset.0)
392        {
393            // There is never a DIE at the unit offset or before the first unit.
394            Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset),
395            Err(i) => Ok(&self.units[i - 1].dw_unit),
396        }
397    }
398
399    /// Finds the CUs for the function address given.
400    ///
401    /// There might be multiple CUs whose range contains this address.
402    /// Weak symbols have shown up in the wild which cause this to happen
403    /// but otherwise this can happen if the CU has non-contiguous functions
404    /// but only reports a single range.
405    ///
406    /// Consequently we return an iterator for all CUs which may contain the
407    /// address, and the caller must check if there is actually a function or
408    /// location in the CU for that address.
409    pub(crate) fn find(&self, probe: u64) -> impl Iterator<Item = &ResUnit<R>> {
410        self.find_range(probe, probe + 1).map(|(unit, _range)| unit)
411    }
412
413    /// Finds the CUs covering the range of addresses given.
414    ///
415    /// The range is [low, high) (ie, the upper bound is exclusive). This can return multiple
416    /// ranges for the same unit.
417    #[inline]
418    pub(crate) fn find_range(
419        &self,
420        probe_low: u64,
421        probe_high: u64,
422    ) -> impl Iterator<Item = (&ResUnit<R>, &gimli::Range)> {
423        // Find the position of the next range after a range which
424        // ends at `probe_low` or lower.
425        let pos = match self
426            .ranges
427            .binary_search_by_key(&probe_low, |i| i.range.end)
428        {
429            Ok(i) => i + 1, // Range `i` ends at exactly `probe_low`.
430            Err(i) => i,    // Range `i - 1` ends at a lower address.
431        };
432
433        // Iterate from that position to find matching CUs.
434        self.ranges[pos..]
435            .iter()
436            .take_while(move |i| {
437                // We know that this CU's end is at least `probe_low` because
438                // of our sorted array.
439                debug_assert!(i.range.end >= probe_low);
440
441                // Each entry keeps track of the minimum begin address for the
442                // remainder of the array of unit ranges. If our probe is before
443                // the minimum range begin of this entry, then it's guaranteed
444                // to not fit in any subsequent entries, so we break out.
445                probe_high > i.min_begin
446            })
447            .filter_map(move |i| {
448                // If this CU doesn't actually contain this address, move to the
449                // next CU.
450                if probe_low >= i.range.end || probe_high <= i.range.begin {
451                    return None;
452                }
453                Some((&self.units[i.unit_id], &i.range))
454            })
455    }
456
457    pub(crate) fn find_location_range<'a>(
458        &'a self,
459        probe_low: u64,
460        probe_high: u64,
461        sections: &'a gimli::Dwarf<R>,
462    ) -> Result<LocationRangeIter<'a, R>, Error> {
463        let unit_iter = Box::new(self.find_range(probe_low, probe_high));
464        Ok(LocationRangeIter {
465            unit_iter,
466            iter: None,
467            probe_low,
468            probe_high,
469            sections,
470        })
471    }
472}
473
474/// A DWO unit has its own DWARF sections.
475struct DwoUnit<R: gimli::Reader> {
476    sections: Arc<gimli::Dwarf<R>>,
477    dw_unit: gimli::Unit<R>,
478}
479
480impl<R: gimli::Reader> DwoUnit<R> {
481    fn unit_ref(&self) -> gimli::UnitRef<'_, R> {
482        gimli::UnitRef::new(&self.sections, &self.dw_unit)
483    }
484}
485
486pub(crate) struct SupUnit<R: gimli::Reader> {
487    offset: gimli::DebugInfoOffset<R::Offset>,
488    dw_unit: gimli::Unit<R>,
489}
490
491pub(crate) struct SupUnits<R: gimli::Reader> {
492    units: Box<[SupUnit<R>]>,
493}
494
495impl<R: gimli::Reader> Default for SupUnits<R> {
496    fn default() -> Self {
497        SupUnits {
498            units: Box::default(),
499        }
500    }
501}
502
503impl<R: gimli::Reader> SupUnits<R> {
504    pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
505        let mut sup_units = Vec::new();
506        let mut units = sections.units();
507        while let Some(header) = units.next()? {
508            let offset = match header.offset().as_debug_info_offset() {
509                Some(offset) => offset,
510                None => continue,
511            };
512            let dw_unit = match sections.unit(header) {
513                Ok(dw_unit) => dw_unit,
514                Err(_) => continue,
515            };
516            sup_units.push(SupUnit { dw_unit, offset });
517        }
518        Ok(SupUnits {
519            units: sup_units.into_boxed_slice(),
520        })
521    }
522
523    pub(crate) fn find_offset(
524        &self,
525        offset: gimli::DebugInfoOffset<R::Offset>,
526    ) -> Result<&gimli::Unit<R>, Error> {
527        match self
528            .units
529            .binary_search_by_key(&offset.0, |unit| unit.offset.0)
530        {
531            // There is never a DIE at the unit offset or before the first unit.
532            Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset),
533            Err(i) => Ok(&self.units[i - 1].dw_unit),
534        }
535    }
536}
537
538/// Iterator over `Location`s in a range of addresses, returned by `Context::find_location_range`.
539pub struct LocationRangeIter<'ctx, R: gimli::Reader> {
540    unit_iter: Box<dyn Iterator<Item = (&'ctx ResUnit<R>, &'ctx gimli::Range)> + 'ctx>,
541    iter: Option<LineLocationRangeIter<'ctx>>,
542
543    probe_low: u64,
544    probe_high: u64,
545    sections: &'ctx gimli::Dwarf<R>,
546}
547
548impl<'ctx, R: gimli::Reader> LocationRangeIter<'ctx, R> {
549    fn next_loc(&mut self) -> Result<Option<(u64, u64, Location<'ctx>)>, Error> {
550        loop {
551            let iter = self.iter.take();
552            match iter {
553                None => match self.unit_iter.next() {
554                    Some((unit, range)) => {
555                        self.iter = unit.find_location_range(
556                            cmp::max(self.probe_low, range.begin),
557                            cmp::min(self.probe_high, range.end),
558                            self.sections,
559                        )?;
560                    }
561                    None => return Ok(None),
562                },
563                Some(mut iter) => {
564                    if let item @ Some(_) = iter.next() {
565                        self.iter = Some(iter);
566                        return Ok(item);
567                    }
568                }
569            }
570        }
571    }
572}
573
574impl<'ctx, R> Iterator for LocationRangeIter<'ctx, R>
575where
576    R: gimli::Reader + 'ctx,
577{
578    type Item = (u64, u64, Location<'ctx>);
579
580    #[inline]
581    fn next(&mut self) -> Option<Self::Item> {
582        self.next_loc().unwrap_or_default()
583    }
584}
585
586#[cfg(feature = "fallible-iterator")]
587impl<'ctx, R> fallible_iterator::FallibleIterator for LocationRangeIter<'ctx, R>
588where
589    R: gimli::Reader + 'ctx,
590{
591    type Item = (u64, u64, Location<'ctx>);
592    type Error = Error;
593
594    #[inline]
595    fn next(&mut self) -> Result<Option<Self::Item>, Self::Error> {
596        self.next_loc()
597    }
598}