regex_automata/meta/
strategy.rs

1use core::{
2    fmt::Debug,
3    panic::{RefUnwindSafe, UnwindSafe},
4};
5
6use alloc::sync::Arc;
7
8use regex_syntax::hir::{literal, Hir};
9
10use crate::{
11    meta::{
12        error::{BuildError, RetryError, RetryFailError, RetryQuadraticError},
13        regex::{Cache, RegexInfo},
14        reverse_inner, wrappers,
15    },
16    nfa::thompson::{self, WhichCaptures, NFA},
17    util::{
18        captures::{Captures, GroupInfo},
19        look::LookMatcher,
20        prefilter::{self, Prefilter, PrefilterI},
21        primitives::{NonMaxUsize, PatternID},
22        search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet},
23    },
24};
25
26/// A trait that represents a single meta strategy. Its main utility is in
27/// providing a way to do dynamic dispatch over a few choices.
28///
29/// Why dynamic dispatch? I actually don't have a super compelling reason, and
30/// importantly, I have not benchmarked it with the main alternative: an enum.
31/// I went with dynamic dispatch initially because the regex engine search code
32/// really can't be inlined into caller code in most cases because it's just
33/// too big. In other words, it is already expected that every regex search
34/// will entail at least the cost of a function call.
35///
36/// I do wonder whether using enums would result in better codegen overall
37/// though. It's a worthwhile experiment to try. Probably the most interesting
38/// benchmark to run in such a case would be one with a high match count. That
39/// is, a benchmark to test the overall latency of a search call.
40pub(super) trait Strategy:
41    Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
42{
43    fn group_info(&self) -> &GroupInfo;
44
45    fn create_cache(&self) -> Cache;
46
47    fn reset_cache(&self, cache: &mut Cache);
48
49    fn is_accelerated(&self) -> bool;
50
51    fn memory_usage(&self) -> usize;
52
53    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>;
54
55    fn search_half(
56        &self,
57        cache: &mut Cache,
58        input: &Input<'_>,
59    ) -> Option<HalfMatch>;
60
61    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62
63    fn search_slots(
64        &self,
65        cache: &mut Cache,
66        input: &Input<'_>,
67        slots: &mut [Option<NonMaxUsize>],
68    ) -> Option<PatternID>;
69
70    fn which_overlapping_matches(
71        &self,
72        cache: &mut Cache,
73        input: &Input<'_>,
74        patset: &mut PatternSet,
75    );
76}
77
78pub(super) fn new(
79    info: &RegexInfo,
80    hirs: &[&Hir],
81) -> Result<Arc<dyn Strategy>, BuildError> {
82    // At this point, we're committed to a regex engine of some kind. So pull
83    // out a prefilter if we can, which will feed to each of the constituent
84    // regex engines.
85    let pre = if info.is_always_anchored_start() {
86        // PERF: I'm not sure we necessarily want to do this... We may want to
87        // run a prefilter for quickly rejecting in some cases. The problem
88        // is that anchored searches overlap quite a bit with the use case
89        // of "run a regex on every line to extract data." In that case, the
90        // regex always matches, so running a prefilter doesn't really help us
91        // there. The main place where a prefilter helps in an anchored search
92        // is if the anchored search is not expected to match frequently. That
93        // is, the prefilter gives us a way to possibly reject a haystack very
94        // quickly.
95        //
96        // Maybe we should do use a prefilter, but only for longer haystacks?
97        // Or maybe we should only use a prefilter when we think it's "fast"?
98        //
99        // Interestingly, I think we currently lack the infrastructure for
100        // disabling a prefilter based on haystack length. That would probably
101        // need to be a new 'Input' option. (Interestingly, an 'Input' used to
102        // carry a 'Prefilter' with it, but I moved away from that.)
103        debug!("skipping literal extraction since regex is anchored");
104        None
105    } else if let Some(pre) = info.config().get_prefilter() {
106        debug!(
107            "skipping literal extraction since the caller provided a prefilter"
108        );
109        Some(pre.clone())
110    } else if info.config().get_auto_prefilter() {
111        let kind = info.config().get_match_kind();
112        let prefixes = crate::util::prefilter::prefixes(kind, hirs);
113        // If we can build a full `Strategy` from just the extracted prefixes,
114        // then we can short-circuit and avoid building a regex engine at all.
115        if let Some(pre) = Pre::from_prefixes(info, &prefixes) {
116            debug!(
117                "found that the regex can be broken down to a literal \
118                 search, avoiding the regex engine entirely",
119            );
120            return Ok(pre);
121        }
122        // This now attempts another short-circuit of the regex engine: if we
123        // have a huge alternation of just plain literals, then we can just use
124        // Aho-Corasick for that and avoid the regex engine entirely.
125        //
126        // You might think this case would just be handled by
127        // `Pre::from_prefixes`, but that technique relies on heuristic literal
128        // extraction from the corresponding `Hir`. That works, but part of
129        // heuristics limit the size and number of literals returned. This case
130        // will specifically handle patterns with very large alternations.
131        //
132        // One wonders if we should just roll this our heuristic literal
133        // extraction, and then I think this case could disappear entirely.
134        if let Some(pre) = Pre::from_alternation_literals(info, hirs) {
135            debug!(
136                "found plain alternation of literals, \
137                 avoiding regex engine entirely and using Aho-Corasick"
138            );
139            return Ok(pre);
140        }
141        prefixes.literals().and_then(|strings| {
142            debug!(
143                "creating prefilter from {} literals: {:?}",
144                strings.len(),
145                strings,
146            );
147            Prefilter::new(kind, strings)
148        })
149    } else {
150        debug!("skipping literal extraction since prefilters were disabled");
151        None
152    };
153    let mut core = Core::new(info.clone(), pre.clone(), hirs)?;
154    // Now that we have our core regex engines built, there are a few cases
155    // where we can do a little bit better than just a normal "search forward
156    // and maybe use a prefilter when in a start state." However, these cases
157    // may not always work or otherwise build on top of the Core searcher.
158    // For example, the reverse anchored optimization seems like it might
159    // always work, but only the DFAs support reverse searching and the DFAs
160    // might give up or quit for reasons. If we had, e.g., a PikeVM that
161    // supported reverse searching, then we could avoid building a full Core
162    // engine for this case.
163    core = match ReverseAnchored::new(core) {
164        Err(core) => core,
165        Ok(ra) => {
166            debug!("using reverse anchored strategy");
167            return Ok(Arc::new(ra));
168        }
169    };
170    core = match ReverseSuffix::new(core, hirs) {
171        Err(core) => core,
172        Ok(rs) => {
173            debug!("using reverse suffix strategy");
174            return Ok(Arc::new(rs));
175        }
176    };
177    core = match ReverseInner::new(core, hirs) {
178        Err(core) => core,
179        Ok(ri) => {
180            debug!("using reverse inner strategy");
181            return Ok(Arc::new(ri));
182        }
183    };
184    debug!("using core strategy");
185    Ok(Arc::new(core))
186}
187
188#[derive(Clone, Debug)]
189struct Pre<P> {
190    pre: P,
191    group_info: GroupInfo,
192}
193
194impl<P: PrefilterI> Pre<P> {
195    fn new(pre: P) -> Arc<dyn Strategy> {
196        // The only thing we support when we use prefilters directly as a
197        // strategy is the start and end of the overall match for a single
198        // pattern. In other words, exactly one implicit capturing group. Which
199        // is exactly what we use here for a GroupInfo.
200        let group_info = GroupInfo::new([[None::<&str>]]).unwrap();
201        Arc::new(Pre { pre, group_info })
202    }
203}
204
205// This is a little weird, but we don't actually care about the type parameter
206// here because we're selecting which underlying prefilter to use. So we just
207// define it on an arbitrary type.
208impl Pre<()> {
209    /// Given a sequence of prefixes, attempt to return a full `Strategy` using
210    /// just the prefixes.
211    ///
212    /// Basically, this occurs when the prefixes given not just prefixes,
213    /// but an enumeration of the entire language matched by the regular
214    /// expression.
215    ///
216    /// A number of other conditions need to be true too. For example, there
217    /// can be only one pattern, the number of explicit capture groups is 0, no
218    /// look-around assertions and so on.
219    ///
220    /// Note that this ignores `Config::get_auto_prefilter` because if this
221    /// returns something, then it isn't a prefilter but a matcher itself.
222    /// Therefore, it shouldn't suffer from the problems typical to prefilters
223    /// (such as a high false positive rate).
224    fn from_prefixes(
225        info: &RegexInfo,
226        prefixes: &literal::Seq,
227    ) -> Option<Arc<dyn Strategy>> {
228        let kind = info.config().get_match_kind();
229        // Check to see if our prefixes are exact, which means we might be
230        // able to bypass the regex engine entirely and just rely on literal
231        // searches.
232        if !prefixes.is_exact() {
233            return None;
234        }
235        // We also require that we have a single regex pattern. Namely,
236        // we reuse the prefilter infrastructure to implement search and
237        // prefilters only report spans. Prefilters don't know about pattern
238        // IDs. The multi-regex case isn't a lost cause, we might still use
239        // Aho-Corasick and we might still just use a regular prefilter, but
240        // that's done below.
241        if info.pattern_len() != 1 {
242            return None;
243        }
244        // We can't have any capture groups either. The literal engines don't
245        // know how to deal with things like '(foo)(bar)'. In that case, a
246        // prefilter will just be used and then the regex engine will resolve
247        // the capture groups.
248        if info.props()[0].explicit_captures_len() != 0 {
249            return None;
250        }
251        // We also require that it has zero look-around assertions. Namely,
252        // literal extraction treats look-around assertions as if they match
253        // *every* empty string. But of course, that isn't true. So for
254        // example, 'foo\bquux' never matches anything, but 'fooquux' is
255        // extracted from that as an exact literal. Such cases should just run
256        // the regex engine. 'fooquux' will be used as a normal prefilter, and
257        // then the regex engine will try to look for an actual match.
258        if !info.props()[0].look_set().is_empty() {
259            return None;
260        }
261        // Finally, currently, our prefilters are all oriented around
262        // leftmost-first match semantics, so don't try to use them if the
263        // caller asked for anything else.
264        if kind != MatchKind::LeftmostFirst {
265            return None;
266        }
267        // The above seems like a lot of requirements to meet, but it applies
268        // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet
269        // the above criteria, for example.
270        //
271        // Note that this is effectively a latency optimization. If we didn't
272        // do this, then the extracted literals would still get bundled into
273        // a prefilter, and every regex engine capable of running unanchored
274        // searches supports prefilters. So this optimization merely sidesteps
275        // having to run the regex engine at all to confirm the match. Thus, it
276        // decreases the latency of a match.
277
278        // OK because we know the set is exact and thus finite.
279        let prefixes = prefixes.literals().unwrap();
280        debug!(
281            "trying to bypass regex engine by creating \
282             prefilter from {} literals: {:?}",
283            prefixes.len(),
284            prefixes,
285        );
286        let choice = match prefilter::Choice::new(kind, prefixes) {
287            Some(choice) => choice,
288            None => {
289                debug!(
290                    "regex bypass failed because no prefilter could be built"
291                );
292                return None;
293            }
294        };
295        let strat: Arc<dyn Strategy> = match choice {
296            prefilter::Choice::Memchr(pre) => Pre::new(pre),
297            prefilter::Choice::Memchr2(pre) => Pre::new(pre),
298            prefilter::Choice::Memchr3(pre) => Pre::new(pre),
299            prefilter::Choice::Memmem(pre) => Pre::new(pre),
300            prefilter::Choice::Teddy(pre) => Pre::new(pre),
301            prefilter::Choice::ByteSet(pre) => Pre::new(pre),
302            prefilter::Choice::AhoCorasick(pre) => Pre::new(pre),
303        };
304        Some(strat)
305    }
306
307    /// Attempts to extract an alternation of literals, and if it's deemed
308    /// worth doing, returns an Aho-Corasick prefilter as a strategy.
309    ///
310    /// And currently, this only returns something when 'hirs.len() == 1'. This
311    /// could in theory do something if there are multiple HIRs where all of
312    /// them are alternation of literals, but I haven't had the time to go down
313    /// that path yet.
314    fn from_alternation_literals(
315        info: &RegexInfo,
316        hirs: &[&Hir],
317    ) -> Option<Arc<dyn Strategy>> {
318        use crate::util::prefilter::AhoCorasick;
319
320        let lits = crate::meta::literal::alternation_literals(info, hirs)?;
321        let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?;
322        Some(Pre::new(ac))
323    }
324}
325
326// This implements Strategy for anything that implements PrefilterI.
327//
328// Note that this must only be used for regexes of length 1. Multi-regexes
329// don't work here. The prefilter interface only provides the span of a match
330// and not the pattern ID. (I did consider making it more expressive, but I
331// couldn't figure out how to tie everything together elegantly.) Thus, so long
332// as the regex only contains one pattern, we can simply assume that a match
333// corresponds to PatternID::ZERO. And indeed, that's what we do here.
334//
335// In practice, since this impl is used to report matches directly and thus
336// completely bypasses the regex engine, we only wind up using this under the
337// following restrictions:
338//
339// * There must be only one pattern. As explained above.
340// * The literal sequence must be finite and only contain exact literals.
341// * There must not be any look-around assertions. If there are, the literals
342// extracted might be exact, but a match doesn't necessarily imply an overall
343// match. As a trivial example, 'foo\bbar' does not match 'foobar'.
344// * The pattern must not have any explicit capturing groups. If it does, the
345// caller might expect them to be resolved. e.g., 'foo(bar)'.
346//
347// So when all of those things are true, we use a prefilter directly as a
348// strategy.
349//
350// In the case where the number of patterns is more than 1, we don't use this
351// but do use a special Aho-Corasick strategy if all of the regexes are just
352// simple literals or alternations of literals. (We also use the Aho-Corasick
353// strategy when len(patterns)==1 if the number of literals is large. In that
354// case, literal extraction gives up and will return an infinite set.)
355impl<P: PrefilterI> Strategy for Pre<P> {
356    #[cfg_attr(feature = "perf-inline", inline(always))]
357    fn group_info(&self) -> &GroupInfo {
358        &self.group_info
359    }
360
361    fn create_cache(&self) -> Cache {
362        Cache {
363            capmatches: Captures::all(self.group_info().clone()),
364            pikevm: wrappers::PikeVMCache::none(),
365            backtrack: wrappers::BoundedBacktrackerCache::none(),
366            onepass: wrappers::OnePassCache::none(),
367            hybrid: wrappers::HybridCache::none(),
368            revhybrid: wrappers::ReverseHybridCache::none(),
369        }
370    }
371
372    fn reset_cache(&self, _cache: &mut Cache) {}
373
374    fn is_accelerated(&self) -> bool {
375        self.pre.is_fast()
376    }
377
378    fn memory_usage(&self) -> usize {
379        self.pre.memory_usage()
380    }
381
382    #[cfg_attr(feature = "perf-inline", inline(always))]
383    fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
384        if input.is_done() {
385            return None;
386        }
387        if input.get_anchored().is_anchored() {
388            return self
389                .pre
390                .prefix(input.haystack(), input.get_span())
391                .map(|sp| Match::new(PatternID::ZERO, sp));
392        }
393        self.pre
394            .find(input.haystack(), input.get_span())
395            .map(|sp| Match::new(PatternID::ZERO, sp))
396    }
397
398    #[cfg_attr(feature = "perf-inline", inline(always))]
399    fn search_half(
400        &self,
401        cache: &mut Cache,
402        input: &Input<'_>,
403    ) -> Option<HalfMatch> {
404        self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
405    }
406
407    #[cfg_attr(feature = "perf-inline", inline(always))]
408    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
409        self.search(cache, input).is_some()
410    }
411
412    #[cfg_attr(feature = "perf-inline", inline(always))]
413    fn search_slots(
414        &self,
415        cache: &mut Cache,
416        input: &Input<'_>,
417        slots: &mut [Option<NonMaxUsize>],
418    ) -> Option<PatternID> {
419        let m = self.search(cache, input)?;
420        if let Some(slot) = slots.get_mut(0) {
421            *slot = NonMaxUsize::new(m.start());
422        }
423        if let Some(slot) = slots.get_mut(1) {
424            *slot = NonMaxUsize::new(m.end());
425        }
426        Some(m.pattern())
427    }
428
429    #[cfg_attr(feature = "perf-inline", inline(always))]
430    fn which_overlapping_matches(
431        &self,
432        cache: &mut Cache,
433        input: &Input<'_>,
434        patset: &mut PatternSet,
435    ) {
436        if self.search(cache, input).is_some() {
437            patset.insert(PatternID::ZERO);
438        }
439    }
440}
441
442#[derive(Debug)]
443struct Core {
444    info: RegexInfo,
445    pre: Option<Prefilter>,
446    nfa: NFA,
447    nfarev: Option<NFA>,
448    pikevm: wrappers::PikeVM,
449    backtrack: wrappers::BoundedBacktracker,
450    onepass: wrappers::OnePass,
451    hybrid: wrappers::Hybrid,
452    dfa: wrappers::DFA,
453}
454
455impl Core {
456    fn new(
457        info: RegexInfo,
458        pre: Option<Prefilter>,
459        hirs: &[&Hir],
460    ) -> Result<Core, BuildError> {
461        let mut lookm = LookMatcher::new();
462        lookm.set_line_terminator(info.config().get_line_terminator());
463        let thompson_config = thompson::Config::new()
464            .utf8(info.config().get_utf8_empty())
465            .nfa_size_limit(info.config().get_nfa_size_limit())
466            .shrink(false)
467            .which_captures(info.config().get_which_captures())
468            .look_matcher(lookm);
469        let nfa = thompson::Compiler::new()
470            .configure(thompson_config.clone())
471            .build_many_from_hir(hirs)
472            .map_err(BuildError::nfa)?;
473        // It's possible for the PikeVM or the BB to fail to build, even though
474        // at this point, we already have a full NFA in hand. They can fail
475        // when a Unicode word boundary is used but where Unicode word boundary
476        // support is disabled at compile time, thus making it impossible to
477        // match. (Construction can also fail if the NFA was compiled without
478        // captures, but we always enable that above.)
479        let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?;
480        let backtrack =
481            wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?;
482        // The onepass engine can of course fail to build, but we expect it to
483        // fail in many cases because it is an optimization that doesn't apply
484        // to all regexes. The 'OnePass' wrapper encapsulates this failure (and
485        // logs a message if it occurs).
486        let onepass = wrappers::OnePass::new(&info, &nfa);
487        // We try to encapsulate whether a particular regex engine should be
488        // used within each respective wrapper, but the DFAs need a reverse NFA
489        // to build itself, and we really do not want to build a reverse NFA if
490        // we know we aren't going to use the lazy DFA. So we do a config check
491        // up front, which is in practice the only way we won't try to use the
492        // DFA.
493        let (nfarev, hybrid, dfa) =
494            if !info.config().get_hybrid() && !info.config().get_dfa() {
495                (None, wrappers::Hybrid::none(), wrappers::DFA::none())
496            } else {
497                // FIXME: Technically, we don't quite yet KNOW that we need
498                // a reverse NFA. It's possible for the DFAs below to both
499                // fail to build just based on the forward NFA. In which case,
500                // building the reverse NFA was totally wasted work. But...
501                // fixing this requires breaking DFA construction apart into
502                // two pieces: one for the forward part and another for the
503                // reverse part. Quite annoying. Making it worse, when building
504                // both DFAs fails, it's quite likely that the NFA is large and
505                // that it will take quite some time to build the reverse NFA
506                // too. So... it's really probably worth it to do this!
507                let nfarev = thompson::Compiler::new()
508                    // Currently, reverse NFAs don't support capturing groups,
509                    // so we MUST disable them. But even if we didn't have to,
510                    // we would, because nothing in this crate does anything
511                    // useful with capturing groups in reverse. And of course,
512                    // the lazy DFA ignores capturing groups in all cases.
513                    .configure(
514                        thompson_config
515                            .clone()
516                            .which_captures(WhichCaptures::None)
517                            .reverse(true),
518                    )
519                    .build_many_from_hir(hirs)
520                    .map_err(BuildError::nfa)?;
521                let dfa = if !info.config().get_dfa() {
522                    wrappers::DFA::none()
523                } else {
524                    wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525                };
526                let hybrid = if !info.config().get_hybrid() {
527                    wrappers::Hybrid::none()
528                } else if dfa.is_some() {
529                    debug!("skipping lazy DFA because we have a full DFA");
530                    wrappers::Hybrid::none()
531                } else {
532                    wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533                };
534                (Some(nfarev), hybrid, dfa)
535            };
536        Ok(Core {
537            info,
538            pre,
539            nfa,
540            nfarev,
541            pikevm,
542            backtrack,
543            onepass,
544            hybrid,
545            dfa,
546        })
547    }
548
549    #[cfg_attr(feature = "perf-inline", inline(always))]
550    fn try_search_mayfail(
551        &self,
552        cache: &mut Cache,
553        input: &Input<'_>,
554    ) -> Option<Result<Option<Match>, RetryFailError>> {
555        if let Some(e) = self.dfa.get(input) {
556            trace!("using full DFA for search at {:?}", input.get_span());
557            Some(e.try_search(input))
558        } else if let Some(e) = self.hybrid.get(input) {
559            trace!("using lazy DFA for search at {:?}", input.get_span());
560            Some(e.try_search(&mut cache.hybrid, input))
561        } else {
562            None
563        }
564    }
565
566    fn search_nofail(
567        &self,
568        cache: &mut Cache,
569        input: &Input<'_>,
570    ) -> Option<Match> {
571        let caps = &mut cache.capmatches;
572        caps.set_pattern(None);
573        // We manually inline 'try_search_slots_nofail' here because we need to
574        // borrow from 'cache.capmatches' in this method, but if we do, then
575        // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a
576        // classic example of how the borrow checker inhibits decomposition.
577        // There are of course work-arounds (more types and/or interior
578        // mutability), but that's more annoying than this IMO.
579        let pid = if let Some(ref e) = self.onepass.get(input) {
580            trace!("using OnePass for search at {:?}", input.get_span());
581            e.search_slots(&mut cache.onepass, input, caps.slots_mut())
582        } else if let Some(ref e) = self.backtrack.get(input) {
583            trace!(
584                "using BoundedBacktracker for search at {:?}",
585                input.get_span()
586            );
587            e.search_slots(&mut cache.backtrack, input, caps.slots_mut())
588        } else {
589            trace!("using PikeVM for search at {:?}", input.get_span());
590            let e = self.pikevm.get();
591            e.search_slots(&mut cache.pikevm, input, caps.slots_mut())
592        };
593        caps.set_pattern(pid);
594        caps.get_match()
595    }
596
597    fn search_half_nofail(
598        &self,
599        cache: &mut Cache,
600        input: &Input<'_>,
601    ) -> Option<HalfMatch> {
602        // Only the lazy/full DFA returns half-matches, since the DFA requires
603        // a reverse scan to find the start position. These fallback regex
604        // engines can find the start and end in a single pass, so we just do
605        // that and throw away the start offset to conform to the API.
606        let m = self.search_nofail(cache, input)?;
607        Some(HalfMatch::new(m.pattern(), m.end()))
608    }
609
610    fn search_slots_nofail(
611        &self,
612        cache: &mut Cache,
613        input: &Input<'_>,
614        slots: &mut [Option<NonMaxUsize>],
615    ) -> Option<PatternID> {
616        if let Some(ref e) = self.onepass.get(input) {
617            trace!(
618                "using OnePass for capture search at {:?}",
619                input.get_span()
620            );
621            e.search_slots(&mut cache.onepass, input, slots)
622        } else if let Some(ref e) = self.backtrack.get(input) {
623            trace!(
624                "using BoundedBacktracker for capture search at {:?}",
625                input.get_span()
626            );
627            e.search_slots(&mut cache.backtrack, input, slots)
628        } else {
629            trace!(
630                "using PikeVM for capture search at {:?}",
631                input.get_span()
632            );
633            let e = self.pikevm.get();
634            e.search_slots(&mut cache.pikevm, input, slots)
635        }
636    }
637
638    fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
639        if let Some(ref e) = self.onepass.get(input) {
640            trace!(
641                "using OnePass for is-match search at {:?}",
642                input.get_span()
643            );
644            e.search_slots(&mut cache.onepass, input, &mut []).is_some()
645        } else if let Some(ref e) = self.backtrack.get(input) {
646            trace!(
647                "using BoundedBacktracker for is-match search at {:?}",
648                input.get_span()
649            );
650            e.is_match(&mut cache.backtrack, input)
651        } else {
652            trace!(
653                "using PikeVM for is-match search at {:?}",
654                input.get_span()
655            );
656            let e = self.pikevm.get();
657            e.is_match(&mut cache.pikevm, input)
658        }
659    }
660
661    fn is_capture_search_needed(&self, slots_len: usize) -> bool {
662        slots_len > self.nfa.group_info().implicit_slot_len()
663    }
664}
665
666impl Strategy for Core {
667    #[cfg_attr(feature = "perf-inline", inline(always))]
668    fn group_info(&self) -> &GroupInfo {
669        self.nfa.group_info()
670    }
671
672    #[cfg_attr(feature = "perf-inline", inline(always))]
673    fn create_cache(&self) -> Cache {
674        Cache {
675            capmatches: Captures::all(self.group_info().clone()),
676            pikevm: self.pikevm.create_cache(),
677            backtrack: self.backtrack.create_cache(),
678            onepass: self.onepass.create_cache(),
679            hybrid: self.hybrid.create_cache(),
680            revhybrid: wrappers::ReverseHybridCache::none(),
681        }
682    }
683
684    #[cfg_attr(feature = "perf-inline", inline(always))]
685    fn reset_cache(&self, cache: &mut Cache) {
686        cache.pikevm.reset(&self.pikevm);
687        cache.backtrack.reset(&self.backtrack);
688        cache.onepass.reset(&self.onepass);
689        cache.hybrid.reset(&self.hybrid);
690    }
691
692    fn is_accelerated(&self) -> bool {
693        self.pre.as_ref().map_or(false, |pre| pre.is_fast())
694    }
695
696    fn memory_usage(&self) -> usize {
697        self.info.memory_usage()
698            + self.pre.as_ref().map_or(0, |pre| pre.memory_usage())
699            + self.nfa.memory_usage()
700            + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage())
701            + self.onepass.memory_usage()
702            + self.dfa.memory_usage()
703    }
704
705    #[cfg_attr(feature = "perf-inline", inline(always))]
706    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
707        // We manually inline try_search_mayfail here because letting the
708        // compiler do it seems to produce pretty crappy codegen.
709        return if let Some(e) = self.dfa.get(input) {
710            trace!("using full DFA for full search at {:?}", input.get_span());
711            match e.try_search(input) {
712                Ok(x) => x,
713                Err(_err) => {
714                    trace!("full DFA search failed: {_err}");
715                    self.search_nofail(cache, input)
716                }
717            }
718        } else if let Some(e) = self.hybrid.get(input) {
719            trace!("using lazy DFA for full search at {:?}", input.get_span());
720            match e.try_search(&mut cache.hybrid, input) {
721                Ok(x) => x,
722                Err(_err) => {
723                    trace!("lazy DFA search failed: {_err}");
724                    self.search_nofail(cache, input)
725                }
726            }
727        } else {
728            self.search_nofail(cache, input)
729        };
730    }
731
732    #[cfg_attr(feature = "perf-inline", inline(always))]
733    fn search_half(
734        &self,
735        cache: &mut Cache,
736        input: &Input<'_>,
737    ) -> Option<HalfMatch> {
738        // The main difference with 'search' is that if we're using a DFA, we
739        // can use a single forward scan without needing to run the reverse
740        // DFA.
741        if let Some(e) = self.dfa.get(input) {
742            trace!("using full DFA for half search at {:?}", input.get_span());
743            match e.try_search_half_fwd(input) {
744                Ok(x) => x,
745                Err(_err) => {
746                    trace!("full DFA half search failed: {_err}");
747                    self.search_half_nofail(cache, input)
748                }
749            }
750        } else if let Some(e) = self.hybrid.get(input) {
751            trace!("using lazy DFA for half search at {:?}", input.get_span());
752            match e.try_search_half_fwd(&mut cache.hybrid, input) {
753                Ok(x) => x,
754                Err(_err) => {
755                    trace!("lazy DFA half search failed: {_err}");
756                    self.search_half_nofail(cache, input)
757                }
758            }
759        } else {
760            self.search_half_nofail(cache, input)
761        }
762    }
763
764    #[cfg_attr(feature = "perf-inline", inline(always))]
765    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
766        if let Some(e) = self.dfa.get(input) {
767            trace!(
768                "using full DFA for is-match search at {:?}",
769                input.get_span()
770            );
771            match e.try_search_half_fwd(input) {
772                Ok(x) => x.is_some(),
773                Err(_err) => {
774                    trace!("full DFA half search failed: {_err}");
775                    self.is_match_nofail(cache, input)
776                }
777            }
778        } else if let Some(e) = self.hybrid.get(input) {
779            trace!(
780                "using lazy DFA for is-match search at {:?}",
781                input.get_span()
782            );
783            match e.try_search_half_fwd(&mut cache.hybrid, input) {
784                Ok(x) => x.is_some(),
785                Err(_err) => {
786                    trace!("lazy DFA half search failed: {_err}");
787                    self.is_match_nofail(cache, input)
788                }
789            }
790        } else {
791            self.is_match_nofail(cache, input)
792        }
793    }
794
795    #[cfg_attr(feature = "perf-inline", inline(always))]
796    fn search_slots(
797        &self,
798        cache: &mut Cache,
799        input: &Input<'_>,
800        slots: &mut [Option<NonMaxUsize>],
801    ) -> Option<PatternID> {
802        // Even if the regex has explicit capture groups, if the caller didn't
803        // provide any explicit slots, then it doesn't make sense to try and do
804        // extra work to get offsets for those slots. Ideally the caller should
805        // realize this and not call this routine in the first place, but alas,
806        // we try to save the caller from themselves if they do.
807        if !self.is_capture_search_needed(slots.len()) {
808            trace!("asked for slots unnecessarily, trying fast path");
809            let m = self.search(cache, input)?;
810            copy_match_to_slots(m, slots);
811            return Some(m.pattern());
812        }
813        // If the onepass DFA is available for this search (which only happens
814        // when it's anchored), then skip running a fallible DFA. The onepass
815        // DFA isn't as fast as a full or lazy DFA, but it is typically quite
816        // a bit faster than the backtracker or the PikeVM. So it isn't as
817        // advantageous to try and do a full/lazy DFA scan first.
818        //
819        // We still theorize that it's better to do a full/lazy DFA scan, even
820        // when it's anchored, because it's usually much faster and permits us
821        // to say "no match" much more quickly. This does hurt the case of,
822        // say, parsing each line in a log file into capture groups, because
823        // in that case, the line always matches. So the lazy DFA scan is
824        // usually just wasted work. But, the lazy DFA is usually quite fast
825        // and doesn't cost too much here.
826        if self.onepass.get(&input).is_some() {
827            return self.search_slots_nofail(cache, &input, slots);
828        }
829        let m = match self.try_search_mayfail(cache, input) {
830            Some(Ok(Some(m))) => m,
831            Some(Ok(None)) => return None,
832            Some(Err(_err)) => {
833                trace!("fast capture search failed: {_err}");
834                return self.search_slots_nofail(cache, input, slots);
835            }
836            None => {
837                return self.search_slots_nofail(cache, input, slots);
838            }
839        };
840        // At this point, now that we've found the bounds of the
841        // match, we need to re-run something that can resolve
842        // capturing groups. But we only need to run on it on the
843        // match bounds and not the entire haystack.
844        trace!(
845            "match found at {}..{} in capture search, \
846		  	 using another engine to find captures",
847            m.start(),
848            m.end(),
849        );
850        let input = input
851            .clone()
852            .span(m.start()..m.end())
853            .anchored(Anchored::Pattern(m.pattern()));
854        Some(
855            self.search_slots_nofail(cache, &input, slots)
856                .expect("should find a match"),
857        )
858    }
859
860    #[cfg_attr(feature = "perf-inline", inline(always))]
861    fn which_overlapping_matches(
862        &self,
863        cache: &mut Cache,
864        input: &Input<'_>,
865        patset: &mut PatternSet,
866    ) {
867        if let Some(e) = self.dfa.get(input) {
868            trace!(
869                "using full DFA for overlapping search at {:?}",
870                input.get_span()
871            );
872            let _err = match e.try_which_overlapping_matches(input, patset) {
873                Ok(()) => return,
874                Err(err) => err,
875            };
876            trace!("fast overlapping search failed: {_err}");
877        } else if let Some(e) = self.hybrid.get(input) {
878            trace!(
879                "using lazy DFA for overlapping search at {:?}",
880                input.get_span()
881            );
882            let _err = match e.try_which_overlapping_matches(
883                &mut cache.hybrid,
884                input,
885                patset,
886            ) {
887                Ok(()) => {
888                    return;
889                }
890                Err(err) => err,
891            };
892            trace!("fast overlapping search failed: {_err}");
893        }
894        trace!(
895            "using PikeVM for overlapping search at {:?}",
896            input.get_span()
897        );
898        let e = self.pikevm.get();
899        e.which_overlapping_matches(&mut cache.pikevm, input, patset)
900    }
901}
902
903#[derive(Debug)]
904struct ReverseAnchored {
905    core: Core,
906}
907
908impl ReverseAnchored {
909    fn new(core: Core) -> Result<ReverseAnchored, Core> {
910        if !core.info.is_always_anchored_end() {
911            debug!(
912                "skipping reverse anchored optimization because \
913				 the regex is not always anchored at the end"
914            );
915            return Err(core);
916        }
917        // Note that the caller can still request an anchored search even when
918        // the regex isn't anchored at the start. We detect that case in the
919        // search routines below and just fallback to the core engine. This
920        // is fine because both searches are anchored. It's just a matter of
921        // picking one. Falling back to the core engine is a little simpler,
922        // since if we used the reverse anchored approach, we'd have to add an
923        // extra check to ensure the match reported starts at the place where
924        // the caller requested the search to start.
925        if core.info.is_always_anchored_start() {
926            debug!(
927                "skipping reverse anchored optimization because \
928				 the regex is also anchored at the start"
929            );
930            return Err(core);
931        }
932        // Only DFAs can do reverse searches (currently), so we need one of
933        // them in order to do this optimization. It's possible (although
934        // pretty unlikely) that we have neither and need to give up.
935        if !core.hybrid.is_some() && !core.dfa.is_some() {
936            debug!(
937                "skipping reverse anchored optimization because \
938				 we don't have a lazy DFA or a full DFA"
939            );
940            return Err(core);
941        }
942        Ok(ReverseAnchored { core })
943    }
944
945    #[cfg_attr(feature = "perf-inline", inline(always))]
946    fn try_search_half_anchored_rev(
947        &self,
948        cache: &mut Cache,
949        input: &Input<'_>,
950    ) -> Result<Option<HalfMatch>, RetryFailError> {
951        // We of course always want an anchored search. In theory, the
952        // underlying regex engines should automatically enable anchored
953        // searches since the regex is itself anchored, but this more clearly
954        // expresses intent and is always correct.
955        let input = input.clone().anchored(Anchored::Yes);
956        if let Some(e) = self.core.dfa.get(&input) {
957            trace!(
958                "using full DFA for reverse anchored search at {:?}",
959                input.get_span()
960            );
961            e.try_search_half_rev(&input)
962        } else if let Some(e) = self.core.hybrid.get(&input) {
963            trace!(
964                "using lazy DFA for reverse anchored search at {:?}",
965                input.get_span()
966            );
967            e.try_search_half_rev(&mut cache.hybrid, &input)
968        } else {
969            unreachable!("ReverseAnchored always has a DFA")
970        }
971    }
972}
973
974// Note that in this impl, we don't check that 'input.end() ==
975// input.haystack().len()'. In particular, when that condition is false, a
976// match is always impossible because we know that the regex is always anchored
977// at the end (or else 'ReverseAnchored' won't be built). We don't check that
978// here because the 'Regex' wrapper actually does that for us in all cases.
979// Thus, in this impl, we can actually assume that the end position in 'input'
980// is equivalent to the length of the haystack.
981impl Strategy for ReverseAnchored {
982    #[cfg_attr(feature = "perf-inline", inline(always))]
983    fn group_info(&self) -> &GroupInfo {
984        self.core.group_info()
985    }
986
987    #[cfg_attr(feature = "perf-inline", inline(always))]
988    fn create_cache(&self) -> Cache {
989        self.core.create_cache()
990    }
991
992    #[cfg_attr(feature = "perf-inline", inline(always))]
993    fn reset_cache(&self, cache: &mut Cache) {
994        self.core.reset_cache(cache);
995    }
996
997    fn is_accelerated(&self) -> bool {
998        // Since this is anchored at the end, a reverse anchored search is
999        // almost certainly guaranteed to result in a much faster search than
1000        // a standard forward search.
1001        true
1002    }
1003
1004    fn memory_usage(&self) -> usize {
1005        self.core.memory_usage()
1006    }
1007
1008    #[cfg_attr(feature = "perf-inline", inline(always))]
1009    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1010        if input.get_anchored().is_anchored() {
1011            return self.core.search(cache, input);
1012        }
1013        match self.try_search_half_anchored_rev(cache, input) {
1014            Err(_err) => {
1015                trace!("fast reverse anchored search failed: {_err}");
1016                self.core.search_nofail(cache, input)
1017            }
1018            Ok(None) => None,
1019            Ok(Some(hm)) => {
1020                Some(Match::new(hm.pattern(), hm.offset()..input.end()))
1021            }
1022        }
1023    }
1024
1025    #[cfg_attr(feature = "perf-inline", inline(always))]
1026    fn search_half(
1027        &self,
1028        cache: &mut Cache,
1029        input: &Input<'_>,
1030    ) -> Option<HalfMatch> {
1031        if input.get_anchored().is_anchored() {
1032            return self.core.search_half(cache, input);
1033        }
1034        match self.try_search_half_anchored_rev(cache, input) {
1035            Err(_err) => {
1036                trace!("fast reverse anchored search failed: {_err}");
1037                self.core.search_half_nofail(cache, input)
1038            }
1039            Ok(None) => None,
1040            Ok(Some(hm)) => {
1041                // Careful here! 'try_search_half' is a *forward* search that
1042                // only cares about the *end* position of a match. But
1043                // 'hm.offset()' is actually the start of the match. So we
1044                // actually just throw that away here and, since we know we
1045                // have a match, return the only possible position at which a
1046                // match can occur: input.end().
1047                Some(HalfMatch::new(hm.pattern(), input.end()))
1048            }
1049        }
1050    }
1051
1052    #[cfg_attr(feature = "perf-inline", inline(always))]
1053    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1054        if input.get_anchored().is_anchored() {
1055            return self.core.is_match(cache, input);
1056        }
1057        match self.try_search_half_anchored_rev(cache, input) {
1058            Err(_err) => {
1059                trace!("fast reverse anchored search failed: {_err}");
1060                self.core.is_match_nofail(cache, input)
1061            }
1062            Ok(None) => false,
1063            Ok(Some(_)) => true,
1064        }
1065    }
1066
1067    #[cfg_attr(feature = "perf-inline", inline(always))]
1068    fn search_slots(
1069        &self,
1070        cache: &mut Cache,
1071        input: &Input<'_>,
1072        slots: &mut [Option<NonMaxUsize>],
1073    ) -> Option<PatternID> {
1074        if input.get_anchored().is_anchored() {
1075            return self.core.search_slots(cache, input, slots);
1076        }
1077        match self.try_search_half_anchored_rev(cache, input) {
1078            Err(_err) => {
1079                trace!("fast reverse anchored search failed: {_err}");
1080                self.core.search_slots_nofail(cache, input, slots)
1081            }
1082            Ok(None) => None,
1083            Ok(Some(hm)) => {
1084                if !self.core.is_capture_search_needed(slots.len()) {
1085                    trace!("asked for slots unnecessarily, skipping captures");
1086                    let m = Match::new(hm.pattern(), hm.offset()..input.end());
1087                    copy_match_to_slots(m, slots);
1088                    return Some(m.pattern());
1089                }
1090                let start = hm.offset();
1091                let input = input
1092                    .clone()
1093                    .span(start..input.end())
1094                    .anchored(Anchored::Pattern(hm.pattern()));
1095                self.core.search_slots_nofail(cache, &input, slots)
1096            }
1097        }
1098    }
1099
1100    #[cfg_attr(feature = "perf-inline", inline(always))]
1101    fn which_overlapping_matches(
1102        &self,
1103        cache: &mut Cache,
1104        input: &Input<'_>,
1105        patset: &mut PatternSet,
1106    ) {
1107        // It seems like this could probably benefit from a reverse anchored
1108        // optimization, perhaps by doing an overlapping reverse search (which
1109        // the DFAs do support). I haven't given it much thought though, and
1110        // I'm currently focus more on the single pattern case.
1111        self.core.which_overlapping_matches(cache, input, patset)
1112    }
1113}
1114
1115#[derive(Debug)]
1116struct ReverseSuffix {
1117    core: Core,
1118    pre: Prefilter,
1119}
1120
1121impl ReverseSuffix {
1122    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core> {
1123        if !core.info.config().get_auto_prefilter() {
1124            debug!(
1125                "skipping reverse suffix optimization because \
1126                 automatic prefilters are disabled"
1127            );
1128            return Err(core);
1129        }
1130        // Like the reverse inner optimization, we don't do this for regexes
1131        // that are always anchored. It could lead to scanning too much, but
1132        // could say "no match" much more quickly than running the regex
1133        // engine if the initial literal scan doesn't match. With that said,
1134        // the reverse suffix optimization has lower overhead, since it only
1135        // requires a reverse scan after a literal match to confirm or reject
1136        // the match. (Although, in the case of confirmation, it then needs to
1137        // do another forward scan to find the end position.)
1138        //
1139        // Note that the caller can still request an anchored search even
1140        // when the regex isn't anchored. We detect that case in the search
1141        // routines below and just fallback to the core engine. Currently this
1142        // optimization assumes all searches are unanchored, so if we do want
1143        // to enable this optimization for anchored searches, it will need a
1144        // little work to support it.
1145        if core.info.is_always_anchored_start() {
1146            debug!(
1147                "skipping reverse suffix optimization because \
1148				 the regex is always anchored at the start",
1149            );
1150            return Err(core);
1151        }
1152        // Only DFAs can do reverse searches (currently), so we need one of
1153        // them in order to do this optimization. It's possible (although
1154        // pretty unlikely) that we have neither and need to give up.
1155        if !core.hybrid.is_some() && !core.dfa.is_some() {
1156            debug!(
1157                "skipping reverse suffix optimization because \
1158				 we don't have a lazy DFA or a full DFA"
1159            );
1160            return Err(core);
1161        }
1162        if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1163            debug!(
1164                "skipping reverse suffix optimization because \
1165				 we already have a prefilter that we think is fast"
1166            );
1167            return Err(core);
1168        }
1169        let kind = core.info.config().get_match_kind();
1170        let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171        let lcs = match suffixes.longest_common_suffix() {
1172            None => {
1173                debug!(
1174                    "skipping reverse suffix optimization because \
1175                     a longest common suffix could not be found",
1176                );
1177                return Err(core);
1178            }
1179            Some(lcs) if lcs.is_empty() => {
1180                debug!(
1181                    "skipping reverse suffix optimization because \
1182                     the longest common suffix is the empty string",
1183                );
1184                return Err(core);
1185            }
1186            Some(lcs) => lcs,
1187        };
1188        let pre = match Prefilter::new(kind, &[lcs]) {
1189            Some(pre) => pre,
1190            None => {
1191                debug!(
1192                    "skipping reverse suffix optimization because \
1193                     a prefilter could not be constructed from the \
1194                     longest common suffix",
1195                );
1196                return Err(core);
1197            }
1198        };
1199        if !pre.is_fast() {
1200            debug!(
1201                "skipping reverse suffix optimization because \
1202				 while we have a suffix prefilter, it is not \
1203				 believed to be 'fast'"
1204            );
1205            return Err(core);
1206        }
1207        Ok(ReverseSuffix { core, pre })
1208    }
1209
1210    #[cfg_attr(feature = "perf-inline", inline(always))]
1211    fn try_search_half_start(
1212        &self,
1213        cache: &mut Cache,
1214        input: &Input<'_>,
1215    ) -> Result<Option<HalfMatch>, RetryError> {
1216        let mut span = input.get_span();
1217        let mut min_start = 0;
1218        loop {
1219            let litmatch = match self.pre.find(input.haystack(), span) {
1220                None => return Ok(None),
1221                Some(span) => span,
1222            };
1223            trace!("reverse suffix scan found suffix match at {litmatch:?}");
1224            let revinput = input
1225                .clone()
1226                .anchored(Anchored::Yes)
1227                .span(input.start()..litmatch.end);
1228            match self
1229                .try_search_half_rev_limited(cache, &revinput, min_start)?
1230            {
1231                None => {
1232                    if span.start >= span.end {
1233                        break;
1234                    }
1235                    span.start = litmatch.start.checked_add(1).unwrap();
1236                }
1237                Some(hm) => return Ok(Some(hm)),
1238            }
1239            min_start = litmatch.end;
1240        }
1241        Ok(None)
1242    }
1243
1244    #[cfg_attr(feature = "perf-inline", inline(always))]
1245    fn try_search_half_fwd(
1246        &self,
1247        cache: &mut Cache,
1248        input: &Input<'_>,
1249    ) -> Result<Option<HalfMatch>, RetryFailError> {
1250        if let Some(e) = self.core.dfa.get(&input) {
1251            trace!(
1252                "using full DFA for forward reverse suffix search at {:?}",
1253                input.get_span()
1254            );
1255            e.try_search_half_fwd(&input)
1256        } else if let Some(e) = self.core.hybrid.get(&input) {
1257            trace!(
1258                "using lazy DFA for forward reverse suffix search at {:?}",
1259                input.get_span()
1260            );
1261            e.try_search_half_fwd(&mut cache.hybrid, &input)
1262        } else {
1263            unreachable!("ReverseSuffix always has a DFA")
1264        }
1265    }
1266
1267    #[cfg_attr(feature = "perf-inline", inline(always))]
1268    fn try_search_half_rev_limited(
1269        &self,
1270        cache: &mut Cache,
1271        input: &Input<'_>,
1272        min_start: usize,
1273    ) -> Result<Option<HalfMatch>, RetryError> {
1274        if let Some(e) = self.core.dfa.get(&input) {
1275            trace!(
1276                "using full DFA for reverse suffix search at {:?}, \
1277                 but will be stopped at {} to avoid quadratic behavior",
1278                input.get_span(),
1279                min_start,
1280            );
1281            e.try_search_half_rev_limited(&input, min_start)
1282        } else if let Some(e) = self.core.hybrid.get(&input) {
1283            trace!(
1284                "using lazy DFA for reverse suffix search at {:?}, \
1285                 but will be stopped at {} to avoid quadratic behavior",
1286                input.get_span(),
1287                min_start,
1288            );
1289            e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start)
1290        } else {
1291            unreachable!("ReverseSuffix always has a DFA")
1292        }
1293    }
1294}
1295
1296impl Strategy for ReverseSuffix {
1297    #[cfg_attr(feature = "perf-inline", inline(always))]
1298    fn group_info(&self) -> &GroupInfo {
1299        self.core.group_info()
1300    }
1301
1302    #[cfg_attr(feature = "perf-inline", inline(always))]
1303    fn create_cache(&self) -> Cache {
1304        self.core.create_cache()
1305    }
1306
1307    #[cfg_attr(feature = "perf-inline", inline(always))]
1308    fn reset_cache(&self, cache: &mut Cache) {
1309        self.core.reset_cache(cache);
1310    }
1311
1312    fn is_accelerated(&self) -> bool {
1313        self.pre.is_fast()
1314    }
1315
1316    fn memory_usage(&self) -> usize {
1317        self.core.memory_usage() + self.pre.memory_usage()
1318    }
1319
1320    #[cfg_attr(feature = "perf-inline", inline(always))]
1321    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1322        if input.get_anchored().is_anchored() {
1323            return self.core.search(cache, input);
1324        }
1325        match self.try_search_half_start(cache, input) {
1326            Err(RetryError::Quadratic(_err)) => {
1327                trace!("reverse suffix optimization failed: {_err}");
1328                self.core.search(cache, input)
1329            }
1330            Err(RetryError::Fail(_err)) => {
1331                trace!("reverse suffix reverse fast search failed: {_err}");
1332                self.core.search_nofail(cache, input)
1333            }
1334            Ok(None) => None,
1335            Ok(Some(hm_start)) => {
1336                let fwdinput = input
1337                    .clone()
1338                    .anchored(Anchored::Pattern(hm_start.pattern()))
1339                    .span(hm_start.offset()..input.end());
1340                match self.try_search_half_fwd(cache, &fwdinput) {
1341                    Err(_err) => {
1342                        trace!(
1343                            "reverse suffix forward fast search failed: {_err}"
1344                        );
1345                        self.core.search_nofail(cache, input)
1346                    }
1347                    Ok(None) => {
1348                        unreachable!(
1349                            "suffix match plus reverse match implies \
1350						     there must be a match",
1351                        )
1352                    }
1353                    Ok(Some(hm_end)) => Some(Match::new(
1354                        hm_start.pattern(),
1355                        hm_start.offset()..hm_end.offset(),
1356                    )),
1357                }
1358            }
1359        }
1360    }
1361
1362    #[cfg_attr(feature = "perf-inline", inline(always))]
1363    fn search_half(
1364        &self,
1365        cache: &mut Cache,
1366        input: &Input<'_>,
1367    ) -> Option<HalfMatch> {
1368        if input.get_anchored().is_anchored() {
1369            return self.core.search_half(cache, input);
1370        }
1371        match self.try_search_half_start(cache, input) {
1372            Err(RetryError::Quadratic(_err)) => {
1373                trace!("reverse suffix half optimization failed: {_err}");
1374                self.core.search_half(cache, input)
1375            }
1376            Err(RetryError::Fail(_err)) => {
1377                trace!(
1378                    "reverse suffix reverse fast half search failed: {_err}"
1379                );
1380                self.core.search_half_nofail(cache, input)
1381            }
1382            Ok(None) => None,
1383            Ok(Some(hm_start)) => {
1384                // This is a bit subtle. It is tempting to just stop searching
1385                // at this point and return a half-match with an offset
1386                // corresponding to where the suffix was found. But the suffix
1387                // match does not necessarily correspond to the end of the
1388                // proper leftmost-first match. Consider /[a-z]+ing/ against
1389                // 'tingling'. The first suffix match is the first 'ing', and
1390                // the /[a-z]+/ matches the 't'. So if we stopped here, then
1391                // we'd report 'ting' as the match. But 'tingling' is the
1392                // correct match because of greediness.
1393                let fwdinput = input
1394                    .clone()
1395                    .anchored(Anchored::Pattern(hm_start.pattern()))
1396                    .span(hm_start.offset()..input.end());
1397                match self.try_search_half_fwd(cache, &fwdinput) {
1398                    Err(_err) => {
1399                        trace!(
1400                            "reverse suffix forward fast search failed: {_err}"
1401                        );
1402                        self.core.search_half_nofail(cache, input)
1403                    }
1404                    Ok(None) => {
1405                        unreachable!(
1406                            "suffix match plus reverse match implies \
1407						     there must be a match",
1408                        )
1409                    }
1410                    Ok(Some(hm_end)) => Some(hm_end),
1411                }
1412            }
1413        }
1414    }
1415
1416    #[cfg_attr(feature = "perf-inline", inline(always))]
1417    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1418        if input.get_anchored().is_anchored() {
1419            return self.core.is_match(cache, input);
1420        }
1421        match self.try_search_half_start(cache, input) {
1422            Err(RetryError::Quadratic(_err)) => {
1423                trace!("reverse suffix half optimization failed: {_err}");
1424                self.core.is_match_nofail(cache, input)
1425            }
1426            Err(RetryError::Fail(_err)) => {
1427                trace!(
1428                    "reverse suffix reverse fast half search failed: {_err}"
1429                );
1430                self.core.is_match_nofail(cache, input)
1431            }
1432            Ok(None) => false,
1433            Ok(Some(_)) => true,
1434        }
1435    }
1436
1437    #[cfg_attr(feature = "perf-inline", inline(always))]
1438    fn search_slots(
1439        &self,
1440        cache: &mut Cache,
1441        input: &Input<'_>,
1442        slots: &mut [Option<NonMaxUsize>],
1443    ) -> Option<PatternID> {
1444        if input.get_anchored().is_anchored() {
1445            return self.core.search_slots(cache, input, slots);
1446        }
1447        if !self.core.is_capture_search_needed(slots.len()) {
1448            trace!("asked for slots unnecessarily, trying fast path");
1449            let m = self.search(cache, input)?;
1450            copy_match_to_slots(m, slots);
1451            return Some(m.pattern());
1452        }
1453        let hm_start = match self.try_search_half_start(cache, input) {
1454            Err(RetryError::Quadratic(_err)) => {
1455                trace!("reverse suffix captures optimization failed: {_err}");
1456                return self.core.search_slots(cache, input, slots);
1457            }
1458            Err(RetryError::Fail(_err)) => {
1459                trace!(
1460                    "reverse suffix reverse fast captures search failed: \
1461                        {_err}"
1462                );
1463                return self.core.search_slots_nofail(cache, input, slots);
1464            }
1465            Ok(None) => return None,
1466            Ok(Some(hm_start)) => hm_start,
1467        };
1468        trace!(
1469            "match found at {}..{} in capture search, \
1470		  	 using another engine to find captures",
1471            hm_start.offset(),
1472            input.end(),
1473        );
1474        let start = hm_start.offset();
1475        let input = input
1476            .clone()
1477            .span(start..input.end())
1478            .anchored(Anchored::Pattern(hm_start.pattern()));
1479        self.core.search_slots_nofail(cache, &input, slots)
1480    }
1481
1482    #[cfg_attr(feature = "perf-inline", inline(always))]
1483    fn which_overlapping_matches(
1484        &self,
1485        cache: &mut Cache,
1486        input: &Input<'_>,
1487        patset: &mut PatternSet,
1488    ) {
1489        self.core.which_overlapping_matches(cache, input, patset)
1490    }
1491}
1492
1493#[derive(Debug)]
1494struct ReverseInner {
1495    core: Core,
1496    preinner: Prefilter,
1497    nfarev: NFA,
1498    hybrid: wrappers::ReverseHybrid,
1499    dfa: wrappers::ReverseDFA,
1500}
1501
1502impl ReverseInner {
1503    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core> {
1504        if !core.info.config().get_auto_prefilter() {
1505            debug!(
1506                "skipping reverse inner optimization because \
1507                 automatic prefilters are disabled"
1508            );
1509            return Err(core);
1510        }
1511        // Currently we hard-code the assumption of leftmost-first match
1512        // semantics. This isn't a huge deal because 'all' semantics tend to
1513        // only be used for forward overlapping searches with multiple regexes,
1514        // and this optimization only supports a single pattern at the moment.
1515        if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
1516            debug!(
1517                "skipping reverse inner optimization because \
1518				 match kind is {:?} but this only supports leftmost-first",
1519                core.info.config().get_match_kind(),
1520            );
1521            return Err(core);
1522        }
1523        // It's likely that a reverse inner scan has too much overhead for it
1524        // to be worth it when the regex is anchored at the start. It is
1525        // possible for it to be quite a bit faster if the initial literal
1526        // scan fails to detect a match, in which case, we can say "no match"
1527        // very quickly. But this could be undesirable, e.g., scanning too far
1528        // or when the literal scan matches. If it matches, then confirming the
1529        // match requires a reverse scan followed by a forward scan to confirm
1530        // or reject, which is a fair bit of work.
1531        //
1532        // Note that the caller can still request an anchored search even
1533        // when the regex isn't anchored. We detect that case in the search
1534        // routines below and just fallback to the core engine. Currently this
1535        // optimization assumes all searches are unanchored, so if we do want
1536        // to enable this optimization for anchored searches, it will need a
1537        // little work to support it.
1538        if core.info.is_always_anchored_start() {
1539            debug!(
1540                "skipping reverse inner optimization because \
1541				 the regex is always anchored at the start",
1542            );
1543            return Err(core);
1544        }
1545        // Only DFAs can do reverse searches (currently), so we need one of
1546        // them in order to do this optimization. It's possible (although
1547        // pretty unlikely) that we have neither and need to give up.
1548        if !core.hybrid.is_some() && !core.dfa.is_some() {
1549            debug!(
1550                "skipping reverse inner optimization because \
1551				 we don't have a lazy DFA or a full DFA"
1552            );
1553            return Err(core);
1554        }
1555        if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1556            debug!(
1557                "skipping reverse inner optimization because \
1558				 we already have a prefilter that we think is fast"
1559            );
1560            return Err(core);
1561        } else if core.pre.is_some() {
1562            debug!(
1563                "core engine has a prefix prefilter, but it is \
1564                 probably not fast, so continuing with attempt to \
1565                 use reverse inner prefilter"
1566            );
1567        }
1568        let (concat_prefix, preinner) = match reverse_inner::extract(hirs) {
1569            Some(x) => x,
1570            // N.B. the 'extract' function emits debug messages explaining
1571            // why we bailed out here.
1572            None => return Err(core),
1573        };
1574        debug!("building reverse NFA for prefix before inner literal");
1575        let mut lookm = LookMatcher::new();
1576        lookm.set_line_terminator(core.info.config().get_line_terminator());
1577        let thompson_config = thompson::Config::new()
1578            .reverse(true)
1579            .utf8(core.info.config().get_utf8_empty())
1580            .nfa_size_limit(core.info.config().get_nfa_size_limit())
1581            .shrink(false)
1582            .which_captures(WhichCaptures::None)
1583            .look_matcher(lookm);
1584        let result = thompson::Compiler::new()
1585            .configure(thompson_config)
1586            .build_from_hir(&concat_prefix);
1587        let nfarev = match result {
1588            Ok(nfarev) => nfarev,
1589            Err(_err) => {
1590                debug!(
1591                    "skipping reverse inner optimization because the \
1592					 reverse NFA failed to build: {}",
1593                    _err,
1594                );
1595                return Err(core);
1596            }
1597        };
1598        debug!("building reverse DFA for prefix before inner literal");
1599        let dfa = if !core.info.config().get_dfa() {
1600            wrappers::ReverseDFA::none()
1601        } else {
1602            wrappers::ReverseDFA::new(&core.info, &nfarev)
1603        };
1604        let hybrid = if !core.info.config().get_hybrid() {
1605            wrappers::ReverseHybrid::none()
1606        } else if dfa.is_some() {
1607            debug!(
1608                "skipping lazy DFA for reverse inner optimization \
1609				 because we have a full DFA"
1610            );
1611            wrappers::ReverseHybrid::none()
1612        } else {
1613            wrappers::ReverseHybrid::new(&core.info, &nfarev)
1614        };
1615        Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa })
1616    }
1617
1618    #[cfg_attr(feature = "perf-inline", inline(always))]
1619    fn try_search_full(
1620        &self,
1621        cache: &mut Cache,
1622        input: &Input<'_>,
1623    ) -> Result<Option<Match>, RetryError> {
1624        let mut span = input.get_span();
1625        let mut min_match_start = 0;
1626        let mut min_pre_start = 0;
1627        loop {
1628            let litmatch = match self.preinner.find(input.haystack(), span) {
1629                None => return Ok(None),
1630                Some(span) => span,
1631            };
1632            if litmatch.start < min_pre_start {
1633                trace!(
1634                    "found inner prefilter match at {litmatch:?}, which starts \
1635					 before the end of the last forward scan at {min_pre_start}, \
1636					 quitting to avoid quadratic behavior",
1637                );
1638                return Err(RetryError::Quadratic(RetryQuadraticError::new()));
1639            }
1640            trace!("reverse inner scan found inner match at {litmatch:?}");
1641            let revinput = input
1642                .clone()
1643                .anchored(Anchored::Yes)
1644                .span(input.start()..litmatch.start);
1645            // Note that in addition to the literal search above scanning past
1646            // our minimum start point, this routine can also return an error
1647            // as a result of detecting possible quadratic behavior if the
1648            // reverse scan goes past the minimum start point. That is, the
1649            // literal search might not, but the reverse regex search for the
1650            // prefix might!
1651            match self.try_search_half_rev_limited(
1652                cache,
1653                &revinput,
1654                min_match_start,
1655            )? {
1656                None => {
1657                    if span.start >= span.end {
1658                        break;
1659                    }
1660                    span.start = litmatch.start.checked_add(1).unwrap();
1661                }
1662                Some(hm_start) => {
1663                    let fwdinput = input
1664                        .clone()
1665                        .anchored(Anchored::Pattern(hm_start.pattern()))
1666                        .span(hm_start.offset()..input.end());
1667                    match self.try_search_half_fwd_stopat(cache, &fwdinput)? {
1668                        Err(stopat) => {
1669                            min_pre_start = stopat;
1670                            span.start =
1671                                litmatch.start.checked_add(1).unwrap();
1672                        }
1673                        Ok(hm_end) => {
1674                            return Ok(Some(Match::new(
1675                                hm_start.pattern(),
1676                                hm_start.offset()..hm_end.offset(),
1677                            )))
1678                        }
1679                    }
1680                }
1681            }
1682            min_match_start = litmatch.end;
1683        }
1684        Ok(None)
1685    }
1686
1687    #[cfg_attr(feature = "perf-inline", inline(always))]
1688    fn try_search_half_fwd_stopat(
1689        &self,
1690        cache: &mut Cache,
1691        input: &Input<'_>,
1692    ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
1693        if let Some(e) = self.core.dfa.get(&input) {
1694            trace!(
1695                "using full DFA for forward reverse inner search at {:?}",
1696                input.get_span()
1697            );
1698            e.try_search_half_fwd_stopat(&input)
1699        } else if let Some(e) = self.core.hybrid.get(&input) {
1700            trace!(
1701                "using lazy DFA for forward reverse inner search at {:?}",
1702                input.get_span()
1703            );
1704            e.try_search_half_fwd_stopat(&mut cache.hybrid, &input)
1705        } else {
1706            unreachable!("ReverseInner always has a DFA")
1707        }
1708    }
1709
1710    #[cfg_attr(feature = "perf-inline", inline(always))]
1711    fn try_search_half_rev_limited(
1712        &self,
1713        cache: &mut Cache,
1714        input: &Input<'_>,
1715        min_start: usize,
1716    ) -> Result<Option<HalfMatch>, RetryError> {
1717        if let Some(e) = self.dfa.get(&input) {
1718            trace!(
1719                "using full DFA for reverse inner search at {:?}, \
1720                 but will be stopped at {} to avoid quadratic behavior",
1721                input.get_span(),
1722                min_start,
1723            );
1724            e.try_search_half_rev_limited(&input, min_start)
1725        } else if let Some(e) = self.hybrid.get(&input) {
1726            trace!(
1727                "using lazy DFA for reverse inner search at {:?}, \
1728                 but will be stopped at {} to avoid quadratic behavior",
1729                input.get_span(),
1730                min_start,
1731            );
1732            e.try_search_half_rev_limited(
1733                &mut cache.revhybrid,
1734                &input,
1735                min_start,
1736            )
1737        } else {
1738            unreachable!("ReverseInner always has a DFA")
1739        }
1740    }
1741}
1742
1743impl Strategy for ReverseInner {
1744    #[cfg_attr(feature = "perf-inline", inline(always))]
1745    fn group_info(&self) -> &GroupInfo {
1746        self.core.group_info()
1747    }
1748
1749    #[cfg_attr(feature = "perf-inline", inline(always))]
1750    fn create_cache(&self) -> Cache {
1751        let mut cache = self.core.create_cache();
1752        cache.revhybrid = self.hybrid.create_cache();
1753        cache
1754    }
1755
1756    #[cfg_attr(feature = "perf-inline", inline(always))]
1757    fn reset_cache(&self, cache: &mut Cache) {
1758        self.core.reset_cache(cache);
1759        cache.revhybrid.reset(&self.hybrid);
1760    }
1761
1762    fn is_accelerated(&self) -> bool {
1763        self.preinner.is_fast()
1764    }
1765
1766    fn memory_usage(&self) -> usize {
1767        self.core.memory_usage()
1768            + self.preinner.memory_usage()
1769            + self.nfarev.memory_usage()
1770            + self.dfa.memory_usage()
1771    }
1772
1773    #[cfg_attr(feature = "perf-inline", inline(always))]
1774    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1775        if input.get_anchored().is_anchored() {
1776            return self.core.search(cache, input);
1777        }
1778        match self.try_search_full(cache, input) {
1779            Err(RetryError::Quadratic(_err)) => {
1780                trace!("reverse inner optimization failed: {_err}");
1781                self.core.search(cache, input)
1782            }
1783            Err(RetryError::Fail(_err)) => {
1784                trace!("reverse inner fast search failed: {_err}");
1785                self.core.search_nofail(cache, input)
1786            }
1787            Ok(matornot) => matornot,
1788        }
1789    }
1790
1791    #[cfg_attr(feature = "perf-inline", inline(always))]
1792    fn search_half(
1793        &self,
1794        cache: &mut Cache,
1795        input: &Input<'_>,
1796    ) -> Option<HalfMatch> {
1797        if input.get_anchored().is_anchored() {
1798            return self.core.search_half(cache, input);
1799        }
1800        match self.try_search_full(cache, input) {
1801            Err(RetryError::Quadratic(_err)) => {
1802                trace!("reverse inner half optimization failed: {_err}");
1803                self.core.search_half(cache, input)
1804            }
1805            Err(RetryError::Fail(_err)) => {
1806                trace!("reverse inner fast half search failed: {_err}");
1807                self.core.search_half_nofail(cache, input)
1808            }
1809            Ok(None) => None,
1810            Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())),
1811        }
1812    }
1813
1814    #[cfg_attr(feature = "perf-inline", inline(always))]
1815    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1816        if input.get_anchored().is_anchored() {
1817            return self.core.is_match(cache, input);
1818        }
1819        match self.try_search_full(cache, input) {
1820            Err(RetryError::Quadratic(_err)) => {
1821                trace!("reverse inner half optimization failed: {_err}");
1822                self.core.is_match_nofail(cache, input)
1823            }
1824            Err(RetryError::Fail(_err)) => {
1825                trace!("reverse inner fast half search failed: {_err}");
1826                self.core.is_match_nofail(cache, input)
1827            }
1828            Ok(None) => false,
1829            Ok(Some(_)) => true,
1830        }
1831    }
1832
1833    #[cfg_attr(feature = "perf-inline", inline(always))]
1834    fn search_slots(
1835        &self,
1836        cache: &mut Cache,
1837        input: &Input<'_>,
1838        slots: &mut [Option<NonMaxUsize>],
1839    ) -> Option<PatternID> {
1840        if input.get_anchored().is_anchored() {
1841            return self.core.search_slots(cache, input, slots);
1842        }
1843        if !self.core.is_capture_search_needed(slots.len()) {
1844            trace!("asked for slots unnecessarily, trying fast path");
1845            let m = self.search(cache, input)?;
1846            copy_match_to_slots(m, slots);
1847            return Some(m.pattern());
1848        }
1849        let m = match self.try_search_full(cache, input) {
1850            Err(RetryError::Quadratic(_err)) => {
1851                trace!("reverse inner captures optimization failed: {_err}");
1852                return self.core.search_slots(cache, input, slots);
1853            }
1854            Err(RetryError::Fail(_err)) => {
1855                trace!("reverse inner fast captures search failed: {_err}");
1856                return self.core.search_slots_nofail(cache, input, slots);
1857            }
1858            Ok(None) => return None,
1859            Ok(Some(m)) => m,
1860        };
1861        trace!(
1862            "match found at {}..{} in capture search, \
1863		  	 using another engine to find captures",
1864            m.start(),
1865            m.end(),
1866        );
1867        let input = input
1868            .clone()
1869            .span(m.start()..m.end())
1870            .anchored(Anchored::Pattern(m.pattern()));
1871        self.core.search_slots_nofail(cache, &input, slots)
1872    }
1873
1874    #[cfg_attr(feature = "perf-inline", inline(always))]
1875    fn which_overlapping_matches(
1876        &self,
1877        cache: &mut Cache,
1878        input: &Input<'_>,
1879        patset: &mut PatternSet,
1880    ) {
1881        self.core.which_overlapping_matches(cache, input, patset)
1882    }
1883}
1884
1885/// Copies the offsets in the given match to the corresponding positions in
1886/// `slots`.
1887///
1888/// In effect, this sets the slots corresponding to the implicit group for the
1889/// pattern in the given match. If the indices for the corresponding slots do
1890/// not exist, then no slots are set.
1891///
1892/// This is useful when the caller provides slots (or captures), but you use a
1893/// regex engine that doesn't operate on slots (like a lazy DFA). This function
1894/// lets you map the match you get back to the slots provided by the caller.
1895#[cfg_attr(feature = "perf-inline", inline(always))]
1896fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
1897    let slot_start = m.pattern().as_usize() * 2;
1898    let slot_end = slot_start + 1;
1899    if let Some(slot) = slots.get_mut(slot_start) {
1900        *slot = NonMaxUsize::new(m.start());
1901    }
1902    if let Some(slot) = slots.get_mut(slot_end) {
1903        *slot = NonMaxUsize::new(m.end());
1904    }
1905}