1#![cfg_attr(not(test), no_std)]
2#![cfg_attr(feature = "pattern", feature(pattern))]
3#![cfg_attr(feature = "pcmp", feature(asm))]
4
5#[cfg(not(test))]
6extern crate core as std;
7
8use std::cmp;
9use std::usize;
10
11extern crate memchr;
12
13mod tw;
14#[cfg(feature = "pcmp")]
15pub mod pcmp;
16pub mod bmh;
17#[cfg(feature = "test-set")]
18pub mod set;
19mod util;
20
21#[cfg(feature = "pattern")]
22use std::str::pattern::{
23 Pattern,
24 Searcher,
25 ReverseSearcher,
26 SearchStep,
27};
28
29#[inline]
33pub fn find_str(text: &str, pattern: &str) -> Option<usize> {
34 find_bytes(text.as_bytes(), pattern.as_bytes())
35}
36
37#[cfg(feature = "pcmp")]
41#[inline]
42pub fn find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
43 pcmp::find(text, pattern)
44}
45
46#[cfg(not(feature = "pcmp"))]
50pub fn find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
51 if pattern.is_empty() {
52 Some(0)
53 } else if pattern.len() == 1 {
54 memchr::memchr(pattern[0], text)
55 } else {
56 let mut searcher = TwoWaySearcher::new(pattern, text.len());
57 let is_long = searcher.memory == usize::MAX;
58 if is_long {
61 searcher.next::<MatchOnly>(text, pattern, true).map(|t| t.0)
62 } else {
63 searcher.next::<MatchOnly>(text, pattern, false).map(|t| t.0)
64 }
65 }
66}
67
68#[inline]
74pub fn rfind_str(text: &str, pattern: &str) -> Option<usize> {
75 rfind_bytes(text.as_bytes(), pattern.as_bytes())
76}
77
78pub fn rfind_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
84 if pattern.is_empty() {
85 Some(text.len())
86 } else if pattern.len() == 1 {
87 memchr::memrchr(pattern[0], text)
88 } else {
89 let mut searcher = TwoWaySearcher::new(pattern, text.len());
90 let is_long = searcher.memory == usize::MAX;
91 if is_long {
94 searcher.next_back::<MatchOnly>(text, pattern, true).map(|t| t.0)
95 } else {
96 searcher.next_back::<MatchOnly>(text, pattern, false).map(|t| t.0)
97 }
98 }
99}
100
101
102#[doc(hidden)]
104pub struct Str<'a>(pub &'a str);
105
106#[cfg(feature = "pattern")]
107impl<'a, 'b> Pattern<'a> for Str<'b> {
112 type Searcher = StrSearcher<'a, 'b>;
113
114 #[inline]
115 fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
116 StrSearcher::new(haystack, self.0)
117 }
118
119 #[inline]
121 fn is_prefix_of(self, haystack: &'a str) -> bool {
122 let self_ = self.0;
123 haystack.is_char_boundary(self_.len()) &&
124 self_ == &haystack[..self_.len()]
125 }
126
127 #[inline]
129 fn is_suffix_of(self, haystack: &'a str) -> bool {
130 let self_ = self.0;
131 self_.len() <= haystack.len() &&
132 haystack.is_char_boundary(haystack.len() - self_.len()) &&
133 self_ == &haystack[haystack.len() - self_.len()..]
134 }
135
136}
137
138#[derive(Clone, Debug)]
139#[doc(hidden)]
140pub struct StrSearcher<'a, 'b> {
142 haystack: &'a str,
143 needle: &'b str,
144
145 searcher: StrSearcherImpl,
146}
147
148#[derive(Clone, Debug)]
149enum StrSearcherImpl {
150 Empty(EmptyNeedle),
151 TwoWay(TwoWaySearcher),
152}
153
154#[derive(Clone, Debug)]
155struct EmptyNeedle {
156 position: usize,
157 end: usize,
158 is_match_fw: bool,
159 is_match_bw: bool,
160}
161
162impl<'a, 'b> StrSearcher<'a, 'b> {
163 pub fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
164 if needle.is_empty() {
165 StrSearcher {
166 haystack: haystack,
167 needle: needle,
168 searcher: StrSearcherImpl::Empty(EmptyNeedle {
169 position: 0,
170 end: haystack.len(),
171 is_match_fw: true,
172 is_match_bw: true,
173 }),
174 }
175 } else {
176 StrSearcher {
177 haystack: haystack,
178 needle: needle,
179 searcher: StrSearcherImpl::TwoWay(
180 TwoWaySearcher::new(needle.as_bytes(), haystack.len())
181 ),
182 }
183 }
184 }
185}
186
187#[cfg(feature = "pattern")]
188unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
189 fn haystack(&self) -> &'a str { self.haystack }
190
191 #[inline]
192 fn next(&mut self) -> SearchStep {
193 match self.searcher {
194 StrSearcherImpl::Empty(ref mut searcher) => {
195 let is_match = searcher.is_match_fw;
197 searcher.is_match_fw = !searcher.is_match_fw;
198 let pos = searcher.position;
199 match self.haystack[pos..].chars().next() {
200 _ if is_match => SearchStep::Match(pos, pos),
201 None => SearchStep::Done,
202 Some(ch) => {
203 searcher.position += ch.len_utf8();
204 SearchStep::Reject(pos, searcher.position)
205 }
206 }
207 }
208 StrSearcherImpl::TwoWay(ref mut searcher) => {
209 if searcher.position == self.haystack.len() {
215 return SearchStep::Done;
216 }
217 let is_long = searcher.memory == usize::MAX;
218 match searcher.next::<RejectAndMatch>(self.haystack.as_bytes(),
219 self.needle.as_bytes(),
220 is_long)
221 {
222 SearchStep::Reject(a, mut b) => {
223 while !self.haystack.is_char_boundary(b) {
225 b += 1;
226 }
227 searcher.position = cmp::max(b, searcher.position);
228 SearchStep::Reject(a, b)
229 }
230 otherwise => otherwise,
231 }
232 }
233 }
234 }
235
236 #[inline(always)]
237 fn next_match(&mut self) -> Option<(usize, usize)> {
238 match self.searcher {
239 StrSearcherImpl::Empty(..) => {
240 loop {
241 match self.next() {
242 SearchStep::Match(a, b) => return Some((a, b)),
243 SearchStep::Done => return None,
244 SearchStep::Reject(..) => { }
245 }
246 }
247 }
248
249 StrSearcherImpl::TwoWay(ref mut searcher) => {
250 let is_long = searcher.memory == usize::MAX;
251 if is_long {
254 searcher.next::<MatchOnly>(self.haystack.as_bytes(),
255 self.needle.as_bytes(),
256 true)
257 } else {
258 searcher.next::<MatchOnly>(self.haystack.as_bytes(),
259 self.needle.as_bytes(),
260 false)
261 }
262 }
263 }
264 }
265}
266
267#[cfg(feature = "pattern")]
268unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
269 #[inline]
270 fn next_back(&mut self) -> SearchStep {
271 match self.searcher {
272 StrSearcherImpl::Empty(ref mut searcher) => {
273 let is_match = searcher.is_match_bw;
274 searcher.is_match_bw = !searcher.is_match_bw;
275 let end = searcher.end;
276 match self.haystack[..end].chars().next_back() {
277 _ if is_match => SearchStep::Match(end, end),
278 None => SearchStep::Done,
279 Some(ch) => {
280 searcher.end -= ch.len_utf8();
281 SearchStep::Reject(searcher.end, end)
282 }
283 }
284 }
285 StrSearcherImpl::TwoWay(ref mut searcher) => {
286 if searcher.end == 0 {
287 return SearchStep::Done;
288 }
289 let is_long = searcher.memory == usize::MAX;
290 match searcher.next_back::<RejectAndMatch>(self.haystack.as_bytes(),
291 self.needle.as_bytes(),
292 is_long)
293 {
294 SearchStep::Reject(mut a, b) => {
295 while !self.haystack.is_char_boundary(a) {
297 a -= 1;
298 }
299 searcher.end = cmp::min(a, searcher.end);
300 SearchStep::Reject(a, b)
301 }
302 otherwise => otherwise,
303 }
304 }
305 }
306 }
307
308 #[inline]
309 fn next_match_back(&mut self) -> Option<(usize, usize)> {
310 match self.searcher {
311 StrSearcherImpl::Empty(..) => {
312 loop {
313 match self.next_back() {
314 SearchStep::Match(a, b) => return Some((a, b)),
315 SearchStep::Done => return None,
316 SearchStep::Reject(..) => { }
317 }
318 }
319 }
320 StrSearcherImpl::TwoWay(ref mut searcher) => {
321 let is_long = searcher.memory == usize::MAX;
322 if is_long {
324 searcher.next_back::<MatchOnly>(self.haystack.as_bytes(),
325 self.needle.as_bytes(),
326 true)
327 } else {
328 searcher.next_back::<MatchOnly>(self.haystack.as_bytes(),
329 self.needle.as_bytes(),
330 false)
331 }
332 }
333 }
334 }
335}
336
337#[derive(Clone, Debug)]
339#[doc(hidden)]
340pub struct TwoWaySearcher {
341 crit_pos: usize,
344 crit_pos_back: usize,
346 period: usize,
347 byteset: u64,
351
352 position: usize,
354 end: usize,
355 memory: usize,
357 memory_back: usize,
359}
360
361impl TwoWaySearcher {
435 pub fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
436 let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
437 let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
438
439 let (crit_pos, period) =
440 if crit_pos_false > crit_pos_true {
441 (crit_pos_false, period_false)
442 } else {
443 (crit_pos_true, period_true)
444 };
445
446 if &needle[..crit_pos] == &needle[period.. period + crit_pos] {
456 let crit_pos_back = needle.len() - cmp::max(
466 TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
467 TwoWaySearcher::reverse_maximal_suffix(needle, period, true));
468
469 TwoWaySearcher {
470 crit_pos: crit_pos,
471 crit_pos_back: crit_pos_back,
472 period: period,
473 byteset: Self::byteset_create(&needle[..period]),
474
475 position: 0,
476 end: end,
477 memory: 0,
478 memory_back: needle.len(),
479 }
480 } else {
481 TwoWaySearcher {
489 crit_pos: crit_pos,
490 crit_pos_back: crit_pos,
491 period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
492 byteset: Self::byteset_create(needle),
493
494 position: 0,
495 end: end,
496 memory: usize::MAX, memory_back: usize::MAX,
498 }
499 }
500 }
501
502 #[inline]
503 fn byteset_create(bytes: &[u8]) -> u64 {
504 bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
505 }
506
507 #[inline(always)]
508 fn byteset_contains(&self, byte: u8) -> bool {
509 (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
510 }
511
512 #[inline(always)]
518 fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
519 -> S::Output
520 where S: TwoWayStrategy
521 {
522 let old_pos = self.position;
524 let needle_last = needle.len() - 1;
525 'search: loop {
526 let tail_byte = match haystack.get(self.position + needle_last) {
530 Some(&b) => b,
531 None => {
532 self.position = haystack.len();
533 return S::rejecting(old_pos, self.position);
534 }
535 };
536
537 if S::use_early_reject() && old_pos != self.position {
538 return S::rejecting(old_pos, self.position);
539 }
540
541 if !self.byteset_contains(tail_byte) {
543 self.position += needle.len();
544 if !long_period {
545 self.memory = 0;
546 }
547 continue 'search;
548 }
549
550 let start = if long_period { self.crit_pos }
552 else { cmp::max(self.crit_pos, self.memory) };
553 for i in start..needle.len() {
554 if needle[i] != haystack[self.position + i] {
555 self.position += i - self.crit_pos + 1;
556 if !long_period {
557 self.memory = 0;
558 }
559 continue 'search;
560 }
561 }
562
563 let start = if long_period { 0 } else { self.memory };
565 for i in (start..self.crit_pos).rev() {
566 if needle[i] != haystack[self.position + i] {
567 self.position += self.period;
568 if !long_period {
569 self.memory = needle.len() - self.period;
570 }
571 continue 'search;
572 }
573 }
574
575 let match_pos = self.position;
577
578 self.position += needle.len();
580 if !long_period {
581 self.memory = 0; }
583
584 return S::matching(match_pos, match_pos + needle.len());
585 }
586 }
587
588 #[inline]
601 fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
602 -> S::Output
603 where S: TwoWayStrategy
604 {
605 let old_end = self.end;
608 'search: loop {
609 let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
614 Some(&b) => b,
615 None => {
616 self.end = 0;
617 return S::rejecting(0, old_end);
618 }
619 };
620
621 if S::use_early_reject() && old_end != self.end {
622 return S::rejecting(self.end, old_end);
623 }
624
625 if !self.byteset_contains(front_byte) {
627 self.end -= needle.len();
628 if !long_period {
629 self.memory_back = needle.len();
630 }
631 continue 'search;
632 }
633
634 let crit = if long_period { self.crit_pos_back }
636 else { cmp::min(self.crit_pos_back, self.memory_back) };
637 for i in (0..crit).rev() {
638 if needle[i] != haystack[self.end - needle.len() + i] {
639 self.end -= self.crit_pos_back - i;
640 if !long_period {
641 self.memory_back = needle.len();
642 }
643 continue 'search;
644 }
645 }
646
647 let needle_end = if long_period { needle.len() }
649 else { self.memory_back };
650 for i in self.crit_pos_back..needle_end {
651 if needle[i] != haystack[self.end - needle.len() + i] {
652 self.end -= self.period;
653 if !long_period {
654 self.memory_back = self.period;
655 }
656 continue 'search;
657 }
658 }
659
660 let match_pos = self.end - needle.len();
662 self.end -= needle.len();
664 if !long_period {
665 self.memory_back = needle.len();
666 }
667
668 return S::matching(match_pos, match_pos + needle.len());
669 }
670 }
671
672 #[inline]
685 pub fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
686 let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1; while let Some(&a) = arr.get(right + offset) {
693 let b = arr[left + offset];
695 if (a < b && !order_greater) || (a > b && order_greater) {
696 right += offset + 1;
698 offset = 0;
699 period = right - left;
700 } else if a == b {
701 if offset + 1 == period {
703 right += offset + 1;
704 offset = 0;
705 } else {
706 offset += 1;
707 }
708 } else {
709 left = right;
711 right += 1;
712 offset = 0;
713 period = 1;
714 }
715 }
716 (left, period)
717 }
718
719 pub fn reverse_maximal_suffix(arr: &[u8], known_period: usize,
732 order_greater: bool) -> usize
733 {
734 let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1; let n = arr.len();
740
741 while right + offset < n {
742 let a = arr[n - (1 + right + offset)];
743 let b = arr[n - (1 + left + offset)];
744 if (a < b && !order_greater) || (a > b && order_greater) {
745 right += offset + 1;
747 offset = 0;
748 period = right - left;
749 } else if a == b {
750 if offset + 1 == period {
752 right += offset + 1;
753 offset = 0;
754 } else {
755 offset += 1;
756 }
757 } else {
758 left = right;
760 right += 1;
761 offset = 0;
762 period = 1;
763 }
764 if period == known_period {
765 break;
766 }
767 }
768 debug_assert!(period <= known_period);
769 left
770 }
771}
772
773trait TwoWayStrategy {
776 type Output;
777 fn use_early_reject() -> bool;
778 fn rejecting(usize, usize) -> Self::Output;
779 fn matching(usize, usize) -> Self::Output;
780}
781
782enum MatchOnly { }
784
785impl TwoWayStrategy for MatchOnly {
786 type Output = Option<(usize, usize)>;
787
788 #[inline]
789 fn use_early_reject() -> bool { false }
790 #[inline]
791 fn rejecting(_a: usize, _b: usize) -> Self::Output { None }
792 #[inline]
793 fn matching(a: usize, b: usize) -> Self::Output { Some((a, b)) }
794}
795
796#[cfg(feature = "pattern")]
797enum RejectAndMatch { }
799
800#[cfg(feature = "pattern")]
801impl TwoWayStrategy for RejectAndMatch {
802 type Output = SearchStep;
803
804 #[inline]
805 fn use_early_reject() -> bool { true }
806 #[inline]
807 fn rejecting(a: usize, b: usize) -> Self::Output { SearchStep::Reject(a, b) }
808 #[inline]
809 fn matching(a: usize, b: usize) -> Self::Output { SearchStep::Match(a, b) }
810}
811
812
813#[cfg(feature = "pattern")]
814#[cfg(test)]
815impl<'a, 'b> StrSearcher<'a, 'b> {
816 fn twoway(&self) -> &TwoWaySearcher {
817 match self.searcher {
818 StrSearcherImpl::TwoWay(ref inner) => inner,
819 _ => panic!("Not a TwoWaySearcher"),
820 }
821 }
822}
823
824#[cfg(feature = "pattern")]
825#[test]
826fn test_basic() {
827 let t = StrSearcher::new("", "aab");
828 println!("{:?}", t);
829 let t = StrSearcher::new("", "abaaaba");
830 println!("{:?}", t);
831 let mut t = StrSearcher::new("GCATCGCAGAGAGTATACAGTACG", "GCAGAGAG");
832 println!("{:?}", t);
833
834 loop {
835 match t.next() {
836 SearchStep::Done => break,
837 m => println!("{:?}", m),
838 }
839 }
840
841 let mut t = StrSearcher::new("GCATCGCAGAGAGTATACAGTACG", "GCAGAGAG");
842 println!("{:?}", t);
843
844 loop {
845 match t.next_back() {
846 SearchStep::Done => break,
847 m => println!("{:?}", m),
848 }
849 }
850
851 let mut t = StrSearcher::new("banana", "nana");
852 println!("{:?}", t);
853
854 loop {
855 match t.next() {
856 SearchStep::Done => break,
857 m => println!("{:?}", m),
858 }
859 }
860}
861
862#[cfg(feature = "pattern")]
863#[cfg(test)]
864fn contains(hay: &str, n: &str) -> bool {
865 let mut tws = StrSearcher::new(hay, n);
866 loop {
867 match tws.next() {
868 SearchStep::Done => return false,
869 SearchStep::Match(..) => return true,
870 _ => { }
871 }
872 }
873}
874
875#[cfg(feature = "pattern")]
876#[cfg(test)]
877fn contains_rev(hay: &str, n: &str) -> bool {
878 let mut tws = StrSearcher::new(hay, n);
879 loop {
880 match tws.next_back() {
881 SearchStep::Done => return false,
882 SearchStep::Match(..) => return true,
883 rej => { println!("{:?}", rej); }
884 }
885 }
886}
887
888
889#[cfg(feature = "pattern")]
890#[test]
891fn test_contains() {
892 let h = "";
893 let n = "";
894 assert!(contains(h, n));
895 assert!(contains_rev(h, n));
896
897 let h = "BDC\0\0\0";
898 let n = "BDC\u{0}";
899 assert!(contains(h, n));
900 assert!(contains_rev(h, n));
901
902
903 let h = "ADA\0";
904 let n = "ADA";
905 assert!(contains(h, n));
906 assert!(contains_rev(h, n));
907
908 let h = "\u{0}\u{0}\u{0}\u{0}";
909 let n = "\u{0}";
910 assert!(contains(h, n));
911 assert!(contains_rev(h, n));
912}
913
914#[cfg(feature = "pattern")]
915#[test]
916fn test_rev_2() {
917 let h = "BDC\0\0\0";
918 let n = "BDC\u{0}";
919 let mut t = StrSearcher::new(h, n);
920 println!("{:?}", t);
921 println!("{:?}", h.contains(&n));
922
923 loop {
924 match t.next_back() {
925 SearchStep::Done => break,
926 m => println!("{:?}", m),
927 }
928 }
929
930 let h = "aabaabx";
931 let n = "aabaab";
932 let mut t = StrSearcher::new(h, n);
933 println!("{:?}", t);
934 assert_eq!(t.twoway().crit_pos, 2);
935 assert_eq!(t.twoway().crit_pos_back, 5);
936
937 loop {
938 match t.next_back() {
939 SearchStep::Done => break,
940 m => println!("{:?}", m),
941 }
942 }
943
944 let h = "abababac";
945 let n = "ababab";
946 let mut t = StrSearcher::new(h, n);
947 println!("{:?}", t);
948 assert_eq!(t.twoway().crit_pos, 1);
949 assert_eq!(t.twoway().crit_pos_back, 5);
950
951 loop {
952 match t.next_back() {
953 SearchStep::Done => break,
954 m => println!("{:?}", m),
955 }
956 }
957
958 let h = "abababac";
959 let n = "abab";
960 let mut t = StrSearcher::new(h, n);
961 println!("{:?}", t);
962
963 loop {
964 match t.next_back() {
965 SearchStep::Done => break,
966 m => println!("{:?}", m),
967 }
968 }
969
970 let h = "baabbbaabc";
971 let n = "baabb";
972 let t = StrSearcher::new(h, n);
973 println!("{:?}", t);
974 assert_eq!(t.twoway().crit_pos, 3);
975 assert_eq!(t.twoway().crit_pos_back, 3);
976
977 let h = "aabaaaabaabxx";
978 let n = "aabaaaabaa";
979 let mut t = StrSearcher::new(h, n);
980 println!("{:?}", t);
981
982 loop {
983 match t.next_back() {
984 SearchStep::Done => break,
985 m => println!("{:?}", m),
986 }
987 }
988
989 let h = "babbabax";
990 let n = "babbab";
991 let mut t = StrSearcher::new(h, n);
992 println!("{:?}", t);
993 assert_eq!(t.twoway().crit_pos, 2);
994 assert_eq!(t.twoway().crit_pos_back, 4);
995
996 loop {
997 match t.next_back() {
998 SearchStep::Done => break,
999 m => println!("{:?}", m),
1000 }
1001 }
1002
1003 let h = "xacbaabcax";
1004 let n = "abca";
1005 let mut t = StrSearcher::new(h, n);
1006 assert_eq!(t.next_match_back(), Some((5, 9)));
1007
1008 let h = "xacbaacbxxcba";
1009 let m = "acba";
1010 let mut s = StrSearcher::new(h, m);
1011 assert_eq!(s.next_match_back(), Some((1, 5)));
1012 assert_eq!(s.twoway().crit_pos, 1);
1013 assert_eq!(s.twoway().crit_pos_back, 2);
1014}
1015
1016#[cfg(feature = "pattern")]
1017#[test]
1018fn test_rev_unicode() {
1019 let h = "ααααααβ";
1020 let n = "αβ";
1021 let mut t = StrSearcher::new(h, n);
1022 println!("{:?}", t);
1023
1024 loop {
1025 match t.next() {
1026 SearchStep::Done => break,
1027 m => println!("{:?}", m),
1028 }
1029 }
1030
1031 let mut t = StrSearcher::new(h, n);
1032 loop {
1033 match t.next_back() {
1034 SearchStep::Done => break,
1035 m => println!("{:?}", m),
1036 }
1037 }
1038}
1039
1040#[test]
1041fn maximal_suffix() {
1042 assert_eq!((2, 1), TwoWaySearcher::maximal_suffix(b"aab", false));
1043 assert_eq!((0, 3), TwoWaySearcher::maximal_suffix(b"aab", true));
1044
1045 assert_eq!((0, 3), TwoWaySearcher::maximal_suffix(b"aabaa", true));
1046 assert_eq!((2, 3), TwoWaySearcher::maximal_suffix(b"aabaa", false));
1047
1048 assert_eq!((0, 7), TwoWaySearcher::maximal_suffix(b"gcagagag", false));
1049 assert_eq!((2, 2), TwoWaySearcher::maximal_suffix(b"gcagagag", true));
1050
1051 assert_eq!((2, 2), TwoWaySearcher::maximal_suffix(b"banana", false));
1053 assert_eq!((1, 2), TwoWaySearcher::maximal_suffix(b"banana", true));
1054 assert_eq!((0, 6), TwoWaySearcher::maximal_suffix(b"zanana", false));
1055 assert_eq!((1, 2), TwoWaySearcher::maximal_suffix(b"zanana", true));
1056}
1057
1058#[test]
1059fn maximal_suffix_verbose() {
1060 fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1061 let mut left: usize = 0; let mut right = 1; let mut offset = 0; let mut period = 1; macro_rules! asstr {
1067 ($e:expr) => (::std::str::from_utf8($e).unwrap())
1068 }
1069
1070 while let Some(&a) = arr.get(right + offset) {
1071 debug_assert!(left <= right);
1073 let b = unsafe { *arr.get_unchecked(left + offset) };
1074 println!("str={}, l={}, r={}, offset={}, p={}", asstr!(arr), left, right, offset, period);
1075 if (a < b && !order_greater) || (a > b && order_greater) {
1076 right += offset + 1;
1078 offset = 0;
1079 period = right - left;
1080 } else if a == b {
1081 if offset + 1 == period {
1083 right += offset + 1;
1084 offset = 0;
1085 } else {
1086 offset += 1;
1087 }
1088 } else {
1089 left = right;
1091 right += 1;
1092 offset = 0;
1093 period = 1;
1094 }
1095 }
1096 println!("str={}, l={}, r={}, offset={}, p={} ==END==", asstr!(arr), left, right, offset, period);
1097 (left, period)
1098 }
1099
1100 fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1101 let n = arr.len();
1102 let mut left: usize = 0; let mut right = 1; let mut offset = 0; let mut period = 1; macro_rules! asstr {
1108 ($e:expr) => (::std::str::from_utf8($e).unwrap())
1109 }
1110
1111 while right + offset < n {
1112 debug_assert!(left <= right);
1114 let a = unsafe { *arr.get_unchecked(n - (1 + right + offset)) };
1115 let b = unsafe { *arr.get_unchecked(n - (1 + left + offset)) };
1116 println!("str={}, l={}, r={}, offset={}, p={}", asstr!(arr), left, right, offset, period);
1117 if (a < b && !order_greater) || (a > b && order_greater) {
1118 right += offset + 1;
1120 offset = 0;
1121 period = right - left;
1122 if period == known_period {
1123 break;
1124 }
1125 } else if a == b {
1126 if offset + 1 == period {
1128 right += offset + 1;
1129 offset = 0;
1130 } else {
1131 offset += 1;
1132 }
1133 } else {
1134 left = right;
1136 right += 1;
1137 offset = 0;
1138 period = 1;
1139 }
1140 }
1141 println!("str={}, l={}, r={}, offset={}, p={} ==END==", asstr!(arr), left, right, offset, period);
1142 debug_assert!(period == known_period);
1143 left
1144 }
1145
1146 assert_eq!((2, 2), maximal_suffix(b"banana", false));
1147 assert_eq!((1, 2), maximal_suffix(b"banana", true));
1148 assert_eq!((0, 7), maximal_suffix(b"gcagagag", false));
1149 assert_eq!((2, 2), maximal_suffix(b"gcagagag", true));
1150 assert_eq!((2, 1), maximal_suffix(b"bac", false));
1151 assert_eq!((1, 2), maximal_suffix(b"bac", true));
1152 assert_eq!((0, 9), maximal_suffix(b"baaaaaaaa", false));
1153 assert_eq!((1, 1), maximal_suffix(b"baaaaaaaa", true));
1154
1155 assert_eq!((2, 3), maximal_suffix(b"babbabbab", false));
1156 assert_eq!((1, 3), maximal_suffix(b"babbabbab", true));
1157
1158 assert_eq!(2, reverse_maximal_suffix(b"babbabbab", 3, false));
1159 assert_eq!(1, reverse_maximal_suffix(b"babbabbab", 3, true));
1160
1161 assert_eq!((0, 2), maximal_suffix(b"bababa", false));
1162 assert_eq!((1, 2), maximal_suffix(b"bababa", true));
1163
1164 assert_eq!(1, reverse_maximal_suffix(b"bababa", 2, false));
1165 assert_eq!(0, reverse_maximal_suffix(b"bababa", 2, true));
1166
1167 assert_eq!((2, 2), maximal_suffix(b"abca", false));
1169 assert_eq!((0, 3), maximal_suffix(b"abca", true));
1170
1171 assert_eq!((3, 2), maximal_suffix(b"abcda", false));
1172 assert_eq!((0, 4), maximal_suffix(b"abcda", true));
1173
1174 assert_eq!((1, 3), maximal_suffix(b"acba", false));
1176 assert_eq!((0, 3), maximal_suffix(b"acba", true));
1177 }
1180
1181#[cfg(feature = "pattern")]
1182#[test]
1183fn test_find_rfind() {
1184 fn find(hay: &str, pat: &str) -> Option<usize> {
1185 let mut t = pat.into_searcher(hay);
1186 t.next_match().map(|(x, _)| x)
1187 }
1188
1189 fn rfind(hay: &str, pat: &str) -> Option<usize> {
1190 let mut t = pat.into_searcher(hay);
1191 t.next_match_back().map(|(x, _)| x)
1192 }
1193
1194 let string = "Việt Namacbaabcaabaaba";
1196 for (i, ci) in string.char_indices() {
1197 let ip = i + ci.len_utf8();
1198 for j in string[ip..].char_indices()
1199 .map(|(i, _)| i)
1200 .chain(Some(string.len() - ip))
1201 {
1202 let pat = &string[i..ip + j];
1203 assert!(match find(string, pat) {
1204 None => false,
1205 Some(x) => x <= i,
1206 });
1207 assert!(match rfind(string, pat) {
1208 None => false,
1209 Some(x) => x >= i,
1210 });
1211 }
1212 }
1213}