1extern crate memchr;
5use std::cmp;
6
7fn bmh_skip(pat: &[u8], skip: &mut [u16; 256]) {
8 let pat_skip = cmp::min(pat.len(), u16::max_value() as usize) as u16;
9 for entry in skip.iter_mut() {
10 *entry = pat_skip;
11 }
12
13 for (index, &byte) in pat[..pat.len() - 1].iter().enumerate() {
14 skip[byte as usize] = cmp::min(pat.len() - index - 1, u16::max_value() as usize) as u16;
15 }
16}
17
18pub fn find(text: &[u8], pat: &[u8]) -> Option<usize> {
20 let mut skip = [0; 256];
21 bmh_skip(pat, &mut skip);
22
23 let pat_len = pat.len();
24
25 if pat_len == 0 {
26 return Some(0);
27 }
28
29 let pat_len_m1 = pat_len - 1;
30 let pat_last = pat[pat_len - 1];
31
32 let mut j = match memchr::memchr(pat[0], text) {
34 Some(x) => x,
35 None => return None,
36 };
37
38 while let Some(&c) = text.get(j + pat_len_m1) {
39 if c == pat_last && &text[j..j + pat_len] == pat {
41 return Some(j);
42 }
43 j += skip[c as usize] as usize;
44 }
45 None
46}
47
48#[test]
49fn bmh_preprocess() {
50 let mut skip = [0; 256];
51 let needle = b"gcagagag";
52 bmh_skip(needle, &mut skip);
53 assert_eq!(skip[b'g' as usize], 2);
54 assert_eq!(skip[b'c' as usize], 6);
55 assert_eq!(skip[b'a' as usize], 1);
56 assert_eq!(skip[b't' as usize], 8);
57}
58
59#[test]
60fn bmh_find() {
61 let text = b"abc";
62 assert_eq!(find(text, b"d"), None);
63 assert_eq!(find(text, b"c"), Some(2));
64
65 let longer = "longer text and so on";
66
67 for wsz in 1..17 {
69 for window in longer.as_bytes().windows(wsz) {
70 let str_find = longer.find(::std::str::from_utf8(window).unwrap());
71 assert!(str_find.is_some());
72 assert_eq!(find(longer.as_bytes(), window), str_find);
73 }
74 }
75
76 let pat = b"ger text and so on";
77 assert!(pat.len() > 16);
78 assert_eq!(Some(3), find(longer.as_bytes(), pat));
79}