adler32/
lib.rs

1//! A minimal implementation of Adler32 for Rust.
2//!
3//! This provides the simple method adler32(), that exhausts a Read and
4//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6//! fashion.
7//!
8//! The adler32 code has been translated (as accurately as I could manage) from
9//! the zlib implementation.
10
11#![forbid(unsafe_code)]
12#![cfg_attr(not(feature = "std"), no_std)]
13
14
15// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
16// It was translated into Rust as accurately as I could manage
17// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
18
19/* zlib.h -- interface of the 'zlib' general purpose compression library
20  version 1.2.8, April 28th, 2013
21
22  Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
23
24  This software is provided 'as-is', without any express or implied
25  warranty.  In no event will the authors be held liable for any damages
26  arising from the use of this software.
27
28  Permission is granted to anyone to use this software for any purpose,
29  including commercial applications, and to alter it and redistribute it
30  freely, subject to the following restrictions:
31
32  1. The origin of this software must not be misrepresented; you must not
33     claim that you wrote the original software. If you use this software
34     in a product, an acknowledgment in the product documentation would be
35     appreciated but is not required.
36  2. Altered source versions must be plainly marked as such, and must not be
37     misrepresented as being the original software.
38  3. This notice may not be removed or altered from any source distribution.
39
40  Jean-loup Gailly        Mark Adler
41  jloup@gzip.org          madler@alumni.caltech.edu
42
43*/
44
45// largest prime smaller than 65536
46const BASE: u32 = 65521;
47
48// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
49const NMAX: usize = 5552;
50
51#[inline(always)]
52fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
53    *adler += u32::from(buf[0]);
54    *sum2 += *adler;
55}
56
57#[inline(always)]
58fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
59    do1(adler, sum2, &buf[0..1]);
60    do1(adler, sum2, &buf[1..2]);
61}
62
63#[inline(always)]
64fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
65    do2(adler, sum2, &buf[0..2]);
66    do2(adler, sum2, &buf[2..4]);
67}
68
69#[inline(always)]
70fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
71    do4(adler, sum2, &buf[0..4]);
72    do4(adler, sum2, &buf[4..8]);
73}
74
75#[inline(always)]
76fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
77    do8(adler, sum2, &buf[0..8]);
78    do8(adler, sum2, &buf[8..16]);
79}
80
81/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
82///
83/// Calling remove() will update the hash to the value it would have if that
84/// past byte had never been fed to the algorithm. This allows you to get the
85/// hash of a rolling window very efficiently.
86#[derive(Clone)]
87pub struct RollingAdler32 {
88    a: u32,
89    b: u32,
90}
91
92impl Default for RollingAdler32 {
93    fn default() -> RollingAdler32 {
94        RollingAdler32::new()
95    }
96}
97
98impl RollingAdler32 {
99    /// Creates an empty Adler32 context (with hash 1).
100    pub fn new() -> RollingAdler32 {
101        Self::from_value(1)
102    }
103
104    /// Creates an Adler32 context with the given initial value.
105    pub fn from_value(adler32: u32) -> RollingAdler32 {
106        let a = adler32 & 0xFFFF;
107        let b = adler32 >> 16;
108        RollingAdler32 { a, b }
109    }
110
111    /// Convenience function initializing a context from the hash of a buffer.
112    pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113        let mut hash = RollingAdler32::new();
114        hash.update_buffer(buffer);
115        hash
116    }
117
118    /// Returns the current hash.
119    pub fn hash(&self) -> u32 {
120        (self.b << 16) | self.a
121    }
122
123    /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
124    pub fn remove(&mut self, size: usize, byte: u8) {
125        let byte = u32::from(byte);
126        self.a = (self.a + BASE - byte) % BASE;
127        self.b = ((self.b + BASE - 1)
128            .wrapping_add(BASE.wrapping_sub(size as u32).wrapping_mul(byte)))
129            % BASE;
130    }
131
132    /// Feeds a new `byte` to the algorithm to update the hash.
133    pub fn update(&mut self, byte: u8) {
134        let byte = u32::from(byte);
135        self.a = (self.a + byte) % BASE;
136        self.b = (self.b + self.a) % BASE;
137    }
138
139    /// Feeds a vector of bytes to the algorithm to update the hash.
140    pub fn update_buffer(&mut self, buffer: &[u8]) {
141        let len = buffer.len();
142
143        // in case user likes doing a byte at a time, keep it fast
144        if len == 1 {
145            self.update(buffer[0]);
146            return;
147        }
148
149        // in case short lengths are provided, keep it somewhat fast
150        if len < 16 {
151            for byte in buffer.iter().take(len) {
152                self.a += u32::from(*byte);
153                self.b += self.a;
154            }
155            if self.a >= BASE {
156                self.a -= BASE;
157            }
158            self.b %= BASE;
159            return;
160        }
161
162        let mut pos = 0;
163
164        // do length NMAX blocks -- requires just one modulo operation;
165        while pos + NMAX <= len {
166            let end = pos + NMAX;
167            while pos < end {
168                // 16 sums unrolled
169                do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170                pos += 16;
171            }
172            self.a %= BASE;
173            self.b %= BASE;
174        }
175
176        // do remaining bytes (less than NMAX, still just one modulo)
177        if pos < len {
178            // avoid modulos if none remaining
179            while len - pos >= 16 {
180                do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
181                pos += 16;
182            }
183            while len - pos > 0 {
184                self.a += u32::from(buffer[pos]);
185                self.b += self.a;
186                pos += 1;
187            }
188            self.a %= BASE;
189            self.b %= BASE;
190        }
191    }
192}
193
194/// Consume a Read object and returns the Adler32 hash.
195#[cfg(feature = "std")]
196pub fn adler32<R: std::io::Read>(mut reader: R) -> std::io::Result<u32> {
197    let mut hash = RollingAdler32::new();
198    let mut buffer = [0u8; NMAX];
199    let mut read = reader.read(&mut buffer)?;
200    while read > 0 {
201        hash.update_buffer(&buffer[..read]);
202        read = reader.read(&mut buffer)?;
203    }
204    Ok(hash.hash())
205}
206
207#[cfg(test)]
208mod test {
209    use rand::Rng;
210    use std::io;
211    #[cfg(target_arch = "wasm32")]
212    use wasm_bindgen_test::wasm_bindgen_test;
213
214    use super::{adler32, RollingAdler32, BASE};
215
216    fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
217        let mut a: u32 = 1;
218        let mut b: u32 = 0;
219
220        for byte in reader.bytes() {
221            let byte = byte? as u32;
222            a = (a + byte) % BASE;
223            b = (b + a) % BASE;
224        }
225
226        Ok((b << 16) | a)
227    }
228
229    #[test]
230    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
231    fn testvectors() {
232        fn do_test(v: u32, bytes: &[u8]) {
233            let mut hash = RollingAdler32::new();
234            hash.update_buffer(&bytes);
235            assert_eq!(hash.hash(), v);
236
237            let r = io::Cursor::new(bytes);
238            assert_eq!(adler32(r).unwrap(), v);
239        }
240        do_test(0x00000001, b"");
241        do_test(0x00620062, b"a");
242        do_test(0x024d0127, b"abc");
243        do_test(0x29750586, b"message digest");
244        do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
245        do_test(
246            0x8adb150c,
247            b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
248                              abcdefghijklmnopqrstuvwxyz\
249                              0123456789",
250        );
251        do_test(
252            0x97b61069,
253            b"1234567890123456789012345678901234567890\
254                              1234567890123456789012345678901234567890",
255        );
256        do_test(0xD6251498, &[255; 64000]);
257    }
258
259    #[test]
260    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
261    fn compare() {
262        let mut rng = rand::thread_rng();
263        let mut data = vec![0u8; 5589];
264        for size in [
265            0, 1, 3, 4, 5, 31, 32, 33, 67, 5550, 5552, 5553, 5568, 5584, 5589,
266        ]
267        .iter()
268        .cloned()
269        {
270            rng.fill(&mut data[..size]);
271            let r1 = io::Cursor::new(&data[..size]);
272            let r2 = r1.clone();
273            if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
274                panic!("Comparison failed, size={}", size);
275            }
276        }
277    }
278
279    #[test]
280    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
281    fn rolling() {
282        assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
283
284        fn do_test(a: &[u8], b: &[u8]) {
285            let mut total = Vec::with_capacity(a.len() + b.len());
286            total.extend(a);
287            total.extend(b);
288            let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
289            for i in 0..(a.len()) {
290                h.remove(b.len(), a[i]);
291                h.update(total[b.len() + i]);
292            }
293            assert_eq!(h.hash(), adler32(b).unwrap());
294        }
295        do_test(b"a", b"b");
296        do_test(b"", b"this a test");
297        do_test(b"th", b"is a test");
298        do_test(b"this a ", b"test");
299    }
300
301    #[test]
302    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
303    fn long_window_remove() {
304        let mut hash = RollingAdler32::new();
305        let w = 65536;
306        assert!(w as u32 > BASE);
307
308        let mut bytes = vec![0; w * 3];
309        for (i, b) in bytes.iter_mut().enumerate() {
310            *b = i as u8;
311        }
312
313        for (i, b) in bytes.iter().enumerate() {
314            if i >= w {
315                hash.remove(w, bytes[i - w]);
316            }
317            hash.update(*b);
318            if i > 0 && i % w == 0 {
319                assert_eq!(hash.hash(), 0x433a8772);
320            }
321        }
322        assert_eq!(hash.hash(), 0xbbba8772);
323    }
324}