1#![macro_use]
3use std::cmp;
4use std::fmt;
5use std::iter::{self, Iterator};
6use std::ops::{Range, RangeFrom};
7use std::slice::Iter;
8
9use crate::chained_hash_table::{update_hash, ChainedHashTable};
10use crate::compress::Flush;
11#[cfg(test)]
12use crate::compression_options::{HIGH_LAZY_IF_LESS_THAN, HIGH_MAX_HASH_CHECKS};
13use crate::input_buffer::InputBuffer;
14#[cfg(test)]
15use crate::lzvalue::{LZType, LZValue};
16use crate::matching::longest_match;
17use crate::output_writer::{BufferStatus, DynamicWriter};
18use crate::rle::process_chunk_greedy_rle;
19
20const MAX_MATCH: usize = crate::huffman_table::MAX_MATCH as usize;
21const MIN_MATCH: usize = crate::huffman_table::MIN_MATCH as usize;
22
23const NO_RLE: u16 = 43212;
24
25#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
27pub enum MatchingType {
28 Greedy,
31 Lazy,
37}
38
39impl fmt::Display for MatchingType {
40 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
41 match *self {
42 MatchingType::Greedy => write!(f, "Greedy matching"),
43 MatchingType::Lazy => write!(f, "Lazy matching"),
44 }
45 }
46}
47
48pub struct LZ77State {
50 hash_table: ChainedHashTable,
52 is_first_window: bool,
54 is_last_block: bool,
56 overlap: usize,
58 current_block_input_bytes: u64,
60 max_hash_checks: u16,
62 lazy_if_less_than: u16,
64 matching_type: MatchingType,
66 match_state: ChunkState,
68 bytes_to_hash: usize,
71 was_synced: bool,
74}
75
76impl LZ77State {
77 pub fn new(
79 max_hash_checks: u16,
80 lazy_if_less_than: u16,
81 matching_type: MatchingType,
82 ) -> LZ77State {
83 LZ77State {
84 hash_table: ChainedHashTable::new(),
85 is_first_window: true,
86 is_last_block: false,
87 overlap: 0,
88 current_block_input_bytes: 0,
89 max_hash_checks,
90 lazy_if_less_than,
91 matching_type,
92 match_state: ChunkState::new(),
93 bytes_to_hash: 0,
94 was_synced: false,
95 }
96 }
97
98 pub fn reset(&mut self) {
100 self.hash_table.reset();
101 self.is_first_window = true;
102 self.is_last_block = false;
103 self.overlap = 0;
104 self.current_block_input_bytes = 0;
105 self.match_state = ChunkState::new();
106 self.bytes_to_hash = 0
107 }
108
109 pub fn set_last(&mut self) {
110 self.is_last_block = true;
111 }
112
113 pub const fn is_last_block(&self) -> bool {
115 self.is_last_block
116 }
117
118 pub const fn current_block_input_bytes(&self) -> u64 {
120 self.current_block_input_bytes
121 }
122
123 pub fn reset_input_bytes(&mut self) {
125 self.current_block_input_bytes = 0;
126 }
127
128 pub const fn pending_byte(&self) -> bool {
130 self.match_state.add
131 }
132
133 pub fn pending_byte_as_num(&self) -> usize {
135 if self.match_state.add {
138 1
139 } else {
140 0
141 }
142 }
143}
144
145const DEFAULT_WINDOW_SIZE: usize = 32768;
146
147#[derive(Debug)]
148pub enum ProcessStatus {
150 Ok,
152 BufferFull(usize),
156}
157
158#[derive(Debug)]
159pub struct ChunkState {
163 current_length: u16,
165 current_distance: u16,
167 prev_byte: u8,
169 cur_byte: u8,
171 add: bool,
173}
174
175impl ChunkState {
176 pub fn new() -> ChunkState {
177 ChunkState {
178 current_length: 0,
179 current_distance: 0,
180 prev_byte: 0,
181 cur_byte: 0,
182 add: false,
183 }
184 }
185}
186
187pub const fn buffer_full(position: usize) -> ProcessStatus {
188 ProcessStatus::BufferFull(position)
189}
190
191#[allow(clippy::too_many_arguments)]
192fn process_chunk(
193 data: &[u8],
194 iterated_data: &Range<usize>,
195 mut match_state: &mut ChunkState,
196 hash_table: &mut ChainedHashTable,
197 writer: &mut DynamicWriter,
198 max_hash_checks: u16,
199 lazy_if_less_than: usize,
200 matching_type: MatchingType,
201) -> (usize, ProcessStatus) {
202 let avoid_rle = if cfg!(test) {
203 lazy_if_less_than == NO_RLE as usize
208 } else {
209 false
210 };
211 match matching_type {
212 MatchingType::Greedy => {
213 process_chunk_greedy(data, iterated_data, hash_table, writer, max_hash_checks)
214 }
215 MatchingType::Lazy => {
216 if max_hash_checks > 0 || avoid_rle {
217 process_chunk_lazy(
218 data,
219 iterated_data,
220 &mut match_state,
221 hash_table,
222 writer,
223 max_hash_checks,
224 lazy_if_less_than,
225 )
226 } else {
227 process_chunk_greedy_rle(data, iterated_data, writer)
229 }
230 }
231 }
232}
233
234fn add_to_hash_table(
237 bytes_to_add: usize,
238 insert_it: &mut iter::Zip<RangeFrom<usize>, Iter<u8>>,
239 hash_it: &mut Iter<u8>,
240 hash_table: &mut ChainedHashTable,
241) {
242 let taker = insert_it.by_ref().take(bytes_to_add);
243 let mut hash_taker = hash_it.by_ref().take(bytes_to_add);
244 let mut hash = hash_table.current_hash();
246 for (ipos, _) in taker {
249 if let Some(&i_hash_byte) = hash_taker.next() {
250 hash = update_hash(hash, i_hash_byte);
251 hash_table.add_with_hash(ipos, hash);
252 }
253 }
254 hash_table.set_hash(hash);
256}
257
258macro_rules! write_literal {
263 ($w:ident, $byte:expr, $pos:expr) => {
264 let b_status = $w.write_literal($byte);
265
266 if let BufferStatus::Full = b_status {
267 return (0, buffer_full($pos));
268 }
269 };
270}
271
272#[inline]
275fn match_too_far(match_len: usize, match_dist: usize) -> bool {
276 const TOO_FAR: usize = 8 * 1024;
277 match_len == MIN_MATCH && match_dist > TOO_FAR
278}
279
280fn create_iterators<'a>(
282 data: &'a [u8],
283 iterated_data: &Range<usize>,
284) -> (
285 usize,
286 iter::Zip<RangeFrom<usize>, Iter<'a, u8>>,
287 Iter<'a, u8>,
288) {
289 let end = cmp::min(data.len(), iterated_data.end);
290 let start = iterated_data.start;
291 let current_chunk = &data[start..end];
292
293 let insert_it = (start..).zip(current_chunk.iter());
294 let hash_it = {
295 let hash_start = if data.len() - start > 2 {
296 start + 2
297 } else {
298 data.len()
299 };
300 (&data[hash_start..]).iter()
301 };
302 (end, insert_it, hash_it)
303}
304
305fn process_chunk_lazy(
306 data: &[u8],
307 iterated_data: &Range<usize>,
308 state: &mut ChunkState,
309 mut hash_table: &mut ChainedHashTable,
310 writer: &mut DynamicWriter,
311 max_hash_checks: u16,
312 lazy_if_less_than: usize,
313) -> (usize, ProcessStatus) {
314 let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
315
316 const NO_LENGTH: u16 = 0;
317
318 let mut prev_length = state.current_length;
320 let mut prev_distance = state.current_distance;
322
323 state.current_length = 0;
324 state.current_distance = 0;
325
326 let mut overlap = 0;
329
330 let mut ignore_next = prev_length as usize >= lazy_if_less_than;
334
335 state.prev_byte = state.cur_byte;
338
339 while let Some((position, &b)) = insert_it.next() {
341 state.cur_byte = b;
342 if let Some(&hash_byte) = hash_it.next() {
343 hash_table.add_hash_value(position, hash_byte);
344
345 if !ignore_next {
348 let (mut match_len, match_dist) = {
349 let max_hash_checks = if prev_length >= 32 {
352 max_hash_checks >> 2
353 } else {
354 max_hash_checks
355 };
356
357 longest_match(
360 data,
361 hash_table,
362 position,
363 prev_length as usize,
364 max_hash_checks,
365 )
366 };
367
368 if match_too_far(match_len, match_dist) {
371 match_len = NO_LENGTH as usize;
372 };
373
374 if match_len >= lazy_if_less_than {
375 ignore_next = true;
377 }
378 state.current_length = match_len as u16;
379 state.current_distance = match_dist as u16;
380 } else {
381 state.current_length = NO_LENGTH;
383 state.current_distance = 0;
384 ignore_next = false;
386 };
387
388 if prev_length >= state.current_length && prev_length >= MIN_MATCH as u16 {
389 let b_status =
393 writer.write_length_distance(prev_length as u16, prev_distance as u16);
394
395 let bytes_to_add = prev_length - 2;
399
400 add_to_hash_table(
401 bytes_to_add as usize,
402 &mut insert_it,
403 &mut hash_it,
404 &mut hash_table,
405 );
406
407 if position + prev_length as usize > end {
414 overlap = position + prev_length as usize - end - 1;
416 };
417
418 state.add = false;
419
420 state.current_length = 0;
422 state.current_distance = 0;
423
424 if let BufferStatus::Full = b_status {
425 return (overlap, buffer_full(position + prev_length as usize - 1));
427 }
428
429 ignore_next = false;
430 } else if state.add {
431 write_literal!(writer, state.prev_byte, position + 1);
435 } else {
436 state.add = true
437 }
438
439 prev_length = state.current_length;
440 prev_distance = state.current_distance;
441 state.prev_byte = b;
442 } else {
443 if prev_length >= MIN_MATCH as u16 {
445 let b_status =
446 writer.write_length_distance(prev_length as u16, prev_distance as u16);
447
448 state.current_length = 0;
449 state.current_distance = 0;
450 state.add = false;
451
452 debug_assert!((position + prev_length as usize) >= end - 1);
453 overlap = (position + prev_length as usize)
456 .saturating_sub(end)
457 .saturating_sub(1);
458
459 if let BufferStatus::Full = b_status {
462 return (overlap, buffer_full(end));
465 } else {
466 return (overlap, ProcessStatus::Ok);
467 }
468 };
469
470 if state.add {
471 state.add = false;
473
474 write_literal!(writer, state.prev_byte, position);
476 };
477
478 write_literal!(writer, b, position + 1);
483 }
484 }
485 (overlap, ProcessStatus::Ok)
486}
487
488fn process_chunk_greedy(
489 data: &[u8],
490 iterated_data: &Range<usize>,
491 mut hash_table: &mut ChainedHashTable,
492 writer: &mut DynamicWriter,
493 max_hash_checks: u16,
494) -> (usize, ProcessStatus) {
495 let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
496
497 const NO_LENGTH: usize = 0;
498
499 let mut overlap = 0;
502
503 while let Some((position, &b)) = insert_it.next() {
505 if let Some(&hash_byte) = hash_it.next() {
506 hash_table.add_hash_value(position, hash_byte);
507
508 let (match_len, match_dist) =
510 { longest_match(data, hash_table, position, NO_LENGTH, max_hash_checks) };
511
512 if match_len >= MIN_MATCH as usize && !match_too_far(match_len, match_dist) {
513 let b_status = writer.write_length_distance(match_len as u16, match_dist as u16);
516
517 let bytes_to_add = match_len - 1;
521 add_to_hash_table(bytes_to_add, &mut insert_it, &mut hash_it, &mut hash_table);
522
523 if position + match_len > end {
527 overlap = position + match_len - end;
529 };
530
531 if let BufferStatus::Full = b_status {
532 return (overlap, buffer_full(position + match_len));
534 }
535 } else {
536 write_literal!(writer, b, position + 1);
538 }
539 } else {
540 write_literal!(writer, b, position + 1);
544 }
545 }
546 (overlap, ProcessStatus::Ok)
547}
548
549#[derive(Eq, PartialEq, Clone, Copy, Debug)]
550pub enum LZ77Status {
551 NeedInput,
553 EndBlock,
556 Finished,
558}
559
560#[cfg(test)]
561pub fn lz77_compress_block_finish(
562 data: &[u8],
563 state: &mut LZ77State,
564 buffer: &mut InputBuffer,
565 mut writer: &mut DynamicWriter,
566) -> (usize, LZ77Status) {
567 let (consumed, status, _) =
568 lz77_compress_block(data, state, buffer, &mut writer, Flush::Finish);
569 (consumed, status)
570}
571
572pub fn lz77_compress_block(
582 data: &[u8],
583 state: &mut LZ77State,
584 buffer: &mut InputBuffer,
585 mut writer: &mut DynamicWriter,
586 flush: Flush,
587) -> (usize, LZ77Status, usize) {
588 let window_size = DEFAULT_WINDOW_SIZE;
590
591 let finish = flush == Flush::Finish || flush == Flush::Sync;
594 let sync = flush == Flush::Sync;
595
596 let mut current_position = 0;
597
598 let mut status = LZ77Status::EndBlock;
600
601 let mut add_initial = true;
603
604 if state.was_synced {
606 if buffer.current_end() > 2 {
607 let pos_add = buffer.current_end() - 2;
608 for (n, &b) in data.iter().take(2).enumerate() {
609 state.hash_table.add_hash_value(n + pos_add, b);
610 }
611 add_initial = false;
612 }
613 state.was_synced = false;
614 }
615
616 let mut remaining_data = buffer.add_data(data);
618
619 loop {
620 let pending_previous = state.pending_byte_as_num();
623
624 assert!(writer.buffer_length() <= (window_size * 2));
625 if buffer.current_end() >= (window_size * 2) + MAX_MATCH || finish {
628 if state.is_first_window {
629 if buffer.get_buffer().len() >= 2
630 && add_initial
631 && state.current_block_input_bytes == 0
632 {
633 let b = buffer.get_buffer();
634 state.hash_table.add_initial_hash_values(b[0], b[1]);
637 add_initial = false;
638 }
639 } else if buffer.current_end() >= window_size + 2 {
640 for (n, &h) in buffer.get_buffer()[window_size + 2..]
641 .iter()
642 .enumerate()
643 .take(state.bytes_to_hash)
644 {
645 state.hash_table.add_hash_value(window_size + n, h);
646 }
647 state.bytes_to_hash = 0;
648 }
649
650 let window_start = if state.is_first_window {
651 0
652 } else {
653 window_size
654 };
655 let start = state.overlap + window_start;
656 let end = cmp::min(window_size + window_start, buffer.current_end());
657
658 let (overlap, p_status) = process_chunk(
659 buffer.get_buffer(),
660 &(start..end),
661 &mut state.match_state,
662 &mut state.hash_table,
663 &mut writer,
664 state.max_hash_checks,
665 state.lazy_if_less_than as usize,
666 state.matching_type,
667 );
668
669 state.bytes_to_hash = overlap;
670
671 if let ProcessStatus::BufferFull(written) = p_status {
672 state.current_block_input_bytes +=
673 (written - start + pending_previous - state.pending_byte_as_num()) as u64;
674
675 state.overlap = if overlap > 0 {
680 if !state.is_first_window {
681 if state.max_hash_checks > 0 {
684 state.hash_table.slide(window_size);
685 }
686 remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
687 } else {
688 state.is_first_window = false;
689 }
690 overlap
691 } else {
692 written - window_start
693 };
694
695 current_position = written - state.pending_byte_as_num();
696
697 break;
700 }
701
702 state.current_block_input_bytes +=
703 (end - start + overlap + pending_previous - state.pending_byte_as_num()) as u64;
704
705 state.overlap = overlap;
708
709 if (state.is_first_window || remaining_data.is_none())
710 && finish
711 && end >= buffer.current_end()
712 {
713 current_position = if state.is_first_window {
714 end - state.pending_byte_as_num()
715 } else {
716 debug_assert!(
717 !state.pending_byte(),
718 "Bug! Ended compression with a pending byte!"
719 );
720 buffer.current_end()
721 };
722
723 if !sync {
725 state.set_last();
727 state.is_first_window = false;
728 } else {
729 state.overlap = if state.is_first_window {
735 end
736 } else {
737 buffer.current_end() - window_size
738 };
739 state.was_synced = true;
740 }
741 status = LZ77Status::Finished;
742 break;
743 } else if state.is_first_window {
744 state.is_first_window = false;
745 } else {
746 if state.max_hash_checks > 0 {
751 state.hash_table.slide(window_size);
752 }
753
754 remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
756 }
757 } else {
758 status = LZ77Status::NeedInput;
761 break;
762 }
763 }
764
765 (
766 data.len() - remaining_data.unwrap_or(&[]).len(),
767 status,
768 current_position,
769 )
770}
771
772#[cfg(test)]
773pub fn decompress_lz77(input: &[LZValue]) -> Vec<u8> {
774 decompress_lz77_with_backbuffer(input, &[])
775}
776
777#[cfg(test)]
778pub fn decompress_lz77_with_backbuffer(input: &[LZValue], back_buffer: &[u8]) -> Vec<u8> {
779 let mut output = Vec::new();
780 for p in input {
781 match p.value() {
782 LZType::Literal(l) => output.push(l),
783 LZType::StoredLengthDistance(l, d) => {
784 let d = d as usize;
786 let prev_output_len = output.len();
787 let consumed = if d > output.len() {
789 let into_back_buffer = d - output.len();
790
791 assert!(
792 into_back_buffer <= back_buffer.len(),
793 "ERROR: Attempted to refer to a match in non-existing data!\
794 into_back_buffer: {}, back_buffer len {}, d {}, l {:?}",
795 into_back_buffer,
796 back_buffer.len(),
797 d,
798 l
799 );
800 let start = back_buffer.len() - into_back_buffer;
801 let end = cmp::min(back_buffer.len(), start + l.actual_length() as usize);
802 output.extend_from_slice(&back_buffer[start..end]);
803
804 end - start
805 } else {
806 0
807 };
808
809 let start = prev_output_len.saturating_sub(d);
811 let mut n = 0;
812 while n < (l.actual_length() as usize).saturating_sub(consumed) {
813 let b = output[start + n];
814 output.push(b);
815 n += 1;
816 }
817 }
818 }
819 }
820 output
821}
822
823#[cfg(test)]
824pub struct TestStruct {
825 state: LZ77State,
826 buffer: InputBuffer,
827 writer: DynamicWriter,
828}
829
830#[cfg(test)]
831impl TestStruct {
832 fn new() -> TestStruct {
833 TestStruct::with_config(
834 HIGH_MAX_HASH_CHECKS,
835 HIGH_LAZY_IF_LESS_THAN,
836 MatchingType::Lazy,
837 )
838 }
839
840 fn with_config(
841 max_hash_checks: u16,
842 lazy_if_less_than: u16,
843 matching_type: MatchingType,
844 ) -> TestStruct {
845 TestStruct {
846 state: LZ77State::new(max_hash_checks, lazy_if_less_than, matching_type),
847 buffer: InputBuffer::empty(),
848 writer: DynamicWriter::new(),
849 }
850 }
851
852 fn compress_block(&mut self, data: &[u8], flush: bool) -> (usize, LZ77Status, usize) {
853 lz77_compress_block(
854 data,
855 &mut self.state,
856 &mut self.buffer,
857 &mut self.writer,
858 if flush { Flush::Finish } else { Flush::None },
859 )
860 }
861}
862
863#[cfg(test)]
864pub fn lz77_compress(data: &[u8]) -> Option<Vec<LZValue>> {
865 lz77_compress_conf(
866 data,
867 HIGH_MAX_HASH_CHECKS,
868 HIGH_LAZY_IF_LESS_THAN,
869 MatchingType::Lazy,
870 )
871}
872
873#[cfg(test)]
878pub fn lz77_compress_conf(
879 data: &[u8],
880 max_hash_checks: u16,
881 lazy_if_less_than: u16,
882 matching_type: MatchingType,
883) -> Option<Vec<LZValue>> {
884 let mut test_boxed = Box::new(TestStruct::with_config(
885 max_hash_checks,
886 lazy_if_less_than,
887 matching_type,
888 ));
889 let mut out = Vec::<LZValue>::with_capacity(data.len() / 3);
890 {
891 let test = test_boxed.as_mut();
892 let mut slice = data;
893
894 while !test.state.is_last_block {
895 let bytes_written = lz77_compress_block_finish(
896 slice,
897 &mut test.state,
898 &mut test.buffer,
899 &mut test.writer,
900 )
901 .0;
902 slice = &slice[bytes_written..];
903 out.extend(test.writer.get_buffer());
904 test.writer.clear();
905 }
906 }
907
908 Some(out)
909}
910
911#[cfg(test)]
912mod test {
913 use super::*;
914
915 use crate::chained_hash_table::WINDOW_SIZE;
916 use crate::compression_options::DEFAULT_LAZY_IF_LESS_THAN;
917 use crate::lzvalue::{ld, lit, LZType, LZValue};
918 use crate::output_writer::MAX_BUFFER_LENGTH;
919 use crate::test_utils::get_test_data;
920
921 fn print_output(input: &[LZValue]) {
923 let mut output = vec![];
924 for l in input {
925 match l.value() {
926 LZType::Literal(l) => output.push(l),
927 LZType::StoredLengthDistance(l, d) => {
928 output.extend(format!("<L {}>", l.actual_length()).into_bytes());
929 output.extend(format!("<D {}>", d).into_bytes())
930 }
931 }
932 }
933
934 println!("\"{}\"", String::from_utf8(output).unwrap());
935 }
936
937 #[test]
939 fn compress_short() {
940 let test_bytes = String::from("Deflate late").into_bytes();
941 let res = lz77_compress(&test_bytes).unwrap();
942
943 let decompressed = decompress_lz77(&res);
944
945 assert_eq!(test_bytes, decompressed);
946 assert_eq!(*res.last().unwrap(), LZValue::length_distance(4, 5));
947 }
948
949 #[test]
951 fn compress_long() {
952 let input = get_test_data();
953 let compressed = lz77_compress(&input).unwrap();
954 assert!(compressed.len() < input.len());
955
956 let decompressed = decompress_lz77(&compressed);
957 println!("compress_long length: {}", input.len());
958
959 for (n, (&a, &b)) in input.iter().zip(decompressed.iter()).enumerate() {
961 if a != b {
962 println!("First difference at {}, input: {}, output: {}", n, a, b);
963 break;
964 }
965 }
966 assert_eq!(input.len(), decompressed.len());
967 assert!(&decompressed == &input);
968 }
969
970 #[test]
972 fn lazy() {
973 let data = b"nba badger nbadger";
976 let compressed = lz77_compress(data).unwrap();
977 let test = compressed[compressed.len() - 1];
978 if let LZType::StoredLengthDistance(l, _) = test.value() {
979 assert_eq!(l.actual_length(), 6);
980 } else {
981 print_output(&compressed);
982 panic!();
983 }
984 }
985
986 fn roundtrip(data: &[u8]) {
987 let compressed = super::lz77_compress(&data).unwrap();
988 let decompressed = decompress_lz77(&compressed);
989 assert!(decompressed == data);
990 }
991
992 #[test]
994 #[allow(unused)]
995 fn exact_window_size() {
996 use std::io::Write;
997 let mut data = vec![0; WINDOW_SIZE];
998 roundtrip(&data);
999 {
1000 data.write(&[22; WINDOW_SIZE]);
1001 }
1002 roundtrip(&data);
1003 {
1004 data.write(&[55; WINDOW_SIZE]);
1005 }
1006 roundtrip(&data);
1007 }
1008
1009 #[test]
1011 fn border() {
1012 use crate::chained_hash_table::WINDOW_SIZE;
1013 let mut data = vec![35; WINDOW_SIZE];
1014 data.extend(b"Test");
1015 let compressed = super::lz77_compress(&data).unwrap();
1016 assert!(compressed.len() < data.len());
1017 let decompressed = decompress_lz77(&compressed);
1018
1019 assert_eq!(decompressed.len(), data.len());
1020 assert!(decompressed == data);
1021 }
1022
1023 #[test]
1024 fn border_multiple_blocks() {
1025 use crate::chained_hash_table::WINDOW_SIZE;
1026 let mut data = vec![0; (WINDOW_SIZE * 2) + 50];
1027 data.push(1);
1028 let compressed = super::lz77_compress(&data).unwrap();
1029 assert!(compressed.len() < data.len());
1030 let decompressed = decompress_lz77(&compressed);
1031 assert_eq!(decompressed.len(), data.len());
1032 assert!(decompressed == data);
1033 }
1034
1035 #[test]
1036 fn compress_block_status() {
1037 use crate::input_buffer::InputBuffer;
1038
1039 let data = b"Test data data";
1040 let mut writer = DynamicWriter::new();
1041
1042 let mut buffer = InputBuffer::empty();
1043 let mut state = LZ77State::new(4096, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
1044 let status = lz77_compress_block_finish(data, &mut state, &mut buffer, &mut writer);
1045 assert_eq!(status.1, LZ77Status::Finished);
1046 assert!(&buffer.get_buffer()[..data.len()] == data);
1047 assert_eq!(buffer.current_end(), data.len());
1048 }
1049
1050 #[test]
1051 fn compress_block_multiple_windows() {
1052 use crate::input_buffer::InputBuffer;
1053 use crate::output_writer::DynamicWriter;
1054
1055 let data = get_test_data();
1056 assert!(data.len() > (WINDOW_SIZE * 2) + super::MAX_MATCH);
1057 let mut writer = DynamicWriter::new();
1058
1059 let mut buffer = InputBuffer::empty();
1060 let mut state = LZ77State::new(0, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
1061 let (bytes_consumed, status) =
1062 lz77_compress_block_finish(&data, &mut state, &mut buffer, &mut writer);
1063 assert_eq!(
1064 buffer.get_buffer().len(),
1065 (WINDOW_SIZE * 2) + super::MAX_MATCH
1066 );
1067 assert_eq!(status, LZ77Status::EndBlock);
1068 let buf_len = buffer.get_buffer().len();
1069 assert!(buffer.get_buffer()[..] == data[..buf_len]);
1070
1071 writer.clear();
1072 let (_, status) = lz77_compress_block_finish(
1073 &data[bytes_consumed..],
1074 &mut state,
1075 &mut buffer,
1076 &mut writer,
1077 );
1078 assert_eq!(status, LZ77Status::EndBlock);
1079 }
1080
1081 #[test]
1082 fn multiple_inputs() {
1083 let data = b"Badger badger bababa test data 25 asfgestghresjkgh";
1084 let comp1 = lz77_compress(data).unwrap();
1085 let comp2 = {
1086 const SPLIT: usize = 25;
1087 let first_part = &data[..SPLIT];
1088 let second_part = &data[SPLIT..];
1089 let mut state = TestStruct::new();
1090 let (bytes_written, status, _) = state.compress_block(first_part, false);
1091 assert_eq!(bytes_written, first_part.len());
1092 assert_eq!(status, LZ77Status::NeedInput);
1093 let (bytes_written, status, _) = state.compress_block(second_part, true);
1094 assert_eq!(bytes_written, second_part.len());
1095 assert_eq!(status, LZ77Status::Finished);
1096 Vec::from(state.writer.get_buffer())
1097 };
1098 assert!(comp1 == comp2);
1099 }
1100
1101 #[test]
1102 fn buffer_fill() {
1104 let data = get_test_data();
1105 buffer_test_literals(&data);
1109 buffer_test_last_bytes(MatchingType::Greedy, &data);
1112 buffer_test_last_bytes(MatchingType::Lazy, &data);
1115
1116 buffer_test_match(MatchingType::Greedy);
1118 buffer_test_match(MatchingType::Lazy);
1120
1121 buffer_test_add_end(&data);
1123 }
1124
1125 fn buffer_test_literals(data: &[u8]) {
1127 let mut state = TestStruct::with_config(0, NO_RLE, MatchingType::Lazy);
1128 let (bytes_consumed, status, position) = state.compress_block(&data, false);
1129
1130 assert_eq!(status, LZ77Status::EndBlock);
1132 assert!(bytes_consumed <= (WINDOW_SIZE * 2) + MAX_MATCH);
1133
1134 assert_eq!(state.writer.get_buffer().len(), MAX_BUFFER_LENGTH);
1136 assert_eq!(position, state.writer.get_buffer().len());
1137 assert_eq!(
1140 state.state.current_block_input_bytes() as usize,
1141 MAX_BUFFER_LENGTH
1142 );
1143 state.state.reset_input_bytes();
1144
1145 let mut out = decompress_lz77(state.writer.get_buffer());
1146
1147 state.writer.clear();
1148 assert_eq!(state.writer.get_buffer().len(), 0);
1150
1151 assert!(data[..out.len()] == out[..]);
1152
1153 let _ = state.compress_block(&data[bytes_consumed..], false);
1154 assert!(state.writer.get_buffer().len() > 0);
1156 assert_eq!(
1157 state.state.current_block_input_bytes() as usize,
1158 MAX_BUFFER_LENGTH
1159 );
1160
1161 out.extend_from_slice(&decompress_lz77(state.writer.get_buffer()));
1162 assert!(data[..out.len()] == out[..]);
1163 }
1164
1165 fn buffer_test_last_bytes(matching_type: MatchingType, data: &[u8]) {
1167 const BYTES_USED: usize = MAX_BUFFER_LENGTH;
1168 assert!(
1169 &data[..BYTES_USED]
1170 == &decompress_lz77(
1171 &lz77_compress_conf(&data[..BYTES_USED], 0, NO_RLE, matching_type,).unwrap()
1172 )[..]
1173 );
1174 assert!(
1175 &data[..BYTES_USED + 1]
1176 == &decompress_lz77(
1177 &lz77_compress_conf(&data[..BYTES_USED + 1], 0, NO_RLE, matching_type,)
1178 .unwrap()
1179 )[..]
1180 );
1181 }
1182
1183 fn buffer_test_match(matching_type: MatchingType) {
1185 let mut state = TestStruct::with_config(1, 0, matching_type);
1188 for _ in 0..MAX_BUFFER_LENGTH - 4 {
1189 assert!(state.writer.write_literal(0) == BufferStatus::NotFull);
1190 }
1191 state.compress_block(&[1, 2, 3, 1, 2, 3, 4], true);
1192 assert!(*state.writer.get_buffer().last().unwrap() == LZValue::length_distance(3, 3));
1193 }
1194
1195 fn buffer_test_add_end(_data: &[u8]) {
1197 }
1213
1214 #[test]
1216 fn test_decompress_with_backbuffer() {
1217 let bb = vec![5; WINDOW_SIZE];
1218 let lz_compressed = [lit(2), lit(4), ld(4, 20), lit(1), lit(1), ld(5, 10)];
1219 let dec = decompress_lz77_with_backbuffer(&lz_compressed, &bb);
1220
1221 assert!(dec == [2, 4, 5, 5, 5, 5, 1, 1, 5, 5, 2, 4, 5]);
1223 }
1224}
1225
1226#[cfg(all(test, feature = "benchmarks"))]
1227mod bench {
1228 use super::lz77_compress;
1229 use test_std::Bencher;
1230 use test_utils::get_test_data;
1231 #[bench]
1232 fn test_file_zlib_lz77_only(b: &mut Bencher) {
1233 let test_data = get_test_data();
1234 b.iter(|| lz77_compress(&test_data));
1235 }
1236}