1use std::ffi::CString;
2use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3use std::time;
4
5use crate::bufreader::BufReader;
6use crate::{Compression, Crc};
7
8pub static FHCRC: u8 = 1 << 1;
9pub static FEXTRA: u8 = 1 << 2;
10pub static FNAME: u8 = 1 << 3;
11pub static FCOMMENT: u8 = 1 << 4;
12pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14pub mod bufread;
15pub mod read;
16pub mod write;
17
18const MAX_HEADER_BUF: usize = 65535;
21
22#[derive(PartialEq, Clone, Debug, Default)]
27pub struct GzHeader {
28 extra: Option<Vec<u8>>,
29 filename: Option<Vec<u8>>,
30 comment: Option<Vec<u8>>,
31 operating_system: u8,
32 mtime: u32,
33}
34
35impl GzHeader {
36 pub fn filename(&self) -> Option<&[u8]> {
38 self.filename.as_ref().map(|s| &s[..])
39 }
40
41 pub fn extra(&self) -> Option<&[u8]> {
43 self.extra.as_ref().map(|s| &s[..])
44 }
45
46 pub fn comment(&self) -> Option<&[u8]> {
48 self.comment.as_ref().map(|s| &s[..])
49 }
50
51 pub fn operating_system(&self) -> u8 {
56 self.operating_system
57 }
58
59 pub fn mtime(&self) -> u32 {
69 self.mtime
70 }
71
72 pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80 if self.mtime == 0 {
81 None
82 } else {
83 let duration = time::Duration::new(u64::from(self.mtime), 0);
84 let datetime = time::UNIX_EPOCH + duration;
85 Some(datetime)
86 }
87 }
88}
89
90#[derive(Debug, Default)]
91pub enum GzHeaderState {
92 Start(u8, [u8; 10]),
93 Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94 Extra(Option<Box<Crc>>, u16),
95 Filename(Option<Box<Crc>>),
96 Comment(Option<Box<Crc>>),
97 Crc(Option<Box<Crc>>, u8, [u8; 2]),
98 #[default]
99 Complete,
100}
101
102#[derive(Debug, Default)]
103pub struct GzHeaderParser {
104 state: GzHeaderState,
105 flags: u8,
106 header: GzHeader,
107}
108
109impl GzHeaderParser {
110 fn new() -> Self {
111 GzHeaderParser {
112 state: GzHeaderState::Start(0, [0; 10]),
113 flags: 0,
114 header: GzHeader::default(),
115 }
116 }
117
118 fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
119 loop {
120 match &mut self.state {
121 GzHeaderState::Start(count, buffer) => {
122 while (*count as usize) < buffer.len() {
123 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
124 }
125 if buffer[0] != 0x1f || buffer[1] != 0x8b {
127 return Err(bad_header());
128 }
129 if buffer[2] != 8 {
131 return Err(bad_header());
132 }
133 self.flags = buffer[3];
134 if self.flags & FRESERVED != 0 {
136 return Err(bad_header());
137 }
138 self.header.mtime = (buffer[4] as u32)
139 | ((buffer[5] as u32) << 8)
140 | ((buffer[6] as u32) << 16)
141 | ((buffer[7] as u32) << 24);
142 let _xfl = buffer[8];
143 self.header.operating_system = buffer[9];
144 let crc = if self.flags & FHCRC != 0 {
145 let mut crc = Box::new(Crc::new());
146 crc.update(buffer);
147 Some(crc)
148 } else {
149 None
150 };
151 self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
152 }
153 GzHeaderState::Xlen(crc, count, buffer) => {
154 if self.flags & FEXTRA != 0 {
155 while (*count as usize) < buffer.len() {
156 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
157 }
158 if let Some(crc) = crc {
159 crc.update(buffer);
160 }
161 let xlen = parse_le_u16(buffer);
162 self.header.extra = Some(vec![0; xlen as usize]);
163 self.state = GzHeaderState::Extra(crc.take(), 0);
164 } else {
165 self.state = GzHeaderState::Filename(crc.take());
166 }
167 }
168 GzHeaderState::Extra(crc, count) => {
169 debug_assert!(self.header.extra.is_some());
170 let extra = self.header.extra.as_mut().unwrap();
171 while (*count as usize) < extra.len() {
172 *count += read_into(r, &mut extra[*count as usize..])? as u16;
173 }
174 if let Some(crc) = crc {
175 crc.update(extra);
176 }
177 self.state = GzHeaderState::Filename(crc.take());
178 }
179 GzHeaderState::Filename(crc) => {
180 if self.flags & FNAME != 0 {
181 let filename = self.header.filename.get_or_insert_with(Vec::new);
182 read_to_nul(r, filename)?;
183 if let Some(crc) = crc {
184 crc.update(filename);
185 crc.update(b"\0");
186 }
187 }
188 self.state = GzHeaderState::Comment(crc.take());
189 }
190 GzHeaderState::Comment(crc) => {
191 if self.flags & FCOMMENT != 0 {
192 let comment = self.header.comment.get_or_insert_with(Vec::new);
193 read_to_nul(r, comment)?;
194 if let Some(crc) = crc {
195 crc.update(comment);
196 crc.update(b"\0");
197 }
198 }
199 self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
200 }
201 GzHeaderState::Crc(crc, count, buffer) => {
202 if let Some(crc) = crc {
203 debug_assert!(self.flags & FHCRC != 0);
204 while (*count as usize) < buffer.len() {
205 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
206 }
207 let stored_crc = parse_le_u16(buffer);
208 let calced_crc = crc.sum() as u16;
209 if stored_crc != calced_crc {
210 return Err(corrupt());
211 }
212 }
213 self.state = GzHeaderState::Complete;
214 }
215 GzHeaderState::Complete => {
216 return Ok(());
217 }
218 }
219 }
220 }
221
222 fn header(&self) -> Option<&GzHeader> {
223 match self.state {
224 GzHeaderState::Complete => Some(&self.header),
225 _ => None,
226 }
227 }
228}
229
230impl From<GzHeaderParser> for GzHeader {
231 fn from(parser: GzHeaderParser) -> Self {
232 debug_assert!(matches!(parser.state, GzHeaderState::Complete));
233 parser.header
234 }
235}
236
237fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
241 debug_assert!(!buffer.is_empty());
242 match r.read(buffer) {
243 Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
244 Ok(n) => Ok(n),
245 Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
246 Err(e) => Err(e),
247 }
248}
249
250fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
252 let mut bytes = r.bytes();
253 loop {
254 match bytes.next().transpose()? {
255 Some(0) => return Ok(()),
256 Some(_) if buffer.len() == MAX_HEADER_BUF => {
257 return Err(Error::new(
258 ErrorKind::InvalidInput,
259 "gzip header field too long",
260 ));
261 }
262 Some(byte) => {
263 buffer.push(byte);
264 }
265 None => {
266 return Err(ErrorKind::UnexpectedEof.into());
267 }
268 }
269 }
270}
271
272fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
273 u16::from_le_bytes(*buffer)
274}
275
276fn bad_header() -> Error {
277 Error::new(ErrorKind::InvalidInput, "invalid gzip header")
278}
279
280fn corrupt() -> Error {
281 Error::new(
282 ErrorKind::InvalidInput,
283 "corrupt gzip stream does not have a matching checksum",
284 )
285}
286
287#[derive(Debug, Default)]
314pub struct GzBuilder {
315 extra: Option<Vec<u8>>,
316 filename: Option<CString>,
317 comment: Option<CString>,
318 operating_system: Option<u8>,
319 mtime: u32,
320}
321
322impl GzBuilder {
323 pub fn new() -> GzBuilder {
325 Self::default()
326 }
327
328 pub fn mtime(mut self, mtime: u32) -> GzBuilder {
330 self.mtime = mtime;
331 self
332 }
333
334 pub fn operating_system(mut self, os: u8) -> GzBuilder {
336 self.operating_system = Some(os);
337 self
338 }
339
340 pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
342 self.extra = Some(extra.into());
343 self
344 }
345
346 pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
352 self.filename = Some(CString::new(filename.into()).unwrap());
353 self
354 }
355
356 pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
362 self.comment = Some(CString::new(comment.into()).unwrap());
363 self
364 }
365
366 pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
371 write::gz_encoder(self.into_header(lvl), w, lvl)
372 }
373
374 pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
379 read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
380 }
381
382 pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
387 where
388 R: BufRead,
389 {
390 bufread::gz_encoder(self.into_header(lvl), r, lvl)
391 }
392
393 fn into_header(self, lvl: Compression) -> Vec<u8> {
394 let GzBuilder {
395 extra,
396 filename,
397 comment,
398 operating_system,
399 mtime,
400 } = self;
401 let mut flg = 0;
402 let mut header = vec![0u8; 10];
403 if let Some(v) = extra {
404 flg |= FEXTRA;
405 header.extend((v.len() as u16).to_le_bytes());
406 header.extend(v);
407 }
408 if let Some(filename) = filename {
409 flg |= FNAME;
410 header.extend(filename.as_bytes_with_nul().iter().copied());
411 }
412 if let Some(comment) = comment {
413 flg |= FCOMMENT;
414 header.extend(comment.as_bytes_with_nul().iter().copied());
415 }
416 header[0] = 0x1f;
417 header[1] = 0x8b;
418 header[2] = 8;
419 header[3] = flg;
420 header[4] = mtime as u8;
421 header[5] = (mtime >> 8) as u8;
422 header[6] = (mtime >> 16) as u8;
423 header[7] = (mtime >> 24) as u8;
424 header[8] = if lvl.0 >= Compression::best().0 {
425 2
426 } else if lvl.0 <= Compression::fast().0 {
427 4
428 } else {
429 0
430 };
431
432 header[9] = operating_system.unwrap_or(255);
437 header
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use std::io::prelude::*;
444
445 use super::{read, write, GzBuilder, GzHeaderParser};
446 use crate::{Compression, GzHeader};
447 use rand::{rng, Rng};
448
449 #[test]
450 fn roundtrip() {
451 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452 e.write_all(b"foo bar baz").unwrap();
453 let inner = e.finish().unwrap();
454 let mut d = read::GzDecoder::new(&inner[..]);
455 let mut s = String::new();
456 d.read_to_string(&mut s).unwrap();
457 assert_eq!(s, "foo bar baz");
458 }
459
460 #[test]
461 fn roundtrip_zero() {
462 let e = write::GzEncoder::new(Vec::new(), Compression::default());
463 let inner = e.finish().unwrap();
464 let mut d = read::GzDecoder::new(&inner[..]);
465 let mut s = String::new();
466 d.read_to_string(&mut s).unwrap();
467 assert_eq!(s, "");
468 }
469
470 #[test]
471 fn roundtrip_big() {
472 let mut real = Vec::new();
473 let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
474 let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
475 for _ in 0..200 {
476 let to_write = &v[..rng().random_range(0..v.len())];
477 real.extend(to_write.iter().copied());
478 w.write_all(to_write).unwrap();
479 }
480 let result = w.finish().unwrap();
481 let mut r = read::GzDecoder::new(&result[..]);
482 let mut v = Vec::new();
483 r.read_to_end(&mut v).unwrap();
484 assert_eq!(v, real);
485 }
486
487 #[test]
488 fn roundtrip_big2() {
489 let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
490 let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
491 let mut res = Vec::new();
492 r.read_to_end(&mut res).unwrap();
493 assert_eq!(res, v);
494 }
495
496 struct Rfc1952Crc {
499 crc_table: [u32; 256],
501 }
502
503 impl Rfc1952Crc {
504 fn new() -> Self {
505 let mut crc = Rfc1952Crc {
506 crc_table: [0; 256],
507 };
508 for n in 0usize..256 {
510 let mut c = n as u32;
511 for _k in 0..8 {
512 if c & 1 != 0 {
513 c = 0xedb88320 ^ (c >> 1);
514 } else {
515 c = c >> 1;
516 }
517 }
518 crc.crc_table[n] = c;
519 }
520 crc
521 }
522
523 fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
530 let mut c = crc ^ 0xffffffff;
531
532 for b in buf {
533 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
534 }
535 c ^ 0xffffffff
536 }
537
538 fn crc(&self, buf: &[u8]) -> u32 {
540 self.update_crc(0, buf)
541 }
542 }
543
544 #[test]
545 fn roundtrip_header() {
546 let mut header = GzBuilder::new()
547 .mtime(1234)
548 .operating_system(57)
549 .filename("filename")
550 .comment("comment")
551 .into_header(Compression::fast());
552
553 header[3] = header[3] ^ super::FHCRC;
555 let rfc1952_crc = Rfc1952Crc::new();
556 let crc32 = rfc1952_crc.crc(&header);
557 let crc16 = crc32 as u16;
558 header.extend(&crc16.to_le_bytes());
559
560 let mut parser = GzHeaderParser::new();
561 parser.parse(&mut header.as_slice()).unwrap();
562 let actual = parser.header().unwrap();
563 assert_eq!(
564 actual,
565 &GzHeader {
566 extra: None,
567 filename: Some("filename".as_bytes().to_vec()),
568 comment: Some("comment".as_bytes().to_vec()),
569 operating_system: 57,
570 mtime: 1234
571 }
572 )
573 }
574
575 #[test]
576 fn fields() {
577 let r = vec![0, 2, 4, 6];
578 let e = GzBuilder::new()
579 .filename("foo.rs")
580 .comment("bar")
581 .extra(vec![0, 1, 2, 3])
582 .read(&r[..], Compression::default());
583 let mut d = read::GzDecoder::new(e);
584 assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
585 assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
586 assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
587 let mut res = Vec::new();
588 d.read_to_end(&mut res).unwrap();
589 assert_eq!(res, vec![0, 2, 4, 6]);
590 }
591
592 #[test]
593 fn keep_reading_after_end() {
594 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
595 e.write_all(b"foo bar baz").unwrap();
596 let inner = e.finish().unwrap();
597 let mut d = read::GzDecoder::new(&inner[..]);
598 let mut s = String::new();
599 d.read_to_string(&mut s).unwrap();
600 assert_eq!(s, "foo bar baz");
601 d.read_to_string(&mut s).unwrap();
602 assert_eq!(s, "foo bar baz");
603 }
604
605 #[test]
606 fn qc_reader() {
607 ::quickcheck::quickcheck(test as fn(_) -> _);
608
609 fn test(v: Vec<u8>) -> bool {
610 let r = read::GzEncoder::new(&v[..], Compression::default());
611 let mut r = read::GzDecoder::new(r);
612 let mut v2 = Vec::new();
613 r.read_to_end(&mut v2).unwrap();
614 v == v2
615 }
616 }
617
618 #[test]
619 fn flush_after_write() {
620 let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
621 write!(f, "Hello world").unwrap();
622 f.flush().unwrap();
623 }
624}