1use super::*;
11use crate::data::*;
12use crate::gb18030_2022::*;
13use crate::handles::*;
14use crate::variant::*;
15use super::in_inclusive_range16;
17use super::in_range16;
18
19enum Gb18030Pending {
20 None,
21 One(u8),
22 Two(u8, u8),
23 Three(u8, u8, u8),
24}
25
26impl Gb18030Pending {
27 fn is_none(&self) -> bool {
28 match *self {
29 Gb18030Pending::None => true,
30 _ => false,
31 }
32 }
33
34 fn count(&self) -> usize {
35 match *self {
36 Gb18030Pending::None => 0,
37 Gb18030Pending::One(_) => 1,
38 Gb18030Pending::Two(_, _) => 2,
39 Gb18030Pending::Three(_, _, _) => 3,
40 }
41 }
42}
43
44pub struct Gb18030Decoder {
45 first: Option<u8>,
46 second: Option<u8>,
47 third: Option<u8>,
48 pending: Gb18030Pending,
49 pending_ascii: Option<u8>,
50}
51
52impl Gb18030Decoder {
53 pub fn new() -> VariantDecoder {
54 VariantDecoder::Gb18030(Gb18030Decoder {
55 first: None,
56 second: None,
57 third: None,
58 pending: Gb18030Pending::None,
59 pending_ascii: None,
60 })
61 }
62
63 pub fn in_neutral_state(&self) -> bool {
64 self.first.is_none()
65 && self.second.is_none()
66 && self.third.is_none()
67 && self.pending.is_none()
68 && self.pending_ascii.is_none()
69 }
70
71 fn extra_from_state(&self, byte_length: usize) -> Option<usize> {
72 byte_length.checked_add(
73 self.pending.count()
74 + match self.first {
75 None => 0,
76 Some(_) => 1,
77 }
78 + match self.second {
79 None => 0,
80 Some(_) => 1,
81 }
82 + match self.third {
83 None => 0,
84 Some(_) => 1,
85 }
86 + match self.pending_ascii {
87 None => 0,
88 Some(_) => 1,
89 },
90 )
91 }
92
93 pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
94 checked_add(1, self.extra_from_state(byte_length))
98 }
99
100 pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
101 self.max_utf8_buffer_length(byte_length)
106 }
107
108 pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
109 checked_add(1, checked_mul(3, self.extra_from_state(byte_length)))
110 }
111
112 gb18030_decoder_functions!(
113 {
114 let non_ascii_minus_offset = non_ascii.wrapping_sub(0x81);
117 if non_ascii_minus_offset > (0xFE - 0x81) {
118 if non_ascii == 0x80 {
119 handle.write_upper_bmp(0x20ACu16);
120 continue 'outermost;
121 }
122 return (DecoderResult::Malformed(1, 0),
123 source.consumed(),
124 handle.written());
125 }
126 non_ascii_minus_offset
127 },
128 {
129 if first_minus_offset >= 0x20 {
131 let trail_minus_offset = second.wrapping_sub(0xA1);
133 if trail_minus_offset <= (0xFE - 0xA1) {
134 let hanzi_lead = first_minus_offset.wrapping_sub(0x2F);
136 if hanzi_lead < (0x77 - 0x2F) {
137 let hanzi_pointer = mul_94(hanzi_lead) + trail_minus_offset as usize;
141 let upper_bmp = GB2312_HANZI[hanzi_pointer];
142 handle.write_upper_bmp(upper_bmp)
143 } else if first_minus_offset == 0x20 {
144 let bmp = GB2312_SYMBOLS[trail_minus_offset as usize];
146 handle.write_bmp_excl_ascii(bmp)
147 } else if first_minus_offset == 0x25 && ((trail_minus_offset.wrapping_sub(63) as usize) < GB2312_SYMBOLS_AFTER_GREEK.len()) {
148 handle.write_bmp_excl_ascii(GB2312_SYMBOLS_AFTER_GREEK[trail_minus_offset.wrapping_sub(63) as usize])
149 } else if first_minus_offset == 0x27 && (trail_minus_offset as usize) < GB2312_PINYIN.len() {
150 handle.write_bmp_excl_ascii(GB2312_PINYIN[trail_minus_offset as usize])
151 } else if first_minus_offset > 0x76 {
152 let pua = (0xE234 + mul_94(first_minus_offset - 0x77) + trail_minus_offset as usize) as u16;
154 handle.write_upper_bmp(pua)
155 } else {
156 let bmp = gb2312_other_decode((mul_94(first_minus_offset - 0x21) + (trail_minus_offset as usize)) as u16);
157 handle.write_bmp_excl_ascii(bmp)
158 }
159 } else {
160 let mut trail_minus_offset = second.wrapping_sub(0x40);
162 if trail_minus_offset > (0x7E - 0x40) {
163 let trail_minus_range_start = second.wrapping_sub(0x80);
164 if trail_minus_range_start > (0xA0 - 0x80) {
165 if second < 0x80 {
166 return (DecoderResult::Malformed(1, 0),
167 unread_handle_second.unread(),
168 handle.written());
169 }
170 return (DecoderResult::Malformed(2, 0),
171 unread_handle_second.consumed(),
172 handle.written());
173 }
174 trail_minus_offset = second - 0x41;
175 }
176 let left_lead = first_minus_offset - 0x20;
178 let left_pointer = left_lead as usize * (190 - 94) +
179 trail_minus_offset as usize;
180 let gbk_left_ideograph_pointer = left_pointer.wrapping_sub((0x29 - 0x20) * (190 - 94));
181 if gbk_left_ideograph_pointer < (((0x7D - 0x29) * (190 - 94)) - 5) {
182 let upper_bmp = gbk_left_ideograph_decode(gbk_left_ideograph_pointer as u16);
183 handle.write_upper_bmp(upper_bmp)
184 } else if left_pointer < ((0x29 - 0x20) * (190 - 94)) {
185 let bmp = gbk_other_decode(left_pointer as u16);
186 handle.write_bmp_excl_ascii(bmp)
187 } else {
188 let bottom_pointer = left_pointer - (((0x7D - 0x20) * (190 - 94)) - 5);
189 let upper_bmp = GBK_BOTTOM[bottom_pointer];
190 handle.write_upper_bmp(upper_bmp)
191 }
192 }
193 } else {
194 let mut trail_minus_offset = second.wrapping_sub(0x40);
196 if trail_minus_offset > (0x7E - 0x40) {
197 let trail_minus_range_start = second.wrapping_sub(0x80);
198 if trail_minus_range_start > (0xFE - 0x80) {
199 if second < 0x80 {
200 return (DecoderResult::Malformed(1, 0),
201 unread_handle_second.unread(),
202 handle.written());
203 }
204 return (DecoderResult::Malformed(2, 0),
205 unread_handle_second.consumed(),
206 handle.written());
207 }
208 trail_minus_offset = second - 0x41;
209 }
210 let pointer = first_minus_offset as usize * 190usize +
211 trail_minus_offset as usize;
212 let upper_bmp = gbk_top_ideograph_decode(pointer as u16);
213 handle.write_upper_bmp(upper_bmp)
214 }
215 },
216 {
217 let third_minus_offset = third.wrapping_sub(0x81);
220 if third_minus_offset > (0xFE - 0x81) {
221 self.pending_ascii = Some(second_minus_offset + 0x30);
228 return (DecoderResult::Malformed(1, 1),
231 unread_handle_third.unread(),
232 handle.written());
233 }
234 third_minus_offset
235 },
236 {
237 let fourth_minus_offset = fourth.wrapping_sub(0x30);
255 if fourth_minus_offset > (0x39 - 0x30) {
256 self.pending_ascii = Some(second_minus_offset + 0x30);
257 self.pending = Gb18030Pending::One(third_minus_offset);
258 return (DecoderResult::Malformed(1, 2),
259 unread_handle_fourth.unread(),
260 handle.written());
261 }
262 let pointer = (first_minus_offset as usize * (10 * 126 * 10)) +
263 (second_minus_offset as usize * (10 * 126)) +
264 (third_minus_offset as usize * 10) +
265 fourth_minus_offset as usize;
266 if pointer <= 39419 {
267 if pointer == 7457 {
269 handle.write_upper_bmp(0xE7C7)
270 } else {
271 handle.write_bmp_excl_ascii(gb18030_range_decode(pointer as u16))
272 }
273 } else if pointer >= 189_000 && pointer <= 1_237_575 {
274 handle.write_astral((pointer - (189_000usize - 0x1_0000usize)) as u32)
276 } else {
277 return (DecoderResult::Malformed(4, 0),
278 unread_handle_fourth.consumed(),
279 handle.written());
280 }
281 },
282 self,
283 non_ascii,
284 first_minus_offset,
285 second,
286 second_minus_offset,
287 unread_handle_second,
288 third,
289 third_minus_offset,
290 unread_handle_third,
291 fourth,
292 fourth_minus_offset,
293 unread_handle_fourth,
294 source,
295 handle,
296 'outermost);
297}
298
299fn gbk_encode_non_unified(bmp: u16) -> Option<(usize, usize)> {
301 if in_inclusive_range16(bmp, 0x2014, 0x3017) || in_inclusive_range16(bmp, 0xFF04, 0xFFE1) {
305 if let Some(pos) = position(&GB2312_SYMBOLS[..], bmp) {
306 return Some((0xA1, pos + 0xA1));
307 }
308 }
309 if in_range16(bmp, 0x3400, 0x4E00) {
311 return position(&GBK_BOTTOM[21..100], bmp).map(|pos| {
312 (
313 0xFE,
314 pos + if pos < (0x3F - 16) {
315 0x40 + 16
316 } else {
317 0x41 + 16
318 },
319 )
320 });
321 }
322 if in_range16(bmp, 0xF900, 0xFB00) {
324 return position(&GBK_BOTTOM[0..21], bmp).map(|pos| {
325 if pos < 5 {
326 (0xFD, pos + (190 - 94 - 5 + 0x41))
328 } else {
329 (0xFE, pos + (0x40 - 5))
331 }
332 });
333 }
334 if bmp < 0x02CA {
336 if in_range16(bmp, 0x00E0, 0x0262) && bmp != 0x00F7 {
337 if let Some(pos) = position(&GB2312_PINYIN[..], bmp) {
339 return Some((0xA8, pos + 0xA1));
340 }
341 } else if in_inclusive_range16(bmp, 0x00A4, 0x00F7)
342 || in_inclusive_range16(bmp, 0x02C7, 0x02C9)
343 {
344 if let Some(pos) = position(&GB2312_SYMBOLS[3..(0xAC - 0x60)], bmp) {
346 return Some((0xA1, pos + 0xA1 + 3));
347 }
348 }
349 return None;
350 }
351
352 if in_inclusive_range16(bmp, 0xE78D, 0xE864) {
353 if let Some(pos) = position(&GB18030_2022_OVERRIDE_PUA[..], bmp) {
355 let pair = &GB18030_2022_OVERRIDE_BYTES[pos];
356 return Some((pair[0].into(), pair[1].into()));
357 }
358 } else if bmp >= 0xFE17 {
359 if let Some(pos) = position(&GB2312_SYMBOLS_AFTER_GREEK[..], bmp) {
361 return Some((0xA6, pos + (0x9F - 0x60 + 0xA1)));
362 }
363 } else if bmp == 0x1E3F {
364 return Some((0xA8, 0x7B - 0x60 + 0xA1));
366 } else if in_range16(bmp, 0xA000, 0xD800) {
367 return None;
370 }
371 if let Some(other_pointer) = gb2312_other_encode(bmp) {
373 let other_lead = other_pointer as usize / 94;
374 let other_trail = other_pointer as usize % 94;
375 return Some((0xA2 + other_lead, 0xA1 + other_trail));
376 }
377 if in_range16(bmp, 0x02DA, 0x2010) {
382 return None;
383 }
384 if let Some(other_pointer) = gbk_other_encode(bmp) {
386 let other_lead = other_pointer as usize / (190 - 94);
387 let other_trail = other_pointer as usize % (190 - 94);
388 let offset = if other_trail < 0x3F { 0x40 } else { 0x41 };
389 return Some((other_lead + (0x81 + 0x20), other_trail + offset));
390 }
391 if in_inclusive_range16(bmp, 0x2E81, 0x2ECA)
393 || in_inclusive_range16(bmp, 0x9FB4, 0x9FBB)
394 || in_inclusive_range16(bmp, 0xE816, 0xE855)
395 {
396 if let Some(pos) = position(&GBK_BOTTOM[21..], bmp) {
397 let trail = pos + 16;
398 let offset = if trail < 0x3F { 0x40 } else { 0x41 };
399 return Some((0xFE, trail + offset));
400 }
401 }
402 let bmp_minus_gb2312_bottom_pua = bmp.wrapping_sub(0xE234);
404 if bmp_minus_gb2312_bottom_pua <= (0xE4C5 - 0xE234) {
405 let pua_lead = bmp_minus_gb2312_bottom_pua as usize / 94;
406 let pua_trail = bmp_minus_gb2312_bottom_pua as usize % 94;
407 return Some((0x81 + 0x77 + pua_lead, 0xA1 + pua_trail));
408 }
409 let bmp_minus_pua_between_hanzi = bmp.wrapping_sub(0xE810);
411 if bmp_minus_pua_between_hanzi < 5 {
412 return Some((0x81 + 0x56, 0xFF - 5 + bmp_minus_pua_between_hanzi as usize));
413 }
414 None
415}
416
417#[cfg(not(feature = "fast-gb-hanzi-encode"))]
418#[inline(always)]
419fn encode_hanzi(bmp: u16, _: u16) -> (u8, u8) {
420 if let Some((lead, trail)) = gb2312_level1_hanzi_encode(bmp) {
421 (lead, trail)
422 } else if let Some(hanzi_pointer) = gb2312_level2_hanzi_encode(bmp) {
423 let hanzi_lead = (hanzi_pointer / 94) + (0xD8);
424 let hanzi_trail = (hanzi_pointer % 94) + 0xA1;
425 (hanzi_lead as u8, hanzi_trail as u8)
426 } else {
427 let (lead, gbk_trail) = if bmp < 0x72DC {
428 let pointer = gbk_top_ideograph_encode(bmp) as usize;
430 let lead = (pointer / 190) + 0x81;
431 let gbk_trail = pointer % 190;
432 (lead, gbk_trail)
433 } else {
434 let gbk_left_ideograph_pointer = gbk_left_ideograph_encode(bmp) as usize;
436 let lead = (gbk_left_ideograph_pointer / (190 - 94)) + (0x81 + 0x29);
437 let gbk_trail = gbk_left_ideograph_pointer % (190 - 94);
438 (lead, gbk_trail)
439 };
440 let offset = if gbk_trail < 0x3F { 0x40 } else { 0x41 };
441 (lead as u8, (gbk_trail + offset) as u8)
442 }
443}
444
445#[cfg(feature = "fast-gb-hanzi-encode")]
446#[inline(always)]
447fn encode_hanzi(_: u16, bmp_minus_unified_start: u16) -> (u8, u8) {
448 gbk_hanzi_encode(bmp_minus_unified_start)
449}
450
451pub struct Gb18030Encoder {
452 extended: bool,
453}
454
455impl Gb18030Encoder {
456 pub fn new(encoding: &'static Encoding, extended_range: bool) -> Encoder {
457 Encoder::new(
458 encoding,
459 VariantEncoder::Gb18030(Gb18030Encoder {
460 extended: extended_range,
461 }),
462 )
463 }
464
465 pub fn max_buffer_length_from_utf16_without_replacement(
466 &self,
467 u16_length: usize,
468 ) -> Option<usize> {
469 if self.extended {
470 u16_length.checked_mul(4)
471 } else {
472 checked_add(2, u16_length.checked_mul(2))
475 }
476 }
477
478 pub fn max_buffer_length_from_utf8_without_replacement(
479 &self,
480 byte_length: usize,
481 ) -> Option<usize> {
482 if self.extended {
483 checked_add(2, byte_length.checked_mul(2))
490 } else {
491 byte_length.checked_add(3)
497 }
498 }
499
500 ascii_compatible_encoder_functions!(
501 {
502 let bmp_minus_unified_start = bmp.wrapping_sub(0x4E00);
503 if bmp_minus_unified_start < (0x9FA6 - 0x4E00) {
504 let (lead, trail) = encode_hanzi(bmp, bmp_minus_unified_start);
508 handle.write_two(lead, trail)
509 } else if bmp == 0xE5E5 {
510 return (
515 EncoderResult::unmappable_from_bmp(bmp),
516 source.consumed(),
517 handle.written(),
518 );
519 } else if bmp == 0x20AC && !self.extended {
520 handle.write_one(0x80u8)
521 } else {
522 match gbk_encode_non_unified(bmp) {
523 Some((lead, trail)) => handle.write_two(lead as u8, trail as u8),
524 None => {
525 if !self.extended {
526 return (
527 EncoderResult::unmappable_from_bmp(bmp),
528 source.consumed(),
529 handle.written(),
530 );
531 }
532 let range_pointer = gb18030_range_encode(bmp);
533 let first = range_pointer / (10 * 126 * 10);
534 let rem_first = range_pointer % (10 * 126 * 10);
535 let second = rem_first / (10 * 126);
536 let rem_second = rem_first % (10 * 126);
537 let third = rem_second / 10;
538 let fourth = rem_second % 10;
539 handle.write_four(
540 (first + 0x81) as u8,
541 (second + 0x30) as u8,
542 (third + 0x81) as u8,
543 (fourth + 0x30) as u8,
544 )
545 }
546 }
547 }
548 },
549 {
550 if !self.extended {
551 return (
552 EncoderResult::Unmappable(astral),
553 source.consumed(),
554 handle.written(),
555 );
556 }
557 let range_pointer = astral as usize + (189_000usize - 0x1_0000usize);
558 let first = range_pointer / (10 * 126 * 10);
559 let rem_first = range_pointer % (10 * 126 * 10);
560 let second = rem_first / (10 * 126);
561 let rem_second = rem_first % (10 * 126);
562 let third = rem_second / 10;
563 let fourth = rem_second % 10;
564 handle.write_four(
565 (first + 0x81) as u8,
566 (second + 0x30) as u8,
567 (third + 0x81) as u8,
568 (fourth + 0x30) as u8,
569 )
570 },
571 bmp,
572 astral,
573 self,
574 source,
575 handle,
576 copy_ascii_to_check_space_four,
577 check_space_four,
578 false
579 );
580}
581
582#[cfg(all(test, feature = "alloc"))]
586mod tests {
587 use super::super::testing::*;
588 use super::super::*;
589
590 fn decode_gb18030(bytes: &[u8], expect: &str) {
591 decode(GB18030, bytes, expect);
592 }
593
594 fn encode_gb18030(string: &str, expect: &[u8]) {
595 encode(GB18030, string, expect);
596 }
597
598 fn encode_gbk(string: &str, expect: &[u8]) {
599 encode(GBK, string, expect);
600 }
601
602 #[test]
603 fn test_gb18030_decode() {
604 decode_gb18030(b"", &"");
606
607 decode_gb18030(b"\x61\x62", "\u{0061}\u{0062}");
609
610 decode_gb18030(b"\x80", "\u{20AC}");
612 decode_gb18030(b"\xA2\xE3", "\u{20AC}");
613
614 decode_gb18030(b"\x81\x40", "\u{4E02}");
616 decode_gb18030(b"\x81\x7E", "\u{4E8A}");
617 decode_gb18030(b"\x81\x7F", "\u{FFFD}\u{007F}");
618 decode_gb18030(b"\x81\x80", "\u{4E90}");
619 decode_gb18030(b"\x81\xFE", "\u{4FA2}");
620 decode_gb18030(b"\xFE\x40", "\u{FA0C}");
621 decode_gb18030(b"\xFE\x7F", "\u{FFFD}\u{007F}");
622 decode_gb18030(b"\xFE\x80", "\u{4723}");
623 decode_gb18030(b"\xFE\xFE", "\u{E4C5}");
624
625 decode_gb18030(b"\xFE\x7E", "\u{9FB9}");
627 decode_gb18030(b"\xA6\xDD", "\u{FE14}");
628
629 decode_gb18030(b"\x82\x35\x91\x32", "\u{9FB9}");
631 decode_gb18030(b"\x84\x31\x83\x30", "\u{FE14}");
632
633 decode_gb18030(b"\xA3\xA0", "\u{3000}");
635 decode_gb18030(b"\xA1\xA1", "\u{3000}");
636
637 decode_gb18030(b"\xFF\x40", "\u{FFFD}\u{0040}");
639 decode_gb18030(b"\xE3\xFF\x9A\x33", "\u{FFFD}\u{FFFD}"); decode_gb18030(b"\xFF\x32\x9A\x33", "\u{FFFD}\u{0032}\u{FFFD}"); decode_gb18030(b"\xFF\x40\x00", "\u{FFFD}\u{0040}\u{0000}");
642 decode_gb18030(b"\xE3\xFF\x9A\x33\x00", "\u{FFFD}\u{FFFD}\u{0033}\u{0000}");
643 decode_gb18030(
644 b"\xFF\x32\x9A\x33\x00",
645 "\u{FFFD}\u{0032}\u{FFFD}\u{0033}\u{0000}",
646 );
647
648 decode_gb18030(b"\x81\x30\x81\x30", "\u{0080}");
650 decode_gb18030(b"\x81\x35\xF4\x37", "\u{E7C7}");
651 decode_gb18030(b"\x81\x37\xA3\x30", "\u{2603}");
652 decode_gb18030(b"\x94\x39\xDA\x33", "\u{1F4A9}");
653 decode_gb18030(b"\xE3\x32\x9A\x35", "\u{10FFFF}");
654 decode_gb18030(b"\xE3\x32\x9A\x36\x81\x30", "\u{FFFD}\u{FFFD}");
655 decode_gb18030(b"\xE3\x32\x9A\x36\x81\x40", "\u{FFFD}\u{4E02}");
656 decode_gb18030(b"\xE3\x32\x9A", "\u{FFFD}"); decode_gb18030(b"\xE3\x32\x9A\x00", "\u{FFFD}\u{0032}\u{FFFD}\u{0000}");
658 }
659
660 #[test]
661 fn test_gb18030_encode() {
662 encode_gb18030("", b"");
664
665 encode_gb18030("\u{0061}\u{0062}", b"\x61\x62");
667
668 encode_gb18030("\u{20AC}", b"\xA2\xE3");
670
671 encode_gb18030("\u{4E02}", b"\x81\x40");
673 encode_gb18030("\u{4E8A}", b"\x81\x7E");
674 if !cfg!(miri) {
675 encode_gb18030("\u{4E90}", b"\x81\x80");
677 encode_gb18030("\u{4FA2}", b"\x81\xFE");
678 encode_gb18030("\u{FA0C}", b"\xFE\x40");
679 encode_gb18030("\u{E843}", b"\xFE\x7E");
680 encode_gb18030("\u{4723}", b"\xFE\x80");
681 encode_gb18030("\u{E4C5}", b"\xFE\xFE");
682 }
683
684 encode_gb18030("\u{E5E5}", b"");
686 encode_gb18030("\u{3000}", b"\xA1\xA1");
687
688 encode_gb18030("\u{0080}", b"\x81\x30\x81\x30");
690 encode_gb18030("\u{E7C7}", b"\x81\x35\xF4\x37");
691 if !cfg!(miri) {
692 encode_gb18030("\u{2603}", b"\x81\x37\xA3\x30");
694 encode_gb18030("\u{1F4A9}", b"\x94\x39\xDA\x33");
695 encode_gb18030("\u{10FFFF}", b"\xE3\x32\x9A\x35");
696 }
697
698 encode_gb18030("\u{00F7}", b"\xA1\xC2");
700
701 encode_gb18030("\u{9FB9}", b"\xFE\x7E");
703 encode_gb18030("\u{FE14}", b"\xA6\xDD");
704 encode_gb18030("\u{E843}", b"\xFE\x7E");
705 encode_gb18030("\u{E791}", b"\xA6\xDD");
706
707 encode_gb18030("\u{E817}", b"\xFE\x52");
709 }
710
711 #[test]
712 fn test_gbk_encode() {
713 encode_gbk("", b"");
715
716 encode_gbk("\u{0061}\u{0062}", b"\x61\x62");
718
719 encode_gbk("\u{20AC}", b"\x80");
721
722 encode_gbk("\u{4E02}", b"\x81\x40");
724 encode_gbk("\u{4E8A}", b"\x81\x7E");
725 if !cfg!(miri) {
726 encode_gbk("\u{4E90}", b"\x81\x80");
728 encode_gbk("\u{4FA2}", b"\x81\xFE");
729 encode_gbk("\u{FA0C}", b"\xFE\x40");
730 encode_gbk("\u{E843}", b"\xFE\x7E");
731 encode_gbk("\u{4723}", b"\xFE\x80");
732 encode_gbk("\u{E4C5}", b"\xFE\xFE");
733 }
734
735 encode_gbk("\u{E5E5}", b"");
737 encode_gbk("\u{3000}", b"\xA1\xA1");
738
739 encode_gbk("\u{0080}", b"€");
741 encode_gbk("\u{E7C7}", b"");
742 if !cfg!(miri) {
743 encode_gbk("\u{2603}", b"☃");
745 encode_gbk("\u{1F4A9}", b"💩");
746 encode_gbk("\u{10FFFF}", b"");
747 }
748
749 encode_gbk("\u{00F7}", b"\xA1\xC2");
751
752 encode_gb18030("\u{9FB9}", b"\xFE\x7E");
754 encode_gb18030("\u{FE14}", b"\xA6\xDD");
755 encode_gb18030("\u{E843}", b"\xFE\x7E");
756 encode_gb18030("\u{E791}", b"\xA6\xDD");
757
758 encode_gb18030("\u{E817}", b"\xFE\x52");
760 }
761
762 #[test]
763 #[cfg_attr(miri, ignore)] fn test_gb18030_decode_all() {
765 let input = include_bytes!("test_data/gb18030_in.txt");
766 let expectation = include_str!("test_data/gb18030_in_ref.txt");
767 let (cow, had_errors) = GB18030.decode_without_bom_handling(input);
768 assert!(!had_errors, "Should not have had errors.");
769 assert_eq!(&cow[..], expectation);
770 }
771
772 #[test]
773 #[cfg_attr(miri, ignore)] fn test_gb18030_encode_all() {
775 let input = include_str!("test_data/gb18030_out.txt");
776 let expectation = include_bytes!("test_data/gb18030_out_ref.txt");
777 let (cow, encoding, had_errors) = GB18030.encode(input);
778 assert!(!had_errors, "Should not have had errors.");
779 assert_eq!(encoding, GB18030);
780 assert_eq!(&cow[..], &expectation[..]);
781 }
782
783 #[test]
784 fn test_gb18030_encode_from_utf16_max_length() {
785 let mut output = [0u8; 20];
786 let mut encoder = GB18030.new_encoder();
787 {
788 let needed = encoder
789 .max_buffer_length_from_utf16_without_replacement(1)
790 .unwrap();
791 let (result, read, written) = encoder.encode_from_utf16_without_replacement(
792 &[0x3000],
793 &mut output[..needed],
794 true,
795 );
796 assert_eq!(result, EncoderResult::InputEmpty);
797 assert_eq!(read, 1);
798 assert_eq!(written, 2);
799 assert_eq!(output[0], 0xA1);
800 assert_eq!(output[1], 0xA1);
801 }
802 }
803}