encoding_rs/
x_user_defined.rs

1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use super::*;
11use crate::handles::*;
12use crate::variant::*;
13
14cfg_if! {
15    if #[cfg(feature = "simd-accel")] {
16        use simd_funcs::*;
17        use core::simd::u16x8;
18        use core::simd::cmp::SimdPartialOrd;
19
20        #[inline(always)]
21        fn shift_upper(unpacked: u16x8) -> u16x8 {
22            let highest_ascii = u16x8::splat(0x7F);
23            unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0))        }
24    } else {
25    }
26}
27
28pub struct UserDefinedDecoder;
29
30impl UserDefinedDecoder {
31    pub fn new() -> VariantDecoder {
32        VariantDecoder::UserDefined(UserDefinedDecoder)
33    }
34
35    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
36        Some(byte_length)
37    }
38
39    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
40        byte_length.checked_mul(3)
41    }
42
43    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
44        byte_length.checked_mul(3)
45    }
46
47    decoder_function!(
48        {},
49        {},
50        {},
51        {
52            if b < 0x80 {
53                // ASCII run not optimized, because binary data expected
54                destination_handle.write_ascii(b);
55                continue;
56            }
57            destination_handle.write_upper_bmp(u16::from(b) + 0xF700);
58            continue;
59        },
60        self,
61        src_consumed,
62        dest,
63        source,
64        b,
65        destination_handle,
66        _unread_handle,
67        check_space_bmp,
68        decode_to_utf8_raw,
69        u8,
70        Utf8Destination
71    );
72
73    #[cfg(not(feature = "simd-accel"))]
74    pub fn decode_to_utf16_raw(
75        &mut self,
76        src: &[u8],
77        dst: &mut [u16],
78        _last: bool,
79    ) -> (DecoderResult, usize, usize) {
80        let (pending, length) = if dst.len() < src.len() {
81            (DecoderResult::OutputFull, dst.len())
82        } else {
83            (DecoderResult::InputEmpty, src.len())
84        };
85        let src_trim = &src[..length];
86        let dst_trim = &mut dst[..length];
87        src_trim
88            .iter()
89            .zip(dst_trim.iter_mut())
90            .for_each(|(from, to)| {
91                *to = {
92                    let unit = *from;
93                    if unit < 0x80 {
94                        u16::from(unit)
95                    } else {
96                        u16::from(unit) + 0xF700
97                    }
98                }
99            });
100        (pending, length, length)
101    }
102
103    #[cfg(feature = "simd-accel")]
104    pub fn decode_to_utf16_raw(
105        &mut self,
106        src: &[u8],
107        dst: &mut [u16],
108        _last: bool,
109    ) -> (DecoderResult, usize, usize) {
110        let (pending, length) = if dst.len() < src.len() {
111            (DecoderResult::OutputFull, dst.len())
112        } else {
113            (DecoderResult::InputEmpty, src.len())
114        };
115        // Not bothering with alignment
116        let tail_start = length & !0xF;
117        let simd_iterations = length >> 4;
118        let src_ptr = src.as_ptr();
119        let dst_ptr = dst.as_mut_ptr();
120        // Safety: This is `for i in 0..length / 16`
121        for i in 0..simd_iterations {
122            // Safety: This is in bounds: length is the minumum valid length for both src/dst
123            // and i ranges to length/16, so multiplying by 16 will always be `< length` and can do
124            // a 16 byte read
125            let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
126            let (first, second) = simd_unpack(input);
127            unsafe {
128                // Safety: same as above, but this is two consecutive 8-byte reads
129                store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
130                store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
131            }
132        }
133        let src_tail = &src[tail_start..length];
134        let dst_tail = &mut dst[tail_start..length];
135        src_tail
136            .iter()
137            .zip(dst_tail.iter_mut())
138            .for_each(|(from, to)| {
139                *to = {
140                    let unit = *from;
141                    if unit < 0x80 {
142                        u16::from(unit)
143                    } else {
144                        u16::from(unit) + 0xF700
145                    }
146                }
147            });
148        (pending, length, length)
149    }
150}
151
152pub struct UserDefinedEncoder;
153
154impl UserDefinedEncoder {
155    pub fn new(encoding: &'static Encoding) -> Encoder {
156        Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder))
157    }
158
159    pub fn max_buffer_length_from_utf16_without_replacement(
160        &self,
161        u16_length: usize,
162    ) -> Option<usize> {
163        Some(u16_length)
164    }
165
166    pub fn max_buffer_length_from_utf8_without_replacement(
167        &self,
168        byte_length: usize,
169    ) -> Option<usize> {
170        Some(byte_length)
171    }
172
173    encoder_functions!(
174        {},
175        {
176            if c <= '\u{7F}' {
177                // TODO optimize ASCII run
178                destination_handle.write_one(c as u8);
179                continue;
180            }
181            if c < '\u{F780}' || c > '\u{F7FF}' {
182                return (
183                    EncoderResult::Unmappable(c),
184                    unread_handle.consumed(),
185                    destination_handle.written(),
186                );
187            }
188            destination_handle.write_one((u32::from(c) - 0xF700) as u8);
189            continue;
190        },
191        self,
192        src_consumed,
193        source,
194        dest,
195        c,
196        destination_handle,
197        unread_handle,
198        check_space_one
199    );
200}
201
202// Any copyright to the test code below this comment is dedicated to the
203// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
204
205#[cfg(all(test, feature = "alloc"))]
206mod tests {
207    use super::super::testing::*;
208    use super::super::*;
209
210    fn decode_x_user_defined(bytes: &[u8], expect: &str) {
211        decode(X_USER_DEFINED, bytes, expect);
212    }
213
214    fn encode_x_user_defined(string: &str, expect: &[u8]) {
215        encode(X_USER_DEFINED, string, expect);
216    }
217
218    #[test]
219    fn test_x_user_defined_decode() {
220        // Empty
221        decode_x_user_defined(b"", "");
222
223        // ASCII
224        decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}");
225
226        decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}");
227        decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}");
228    }
229
230    #[test]
231    fn test_x_user_defined_encode() {
232        // Empty
233        encode_x_user_defined("", b"");
234
235        // ASCII
236        encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62");
237
238        encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF");
239        encode_x_user_defined("\u{F77F}\u{F800}", b"&#63359;&#63488;");
240    }
241
242    #[test]
243    fn test_x_user_defined_from_two_low_surrogates() {
244        let expectation = b"&#65533;&#65533;";
245        let mut output = [0u8; 40];
246        let mut encoder = X_USER_DEFINED.new_encoder();
247        let (result, read, written, had_errors) =
248            encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
249        assert_eq!(result, CoderResult::InputEmpty);
250        assert_eq!(read, 2);
251        assert_eq!(written, expectation.len());
252        assert!(had_errors);
253        assert_eq!(&output[..written], expectation);
254    }
255}