rsvg/
accept_language.rs

1//! Parser for an Accept-Language HTTP header.
2
3use language_tags::{LanguageTag, ParseError};
4use locale_config::{LanguageRange, Locale};
5
6use std::error;
7use std::fmt;
8use std::str::FromStr;
9
10#[cfg(doc)]
11use crate::api::CairoRenderer;
12
13/// Used to set the language for rendering.
14///
15/// SVG documents can use the `<switch>` element, whose children have a `systemLanguage`
16/// attribute; only the first child which has a `systemLanguage` that matches the
17/// preferred languages will be rendered.
18///
19/// This enum, used with [`CairoRenderer::with_language`], configures how to obtain the
20/// user's prefererred languages.
21pub enum Language {
22    /// Use the Unix environment variables `LANGUAGE`, `LC_ALL`, `LC_MESSAGES` and `LANG` to obtain the
23    /// user's language.
24    ///
25    /// This uses [`g_get_language_names()`][ggln] underneath.
26    ///
27    /// [ggln]: https://docs.gtk.org/glib/func.get_language_names.html
28    FromEnvironment,
29
30    /// Use a list of languages in the form of an HTTP Accept-Language header, like `es, en;q=0.8`.
31    ///
32    /// This is convenient when you want to select an explicit set of languages, instead of
33    /// assuming that the Unix environment has the language you want.
34    AcceptLanguage(AcceptLanguage),
35}
36
37/// `Language` but with the environment's locale converted to something we can use.
38#[derive(Clone)]
39pub enum UserLanguage {
40    LanguageTags(LanguageTags),
41    AcceptLanguage(AcceptLanguage),
42}
43
44#[derive(Clone, Debug, PartialEq)]
45struct Weight(Option<f32>);
46
47impl Weight {
48    fn numeric(&self) -> f32 {
49        self.0.unwrap_or(1.0)
50    }
51}
52
53#[derive(Clone, Debug, PartialEq)]
54struct Item {
55    tag: LanguageTag,
56    weight: Weight,
57}
58
59/// Stores a parsed version of an HTTP Accept-Language header.
60///
61/// RFC 7231: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
62#[derive(Clone, Debug, PartialEq)]
63pub struct AcceptLanguage(Box<[Item]>);
64
65/// Errors when parsing an `AcceptLanguage`.
66#[derive(Debug, PartialEq)]
67enum AcceptLanguageError {
68    NoElements,
69    InvalidCharacters,
70    InvalidLanguageTag(ParseError),
71    InvalidWeight,
72}
73
74impl error::Error for AcceptLanguageError {}
75
76impl fmt::Display for AcceptLanguageError {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        match self {
79            Self::NoElements => write!(f, "no language tags in list"),
80            Self::InvalidCharacters => write!(f, "invalid characters in language list"),
81            Self::InvalidLanguageTag(e) => write!(f, "invalid language tag: {e}"),
82            Self::InvalidWeight => write!(f, "invalid q= weight"),
83        }
84    }
85}
86
87/// Optional whitespace, Space or Tab, per RFC 7230.
88///
89/// RFC 7230: <https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3>
90const OWS: [char; 2] = ['\x20', '\x09'];
91
92impl AcceptLanguage {
93    /// Parses the payload of an HTTP Accept-Language header.
94    ///
95    /// For example, a valid header looks like `es, en;q=0.8`, and means, "I prefer Spanish,
96    /// but will also accept English".
97    ///
98    /// Use this function to construct a [`Language::AcceptLanguage`]
99    /// variant to pass to the [`CairoRenderer::with_language`] function.
100    ///
101    /// See RFC 7231 for details: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
102    pub fn parse(s: &str) -> Result<AcceptLanguage, String> {
103        AcceptLanguage::parse_internal(s).map_err(|e| format!("{}", e))
104    }
105
106    /// Internal constructor.  We don't expose [`AcceptLanguageError`] in the public API;
107    /// there we just use a [`String`].
108    fn parse_internal(s: &str) -> Result<AcceptLanguage, AcceptLanguageError> {
109        if !s.is_ascii() {
110            return Err(AcceptLanguageError::InvalidCharacters);
111        }
112
113        let mut items = Vec::new();
114
115        for val in s.split(',') {
116            let trimmed = val.trim_matches(&OWS[..]);
117            if trimmed.is_empty() {
118                continue;
119            }
120
121            items.push(Item::parse(trimmed)?);
122        }
123
124        if items.is_empty() {
125            Err(AcceptLanguageError::NoElements)
126        } else {
127            Ok(AcceptLanguage(items.into_boxed_slice()))
128        }
129    }
130
131    fn iter(&self) -> impl Iterator<Item = (&LanguageTag, f32)> {
132        self.0.iter().map(|item| (&item.tag, item.weight.numeric()))
133    }
134
135    fn any_matches(&self, tag: &LanguageTag) -> bool {
136        self.iter().any(|(self_tag, _weight)| tag.matches(self_tag))
137    }
138}
139
140impl Item {
141    fn parse(s: &str) -> Result<Item, AcceptLanguageError> {
142        let semicolon_pos = s.find(';');
143
144        let (before_semicolon, after_semicolon) = if let Some(semi) = semicolon_pos {
145            (&s[..semi], Some(&s[semi + 1..]))
146        } else {
147            (s, None)
148        };
149
150        let tag = LanguageTag::parse(before_semicolon)
151            .map_err(AcceptLanguageError::InvalidLanguageTag)?;
152
153        let weight = if let Some(quality) = after_semicolon {
154            let quality = quality.trim_start_matches(&OWS[..]);
155
156            let number = if let Some(qvalue) = quality.strip_prefix("q=") {
157                if qvalue.starts_with(&['0', '1'][..]) {
158                    let first_digit = qvalue.chars().next().unwrap();
159
160                    if let Some(decimals) = qvalue[1..].strip_prefix('.') {
161                        if (first_digit == '0'
162                            && decimals.len() <= 3
163                            && decimals.chars().all(|c| c.is_ascii_digit()))
164                            || (first_digit == '1'
165                                && decimals.len() <= 3
166                                && decimals.chars().all(|c| c == '0'))
167                        {
168                            qvalue
169                        } else {
170                            return Err(AcceptLanguageError::InvalidWeight);
171                        }
172                    } else {
173                        qvalue
174                    }
175                } else {
176                    return Err(AcceptLanguageError::InvalidWeight);
177                }
178            } else {
179                return Err(AcceptLanguageError::InvalidWeight);
180            };
181
182            Weight(Some(
183                f32::from_str(number).map_err(|_| AcceptLanguageError::InvalidWeight)?,
184            ))
185        } else {
186            Weight(None)
187        };
188
189        Ok(Item { tag, weight })
190    }
191}
192
193/// A list of BCP47 language tags.
194///
195/// RFC 5664: <https://www.rfc-editor.org/info/rfc5664>
196#[derive(Debug, Clone, PartialEq)]
197pub struct LanguageTags(Box<[LanguageTag]>);
198
199impl LanguageTags {
200    pub fn empty() -> Self {
201        LanguageTags(Box::new([]))
202    }
203
204    /// Converts a `Locale` to a set of language tags.
205    pub fn from_locale(locale: &Locale) -> Result<LanguageTags, String> {
206        let mut tags = Vec::new();
207
208        for locale_range in locale.tags_for("messages") {
209            if locale_range == LanguageRange::invariant() {
210                continue;
211            }
212
213            let str_locale_range = locale_range.as_ref();
214
215            let locale_tag = LanguageTag::from_str(str_locale_range).map_err(|e| {
216                format!("invalid language tag \"{str_locale_range}\" in locale: {e}")
217            })?;
218
219            if !locale_tag.is_language_range() {
220                return Err(format!(
221                    "language tag \"{locale_tag}\" is not a language range"
222                ));
223            }
224
225            tags.push(locale_tag);
226        }
227
228        Ok(LanguageTags(Box::from(tags)))
229    }
230
231    pub fn from(tags: Vec<LanguageTag>) -> LanguageTags {
232        LanguageTags(Box::from(tags))
233    }
234
235    pub fn iter(&self) -> impl Iterator<Item = &LanguageTag> {
236        self.0.iter()
237    }
238
239    pub fn any_matches(&self, language_tag: &LanguageTag) -> bool {
240        self.0.iter().any(|tag| tag.matches(language_tag))
241    }
242}
243
244impl UserLanguage {
245    pub fn any_matches(&self, tags: &LanguageTags) -> bool {
246        match *self {
247            UserLanguage::LanguageTags(ref language_tags) => {
248                tags.iter().any(|tag| language_tags.any_matches(tag))
249            }
250            UserLanguage::AcceptLanguage(ref accept_language) => {
251                tags.iter().any(|tag| accept_language.any_matches(tag))
252            }
253        }
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn parses_accept_language() {
263        // plain tag
264        assert_eq!(
265            AcceptLanguage::parse_internal("es-MX").unwrap(),
266            AcceptLanguage(
267                vec![Item {
268                    tag: LanguageTag::parse("es-MX").unwrap(),
269                    weight: Weight(None)
270                }]
271                .into_boxed_slice()
272            )
273        );
274
275        // with quality
276        assert_eq!(
277            AcceptLanguage::parse_internal("es-MX;q=1").unwrap(),
278            AcceptLanguage(
279                vec![Item {
280                    tag: LanguageTag::parse("es-MX").unwrap(),
281                    weight: Weight(Some(1.0))
282                }]
283                .into_boxed_slice()
284            )
285        );
286
287        // with quality
288        assert_eq!(
289            AcceptLanguage::parse_internal("es-MX;q=0").unwrap(),
290            AcceptLanguage(
291                vec![Item {
292                    tag: LanguageTag::parse("es-MX").unwrap(),
293                    weight: Weight(Some(0.0))
294                }]
295                .into_boxed_slice()
296            )
297        );
298
299        // zero decimals are allowed
300        assert_eq!(
301            AcceptLanguage::parse_internal("es-MX;q=0.").unwrap(),
302            AcceptLanguage(
303                vec![Item {
304                    tag: LanguageTag::parse("es-MX").unwrap(),
305                    weight: Weight(Some(0.0))
306                }]
307                .into_boxed_slice()
308            )
309        );
310
311        // zero decimals are allowed
312        assert_eq!(
313            AcceptLanguage::parse_internal("es-MX;q=1.").unwrap(),
314            AcceptLanguage(
315                vec![Item {
316                    tag: LanguageTag::parse("es-MX").unwrap(),
317                    weight: Weight(Some(1.0))
318                }]
319                .into_boxed_slice()
320            )
321        );
322
323        // one decimal
324        assert_eq!(
325            AcceptLanguage::parse_internal("es-MX;q=1.0").unwrap(),
326            AcceptLanguage(
327                vec![Item {
328                    tag: LanguageTag::parse("es-MX").unwrap(),
329                    weight: Weight(Some(1.0))
330                }]
331                .into_boxed_slice()
332            )
333        );
334
335        // two decimals
336        assert_eq!(
337            AcceptLanguage::parse_internal("es-MX;q=1.00").unwrap(),
338            AcceptLanguage(
339                vec![Item {
340                    tag: LanguageTag::parse("es-MX").unwrap(),
341                    weight: Weight(Some(1.0))
342                }]
343                .into_boxed_slice()
344            )
345        );
346
347        // three decimals
348        assert_eq!(
349            AcceptLanguage::parse_internal("es-MX;q=1.000").unwrap(),
350            AcceptLanguage(
351                vec![Item {
352                    tag: LanguageTag::parse("es-MX").unwrap(),
353                    weight: Weight(Some(1.0))
354                }]
355                .into_boxed_slice()
356            )
357        );
358
359        // multiple elements
360        assert_eq!(
361            AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap(),
362            AcceptLanguage(
363                vec![
364                    Item {
365                        tag: LanguageTag::parse("es-MX").unwrap(),
366                        weight: Weight(None)
367                    },
368                    Item {
369                        tag: LanguageTag::parse("en").unwrap(),
370                        weight: Weight(Some(0.5))
371                    },
372                ]
373                .into_boxed_slice()
374            )
375        );
376
377        // superfluous whitespace
378        assert_eq!(
379            AcceptLanguage::parse_internal(",es-MX;q=1.000  , en; q=0.125  ,  ,").unwrap(),
380            AcceptLanguage(
381                vec![
382                    Item {
383                        tag: LanguageTag::parse("es-MX").unwrap(),
384                        weight: Weight(Some(1.0))
385                    },
386                    Item {
387                        tag: LanguageTag::parse("en").unwrap(),
388                        weight: Weight(Some(0.125))
389                    },
390                ]
391                .into_boxed_slice()
392            )
393        );
394    }
395
396    #[test]
397    fn empty_lists() {
398        assert!(matches!(
399            AcceptLanguage::parse_internal(""),
400            Err(AcceptLanguageError::NoElements)
401        ));
402
403        assert!(matches!(
404            AcceptLanguage::parse_internal(","),
405            Err(AcceptLanguageError::NoElements)
406        ));
407
408        assert!(matches!(
409            AcceptLanguage::parse_internal(", , ,,,"),
410            Err(AcceptLanguageError::NoElements)
411        ));
412    }
413
414    #[test]
415    fn ascii_only() {
416        assert!(matches!(
417            AcceptLanguage::parse_internal("ës"),
418            Err(AcceptLanguageError::InvalidCharacters)
419        ));
420    }
421
422    #[test]
423    fn invalid_tag() {
424        assert!(matches!(
425            AcceptLanguage::parse_internal("no_underscores"),
426            Err(AcceptLanguageError::InvalidLanguageTag(_))
427        ));
428    }
429
430    #[test]
431    fn invalid_weight() {
432        assert!(matches!(
433            AcceptLanguage::parse_internal("es;"),
434            Err(AcceptLanguageError::InvalidWeight)
435        ));
436        assert!(matches!(
437            AcceptLanguage::parse_internal("es;q"),
438            Err(AcceptLanguageError::InvalidWeight)
439        ));
440        assert!(matches!(
441            AcceptLanguage::parse_internal("es;q="),
442            Err(AcceptLanguageError::InvalidWeight)
443        ));
444        assert!(matches!(
445            AcceptLanguage::parse_internal("es;q=2"),
446            Err(AcceptLanguageError::InvalidWeight)
447        ));
448        assert!(matches!(
449            AcceptLanguage::parse_internal("es;q=1.1"),
450            Err(AcceptLanguageError::InvalidWeight)
451        ));
452        assert!(matches!(
453            AcceptLanguage::parse_internal("es;q=1.12"),
454            Err(AcceptLanguageError::InvalidWeight)
455        ));
456        assert!(matches!(
457            AcceptLanguage::parse_internal("es;q=1.123"),
458            Err(AcceptLanguageError::InvalidWeight)
459        ));
460
461        // Up to three decimals allowed per RFC 7231
462        assert!(matches!(
463            AcceptLanguage::parse_internal("es;q=0.1234"),
464            Err(AcceptLanguageError::InvalidWeight)
465        ));
466    }
467
468    #[test]
469    fn iter() {
470        let accept_language = AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap();
471        let mut iter = accept_language.iter();
472
473        let (tag, weight) = iter.next().unwrap();
474        assert_eq!(*tag, LanguageTag::parse("es-MX").unwrap());
475        assert_eq!(weight, 1.0);
476
477        let (tag, weight) = iter.next().unwrap();
478        assert_eq!(*tag, LanguageTag::parse("en").unwrap());
479        assert_eq!(weight, 0.5);
480
481        assert!(iter.next().is_none());
482    }
483}