1
//! Determine which URLs are allowed for loading.
2

            
3
use std::fmt;
4
use std::ops::Deref;
5
use url::Url;
6

            
7
use crate::error::AllowedUrlError;
8

            
9
/// Decides which URLs are allowed to be loaded.
10
///
11
/// Currently only contains the base URL.
12
///
13
/// The plan is to add:
14
/// base_only:    Only allow to load content from the same base URL. By default
15
//                this restriction is enabled and requires to provide base_url.
16
/// include_xml:  Allows to use xi:include with XML. Enabled by default.
17
/// include_text: Allows to use xi:include with text. Enabled by default.
18
/// local_only:   Only allow to load content from the local filesystem.
19
///               Enabled by default.
20
#[derive(Clone)]
21
pub struct UrlResolver {
22
    /// Base URL; all relative references will be resolved with respect to this.
23
    pub base_url: Option<Url>,
24
}
25

            
26
impl UrlResolver {
27
    /// Creates a `UrlResolver` with defaults, and sets the `base_url`.
28
22217
    pub fn new(base_url: Option<Url>) -> Self {
29
22217
        UrlResolver { base_url }
30
22217
    }
31

            
32
    /// Decides which URLs are allowed to be loaded based on the presence of a base URL.
33
    ///
34
    /// This function implements the policy described in "Security and locations of
35
    /// referenced files" in the [crate
36
    /// documentation](index.html#security-and-locations-of-referenced-files).
37
7219
    pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> {
38
7219
        let url = Url::options()
39
7219
            .base_url(self.base_url.as_ref())
40
7219
            .parse(href)
41
7219
            .map_err(AllowedUrlError::UrlParseError)?;
42

            
43
        // Allow loads of data: from any location
44
5185
        if url.scheme() == "data" {
45
685
            return Ok(AllowedUrl(url));
46
4500
        }
47
4500

            
48
4500
        // Queries are not allowed.
49
4500
        if url.query().is_some() {
50
20
            return Err(AllowedUrlError::NoQueriesAllowed);
51
4480
        }
52
4480

            
53
4480
        // Fragment identifiers are not allowed.  They should have been stripped
54
4480
        // upstream, by NodeId.
55
4480
        if url.fragment().is_some() {
56
192
            return Err(AllowedUrlError::NoFragmentIdentifierAllowed);
57
4288
        }
58
4288

            
59
4288
        // All other sources require a base url
60
4288
        if self.base_url.is_none() {
61
19
            return Err(AllowedUrlError::BaseRequired);
62
4269
        }
63
4269

            
64
4269
        let base_url = self.base_url.as_ref().unwrap();
65
4269

            
66
4269
        // Deny loads from differing URI schemes
67
4269
        if url.scheme() != base_url.scheme() {
68
191
            return Err(AllowedUrlError::DifferentUriSchemes);
69
4078
        }
70
4078

            
71
4078
        // resource: is allowed to load anything from other resources
72
4078
        if url.scheme() == "resource" {
73
            return Ok(AllowedUrl(url));
74
4078
        }
75
4078

            
76
4078
        // Non-file: isn't allowed to load anything
77
4078
        if url.scheme() != "file" {
78
1
            return Err(AllowedUrlError::DisallowedScheme);
79
4077
        }
80
4077

            
81
4077
        // The rest of this function assumes file: URLs; guard against
82
4077
        // incorrect refactoring.
83
4077
        assert!(url.scheme() == "file");
84

            
85
        // If we have a base_uri of "file:///foo/bar.svg", and resolve an href of ".",
86
        // Url.parse() will give us "file:///foo/".  We don't want that, so check
87
        // if the last path segment is empty - it will not be empty for a normal file.
88

            
89
4077
        if let Some(mut segments) = url.path_segments() {
90
4077
            if segments
91
4077
                .next_back()
92
4077
                .expect("URL path segments always contain at last 1 element")
93
4077
                .is_empty()
94
            {
95
20
                return Err(AllowedUrlError::NotSiblingOrChildOfBaseFile);
96
4057
            }
97
        } else {
98
            unreachable!("the file: URL cannot have an empty path");
99
        }
100

            
101
        // We have two file: URIs.  Now canonicalize them (remove .. and symlinks, etc.)
102
        // and see if the directories match
103

            
104
4057
        let url_path = url
105
4057
            .to_file_path()
106
4057
            .map_err(|_| AllowedUrlError::InvalidPath)?;
107
4057
        let base_path = base_url
108
4057
            .to_file_path()
109
4057
            .map_err(|_| AllowedUrlError::InvalidPath)?;
110

            
111
4057
        let base_parent = base_path.parent();
112
4057
        if base_parent.is_none() {
113
1
            return Err(AllowedUrlError::BaseIsRoot);
114
4056
        }
115
4056

            
116
4056
        let base_parent = base_parent.unwrap();
117

            
118
4056
        let path_canon = url_path
119
4056
            .canonicalize()
120
4056
            .map_err(|_| AllowedUrlError::CanonicalizationError)?;
121
3581
        let parent_canon = base_parent
122
3581
            .canonicalize()
123
3581
            .map_err(|_| AllowedUrlError::CanonicalizationError)?;
124

            
125
3576
        if path_canon.starts_with(parent_canon) {
126
            // Finally, convert the canonicalized path back to a URL.
127
3575
            let path_to_url = Url::from_file_path(path_canon).unwrap();
128
3575
            Ok(AllowedUrl(path_to_url))
129
        } else {
130
1
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
131
        }
132
7219
    }
133
}
134

            
135
/// Wrapper for URLs which are allowed to be loaded
136
///
137
/// SVG files can reference other files (PNG/JPEG images, other SVGs,
138
/// CSS files, etc.).  This object is constructed by checking whether
139
/// a specified `href` (a possibly-relative filename, for example)
140
/// should be allowed to be loaded, given the base URL of the SVG
141
/// being loaded.
142
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
143
pub struct AllowedUrl(Url);
144

            
145
impl Deref for AllowedUrl {
146
    type Target = Url;
147

            
148
1847
    fn deref(&self) -> &Url {
149
1847
        &self.0
150
1847
    }
151
}
152

            
153
impl fmt::Display for AllowedUrl {
154
38
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155
38
        self.0.fmt(f)
156
38
    }
157
}
158

            
159
#[cfg(test)]
160
mod tests {
161
    use super::*;
162

            
163
    use std::path::PathBuf;
164

            
165
    #[test]
166
1
    fn disallows_relative_file_with_no_base_file() {
167
1
        let url_resolver = UrlResolver::new(None);
168
1
        assert!(matches!(
169
1
            url_resolver.resolve_href("foo.svg"),
170
            Err(AllowedUrlError::UrlParseError(
171
                url::ParseError::RelativeUrlWithoutBase
172
            ))
173
        ));
174
1
    }
175

            
176
    #[test]
177
1
    fn disallows_different_schemes() {
178
1
        let url_resolver = UrlResolver::new(Some(
179
1
            Url::parse("http://example.com/malicious.svg").unwrap(),
180
1
        ));
181
1
        assert!(matches!(
182
1
            url_resolver.resolve_href("file:///etc/passwd"),
183
            Err(AllowedUrlError::DifferentUriSchemes)
184
        ));
185
1
    }
186

            
187
5
    fn make_file_uri(p: &str) -> String {
188
5
        if cfg!(windows) {
189
            format!("file:///c:{}", p)
190
        } else {
191
5
            format!("file://{}", p)
192
        }
193
5
    }
194

            
195
    #[test]
196
1
    fn disallows_base_is_root() {
197
1
        let url_resolver = UrlResolver::new(Some(Url::parse(&make_file_uri("/")).unwrap()));
198
1
        assert!(matches!(
199
1
            url_resolver.resolve_href("foo.svg"),
200
            Err(AllowedUrlError::BaseIsRoot)
201
        ));
202
1
    }
203

            
204
    #[test]
205
1
    fn disallows_non_file_scheme() {
206
1
        let url_resolver = UrlResolver::new(Some(Url::parse("http://foo.bar/baz.svg").unwrap()));
207
1
        assert!(matches!(
208
1
            url_resolver.resolve_href("foo.svg"),
209
            Err(AllowedUrlError::DisallowedScheme)
210
        ));
211
1
    }
212

            
213
    #[test]
214
1
    fn allows_data_url_with_no_base_file() {
215
1
        let url_resolver = UrlResolver::new(None);
216
1
        assert_eq!(
217
1
            url_resolver
218
1
                .resolve_href("data:image/jpeg;base64,xxyyzz")
219
1
                .unwrap()
220
1
                .as_ref(),
221
1
            "data:image/jpeg;base64,xxyyzz",
222
1
        );
223
1
    }
224

            
225
6
    fn url_from_test_fixtures(filename_relative_to_librsvg_srcdir: &str) -> Url {
226
6
        let path = PathBuf::from(filename_relative_to_librsvg_srcdir);
227
6
        let absolute = path
228
6
            .canonicalize()
229
6
            .expect("files from test fixtures are supposed to canonicalize");
230
6
        Url::from_file_path(absolute).unwrap()
231
6
    }
232

            
233
    #[test]
234
1
    fn allows_relative() {
235
1
        let base_url = url_from_test_fixtures("tests/fixtures/loading/bar.svg");
236
1
        let url_resolver = UrlResolver::new(Some(base_url));
237
1

            
238
1
        let resolved = url_resolver.resolve_href("foo.svg").unwrap();
239
1
        let resolved_str = resolved.as_str();
240
1
        assert!(resolved_str.ends_with("/loading/foo.svg"));
241
1
    }
242

            
243
    #[test]
244
1
    fn allows_sibling() {
245
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
246
1
            "tests/fixtures/loading/bar.svg",
247
1
        )));
248
1
        let resolved = url_resolver
249
1
            .resolve_href(url_from_test_fixtures("tests/fixtures/loading/foo.svg").as_str())
250
1
            .unwrap();
251
1

            
252
1
        let resolved_str = resolved.as_str();
253
1
        assert!(resolved_str.ends_with("/loading/foo.svg"));
254
1
    }
255

            
256
    #[test]
257
1
    fn allows_child_of_sibling() {
258
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
259
1
            "tests/fixtures/loading/bar.svg",
260
1
        )));
261
1
        let resolved = url_resolver
262
1
            .resolve_href(url_from_test_fixtures("tests/fixtures/loading/subdir/baz.svg").as_str())
263
1
            .unwrap();
264
1

            
265
1
        let resolved_str = resolved.as_str();
266
1
        assert!(resolved_str.ends_with("/loading/subdir/baz.svg"));
267
1
    }
268

            
269
    // Ignore on Windows since we test for /etc/passwd
270
    #[cfg(unix)]
271
    #[test]
272
1
    fn disallows_non_sibling() {
273
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
274
1
            "tests/fixtures/loading/bar.svg",
275
1
        )));
276
1
        assert!(matches!(
277
1
            url_resolver.resolve_href(&make_file_uri("/etc/passwd")),
278
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
279
        ));
280
1
    }
281

            
282
    #[test]
283
1
    fn disallows_queries() {
284
1
        let url_resolver = UrlResolver::new(Some(
285
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
286
1
        ));
287
1
        assert!(matches!(
288
1
            url_resolver.resolve_href(".?../../../../../../../../../../etc/passwd"),
289
            Err(AllowedUrlError::NoQueriesAllowed)
290
        ));
291
1
    }
292

            
293
    #[test]
294
1
    fn disallows_weird_relative_uris() {
295
1
        let url_resolver = UrlResolver::new(Some(
296
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
297
1
        ));
298
1

            
299
1
        assert!(url_resolver
300
1
            .resolve_href(".@../../../../../../../../../../etc/passwd")
301
1
            .is_err());
302
1
        assert!(url_resolver
303
1
            .resolve_href(".$../../../../../../../../../../etc/passwd")
304
1
            .is_err());
305
1
        assert!(url_resolver
306
1
            .resolve_href(".%../../../../../../../../../../etc/passwd")
307
1
            .is_err());
308
1
        assert!(url_resolver
309
1
            .resolve_href(".*../../../../../../../../../../etc/passwd")
310
1
            .is_err());
311
1
        assert!(url_resolver
312
1
            .resolve_href("~/../../../../../../../../../../etc/passwd")
313
1
            .is_err());
314
1
    }
315

            
316
    #[test]
317
1
    fn disallows_dot_sibling() {
318
1
        let url_resolver = UrlResolver::new(Some(
319
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
320
1
        ));
321
1

            
322
1
        assert!(matches!(
323
1
            url_resolver.resolve_href("."),
324
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
325
        ));
326
1
        assert!(matches!(
327
1
            url_resolver.resolve_href(".#../../../../../../../../../../etc/passwd"),
328
            Err(AllowedUrlError::NoFragmentIdentifierAllowed)
329
        ));
330
1
    }
331

            
332
    #[test]
333
1
    fn disallows_fragment() {
334
1
        // UrlResolver::resolve_href() explicitly disallows fragment identifiers.
335
1
        // This is because they should have been stripped before calling that function,
336
1
        // by NodeId or the Iri machinery.
337
1
        let url_resolver =
338
1
            UrlResolver::new(Some(Url::parse("https://example.com/foo.svg").unwrap()));
339
1

            
340
1
        assert!(matches!(
341
1
            url_resolver.resolve_href("bar.svg#fragment"),
342
            Err(AllowedUrlError::NoFragmentIdentifierAllowed)
343
        ));
344
1
    }
345

            
346
    #[cfg(windows)]
347
    #[test]
348
    fn invalid_url_from_test_suite() {
349
        // This is required for Url to panic.
350
        let resolver =
351
            UrlResolver::new(Some(Url::parse("file:///c:/foo.svg").expect("initial url")));
352
        // With this, it doesn't panic:
353
        //   let resolver = UrlResolver::new(None);
354

            
355
        // The following panics, when using a base URL
356
        //   match resolver.resolve_href("file://invalid.css") {
357
        // so, use a less problematic case, hopefully
358
        match resolver.resolve_href("file://") {
359
            Ok(_) => println!("yay!"),
360
            Err(e) => println!("err: {}", e),
361
        }
362
    }
363
}