1
//! The main XML parser.
2

            
3
use encoding_rs::Encoding;
4
use gio::{
5
    prelude::BufferedInputStreamExt, BufferedInputStream, Cancellable, ConverterInputStream,
6
    InputStream, ZlibCompressorFormat, ZlibDecompressor,
7
};
8
use glib::object::Cast;
9
use markup5ever::{expanded_name, local_name, ns, ExpandedName, LocalName, Namespace, QualName};
10
use std::cell::RefCell;
11
use std::collections::HashMap;
12
use std::rc::Rc;
13
use std::str;
14
use std::string::ToString;
15
use std::sync::Arc;
16
use xml5ever::{
17
    buffer_queue::BufferQueue,
18
    tendril::format_tendril,
19
    tokenizer::{ProcessResult, TagKind, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts},
20
    TokenizerResult,
21
};
22

            
23
use crate::borrow_element_as;
24
use crate::css::{Origin, Stylesheet};
25
use crate::document::{Document, DocumentBuilder, LoadOptions};
26
use crate::error::{ImplementationLimit, LoadingError};
27
use crate::io::{self, IoError};
28
use crate::limits::{MAX_LOADED_ELEMENTS, MAX_XINCLUDE_DEPTH};
29
use crate::node::{Node, NodeBorrow};
30
use crate::rsvg_log;
31
use crate::session::Session;
32
use crate::style::StyleType;
33
use crate::url_resolver::AllowedUrl;
34

            
35
use xml2_load::Xml2Parser;
36

            
37
mod attributes;
38
mod xml2;
39
mod xml2_load;
40

            
41
use xml2::xmlEntityPtr;
42

            
43
pub use attributes::Attributes;
44

            
45
#[derive(Clone)]
46
enum Context {
47
    // Starting state
48
    Start,
49

            
50
    // Creating nodes for elements under the current node
51
    ElementCreation,
52

            
53
    // Inside <style>; accumulate text to include in a stylesheet
54
    Style,
55

            
56
    // An unsupported element inside a `<style>` element, to be ignored
57
    UnsupportedStyleChild,
58

            
59
    // Inside <xi:include>
60
    XInclude(XIncludeContext),
61

            
62
    // An unsupported element inside a <xi:include> context, to be ignored
63
    UnsupportedXIncludeChild,
64

            
65
    // Insie <xi::fallback>
66
    XIncludeFallback(XIncludeContext),
67

            
68
    // An XML parsing error was found.  We will no-op upon any further XML events.
69
    FatalError(LoadingError),
70
}
71

            
72
#[derive(Clone)]
73
struct XIncludeContext {
74
    need_fallback: bool,
75
}
76

            
77
extern "C" {
78
    // The original function takes an xmlNodePtr, but that is compatible
79
    // with xmlEntityPtr for the purposes of this function.
80
    fn xmlFreeNode(node: xmlEntityPtr);
81
}
82

            
83
/// This is to hold an xmlEntityPtr from libxml2; we just hold an opaque pointer
84
/// that is freed in impl Drop.
85
struct XmlEntity(xmlEntityPtr);
86

            
87
impl Drop for XmlEntity {
88
133
    fn drop(&mut self) {
89
133
        unsafe {
90
133
            // Even though we are freeing an xmlEntityPtr, historically the code has always
91
133
            // used xmlFreeNode() because that function actually does allow freeing entities.
92
133
            //
93
133
            // See https://gitlab.gnome.org/GNOME/libxml2/-/issues/731
94
133
            // for a possible memory leak on older versions of libxml2 when using
95
133
            // xmlFreeNode() instead of xmlFreeEntity() - the latter just became public
96
133
            // in librsvg-2.12.0.
97
133
            xmlFreeNode(self.0);
98
133
        }
99
133
    }
100
}
101

            
102
// Creates an ExpandedName from the XInclude namespace and a local_name
103
//
104
// The markup5ever crate doesn't have built-in namespaces for XInclude,
105
// so we make our own.
106
macro_rules! xinclude_name {
107
    ($local_name:expr) => {
108
        ExpandedName {
109
            ns: &Namespace::from("http://www.w3.org/2001/XInclude"),
110
            local: &LocalName::from($local_name),
111
        }
112
    };
113
}
114

            
115
/// Holds the state used for XML processing
116
///
117
/// These methods are called when an XML event is parsed out of the XML stream: `start_element`,
118
/// `end_element`, `characters`.
119
///
120
/// When an element starts, we push a corresponding `Context` into the `context_stack`.  Within
121
/// that context, all XML events will be forwarded to it, and processed in one of the `XmlHandler`
122
/// trait objects. Normally the context refers to a `NodeCreationContext` implementation which is
123
/// what creates normal graphical elements.
124
struct XmlStateInner {
125
    document_builder: DocumentBuilder,
126
    num_loaded_elements: usize,
127
    xinclude_depth: usize,
128
    context_stack: Vec<Context>,
129
    current_node: Option<Node>,
130

            
131
    // Note that neither XmlStateInner nor Xmlstate implement Drop.
132
    //
133
    // An XmlState is finally consumed in XmlState::build_document(), and that
134
    // function is responsible for freeing all the XmlEntityPtr from this field.
135
    //
136
    // (The structs cannot impl Drop because build_document()
137
    // destructures and consumes them at the same time.)
138
    entities: HashMap<String, XmlEntity>,
139
}
140

            
141
pub struct XmlState {
142
    inner: RefCell<XmlStateInner>,
143

            
144
    session: Session,
145
    load_options: Arc<LoadOptions>,
146
}
147

            
148
/// Errors returned from XmlState::acquire()
149
///
150
/// These follow the terminology from <https://www.w3.org/TR/xinclude/#terminology>
151
enum AcquireError {
152
    /// Resource could not be acquired (file not found), or I/O error.
153
    /// In this case, the `xi:fallback` can be used if present.
154
    ResourceError,
155

            
156
    /// Resource could not be parsed/decoded
157
    FatalError(String),
158
}
159

            
160
impl XmlStateInner {
161
58629636
    fn context(&self) -> Context {
162
58629636
        // We can unwrap since the stack is never empty
163
58629636
        self.context_stack.last().unwrap().clone()
164
58629636
    }
165
}
166

            
167
impl XmlState {
168
20284
    fn new(
169
20284
        session: Session,
170
20284
        document_builder: DocumentBuilder,
171
20284
        load_options: Arc<LoadOptions>,
172
20284
    ) -> XmlState {
173
20284
        XmlState {
174
20284
            inner: RefCell::new(XmlStateInner {
175
20284
                document_builder,
176
20284
                num_loaded_elements: 0,
177
20284
                xinclude_depth: 0,
178
20284
                context_stack: vec![Context::Start],
179
20284
                current_node: None,
180
20284
                entities: HashMap::new(),
181
20284
            }),
182
20284

            
183
20284
            session,
184
20284
            load_options,
185
20284
        }
186
20284
    }
187

            
188
20968
    fn check_last_error(&self) -> Result<(), LoadingError> {
189
20968
        let inner = self.inner.borrow();
190
20968

            
191
20968
        match inner.context() {
192
912
            Context::FatalError(e) => Err(e),
193
20056
            _ => Ok(()),
194
        }
195
20968
    }
196

            
197
19461555
    fn check_limits(&self) -> Result<(), ()> {
198
19461555
        if self.inner.borrow().num_loaded_elements > MAX_LOADED_ELEMENTS {
199
19
            self.error(LoadingError::LimitExceeded(
200
19
                ImplementationLimit::TooManyLoadedElements,
201
19
            ));
202
19
            Err(())
203
        } else {
204
19461536
            Ok(())
205
        }
206
19461555
    }
207

            
208
19461555
    pub fn start_element(&self, name: QualName, attrs: Attributes) -> Result<(), ()> {
209
19461555
        self.check_limits()?;
210

            
211
19461536
        let context = self.inner.borrow().context();
212
19461536

            
213
19461536
        if let Context::FatalError(_) = context {
214
57
            return Err(());
215
19461479
        }
216
19461479

            
217
19461479
        self.inner.borrow_mut().num_loaded_elements += 1;
218

            
219
19461479
        let new_context = match context {
220
20132
            Context::Start => self.element_creation_start_element(&name, attrs),
221
19441309
            Context::ElementCreation => self.element_creation_start_element(&name, attrs),
222

            
223
            Context::Style => self.inside_style_start_element(&name),
224
            Context::UnsupportedStyleChild => self.unsupported_style_start_element(&name),
225

            
226
38
            Context::XInclude(ref ctx) => self.inside_xinclude_start_element(ctx, &name),
227
            Context::UnsupportedXIncludeChild => self.unsupported_xinclude_start_element(&name),
228
            Context::XIncludeFallback(ref ctx) => {
229
                self.xinclude_fallback_start_element(ctx, &name, attrs)
230
            }
231

            
232
            Context::FatalError(_) => unreachable!(),
233
        };
234

            
235
19461479
        self.inner.borrow_mut().context_stack.push(new_context);
236
19461479

            
237
19461479
        Ok(())
238
19461555
    }
239

            
240
19461441
    pub fn end_element(&self, _name: QualName) {
241
19461441
        let context = self.inner.borrow().context();
242
19461441

            
243
19461441
        match context {
244
            Context::Start => panic!("end_element: XML handler stack is empty!?"),
245
19458800
            Context::ElementCreation => self.element_creation_end_element(),
246

            
247
798
            Context::Style => self.style_end_element(),
248
            Context::UnsupportedStyleChild => (),
249

            
250
114
            Context::XInclude(_) => (),
251
            Context::UnsupportedXIncludeChild => (),
252
38
            Context::XIncludeFallback(_) => (),
253

            
254
1691
            Context::FatalError(_) => return,
255
        }
256

            
257
        // We can unwrap since start_element() always adds a context to the stack
258
19459750
        self.inner.borrow_mut().context_stack.pop().unwrap();
259
19461441
    }
260

            
261
19685691
    pub fn characters(&self, text: &str) {
262
19685691
        let context = self.inner.borrow().context();
263
19685691

            
264
19685691
        match context {
265
            Context::Start => {
266
                // This is character data before the first element, i.e. something like
267
                //  <?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg"/>
268
                // ^ note the space here
269
                // libxml2 is not finished reading the file yet; it will emit an error
270
                // on its own when it finishes.  So, ignore this condition.
271
            }
272

            
273
19683582
            Context::ElementCreation => self.element_creation_characters(text),
274

            
275
1159
            Context::Style => self.element_creation_characters(text),
276
            Context::UnsupportedStyleChild => (),
277

            
278
76
            Context::XInclude(_) => (),
279
            Context::UnsupportedXIncludeChild => (),
280
38
            Context::XIncludeFallback(ref ctx) => self.xinclude_fallback_characters(ctx, text),
281
836
            Context::FatalError(_) => (),
282
        }
283
19685691
    }
284

            
285
57
    pub fn processing_instruction(&self, target: &str, data: &str) {
286
57
        if target != "xml-stylesheet" {
287
19
            return;
288
38
        }
289

            
290
38
        if let Ok(pairs) = parse_xml_stylesheet_processing_instruction(data) {
291
38
            let mut alternate = None;
292
38
            let mut type_ = None;
293
38
            let mut href = None;
294

            
295
114
            for (att, value) in pairs {
296
76
                match att.as_str() {
297
76
                    "alternate" => alternate = Some(value),
298
76
                    "type" => type_ = Some(value),
299
38
                    "href" => href = Some(value),
300
                    _ => (),
301
                }
302
            }
303

            
304
38
            let mut inner = self.inner.borrow_mut();
305
38

            
306
38
            if type_.as_deref() != Some("text/css")
307
38
                || (alternate.is_some() && alternate.as_deref() != Some("no"))
308
            {
309
                rsvg_log!(
310
                    self.session,
311
                    "invalid parameters in XML processing instruction for stylesheet",
312
                );
313
                return;
314
38
            }
315

            
316
38
            if let Some(href) = href {
317
38
                if let Ok(aurl) = self.load_options.url_resolver.resolve_href(&href) {
318
19
                    if let Ok(stylesheet) =
319
19
                        Stylesheet::from_href(&aurl, Origin::Author, self.session.clone())
320
19
                    {
321
19
                        inner.document_builder.append_stylesheet(stylesheet);
322
19
                    } else {
323
                        // FIXME: https://www.w3.org/TR/xml-stylesheet/ does not seem to specify
324
                        // what to do if the stylesheet cannot be loaded, so here we ignore the error.
325
                        rsvg_log!(
326
                            self.session,
327
                            "could not create stylesheet from {} in XML processing instruction",
328
                            href
329
                        );
330
                    }
331
                } else {
332
19
                    rsvg_log!(
333
19
                        self.session,
334
19
                        "{} not allowed for xml-stylesheet in XML processing instruction",
335
19
                        href
336
19
                    );
337
                }
338
            } else {
339
                rsvg_log!(
340
                    self.session,
341
                    "xml-stylesheet processing instruction does not have href; ignoring"
342
                );
343
            }
344
        } else {
345
            self.error(LoadingError::XmlParseError(String::from(
346
                "invalid processing instruction data in xml-stylesheet",
347
            )));
348
        }
349
57
    }
350

            
351
133
    pub fn error(&self, e: LoadingError) {
352
133
        self.inner
353
133
            .borrow_mut()
354
133
            .context_stack
355
133
            .push(Context::FatalError(e));
356
133
    }
357

            
358
1178
    pub fn entity_lookup(&self, entity_name: &str) -> Option<xmlEntityPtr> {
359
1178
        self.inner
360
1178
            .borrow()
361
1178
            .entities
362
1178
            .get(entity_name)
363
1178
            .map(|entity| entity.0)
364
1178
    }
365

            
366
133
    pub fn entity_insert(&self, entity_name: &str, entity: xmlEntityPtr) {
367
133
        let mut inner = self.inner.borrow_mut();
368
133

            
369
133
        inner
370
133
            .entities
371
133
            .insert(entity_name.to_string(), XmlEntity(entity));
372
133
    }
373

            
374
19461441
    fn element_creation_start_element(&self, name: &QualName, attrs: Attributes) -> Context {
375
19461441
        if name.expanded() == xinclude_name!("include") {
376
950
            self.xinclude_start_element(name, attrs)
377
        } else {
378
19460491
            let mut inner = self.inner.borrow_mut();
379
19460491

            
380
19460491
            let parent = inner.current_node.clone();
381
19460491
            let node = inner.document_builder.append_element(name, attrs, parent);
382
19460491
            inner.current_node = Some(node);
383
19460491

            
384
19460491
            if name.expanded() == expanded_name!(svg "style") {
385
798
                Context::Style
386
            } else {
387
19459693
                Context::ElementCreation
388
            }
389
        }
390
19461441
    }
391

            
392
19459598
    fn element_creation_end_element(&self) {
393
19459598
        let mut inner = self.inner.borrow_mut();
394
19459598
        let node = inner.current_node.take().unwrap();
395
19459598
        inner.current_node = node.parent();
396
19459598
    }
397

            
398
19684817
    fn element_creation_characters(&self, text: &str) {
399
19684817
        let mut inner = self.inner.borrow_mut();
400
19684817

            
401
19684817
        let mut parent = inner.current_node.clone().unwrap();
402
19684817
        inner.document_builder.append_characters(text, &mut parent);
403
19684817
    }
404

            
405
798
    fn style_end_element(&self) {
406
798
        self.add_inline_stylesheet();
407
798
        self.element_creation_end_element()
408
798
    }
409

            
410
798
    fn add_inline_stylesheet(&self) {
411
798
        let mut inner = self.inner.borrow_mut();
412
798
        let current_node = inner.current_node.as_ref().unwrap();
413
798

            
414
798
        let style_type = borrow_element_as!(current_node, Style).style_type();
415
798

            
416
798
        if style_type == StyleType::TextCss {
417
798
            let stylesheet_text = current_node
418
798
                .children()
419
798
                .map(|child| {
420
798
                    // Note that here we assume that the only children of <style>
421
798
                    // are indeed text nodes.
422
798
                    let child_borrow = child.borrow_chars();
423
798
                    child_borrow.get_string()
424
798
                })
425
798
                .collect::<String>();
426

            
427
798
            if let Ok(stylesheet) = Stylesheet::from_data(
428
798
                &stylesheet_text,
429
798
                &self.load_options.url_resolver,
430
798
                Origin::Author,
431
798
                self.session.clone(),
432
798
            ) {
433
798
                inner.document_builder.append_stylesheet(stylesheet);
434
798
            } else {
435
                rsvg_log!(self.session, "invalid inline stylesheet");
436
            }
437
        }
438
798
    }
439

            
440
    fn inside_style_start_element(&self, name: &QualName) -> Context {
441
        self.unsupported_style_start_element(name)
442
    }
443

            
444
    fn unsupported_style_start_element(&self, _name: &QualName) -> Context {
445
        Context::UnsupportedStyleChild
446
    }
447

            
448
950
    fn xinclude_start_element(&self, _name: &QualName, attrs: Attributes) -> Context {
449
950
        let mut href = None;
450
950
        let mut parse = None;
451
950
        let mut encoding = None;
452
950

            
453
950
        let ln_parse = LocalName::from("parse");
454

            
455
1919
        for (attr, value) in attrs.iter() {
456
1919
            match attr.expanded() {
457
950
                expanded_name!("", "href") => href = Some(value),
458
969
                ref v
459
969
                    if *v
460
969
                        == ExpandedName {
461
969
                            ns: &ns!(),
462
969
                            local: &ln_parse,
463
893
                        } =>
464
893
                {
465
893
                    parse = Some(value)
466
                }
467
76
                expanded_name!("", "encoding") => encoding = Some(value),
468
                _ => (),
469
            }
470
        }
471

            
472
950
        let need_fallback = match self.acquire(href, parse, encoding) {
473
76
            Ok(()) => false,
474
38
            Err(AcquireError::ResourceError) => true,
475
836
            Err(AcquireError::FatalError(s)) => {
476
836
                return Context::FatalError(LoadingError::XmlParseError(s))
477
            }
478
        };
479

            
480
114
        Context::XInclude(XIncludeContext { need_fallback })
481
950
    }
482

            
483
38
    fn inside_xinclude_start_element(&self, ctx: &XIncludeContext, name: &QualName) -> Context {
484
38
        if name.expanded() == xinclude_name!("fallback") {
485
38
            Context::XIncludeFallback(ctx.clone())
486
        } else {
487
            // https://www.w3.org/TR/xinclude/#include_element
488
            //
489
            // "Other content (text, processing instructions,
490
            // comments, elements not in the XInclude namespace,
491
            // descendants of child elements) is not constrained by
492
            // this specification and is ignored by the XInclude
493
            // processor"
494

            
495
            self.unsupported_xinclude_start_element(name)
496
        }
497
38
    }
498

            
499
    fn xinclude_fallback_start_element(
500
        &self,
501
        ctx: &XIncludeContext,
502
        name: &QualName,
503
        attrs: Attributes,
504
    ) -> Context {
505
        if ctx.need_fallback {
506
            if name.expanded() == xinclude_name!("include") {
507
                self.xinclude_start_element(name, attrs)
508
            } else {
509
                self.element_creation_start_element(name, attrs)
510
            }
511
        } else {
512
            Context::UnsupportedXIncludeChild
513
        }
514
    }
515

            
516
38
    fn xinclude_fallback_characters(&self, ctx: &XIncludeContext, text: &str) {
517
38
        if ctx.need_fallback && self.inner.borrow().current_node.is_some() {
518
38
            // We test for is_some() because with a bad "SVG" file like this:
519
38
            //
520
38
            //    <xi:include href="blah"><xi:fallback>foo</xi:fallback></xi:include>
521
38
            //
522
38
            // at the point we get "foo" here, there is no current_node because
523
38
            // no nodes have been created before the xi:include.
524
38
            self.element_creation_characters(text);
525
38
        }
526
38
    }
527

            
528
950
    fn acquire(
529
950
        &self,
530
950
        href: Option<&str>,
531
950
        parse: Option<&str>,
532
950
        encoding: Option<&str>,
533
950
    ) -> Result<(), AcquireError> {
534
950
        if let Some(href) = href {
535
950
            let aurl = self
536
950
                .load_options
537
950
                .url_resolver
538
950
                .resolve_href(href)
539
950
                .map_err(|e| {
540
38
                    // FIXME: should AlloweUrlError::UrlParseError be a fatal error,
541
38
                    // not a resource error?
542
38
                    rsvg_log!(self.session, "could not acquire \"{}\": {}", href, e);
543
38
                    AcquireError::ResourceError
544
950
                })?;
545

            
546
            // https://www.w3.org/TR/xinclude/#include_element
547
            //
548
            // "When omitted, the value of "xml" is implied (even in
549
            // the absence of a default value declaration). Values
550
            // other than "xml" and "text" are a fatal error."
551
912
            match parse {
552
874
                None | Some("xml") => self.include_xml(&aurl),
553

            
554
38
                Some("text") => self.acquire_text(&aurl, encoding),
555

            
556
                Some(v) => Err(AcquireError::FatalError(format!(
557
                    "unknown 'parse' attribute value: \"{v}\""
558
                ))),
559
            }
560
        } else {
561
            // The href attribute is not present.  Per
562
            // https://www.w3.org/TR/xinclude/#include_element we
563
            // should use the xpointer attribute, but we do not
564
            // support that yet.  So, we'll just say, "OK" and not
565
            // actually include anything.
566
            Ok(())
567
        }
568
950
    }
569

            
570
874
    fn include_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
571
874
        self.increase_xinclude_depth(aurl)?;
572

            
573
836
        let result = self.acquire_xml(aurl);
574
836

            
575
836
        self.decrease_xinclude_depth();
576
836

            
577
836
        result
578
874
    }
579

            
580
874
    fn increase_xinclude_depth(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
581
874
        let mut inner = self.inner.borrow_mut();
582
874

            
583
874
        if inner.xinclude_depth == MAX_XINCLUDE_DEPTH {
584
38
            Err(AcquireError::FatalError(format!(
585
38
                "exceeded maximum level of nested xinclude in {aurl}"
586
38
            )))
587
        } else {
588
836
            inner.xinclude_depth += 1;
589
836
            Ok(())
590
        }
591
874
    }
592

            
593
836
    fn decrease_xinclude_depth(&self) {
594
836
        let mut inner = self.inner.borrow_mut();
595
836
        inner.xinclude_depth -= 1;
596
836
    }
597

            
598
38
    fn acquire_text(&self, aurl: &AllowedUrl, encoding: Option<&str>) -> Result<(), AcquireError> {
599
38
        let binary = io::acquire_data(aurl, None).map_err(|e| {
600
            rsvg_log!(self.session, "could not acquire \"{}\": {}", aurl, e);
601
            AcquireError::ResourceError
602
38
        })?;
603

            
604
38
        let encoding = encoding.unwrap_or("utf-8");
605

            
606
38
        let encoder = Encoding::for_label_no_replacement(encoding.as_bytes()).ok_or_else(|| {
607
            AcquireError::FatalError(format!("unknown encoding \"{encoding}\" for \"{aurl}\""))
608
38
        })?;
609

            
610
38
        let utf8_data = encoder
611
38
            .decode_without_bom_handling_and_without_replacement(&binary.data)
612
38
            .ok_or_else(|| {
613
                AcquireError::FatalError(format!("could not convert contents of \"{aurl}\" from character encoding \"{encoding}\""))
614
38
            })?;
615

            
616
38
        self.element_creation_characters(&utf8_data);
617
38
        Ok(())
618
38
    }
619

            
620
836
    fn acquire_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
621
        // FIXME: distinguish between "file not found" and "invalid XML"
622

            
623
836
        let stream = io::acquire_stream(aurl, None).map_err(|e| match e {
624
38
            IoError::BadDataUrl => AcquireError::FatalError(String::from("malformed data: URL")),
625
            _ => AcquireError::ResourceError,
626
836
        })?;
627

            
628
        // FIXME: pass a cancellable
629
798
        self.parse_from_stream(&stream, None).map_err(|e| match e {
630
            LoadingError::Io(_) => AcquireError::ResourceError,
631
760
            LoadingError::XmlParseError(s) => AcquireError::FatalError(s),
632
            _ => AcquireError::FatalError(String::from("unknown error")),
633
798
        })
634
836
    }
635

            
636
    // Parses XML from a stream into an XmlState.
637
    //
638
    // This can be called "in the middle" of an XmlState's processing status,
639
    // for example, when including another XML file via xi:include.
640
21025
    fn parse_from_stream(
641
21025
        &self,
642
21025
        stream: &gio::InputStream,
643
21025
        cancellable: Option<&gio::Cancellable>,
644
21025
    ) -> Result<(), LoadingError> {
645
21025
        Xml2Parser::from_stream(self, self.load_options.unlimited_size, stream, cancellable)
646
21025
            .and_then(|parser| parser.parse())
647
21025
            .and_then(|_: ()| self.check_last_error())
648
21025
    }
649

            
650
    fn unsupported_xinclude_start_element(&self, _name: &QualName) -> Context {
651
        Context::UnsupportedXIncludeChild
652
    }
653

            
654
20227
    fn build_document(
655
20227
        self,
656
20227
        stream: &gio::InputStream,
657
20227
        cancellable: Option<&gio::Cancellable>,
658
20227
    ) -> Result<Document, LoadingError> {
659
20227
        self.parse_from_stream(stream, cancellable)?;
660

            
661
        // consume self, then consume inner, then consume document_builder by calling .build()
662

            
663
20018
        let XmlState { inner, .. } = self;
664
20018
        let inner = inner.into_inner();
665
20018

            
666
20018
        let XmlStateInner {
667
20018
            document_builder, ..
668
20018
        } = inner;
669
20018
        document_builder.build()
670
20227
    }
671
}
672

            
673
/// Temporary holding space for data in an XML processing instruction.
674
///
675
/// We use a little hack via xml5ever to parse the contents of an XML processing instruction.
676
/// See the comment in parse_xml_stylesheet_processing_instruction() below.
677
#[derive(Default)]
678
struct ProcessingInstructionData {
679
    attributes: Vec<(String, String)>,
680
    error: bool,
681
}
682

            
683
struct ProcessingInstructionSink(Rc<RefCell<ProcessingInstructionData>>);
684

            
685
impl TokenSink for ProcessingInstructionSink {
686
    // xml5ever's tokenizer only uses this if we are actually using it to parse full XML;
687
    // here, the Handle associated type refers to a DOM script, which we know can't appear
688
    // in the way we use xml5ever, so we use the unit type instead.
689
    type Handle = ();
690

            
691
39
    fn process_token(&self, token: Token) -> ProcessResult<()> {
692
39
        let mut data = self.0.borrow_mut();
693

            
694
39
        match token {
695
39
            Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => {
696
117
                for a in &tag.attrs {
697
78
                    let name = a.name.local.as_ref().to_string();
698
78
                    let value = a.value.to_string();
699
78

            
700
78
                    data.attributes.push((name, value));
701
78
                }
702
            }
703

            
704
            Token::ParseError(_) => data.error = true,
705

            
706
            _ => (),
707
        }
708

            
709
39
        ProcessResult::Continue
710
39
    }
711
}
712

            
713
// https://www.w3.org/TR/xml-stylesheet/
714
//
715
// The syntax for the xml-stylesheet processing instruction we support
716
// is this:
717
//
718
//   <?xml-stylesheet href="uri" alternate="no" type="text/css"?>
719
//
720
// XML parsers just feed us the raw data after the target name
721
// ("xml-stylesheet"), so we'll create a mini-parser with a hackish
722
// element just to extract the data as attributes.
723
39
fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
724
39
    let pi_data = Rc::new(RefCell::new(ProcessingInstructionData {
725
39
        attributes: Vec::new(),
726
39
        error: false,
727
39
    }));
728
39

            
729
39
    let queue = BufferQueue::default();
730
39
    queue.push_back(format_tendril!("<rsvg-hack {} />", data));
731
39

            
732
39
    let sink = ProcessingInstructionSink(pi_data.clone());
733
39

            
734
39
    let tokenizer = XmlTokenizer::new(sink, XmlTokenizerOpts::default());
735
39

            
736
39
    match tokenizer.run(&queue) {
737
39
        TokenizerResult::Done => (),
738
        _ => unreachable!("got an unexpected TokenizerResult; did xml5ever change its API?"),
739
    }
740

            
741
39
    let pi_data = pi_data.borrow();
742
39

            
743
39
    if pi_data.error {
744
        Err(())
745
    } else {
746
39
        Ok(pi_data.attributes.clone())
747
    }
748
39
}
749

            
750
20284
pub fn xml_load_from_possibly_compressed_stream(
751
20284
    session: Session,
752
20284
    document_builder: DocumentBuilder,
753
20284
    load_options: Arc<LoadOptions>,
754
20284
    stream: &gio::InputStream,
755
20284
    cancellable: Option<&gio::Cancellable>,
756
20284
) -> Result<Document, LoadingError> {
757
20284
    let state = XmlState::new(session, document_builder, load_options);
758

            
759
20284
    let stream = get_input_stream_for_loading(stream, cancellable)?;
760

            
761
20227
    state.build_document(&stream, cancellable)
762
20284
}
763

            
764
// Header of a gzip data stream
765
const GZ_MAGIC_0: u8 = 0x1f;
766
const GZ_MAGIC_1: u8 = 0x8b;
767

            
768
20284
fn get_input_stream_for_loading(
769
20284
    stream: &InputStream,
770
20284
    cancellable: Option<&Cancellable>,
771
20284
) -> Result<InputStream, LoadingError> {
772
20284
    // detect gzipped streams (svgz)
773
20284

            
774
20284
    let buffered = BufferedInputStream::new(stream);
775
20284
    let num_read = buffered.fill(2, cancellable)?;
776
20284
    if num_read < 2 {
777
        // FIXME: this string was localized in the original; localize it
778
57
        return Err(LoadingError::XmlParseError(String::from(
779
57
            "Input file is too short",
780
57
        )));
781
20227
    }
782
20227

            
783
20227
    let buf = buffered.peek_buffer();
784
20227
    assert!(buf.len() >= 2);
785
20227
    if buf[0..2] == [GZ_MAGIC_0, GZ_MAGIC_1] {
786
57
        let decomp = ZlibDecompressor::new(ZlibCompressorFormat::Gzip);
787
57
        let converter = ConverterInputStream::new(&buffered, &decomp);
788
57
        Ok(converter.upcast::<InputStream>())
789
    } else {
790
20170
        Ok(buffered.upcast::<InputStream>())
791
    }
792
20284
}
793

            
794
#[cfg(test)]
795
mod tests {
796
    use super::*;
797

            
798
    #[test]
799
1
    fn parses_processing_instruction_data() {
800
1
        let mut r =
801
1
            parse_xml_stylesheet_processing_instruction("foo=\"bar\" baz=\"beep\"").unwrap();
802
1
        r.sort_by(|a, b| a.0.cmp(&b.0));
803
1

            
804
1
        assert_eq!(
805
1
            r,
806
1
            vec![
807
1
                ("baz".to_string(), "beep".to_string()),
808
1
                ("foo".to_string(), "bar".to_string())
809
1
            ]
810
1
        );
811
1
    }
812
}