1
//! Glue between the libxml2 API and our xml parser module.
2
//!
3
//! This file provides functions to create a libxml2 xmlParserCtxtPtr, configured
4
//! to read from a gio::InputStream, and to maintain its loading data in an XmlState.
5

            
6
use gio::prelude::*;
7
use std::borrow::Cow;
8
use std::cell::{Cell, RefCell};
9
use std::ptr;
10
use std::rc::Rc;
11
use std::slice;
12
use std::str;
13
use std::sync::Once;
14

            
15
use glib::translate::*;
16
use markup5ever::{ns, LocalName, Namespace, Prefix, QualName};
17

            
18
use crate::error::LoadingError;
19
use crate::util::{cstr, opt_utf8_cstr, utf8_cstr, utf8_cstr_len};
20

            
21
use super::xml2::*;
22
use super::Attributes;
23
use super::XmlState;
24

            
25
#[rustfmt::skip]
26
21025
fn get_xml2_sax_handler() -> xmlSAXHandler {
27
21025
    xmlSAXHandler {
28
21025
        // first the unused callbacks
29
21025
        internalSubset:        None,
30
21025
        isStandalone:          None,
31
21025
        hasInternalSubset:     None,
32
21025
        hasExternalSubset:     None,
33
21025
        resolveEntity:         None,
34
21025
        notationDecl:          None,
35
21025
        attributeDecl:         None,
36
21025
        elementDecl:           None,
37
21025
        setDocumentLocator:    None,
38
21025
        startDocument:         None,
39
21025
        endDocument:           None,
40
21025
        reference:             None,
41
21025
        ignorableWhitespace:   None,
42
21025
        comment:               None,
43
21025
        warning:               None,
44
21025
        error:                 None,
45
21025
        fatalError:            None,
46
21025
        externalSubset:        None,
47
21025

            
48
21025
        _private:              ptr::null_mut(),
49
21025

            
50
21025
        // then the used callbacks
51
21025
        getEntity:             Some(sax_get_entity_cb),
52
21025
        entityDecl:            Some(sax_entity_decl_cb),
53
21025
        unparsedEntityDecl:    Some(sax_unparsed_entity_decl_cb),
54
21025
        getParameterEntity:    Some(sax_get_parameter_entity_cb),
55
21025
        characters:            Some(sax_characters_cb),
56
21025
        cdataBlock:            Some(sax_characters_cb),
57
21025
        startElement:          None,
58
21025
        endElement:            None,
59
21025
        processingInstruction: Some(sax_processing_instruction_cb),
60
21025
        startElementNs:        Some(sax_start_element_ns_cb),
61
21025
        endElementNs:          Some(sax_end_element_ns_cb),
62
21025
        serror:                Some(rsvg_sax_serror_cb),
63
21025

            
64
21025
        initialized:           XML_SAX2_MAGIC,
65
21025
    }
66
21025
}
67

            
68
114
unsafe extern "C" fn rsvg_sax_serror_cb(user_data: *mut libc::c_void, error: xmlErrorPtr) {
69
114
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
70
114
    let error = error.as_ref().unwrap();
71

            
72
114
    let level_name = match error.level {
73
        1 => "warning",
74
57
        2 => "error",
75
57
        3 => "fatal error",
76
        _ => "unknown error",
77
    };
78

            
79
    // "int2" is the column number
80
114
    let column = if error.int2 > 0 {
81
114
        Cow::Owned(format!(":{}", error.int2))
82
    } else {
83
        Cow::Borrowed("")
84
    };
85

            
86
114
    let full_error_message = format!(
87
114
        "{} code={} ({}) in {}:{}{}: {}",
88
114
        level_name,
89
114
        error.code,
90
114
        error.domain,
91
114
        cstr(error.file),
92
114
        error.line,
93
114
        column,
94
114
        cstr(error.message)
95
114
    );
96
114
    xml2_parser
97
114
        .state
98
114
        .error(LoadingError::XmlParseError(full_error_message));
99
114
}
100

            
101
21025
fn free_xml_parser_and_doc(parser: xmlParserCtxtPtr) {
102
21025
    // Free the ctxt and its ctxt->myDoc - libxml2 doesn't free them together
103
21025
    // http://xmlsoft.org/html/libxml-parser.html#xmlFreeParserCtxt
104
21025
    unsafe {
105
21025
        if !parser.is_null() {
106
21025
            let rparser = &mut *parser;
107
21025

            
108
21025
            if !rparser.myDoc.is_null() {
109
                xmlFreeDoc(rparser.myDoc);
110
                rparser.myDoc = ptr::null_mut();
111
21025
            }
112

            
113
21025
            xmlFreeParserCtxt(parser);
114
        }
115
    }
116
21025
}
117

            
118
1178
unsafe extern "C" fn sax_get_entity_cb(
119
1178
    user_data: *mut libc::c_void,
120
1178
    name: *const libc::c_char,
121
1178
) -> xmlEntityPtr {
122
1178
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
123
1178

            
124
1178
    assert!(!name.is_null());
125
1178
    let name = utf8_cstr(name);
126
1178

            
127
1178
    xml2_parser
128
1178
        .state
129
1178
        .entity_lookup(name)
130
1178
        .unwrap_or(ptr::null_mut())
131
1178
}
132

            
133
133
unsafe extern "C" fn sax_entity_decl_cb(
134
133
    user_data: *mut libc::c_void,
135
133
    name: *const libc::c_char,
136
133
    type_: libc::c_int,
137
133
    _public_id: *const libc::c_char,
138
133
    _system_id: *const libc::c_char,
139
133
    content: *const libc::c_char,
140
133
) {
141
133
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
142
133

            
143
133
    assert!(!name.is_null());
144

            
145
133
    if type_ != XML_INTERNAL_GENERAL_ENTITY {
146
        // We don't allow loading external entities; we don't support
147
        // defining parameter entities in the DTD, and libxml2 should
148
        // handle internal predefined entities by itself (e.g. "&amp;").
149
        return;
150
133
    }
151
133

            
152
133
    let entity = xmlNewEntity(
153
133
        ptr::null_mut(),
154
133
        name,
155
133
        type_,
156
133
        ptr::null(),
157
133
        ptr::null(),
158
133
        content,
159
133
    );
160
133
    assert!(!entity.is_null());
161

            
162
133
    let name = utf8_cstr(name);
163
133
    xml2_parser.state.entity_insert(name, entity);
164
133
}
165

            
166
unsafe extern "C" fn sax_unparsed_entity_decl_cb(
167
    user_data: *mut libc::c_void,
168
    name: *const libc::c_char,
169
    public_id: *const libc::c_char,
170
    system_id: *const libc::c_char,
171
    _notation_name: *const libc::c_char,
172
) {
173
    sax_entity_decl_cb(
174
        user_data,
175
        name,
176
        XML_INTERNAL_GENERAL_ENTITY,
177
        public_id,
178
        system_id,
179
        ptr::null(),
180
    );
181
}
182

            
183
38922996
fn make_qual_name(prefix: Option<&str>, uri: Option<&str>, localname: &str) -> QualName {
184
38922996
    // FIXME: If the element doesn't have a namespace URI, we are falling back
185
38922996
    // to the SVG namespace.  In reality we need to take namespace scoping into account,
186
38922996
    // i.e. handle the "default namespace" active at that point in the XML stack.
187
38922996
    let element_ns = uri.map_or_else(|| ns!(svg), Namespace::from);
188
38922996

            
189
38922996
    QualName::new(
190
38922996
        prefix.map(Prefix::from),
191
38922996
        element_ns,
192
38922996
        LocalName::from(localname),
193
38922996
    )
194
38922996
}
195

            
196
19461555
unsafe extern "C" fn sax_start_element_ns_cb(
197
19461555
    user_data: *mut libc::c_void,
198
19461555
    localname: *mut libc::c_char,
199
19461555
    prefix: *mut libc::c_char,
200
19461555
    uri: *mut libc::c_char,
201
19461555
    _nb_namespaces: libc::c_int,
202
19461555
    _namespaces: *mut *mut libc::c_char,
203
19461555
    nb_attributes: libc::c_int,
204
19461555
    _nb_defaulted: libc::c_int,
205
19461555
    attributes: *mut *mut libc::c_char,
206
19461555
) {
207
19461555
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
208
19461555

            
209
19461555
    assert!(!localname.is_null());
210

            
211
19461555
    let prefix = opt_utf8_cstr(prefix);
212
19461555
    let uri = opt_utf8_cstr(uri);
213
19461555
    let localname = utf8_cstr(localname);
214
19461555

            
215
19461555
    let qual_name = make_qual_name(prefix, uri, localname);
216
19461555

            
217
19461555
    let nb_attributes = nb_attributes as usize;
218
19461555
    let attrs =
219
19461555
        match Attributes::new_from_xml2_attributes(nb_attributes, attributes as *const *const _) {
220
19461555
            Ok(attrs) => attrs,
221
            Err(e) => {
222
                xml2_parser.state.error(e);
223
                let parser = xml2_parser.parser.get();
224
                xmlStopParser(parser);
225
                return;
226
            }
227
        };
228

            
229
    // This clippy::let_unit_value is for the "let _: () = e" guard below.
230
    #[allow(clippy::let_unit_value)]
231
19461555
    if let Err(e) = xml2_parser.state.start_element(qual_name, attrs) {
232
76
        let _: () = e; // guard in case we change the error type later
233
76

            
234
76
        let parser = xml2_parser.parser.get();
235
76
        xmlStopParser(parser);
236
19461479
    }
237
19461555
}
238

            
239
19461441
unsafe extern "C" fn sax_end_element_ns_cb(
240
19461441
    user_data: *mut libc::c_void,
241
19461441
    localname: *mut libc::c_char,
242
19461441
    prefix: *mut libc::c_char,
243
19461441
    uri: *mut libc::c_char,
244
19461441
) {
245
19461441
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
246
19461441

            
247
19461441
    assert!(!localname.is_null());
248

            
249
19461441
    let prefix = opt_utf8_cstr(prefix);
250
19461441
    let uri = opt_utf8_cstr(uri);
251
19461441
    let localname = utf8_cstr(localname);
252
19461441

            
253
19461441
    let qual_name = make_qual_name(prefix, uri, localname);
254
19461441

            
255
19461441
    xml2_parser.state.end_element(qual_name);
256
19461441
}
257

            
258
19685691
unsafe extern "C" fn sax_characters_cb(
259
19685691
    user_data: *mut libc::c_void,
260
19685691
    unterminated_text: *const libc::c_char,
261
19685691
    len: libc::c_int,
262
19685691
) {
263
19685691
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
264
19685691

            
265
19685691
    assert!(!unterminated_text.is_null());
266
19685691
    assert!(len >= 0);
267

            
268
19685691
    let utf8 = utf8_cstr_len(unterminated_text, len as usize);
269
19685691
    xml2_parser.state.characters(utf8);
270
19685691
}
271

            
272
57
unsafe extern "C" fn sax_processing_instruction_cb(
273
57
    user_data: *mut libc::c_void,
274
57
    target: *const libc::c_char,
275
57
    data: *const libc::c_char,
276
57
) {
277
57
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
278
57

            
279
57
    assert!(!target.is_null());
280
57
    let target = utf8_cstr(target);
281

            
282
57
    let data = if data.is_null() { "" } else { utf8_cstr(data) };
283

            
284
57
    xml2_parser.state.processing_instruction(target, data);
285
57
}
286

            
287
unsafe extern "C" fn sax_get_parameter_entity_cb(
288
    user_data: *mut libc::c_void,
289
    name: *const libc::c_char,
290
) -> xmlEntityPtr {
291
    sax_get_entity_cb(user_data, name)
292
}
293

            
294
21025
fn set_xml_parse_options(parser: xmlParserCtxtPtr, unlimited_size: bool) {
295
21025
    let mut options: libc::c_int = XML_PARSE_NONET | XML_PARSE_BIG_LINES;
296
21025

            
297
21025
    if unlimited_size {
298
57
        options |= XML_PARSE_HUGE;
299
20968
    }
300

            
301
21025
    unsafe {
302
21025
        xmlCtxtUseOptions(parser, options);
303
21025

            
304
21025
        // If false, external entities work, but internal ones don't. if
305
21025
        // true, internal entities work, but external ones don't. favor
306
21025
        // internal entities, in order to not cause a regression
307
21025
        (*parser).replaceEntities = 1;
308
21025
    }
309
21025
}
310

            
311
// Struct used as closure data for xmlCreateIOParserCtxt().  In conjunction
312
// with stream_ctx_read() and stream_ctx_close(), this struct provides the
313
// I/O callbacks and their context for libxml2.
314
//
315
// We call I/O methods on the stream, and as soon as we get an error
316
// we store it in the gio_error field.  Libxml2 just allows us to
317
// return -1 from the I/O callbacks in that case; it doesn't actually
318
// see the error code.
319
//
320
// The gio_error field comes from the place that constructs the
321
// StreamCtx.  That place is later responsible for seeing if the error
322
// is set; if it is, it means that there was an I/O error.  Otherwise,
323
// there were no I/O errors but the caller must then ask libxml2 for
324
// XML parsing errors.
325
struct StreamCtx {
326
    stream: gio::InputStream,
327
    cancellable: Option<gio::Cancellable>,
328
    gio_error: Rc<RefCell<Option<glib::Error>>>,
329
}
330

            
331
// read() callback from xmlCreateIOParserCtxt()
332
103154
unsafe extern "C" fn stream_ctx_read(
333
103154
    context: *mut libc::c_void,
334
103154
    buffer: *mut libc::c_char,
335
103154
    len: libc::c_int,
336
103154
) -> libc::c_int {
337
103154
    let ctx = &mut *(context as *mut StreamCtx);
338
103154

            
339
103154
    let mut err_ref = ctx.gio_error.borrow_mut();
340
103154

            
341
103154
    // has the error been set already?
342
103154
    if err_ref.is_some() {
343
        return -1;
344
103154
    }
345
103154

            
346
103154
    // Convert from libc::c_char to u8.  Why transmute?  Because libc::c_char
347
103154
    // is of different signedness depending on the architecture (u8 on aarch64,
348
103154
    // i8 on x86_64).  If one just uses "start as *const u8", it triggers a
349
103154
    // trivial_casts warning.
350
103154
    #[allow(trivial_casts)]
351
103154
    let u8_buffer = buffer as *mut u8;
352
103154
    let buf = slice::from_raw_parts_mut(u8_buffer, len as usize);
353
103154

            
354
103154
    match ctx.stream.read(buf, ctx.cancellable.as_ref()) {
355
103154
        Ok(size) => size as libc::c_int,
356

            
357
        Err(e) => {
358
            // Just store the first I/O error we get; ignore subsequent ones.
359
            *err_ref = Some(e);
360
            -1
361
        }
362
    }
363
103154
}
364

            
365
// close() callback from xmlCreateIOParserCtxt()
366
21025
unsafe extern "C" fn stream_ctx_close(context: *mut libc::c_void) -> libc::c_int {
367
21025
    let ctx = &mut *(context as *mut StreamCtx);
368

            
369
21025
    let ret = match ctx.stream.close(ctx.cancellable.as_ref()) {
370
21025
        Ok(()) => 0,
371

            
372
        Err(e) => {
373
            let mut err_ref = ctx.gio_error.borrow_mut();
374

            
375
            // don't overwrite a previous error
376
            if err_ref.is_none() {
377
                *err_ref = Some(e);
378
            }
379

            
380
            -1
381
        }
382
    };
383

            
384
21025
    drop(Box::from_raw(ctx));
385
21025

            
386
21025
    ret
387
21025
}
388

            
389
21025
fn init_libxml2() {
390
    static ONCE: Once = Once::new();
391

            
392
21025
    ONCE.call_once(|| unsafe {
393
2053
        xmlInitParser();
394
21025
    });
395
21025
}
396

            
397
pub struct Xml2Parser<'a> {
398
    parser: Cell<xmlParserCtxtPtr>,
399
    state: &'a XmlState,
400
    gio_error: Rc<RefCell<Option<glib::Error>>>,
401
}
402

            
403
impl<'a> Xml2Parser<'a> {
404
21025
    pub fn from_stream(
405
21025
        state: &'a XmlState,
406
21025
        unlimited_size: bool,
407
21025
        stream: &gio::InputStream,
408
21025
        cancellable: Option<&gio::Cancellable>,
409
21025
    ) -> Result<Box<Xml2Parser<'a>>, LoadingError> {
410
21025
        init_libxml2();
411
21025

            
412
21025
        // The Xml2Parser we end up creating, if
413
21025
        // xmlCreateIOParserCtxt() is successful, needs to hold a
414
21025
        // location to place a GError from within the I/O callbacks
415
21025
        // stream_ctx_read() and stream_ctx_close().  We put this
416
21025
        // location in an Rc so that it can outlive the call to
417
21025
        // xmlCreateIOParserCtxt() in case that fails, since on
418
21025
        // failure that function frees the StreamCtx.
419
21025
        let gio_error = Rc::new(RefCell::new(None));
420
21025

            
421
21025
        let ctx = Box::new(StreamCtx {
422
21025
            stream: stream.clone(),
423
21025
            cancellable: cancellable.cloned(),
424
21025
            gio_error: gio_error.clone(),
425
21025
        });
426
21025

            
427
21025
        let mut sax_handler = get_xml2_sax_handler();
428
21025

            
429
21025
        let mut xml2_parser = Box::new(Xml2Parser {
430
21025
            parser: Cell::new(ptr::null_mut()),
431
21025
            state,
432
21025
            gio_error,
433
21025
        });
434
21025

            
435
21025
        unsafe {
436
21025
            let xml2_parser_ptr: *mut Xml2Parser<'a> = xml2_parser.as_mut();
437
21025
            let parser = xmlCreateIOParserCtxt(
438
21025
                &mut sax_handler,
439
21025
                xml2_parser_ptr as *mut _,
440
21025
                Some(stream_ctx_read),
441
21025
                Some(stream_ctx_close),
442
21025
                Box::into_raw(ctx) as *mut _,
443
21025
                XML_CHAR_ENCODING_NONE,
444
21025
            );
445
21025

            
446
21025
            if parser.is_null() {
447
                // on error, xmlCreateIOParserCtxt() frees our ctx via the
448
                // stream_ctx_close function
449
                Err(LoadingError::OutOfMemory(String::from(
450
                    "could not create XML parser",
451
                )))
452
            } else {
453
21025
                xml2_parser.parser.set(parser);
454
21025

            
455
21025
                set_xml_parse_options(parser, unlimited_size);
456
21025

            
457
21025
                Ok(xml2_parser)
458
            }
459
        }
460
21025
    }
461

            
462
21025
    pub fn parse(&self) -> Result<(), LoadingError> {
463
21025
        unsafe {
464
21025
            let parser = self.parser.get();
465
21025

            
466
21025
            let xml_parse_success = xmlParseDocument(parser) == 0;
467
21025

            
468
21025
            let mut err_ref = self.gio_error.borrow_mut();
469
21025

            
470
21025
            let io_error = err_ref.take();
471

            
472
21025
            if let Some(io_error) = io_error {
473
                Err(LoadingError::from(io_error))
474
21025
            } else if !xml_parse_success {
475
57
                let xerr = xmlCtxtGetLastError(parser as *mut _);
476
57
                let msg = xml2_error_to_string(xerr);
477
57
                Err(LoadingError::XmlParseError(msg))
478
            } else {
479
20968
                Ok(())
480
            }
481
        }
482
21025
    }
483
}
484

            
485
impl<'a> Drop for Xml2Parser<'a> {
486
21025
    fn drop(&mut self) {
487
21025
        let parser = self.parser.get();
488
21025
        free_xml_parser_and_doc(parser);
489
21025
        self.parser.set(ptr::null_mut());
490
21025
    }
491
}
492

            
493
57
fn xml2_error_to_string(xerr: xmlErrorPtr) -> String {
494
57
    unsafe {
495
57
        if !xerr.is_null() {
496
57
            let xerr = &*xerr;
497

            
498
57
            let file = if xerr.file.is_null() {
499
57
                "data".to_string()
500
            } else {
501
                from_glib_none(xerr.file)
502
            };
503

            
504
57
            let message = if xerr.message.is_null() {
505
                "-".to_string()
506
            } else {
507
57
                from_glib_none(xerr.message)
508
            };
509

            
510
57
            format!(
511
57
                "Error domain {} code {} on line {} column {} of {}: {}",
512
57
                xerr.domain, xerr.code, xerr.line, xerr.int2, file, message
513
57
            )
514
        } else {
515
            // The error is not set?  Return a generic message :(
516
            "Error parsing XML data".to_string()
517
        }
518
    }
519
57
}