diff --git a/ext/libxml/ruby_xml.c b/ext/libxml/ruby_xml.c index 4ef63977..5994ead7 100644 --- a/ext/libxml/ruby_xml.c +++ b/ext/libxml/ruby_xml.c @@ -328,264 +328,6 @@ static VALUE rxml_enabled_zlib_q(VALUE klass) #endif } -/* - * call-seq: - * XML.debug_entities -> (true|false) - * - * Determine whether included-entity debugging is enabled. - * (Requires Libxml to be compiled with debugging support) - */ -static VALUE rxml_debug_entities_get(VALUE klass) -{ -#ifdef LIBXML_DEBUG_ENABLED - if (xmlParserDebugEntities) - return(Qtrue); - else - return(Qfalse); -#else - rb_warn("libxml was compiled with debugging turned off"); - return (Qfalse); -#endif -} - -/* - * call-seq: - * XML.debug_entities = true|false - * - * Enable or disable included-entity debugging. - * (Requires Libxml to be compiled with debugging support) - */ -static VALUE rxml_debug_entities_set(VALUE klass, VALUE value) -{ -#ifdef LIBXML_DEBUG_ENABLED - if (value == Qfalse) - { - xmlParserDebugEntities = 0; - return(Qfalse); - } - else - { - xmlParserDebugEntities = 1; - return(Qtrue); - } -#else - rb_warn("libxml was compiled with debugging turned off"); -#endif -} - -/* - * call-seq: - * XML.default_keep_blanks -> (true|false) - * - * Determine whether parsers retain whitespace by default. - */ -static VALUE rxml_default_keep_blanks_get(VALUE klass) -{ - if (xmlKeepBlanksDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_keep_blanks = true|false - * - * Controls whether parsers retain whitespace by default. - */ -static VALUE rxml_default_keep_blanks_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlKeepBlanksDefaultValue = 0; - return (Qfalse); - } - else if (value == Qtrue) - { - xmlKeepBlanksDefaultValue = 1; - return (Qtrue); - } - else - { - rb_raise(rb_eArgError, "Invalid argument, must be a boolean"); - } -} - -/* - * call-seq: - * XML.default_load_external_dtd -> (true|false) - * - * Determine whether parsers load external DTDs by default. - */ -static VALUE rxml_default_load_external_dtd_get(VALUE klass) -{ - if (xmlLoadExtDtdDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_load_external_dtd = true|false - * - * Controls whether parsers load external DTDs by default. - */ -static VALUE rxml_default_load_external_dtd_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlLoadExtDtdDefaultValue = 0; - return (Qfalse); - } - else - { - xmlLoadExtDtdDefaultValue = 1; - return (Qtrue); - } -} - -/* - * call-seq: - * XML.default_line_numbers -> (true|false) - * - * Determine whether parsers retain line-numbers by default. - */ -static VALUE rxml_default_line_numbers_get(VALUE klass) -{ - if (xmlLineNumbersDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_line_numbers = true|false - * - * Controls whether parsers retain line-numbers by default. - */ -static VALUE rxml_default_line_numbers_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlLineNumbersDefault(0); - return (Qfalse); - } - else - { - xmlLineNumbersDefault(1); - return (Qtrue); - } -} - -int rxml_libxml_default_options(void) -{ - int options = 0; - - if (xmlLoadExtDtdDefaultValue) - options |= XML_PARSE_DTDLOAD; - - if (xmlDoValidityCheckingDefaultValue) - options |= XML_PARSE_DTDVALID; - - if (!xmlKeepBlanksDefaultValue) - options |= XML_PARSE_NOBLANKS; - - if (xmlSubstituteEntitiesDefaultValue) - options |= XML_PARSE_NOENT; - - if (!xmlGetWarningsDefaultValue) - options |= XML_PARSE_NOWARNING; - - if (xmlPedanticParserDefaultValue) - options |= XML_PARSE_PEDANTIC; - - return options; -} - -/* - * call-seq: - * XML.default_options -> int - * - * Returns an integer that summarize libxml2's default options. - */ -static VALUE rxml_default_options_get(VALUE klass) -{ - int options = rxml_libxml_default_options(); - return INT2NUM(options); -} - -/* - * call-seq: - * XML.default_pedantic_parser -> (true|false) - * - * Determine whether parsers are pedantic by default. - */ -static VALUE rxml_default_pedantic_parser_get(VALUE klass) -{ - if (xmlPedanticParserDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_pedantic_parser = true|false - * - * Controls whether parsers are pedantic by default. - */ -static VALUE rxml_default_pedantic_parser_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlPedanticParserDefault(0); - return (Qfalse); - } - else - { - xmlPedanticParserDefault(1); - return (Qtrue); - } -} - -/* - * call-seq: - * XML.default_substitute_entities -> (true|false) - * - * Determine whether parsers perform inline entity substitution - * (for external entities) by default. - */ -static VALUE rxml_default_substitute_entities_get(VALUE klass) -{ - if (xmlSubstituteEntitiesDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_substitute_entities = true|false - * - * Controls whether parsers perform inline entity substitution - * (for external entities) by default. - */ -static VALUE rxml_default_substitute_entities_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlSubstituteEntitiesDefault(0); - return (Qfalse); - } - else - { - xmlSubstituteEntitiesDefault(1); - return (Qtrue); - } -} - /* * call-seq: * XML.default_tree_indent_string -> "string" @@ -615,74 +357,6 @@ static VALUE rxml_default_tree_indent_string_set(VALUE klass, VALUE string) return (string); } -/* - * call-seq: - * XML.default_validity_checking -> (true|false) - * - * Determine whether parsers perform XML validation by default. - */ -static VALUE rxml_default_validity_checking_get(VALUE klass) -{ - if (xmlDoValidityCheckingDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_validity_checking = true|false - * - * Controls whether parsers perform XML validation by default. - */ -static VALUE rxml_default_validity_checking_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlDoValidityCheckingDefaultValue = 0; - return (Qfalse); - } - else - { - xmlDoValidityCheckingDefaultValue = 1; - return (Qtrue); - } -} - -/* - * call-seq: - * XML.default_warnings -> (true|false) - * - * Determine whether parsers output warnings by default. - */ -static VALUE rxml_default_warnings_get(VALUE klass) -{ - if (xmlGetWarningsDefaultValue) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * XML.default_warnings = true|false - * - * Controls whether parsers output warnings by default. - */ -static VALUE rxml_default_warnings_set(VALUE klass, VALUE value) -{ - if (value == Qfalse) - { - xmlGetWarningsDefaultValue = 0; - return (Qfalse); - } - else - { - xmlGetWarningsDefaultValue = 1; - return (Qtrue); - } -} - /* * call-seq: * XML.default_compression -> (true|false) @@ -869,27 +543,10 @@ void rxml_init_xml(void) rb_define_module_function(mXML, "catalog_dump", rxml_catalog_dump, 0); rb_define_module_function(mXML, "catalog_remove", rxml_catalog_remove, 1); rb_define_module_function(mXML, "check_lib_versions", rxml_check_lib_versions, 0); - rb_define_module_function(mXML, "debug_entities", rxml_debug_entities_get, 0); - rb_define_module_function(mXML, "debug_entities=", rxml_debug_entities_set, 1); rb_define_module_function(mXML, "default_compression", rxml_default_compression_get, 0); rb_define_module_function(mXML, "default_compression=", rxml_default_compression_set, 1); - rb_define_module_function(mXML, "default_keep_blanks", rxml_default_keep_blanks_get, 0); - rb_define_module_function(mXML, "default_keep_blanks=", rxml_default_keep_blanks_set, 1); - rb_define_module_function(mXML, "default_load_external_dtd", rxml_default_load_external_dtd_get, 0); - rb_define_module_function(mXML, "default_load_external_dtd=", rxml_default_load_external_dtd_set, 1); - rb_define_module_function(mXML, "default_line_numbers", rxml_default_line_numbers_get, 0); - rb_define_module_function(mXML, "default_line_numbers=", rxml_default_line_numbers_set, 1); - rb_define_module_function(mXML, "default_options", rxml_default_options_get, 0); - rb_define_module_function(mXML, "default_pedantic_parser", rxml_default_pedantic_parser_get, 0); - rb_define_module_function(mXML, "default_pedantic_parser=", rxml_default_pedantic_parser_set, 1); - rb_define_module_function(mXML, "default_substitute_entities", rxml_default_substitute_entities_get, 0); - rb_define_module_function(mXML, "default_substitute_entities=", rxml_default_substitute_entities_set, 1); rb_define_module_function(mXML, "default_tree_indent_string", rxml_default_tree_indent_string_get, 0); rb_define_module_function(mXML, "default_tree_indent_string=", rxml_default_tree_indent_string_set, 1); - rb_define_module_function(mXML, "default_validity_checking", rxml_default_validity_checking_get, 0); - rb_define_module_function(mXML, "default_validity_checking=", rxml_default_validity_checking_set, 1); - rb_define_module_function(mXML, "default_warnings", rxml_default_warnings_get, 0); - rb_define_module_function(mXML, "default_warnings=", rxml_default_warnings_set, 1); rb_define_module_function(mXML, "default_save_no_empty_tags", rxml_default_save_no_empty_tags_get, 0); rb_define_module_function(mXML, "default_save_no_empty_tags=", rxml_default_save_no_empty_tags_set, 1); rb_define_module_function(mXML, "indent_tree_output", rxml_indent_tree_output_get, 0); diff --git a/ext/libxml/ruby_xml.h b/ext/libxml/ruby_xml.h index cea066b5..a4676ebb 100644 --- a/ext/libxml/ruby_xml.h +++ b/ext/libxml/ruby_xml.h @@ -4,7 +4,6 @@ #define __RUBY_XML_H__ extern VALUE mXML; -int rxml_libxml_default_options(void); void rxml_init_xml(void); #endif diff --git a/ext/libxml/ruby_xml_html_parser_context.c b/ext/libxml/ruby_xml_html_parser_context.c index bac885ed..c08978dc 100644 --- a/ext/libxml/ruby_xml_html_parser_context.c +++ b/ext/libxml/ruby_xml_html_parser_context.c @@ -146,10 +146,14 @@ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt) * * Parameters: * - * file - A filename or uri. + * file - A filename or uri + * options - A or'ed together list of LibXML::XML::HTMLParser::Options values */ -static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file) +static VALUE rxml_html_parser_context_file(int argc, VALUE* argv, VALUE klass) { + VALUE file, options; + rb_scan_args(argc, argv, "11", &file, &options); + htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL); if (!ctxt) rxml_raise(&xmlLastError); @@ -158,7 +162,7 @@ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file) xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - htmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); + htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); return rxml_html_parser_context_wrap(ctxt); } @@ -170,10 +174,14 @@ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file) * * Parameters: * - * io - A ruby IO object. + * io - A ruby IO object + * options - A or'ed together list of LibXML::XML::HTMLParser::Options values */ -static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io) +static VALUE rxml_html_parser_context_io(int argc, VALUE* argv, VALUE klass) { + VALUE io, options; + rb_scan_args(argc, argv, "11", &io, &options); + VALUE result; htmlParserCtxtPtr ctxt; xmlParserInputBufferPtr input; @@ -196,7 +204,7 @@ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io) xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - htmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); + htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); @@ -222,10 +230,14 @@ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io) * * Parameters: * - * string - A string that contains the data to parse. + * string - A string that contains the data to parse + * options - A or'ed together list of LibXML::XML::HTMLParser::Options values */ -static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string) +static VALUE rxml_html_parser_context_string(int argc, VALUE* argv, VALUE klass) { + VALUE string, options; + rb_scan_args(argc, argv, "11", &string, &options); + htmlParserCtxtPtr ctxt; Check_Type(string, T_STRING); @@ -241,7 +253,7 @@ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string) xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - htmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); + htmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); if (ctxt->sax != NULL) memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); @@ -328,9 +340,9 @@ void rxml_init_html_parser_context(void) IO_ATTR = ID2SYM(rb_intern("@io")); cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext); - rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1); - rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1); - rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1); + rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, -1); + rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, -1); + rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, -1); rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0); rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1); rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1); diff --git a/ext/libxml/ruby_xml_parser_context.c b/ext/libxml/ruby_xml_parser_context.c index a30818a5..c7f1a9ac 100644 --- a/ext/libxml/ruby_xml_parser_context.c +++ b/ext/libxml/ruby_xml_parser_context.c @@ -1,1001 +1,1009 @@ -/* Please see the LICENSE file for copyright and distribution information */ - -#include "ruby_libxml.h" -#include "ruby_xml_parser_context.h" - -#include - -VALUE cXMLParserContext; -static ID IO_ATTR; - -/* - * Document-class: LibXML::XML::Parser::Context - * - * The XML::Parser::Context class provides in-depth control over how - * a document is parsed. - */ - -static void rxml_parser_context_free(xmlParserCtxtPtr ctxt) -{ - xmlFreeParserCtxt(ctxt); -} - -static VALUE rxml_parser_context_wrap(xmlParserCtxtPtr ctxt) -{ - return Data_Wrap_Struct(cXMLParserContext, NULL, rxml_parser_context_free, ctxt); -} - - -static VALUE rxml_parser_context_alloc(VALUE klass) -{ - xmlParserCtxtPtr ctxt = xmlNewParserCtxt(); - return Data_Wrap_Struct(klass, NULL, rxml_parser_context_free, ctxt); -} - -/* call-seq: - * XML::Parser::Context.document(document) -> XML::Parser::Context - * - * Creates a new parser context based on the specified document. - * - * Parameters: - * - * document - An XML::Document instance. - */ -static VALUE rxml_parser_context_document(VALUE klass, VALUE document) -{ - xmlParserCtxtPtr ctxt; - xmlDocPtr xdoc; - xmlChar *buffer; - int length; - - if (rb_obj_is_kind_of(document, cXMLDocument) == Qfalse) - rb_raise(rb_eTypeError, "Must pass an LibXML::XML::Document object"); - - Data_Get_Struct(document, xmlDoc, xdoc); - xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, (const char*)xdoc->encoding, 0); - - ctxt = xmlCreateDocParserCtxt(buffer); - - if (!ctxt) - rxml_raise(&xmlLastError); - - /* This is annoying, but xmlInitParserCtxt (called indirectly above) and - xmlCtxtUseOptionsInternal (called below) initialize slightly different - context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt - sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - xmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); - - return rxml_parser_context_wrap(ctxt); -} - -/* call-seq: - * XML::Parser::Context.file(file) -> XML::Parser::Context - * - * Creates a new parser context based on the specified file or uri. - * - * Parameters: - * - * file - A filename or uri. -*/ -static VALUE rxml_parser_context_file(VALUE klass, VALUE file) -{ - xmlParserCtxtPtr ctxt = xmlCreateURLParserCtxt(StringValuePtr(file), 0); - - if (!ctxt) - rxml_raise(&xmlLastError); - - /* This is annoying, but xmlInitParserCtxt (called indirectly above) and - xmlCtxtUseOptionsInternal (called below) initialize slightly different - context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt - sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - xmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); - - return rxml_parser_context_wrap(ctxt); -} - -/* call-seq: - * XML::Parser::Context.string(string) -> XML::Parser::Context - * - * Creates a new parser context based on the specified string. - * - * Parameters: - * - * string - A string that contains the data to parse. -*/ -static VALUE rxml_parser_context_string(VALUE klass, VALUE string) -{ - xmlParserCtxtPtr ctxt; - Check_Type(string, T_STRING); - - if (RSTRING_LEN(string) == 0) - rb_raise(rb_eArgError, "Must specify a string with one or more characters"); - - ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string), (int)RSTRING_LEN(string)); - - if (!ctxt) - rxml_raise(&xmlLastError); - - /* This is annoying, but xmlInitParserCtxt (called indirectly above) and - xmlCtxtUseOptionsInternal (called below) initialize slightly different - context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt - sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - xmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); - - return rxml_parser_context_wrap(ctxt); -} - -/* call-seq: - * XML::Parser::Context.io(io) -> XML::Parser::Context - * - * Creates a new parser context based on the specified io object. - * - * Parameters: - * - * io - A ruby IO object. -*/ -static VALUE rxml_parser_context_io(VALUE klass, VALUE io) -{ - VALUE result; - xmlParserCtxtPtr ctxt; - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; - - if (NIL_P(io)) - rb_raise(rb_eTypeError, "Must pass in an IO object"); - - input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL, - (void*)io, XML_CHAR_ENCODING_NONE); - - ctxt = xmlNewParserCtxt(); - - if (!ctxt) - { - xmlFreeParserInputBuffer(input); - rxml_raise(&xmlLastError); - } - - /* This is annoying, but xmlInitParserCtxt (called indirectly above) and - xmlCtxtUseOptionsInternal (called below) initialize slightly different - context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt - sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ - xmlCtxtUseOptions(ctxt, rxml_libxml_default_options()); - - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - - if (!stream) - { - xmlFreeParserInputBuffer(input); - xmlFreeParserCtxt(ctxt); - rxml_raise(&xmlLastError); - } - inputPush(ctxt, stream); - result = rxml_parser_context_wrap(ctxt); - - /* Attach io object to parser so it won't get freed.*/ - rb_ivar_set(result, IO_ATTR, io); - - return result; -} - -/* - * call-seq: - * context.base_uri -> "http:://libxml.org" - * - * Obtain the base url for this parser context. - */ -static VALUE rxml_parser_context_base_uri_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->input && ctxt->input->filename) - return rxml_new_cstr((const xmlChar*)ctxt->input->filename, ctxt->encoding); - else - return Qnil; -} - -/* - * call-seq: - * context.base_uri = "http:://libxml.org" - * - * Sets the base url for this parser context. - */ -static VALUE rxml_parser_context_base_uri_set(VALUE self, VALUE url) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - Check_Type(url, T_STRING); - - if (ctxt->input && !ctxt->input->filename) - { - const char* xurl = StringValuePtr(url); - ctxt->input->filename = (const char*)xmlStrdup((const xmlChar*)xurl); - } - return self; -} - -/* - * call-seq: - * context.close -> nil - * - * Closes the underlying input streams. This is useful when parsing a large amount of - * files and you want to close the files without relying on Ruby's garbage collector - * to run. - */ -static VALUE rxml_parser_context_close(VALUE self) -{ - xmlParserCtxtPtr ctxt; - xmlParserInputPtr xinput; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - while ((xinput = inputPop(ctxt)) != NULL) - { - xmlFreeInputStream(xinput); - } - return Qnil; -} - -/* - * call-seq: - * context.data_directory -> "dir" - * - * Obtain the data directory associated with this context. - */ -static VALUE rxml_parser_context_data_directory_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->directory == NULL) - return (Qnil); - else - return (rxml_new_cstr((const xmlChar*)ctxt->directory, ctxt->encoding)); -} - -/* - * call-seq: - * context.depth -> num - * - * Obtain the depth of this context. - */ -static VALUE rxml_parser_context_depth_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->depth)); -} - -/* - * call-seq: - * context.disable_cdata? -> (true|false) - * - * Determine whether CDATA nodes will be created in this context. - */ -static VALUE rxml_parser_context_disable_cdata_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - /* LibXML controls this internally with the default SAX handler. */ - if (ctxt->sax && ctxt->sax->cdataBlock) - return (Qfalse); - else - return (Qtrue); -} - -/* - * call-seq: - * context.disable_cdata = (true|false) - * - * Control whether CDATA nodes will be created in this context. - */ -static VALUE rxml_parser_context_disable_cdata_set(VALUE self, VALUE value) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->sax == NULL) - rb_raise(rb_eRuntimeError, "Sax handler is not yet set"); - - /* LibXML controls this internally with the default SAX handler. */ - if (value) - ctxt->sax->cdataBlock = NULL; - else - ctxt->sax->cdataBlock = xmlDefaultSAXHandler.cdataBlock; - - return value; -} - -/* - * call-seq: - * context.disable_sax? -> (true|false) - * - * Determine whether SAX-based processing is disabled - * in this context. - */ -static VALUE rxml_parser_context_disable_sax_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->disableSAX) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.docbook? -> (true|false) - * - * Determine whether this is a docbook context. - */ -static VALUE rxml_parser_context_docbook_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->html == 2) // TODO check this - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.encoding -> XML::Encoding::UTF_8 - * - * Obtain the character encoding identifier used in - * this context. - */ -static VALUE rxml_parser_context_encoding_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - return INT2NUM(xmlParseCharEncoding((const char*)ctxt->encoding)); -} - -/* - * call-seq: - * context.encoding = XML::Encoding::UTF_8 - * - * Sets the character encoding for this context. - */ -static VALUE rxml_parser_context_encoding_set(VALUE self, VALUE encoding) -{ - xmlParserCtxtPtr ctxt; - int result; - const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding)); - xmlCharEncodingHandlerPtr hdlr = xmlFindCharEncodingHandler(xencoding); - - if (!hdlr) - rb_raise(rb_eArgError, "Unknown encoding: %i", NUM2INT(encoding)); - - Data_Get_Struct(self, xmlParserCtxt, ctxt); - result = xmlSwitchToEncoding(ctxt, hdlr); - - if (result != 0) - rxml_raise(&xmlLastError); - - if (ctxt->encoding != NULL) - xmlFree((xmlChar *) ctxt->encoding); - - ctxt->encoding = xmlStrdup((const xmlChar *) xencoding); - return self; -} - -/* - * call-seq: - * context.errno -> num - * - * Obtain the last-error number in this context. - */ -static VALUE rxml_parser_context_errno_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->errNo)); -} - -/* - * call-seq: - * context.html? -> (true|false) - * - * Determine whether this is an html context. - */ -static VALUE rxml_parser_context_html_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->html == 1) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.max_num_streams -> num - * - * Obtain the limit on the number of IO streams opened in - * this context. - */ -static VALUE rxml_parser_context_io_max_num_streams_get(VALUE self) -{ - // TODO alias to max_streams and dep this? - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->inputMax)); -} - -/* - * call-seq: - * context.num_streams -> "dir" - * - * Obtain the actual number of IO streams in this - * context. - */ -static VALUE rxml_parser_context_io_num_streams_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->inputNr)); -} - -/* - * call-seq: - * context.keep_blanks? -> (true|false) - * - * Determine whether parsers in this context retain - * whitespace. - */ -static VALUE rxml_parser_context_keep_blanks_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->keepBlanks) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.name_depth -> num - * - * Obtain the name depth for this context. - */ -static VALUE rxml_parser_context_name_depth_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->nameNr)); -} - -/* - * call-seq: - * context.name_depth_max -> num - * - * Obtain the maximum name depth for this context. - */ -static VALUE rxml_parser_context_name_depth_max_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->nameMax)); -} - -/* - * call-seq: - * context.name_node -> "name" - * - * Obtain the name node for this context. - */ -static VALUE rxml_parser_context_name_node_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->name == NULL) - return (Qnil); - else - return (rxml_new_cstr( ctxt->name, ctxt->encoding)); -} - -/* - * call-seq: - * context.name_tab -> ["name", ..., "name"] - * - * Obtain the name table for this context. - */ -static VALUE rxml_parser_context_name_tab_get(VALUE self) -{ - int i; - xmlParserCtxtPtr ctxt; - VALUE tab_ary; - - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->nameTab == NULL) - return (Qnil); - - tab_ary = rb_ary_new(); - - for (i = (ctxt->nameNr - 1); i >= 0; i--) - { - if (ctxt->nameTab[i] == NULL) - continue; - else - rb_ary_push(tab_ary, rxml_new_cstr( ctxt->nameTab[i], ctxt->encoding)); - } - - return (tab_ary); -} - -/* - * call-seq: - * context.node_depth -> num - * - * Obtain the node depth for this context. - */ -static VALUE rxml_parser_context_node_depth_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->nodeNr)); -} - -/* - * call-seq: - * context.node -> node - * - * Obtain the root node of this context. - */ -static VALUE rxml_parser_context_node_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->node == NULL) - return (Qnil); - else - return (rxml_node_wrap(ctxt->node)); -} - -/* - * call-seq: - * context.node_depth_max -> num - * - * Obtain the maximum node depth for this context. - */ -static VALUE rxml_parser_context_node_depth_max_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->nodeMax)); -} - -/* - * call-seq: - * context.num_chars -> num - * - * Obtain the number of characters in this context. - */ -static VALUE rxml_parser_context_num_chars_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (LONG2NUM(ctxt->nbChars)); -} - - -/* - * call-seq: - * context.options > XML::Parser::Options::NOENT - * - * Returns the parser options for this context. Multiple - * options can be combined by using Bitwise OR (|). - */ -static VALUE rxml_parser_context_options_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return INT2NUM(ctxt->options); -} - -/* - * call-seq: - * context.options = XML::Parser::Options::NOENT | - XML::Parser::Options::NOCDATA - * - * Provides control over the execution of a parser. Valid values - * are the constants defined on XML::Parser::Options. Multiple - * options can be combined by using Bitwise OR (|). - */ -static VALUE rxml_parser_context_options_set(VALUE self, VALUE options) -{ - xmlParserCtxtPtr ctxt; - Check_Type(options, T_FIXNUM); - - Data_Get_Struct(self, xmlParserCtxt, ctxt); - xmlCtxtUseOptions(ctxt, NUM2INT(options)); - - return self; -} - -/* - * call-seq: - * context.recovery? -> (true|false) - * - * Determine whether recovery mode is enabled in this - * context. - */ -static VALUE rxml_parser_context_recovery_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->recovery) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.recovery = true|false - * - * Control whether recovery mode is enabled in this - * context. - */ -static VALUE rxml_parser_context_recovery_set(VALUE self, VALUE value) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (value == Qfalse) - { - ctxt->recovery = 0; - return (Qfalse); - } - else - { - ctxt->recovery = 1; - return (Qtrue); - } -} - -/* - * call-seq: - * context.replace_entities? -> (true|false) - * - * Determine whether external entity replacement is enabled in this - * context. - */ -static VALUE rxml_parser_context_replace_entities_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->replaceEntities) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.replace_entities = true|false - * - * Control whether external entity replacement is enabled in this - * context. - */ -static VALUE rxml_parser_context_replace_entities_set(VALUE self, VALUE value) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (value == Qfalse) - { - ctxt->replaceEntities = 0; - return (Qfalse); - } - else - { - ctxt->replaceEntities = 1; - return (Qtrue); - } -} - -/* - * call-seq: - * context.space_depth -> num - * - * Obtain the space depth for this context. - */ -static VALUE rxml_parser_context_space_depth_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->spaceNr)); -} - -/* - * call-seq: - * context.space_depth -> num - * - * Obtain the maximum space depth for this context. - */ -static VALUE rxml_parser_context_space_depth_max_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - return (INT2NUM(ctxt->spaceMax)); -} - -/* - * call-seq: - * context.subset_external? -> (true|false) - * - * Determine whether this context is a subset of an - * external context. - */ -static VALUE rxml_parser_context_subset_external_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->inSubset == 2) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.subset_internal? -> (true|false) - * - * Determine whether this context is a subset of an - * internal context. - */ -static VALUE rxml_parser_context_subset_internal_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->inSubset == 1) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.subset_internal_name -> "name" - * - * Obtain this context's subset name (valid only if - * either of subset_external? or subset_internal? - * is true). - */ -static VALUE rxml_parser_context_subset_name_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->intSubName == NULL) - return (Qnil); - else - return (rxml_new_cstr(ctxt->intSubName, ctxt->encoding)); -} - -/* - * call-seq: - * context.subset_external_uri -> "uri" - * - * Obtain this context's external subset URI. (valid only if - * either of subset_external? or subset_internal? - * is true). - */ -static VALUE rxml_parser_context_subset_external_uri_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->extSubURI == NULL) - return (Qnil); - else - return (rxml_new_cstr( ctxt->extSubURI, ctxt->encoding)); -} - -/* - * call-seq: - * context.subset_external_system_id -> "system_id" - * - * Obtain this context's external subset system identifier. - * (valid only if either of subset_external? or subset_internal? - * is true). - */ -static VALUE rxml_parser_context_subset_external_system_id_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->extSubSystem == NULL) - return (Qnil); - else - return (rxml_new_cstr( ctxt->extSubSystem, ctxt->encoding)); -} - -/* - * call-seq: - * context.standalone? -> (true|false) - * - * Determine whether this is a standalone context. - */ -static VALUE rxml_parser_context_standalone_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->standalone) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.stats? -> (true|false) - * - * Determine whether this context maintains statistics. - */ -static VALUE rxml_parser_context_stats_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->record_info) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.valid? -> (true|false) - * - * Determine whether this context is valid. - */ -static VALUE rxml_parser_context_valid_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->valid) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.validate? -> (true|false) - * - * Determine whether validation is enabled in this context. - */ -static VALUE rxml_parser_context_validate_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->validate) - return (Qtrue); - else - return (Qfalse); -} - -/* - * call-seq: - * context.version -> "version" - * - * Obtain this context's version identifier. - */ -static VALUE rxml_parser_context_version_get(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->version == NULL) - return (Qnil); - else - return (rxml_new_cstr( ctxt->version, ctxt->encoding)); -} - -/* - * call-seq: - * context.well_formed? -> (true|false) - * - * Determine whether this context contains well-formed XML. - */ -static VALUE rxml_parser_context_well_formed_q(VALUE self) -{ - xmlParserCtxtPtr ctxt; - Data_Get_Struct(self, xmlParserCtxt, ctxt); - - if (ctxt->wellFormed) - return (Qtrue); - else - return (Qfalse); -} - -void rxml_init_parser_context(void) -{ - IO_ATTR = ID2SYM(rb_intern("@io")); - - cXMLParserContext = rb_define_class_under(cXMLParser, "Context", rb_cObject); - rb_define_alloc_func(cXMLParserContext, rxml_parser_context_alloc); - - rb_define_singleton_method(cXMLParserContext, "document", rxml_parser_context_document, 1); - rb_define_singleton_method(cXMLParserContext, "file", rxml_parser_context_file, 1); - rb_define_singleton_method(cXMLParserContext, "io", rxml_parser_context_io, 1); - rb_define_singleton_method(cXMLParserContext, "string", rxml_parser_context_string, 1); - - rb_define_method(cXMLParserContext, "base_uri", rxml_parser_context_base_uri_get, 0); - rb_define_method(cXMLParserContext, "base_uri=", rxml_parser_context_base_uri_set, 1); - rb_define_method(cXMLParserContext, "close", rxml_parser_context_close, 0); - rb_define_method(cXMLParserContext, "data_directory", rxml_parser_context_data_directory_get, 0); - rb_define_method(cXMLParserContext, "depth", rxml_parser_context_depth_get, 0); - rb_define_method(cXMLParserContext, "disable_cdata?", rxml_parser_context_disable_cdata_q, 0); - rb_define_method(cXMLParserContext, "disable_cdata=", rxml_parser_context_disable_cdata_set, 1); - rb_define_method(cXMLParserContext, "disable_sax?", rxml_parser_context_disable_sax_q, 0); - rb_define_method(cXMLParserContext, "docbook?", rxml_parser_context_docbook_q, 0); - rb_define_method(cXMLParserContext, "encoding", rxml_parser_context_encoding_get, 0); - rb_define_method(cXMLParserContext, "encoding=", rxml_parser_context_encoding_set, 1); - rb_define_method(cXMLParserContext, "errno", rxml_parser_context_errno_get, 0); - rb_define_method(cXMLParserContext, "html?", rxml_parser_context_html_q, 0); - rb_define_method(cXMLParserContext, "io_max_num_streams", rxml_parser_context_io_max_num_streams_get, 0); - rb_define_method(cXMLParserContext, "io_num_streams", rxml_parser_context_io_num_streams_get, 0); - rb_define_method(cXMLParserContext, "keep_blanks?", rxml_parser_context_keep_blanks_q, 0); - rb_define_method(cXMLParserContext, "name_node", rxml_parser_context_name_node_get, 0); - rb_define_method(cXMLParserContext, "name_depth", rxml_parser_context_name_depth_get, 0); - rb_define_method(cXMLParserContext, "name_depth_max", rxml_parser_context_name_depth_max_get, 0); - rb_define_method(cXMLParserContext, "name_tab", rxml_parser_context_name_tab_get, 0); - rb_define_method(cXMLParserContext, "node", rxml_parser_context_node_get, 0); - rb_define_method(cXMLParserContext, "node_depth", rxml_parser_context_node_depth_get, 0); - rb_define_method(cXMLParserContext, "node_depth_max", rxml_parser_context_node_depth_max_get, 0); - rb_define_method(cXMLParserContext, "num_chars", rxml_parser_context_num_chars_get, 0); - rb_define_method(cXMLParserContext, "options", rxml_parser_context_options_get, 0); - rb_define_method(cXMLParserContext, "options=", rxml_parser_context_options_set, 1); - rb_define_method(cXMLParserContext, "recovery?", rxml_parser_context_recovery_q, 0); - rb_define_method(cXMLParserContext, "recovery=", rxml_parser_context_recovery_set, 1); - rb_define_method(cXMLParserContext, "replace_entities?", rxml_parser_context_replace_entities_q, 0); - rb_define_method(cXMLParserContext, "replace_entities=", rxml_parser_context_replace_entities_set, 1); - rb_define_method(cXMLParserContext, "space_depth", rxml_parser_context_space_depth_get, 0); - rb_define_method(cXMLParserContext, "space_depth_max", rxml_parser_context_space_depth_max_get, 0); - rb_define_method(cXMLParserContext, "subset_external?", rxml_parser_context_subset_external_q, 0); - rb_define_method(cXMLParserContext, "subset_external_system_id", rxml_parser_context_subset_external_system_id_get, 0); - rb_define_method(cXMLParserContext, "subset_external_uri", rxml_parser_context_subset_external_uri_get, 0); - rb_define_method(cXMLParserContext, "subset_internal?", rxml_parser_context_subset_internal_q, 0); - rb_define_method(cXMLParserContext, "subset_internal_name", rxml_parser_context_subset_name_get, 0); - rb_define_method(cXMLParserContext, "stats?", rxml_parser_context_stats_q, 0); - rb_define_method(cXMLParserContext, "standalone?", rxml_parser_context_standalone_q, 0); - rb_define_method(cXMLParserContext, "valid", rxml_parser_context_valid_q, 0); - rb_define_method(cXMLParserContext, "validate?", rxml_parser_context_validate_q, 0); - rb_define_method(cXMLParserContext, "version", rxml_parser_context_version_get, 0); - rb_define_method(cXMLParserContext, "well_formed?", rxml_parser_context_well_formed_q, 0); -} +/* Please see the LICENSE file for copyright and distribution information */ + +#include "ruby_libxml.h" +#include "ruby_xml_parser_context.h" + +#include + +VALUE cXMLParserContext; +static ID IO_ATTR; + +/* + * Document-class: LibXML::XML::Parser::Context + * + * The XML::Parser::Context class provides in-depth control over how + * a document is parsed. + */ + +static void rxml_parser_context_free(xmlParserCtxtPtr ctxt) +{ + xmlFreeParserCtxt(ctxt); +} + +static VALUE rxml_parser_context_wrap(xmlParserCtxtPtr ctxt) +{ + return Data_Wrap_Struct(cXMLParserContext, NULL, rxml_parser_context_free, ctxt); +} + + +static VALUE rxml_parser_context_alloc(VALUE klass) +{ + xmlParserCtxtPtr ctxt = xmlNewParserCtxt(); + return Data_Wrap_Struct(klass, NULL, rxml_parser_context_free, ctxt); +} + +/* call-seq: + * XML::Parser::Context.document(document) -> XML::Parser::Context + * + * Creates a new parser context based on the specified document. + * + * Parameters: + * + * document - An XML::Document instance + * options - A or'ed together list of LibXML::XML::Parser::Options values + */ +static VALUE rxml_parser_context_document(int argc, VALUE* argv, VALUE klass) +{ + VALUE document, options; + rb_scan_args(argc, argv, "11", &document, &options); + + if (rb_obj_is_kind_of(document, cXMLDocument) == Qfalse) + rb_raise(rb_eTypeError, "Must pass an LibXML::XML::Document object"); + + xmlDocPtr xdoc; + xmlChar *buffer; + int length; + Data_Get_Struct(document, xmlDoc, xdoc); + xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, (const char*)xdoc->encoding, 0); + + xmlParserCtxtPtr ctxt = xmlCreateDocParserCtxt(buffer); + + if (!ctxt) + rxml_raise(&xmlLastError); + + /* This is annoying, but xmlInitParserCtxt (called indirectly above) and + xmlCtxtUseOptionsInternal (called below) initialize slightly different + context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt + sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ + xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); + + return rxml_parser_context_wrap(ctxt); +} + +/* call-seq: + * XML::Parser::Context.file(file) -> XML::Parser::Context + * + * Creates a new parser context based on the specified file or uri. + * + * Parameters: + * + * file - A filename or uri + * options - A or'ed together list of LibXML::XML::Parser::Options values +*/ +static VALUE rxml_parser_context_file(int argc, VALUE* argv, VALUE klass) +{ + VALUE file, options; + rb_scan_args(argc, argv, "11", &file, &options); + + xmlParserCtxtPtr ctxt = xmlCreateURLParserCtxt(StringValuePtr(file), 0); + + if (!ctxt) + rxml_raise(&xmlLastError); + + /* This is annoying, but xmlInitParserCtxt (called indirectly above) and + xmlCtxtUseOptionsInternal (called below) initialize slightly different + context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt + sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ + xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); + + return rxml_parser_context_wrap(ctxt); +} + +/* call-seq: + * XML::Parser::Context.string(string) -> XML::Parser::Context + * + * Creates a new parser context based on the specified string. + * + * Parameters: + * + * string - A string that contains the data to parse + * options - A or'ed together list of LibXML::XML::Parser::Options values +*/ +static VALUE rxml_parser_context_string(int argc, VALUE* argv, VALUE klass) +{ + VALUE string, options; + rb_scan_args(argc, argv, "11", &string, &options); + + Check_Type(string, T_STRING); + + if (RSTRING_LEN(string) == 0) + rb_raise(rb_eArgError, "Must specify a string with one or more characters"); + + xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string), (int)RSTRING_LEN(string)); + + if (!ctxt) + rxml_raise(&xmlLastError); + + /* This is annoying, but xmlInitParserCtxt (called indirectly above) and + xmlCtxtUseOptionsInternal (called below) initialize slightly different + context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt + sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ + xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); + + return rxml_parser_context_wrap(ctxt); +} + +/* call-seq: + * XML::Parser::Context.io(io) -> XML::Parser::Context + * + * Creates a new parser context based on the specified io object. + * + * Parameters: + * + * io - A ruby IO object + * options - A or'ed together list of LibXML::XML::Parser::Options values +*/ +static VALUE rxml_parser_context_io(int argc, VALUE* argv, VALUE klass) +{ + VALUE io, options; + rb_scan_args(argc, argv, "11", &io, &options); + + if (NIL_P(io)) + rb_raise(rb_eTypeError, "Must pass in an IO object"); + + xmlParserInputBufferPtr input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL, + (void*)io, XML_CHAR_ENCODING_NONE); + + xmlParserCtxtPtr ctxt = xmlNewParserCtxt(); + + if (!ctxt) + { + xmlFreeParserInputBuffer(input); + rxml_raise(&xmlLastError); + } + + /* This is annoying, but xmlInitParserCtxt (called indirectly above) and + xmlCtxtUseOptionsInternal (called below) initialize slightly different + context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt + sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ + xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); + + xmlParserInputPtr stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); + + if (!stream) + { + xmlFreeParserInputBuffer(input); + xmlFreeParserCtxt(ctxt); + rxml_raise(&xmlLastError); + } + inputPush(ctxt, stream); + VALUE result = rxml_parser_context_wrap(ctxt); + + /* Attach io object to parser so it won't get freed.*/ + rb_ivar_set(result, IO_ATTR, io); + + return result; +} + +/* + * call-seq: + * context.base_uri -> "http:://libxml.org" + * + * Obtain the base url for this parser context. + */ +static VALUE rxml_parser_context_base_uri_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->input && ctxt->input->filename) + return rxml_new_cstr((const xmlChar*)ctxt->input->filename, ctxt->encoding); + else + return Qnil; +} + +/* + * call-seq: + * context.base_uri = "http:://libxml.org" + * + * Sets the base url for this parser context. + */ +static VALUE rxml_parser_context_base_uri_set(VALUE self, VALUE url) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + Check_Type(url, T_STRING); + + if (ctxt->input && !ctxt->input->filename) + { + const char* xurl = StringValuePtr(url); + ctxt->input->filename = (const char*)xmlStrdup((const xmlChar*)xurl); + } + return self; +} + +/* + * call-seq: + * context.close -> nil + * + * Closes the underlying input streams. This is useful when parsing a large amount of + * files and you want to close the files without relying on Ruby's garbage collector + * to run. + */ +static VALUE rxml_parser_context_close(VALUE self) +{ + xmlParserCtxtPtr ctxt; + xmlParserInputPtr xinput; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + while ((xinput = inputPop(ctxt)) != NULL) + { + xmlFreeInputStream(xinput); + } + return Qnil; +} + +/* + * call-seq: + * context.data_directory -> "dir" + * + * Obtain the data directory associated with this context. + */ +static VALUE rxml_parser_context_data_directory_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->directory == NULL) + return (Qnil); + else + return (rxml_new_cstr((const xmlChar*)ctxt->directory, ctxt->encoding)); +} + +/* + * call-seq: + * context.depth -> num + * + * Obtain the depth of this context. + */ +static VALUE rxml_parser_context_depth_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->depth)); +} + +/* + * call-seq: + * context.disable_cdata? -> (true|false) + * + * Determine whether CDATA nodes will be created in this context. + */ +static VALUE rxml_parser_context_disable_cdata_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + /* LibXML controls this internally with the default SAX handler. */ + if (ctxt->sax && ctxt->sax->cdataBlock) + return (Qfalse); + else + return (Qtrue); +} + +/* + * call-seq: + * context.disable_cdata = (true|false) + * + * Control whether CDATA nodes will be created in this context. + */ +static VALUE rxml_parser_context_disable_cdata_set(VALUE self, VALUE value) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->sax == NULL) + rb_raise(rb_eRuntimeError, "Sax handler is not yet set"); + + /* LibXML controls this internally with the default SAX handler. */ + if (value) + ctxt->sax->cdataBlock = NULL; + else + ctxt->sax->cdataBlock = xmlDefaultSAXHandler.cdataBlock; + + return value; +} + +/* + * call-seq: + * context.disable_sax? -> (true|false) + * + * Determine whether SAX-based processing is disabled + * in this context. + */ +static VALUE rxml_parser_context_disable_sax_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->disableSAX) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.docbook? -> (true|false) + * + * Determine whether this is a docbook context. + */ +static VALUE rxml_parser_context_docbook_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->html == 2) // TODO check this + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.encoding -> XML::Encoding::UTF_8 + * + * Obtain the character encoding identifier used in + * this context. + */ +static VALUE rxml_parser_context_encoding_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + return INT2NUM(xmlParseCharEncoding((const char*)ctxt->encoding)); +} + +/* + * call-seq: + * context.encoding = XML::Encoding::UTF_8 + * + * Sets the character encoding for this context. + */ +static VALUE rxml_parser_context_encoding_set(VALUE self, VALUE encoding) +{ + xmlParserCtxtPtr ctxt; + int result; + const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding)); + xmlCharEncodingHandlerPtr hdlr = xmlFindCharEncodingHandler(xencoding); + + if (!hdlr) + rb_raise(rb_eArgError, "Unknown encoding: %i", NUM2INT(encoding)); + + Data_Get_Struct(self, xmlParserCtxt, ctxt); + result = xmlSwitchToEncoding(ctxt, hdlr); + + if (result != 0) + rxml_raise(&xmlLastError); + + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + + ctxt->encoding = xmlStrdup((const xmlChar *) xencoding); + return self; +} + +/* + * call-seq: + * context.errno -> num + * + * Obtain the last-error number in this context. + */ +static VALUE rxml_parser_context_errno_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->errNo)); +} + +/* + * call-seq: + * context.html? -> (true|false) + * + * Determine whether this is an html context. + */ +static VALUE rxml_parser_context_html_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->html == 1) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.max_num_streams -> num + * + * Obtain the limit on the number of IO streams opened in + * this context. + */ +static VALUE rxml_parser_context_io_max_num_streams_get(VALUE self) +{ + // TODO alias to max_streams and dep this? + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->inputMax)); +} + +/* + * call-seq: + * context.num_streams -> "dir" + * + * Obtain the actual number of IO streams in this + * context. + */ +static VALUE rxml_parser_context_io_num_streams_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->inputNr)); +} + +/* + * call-seq: + * context.keep_blanks? -> (true|false) + * + * Determine whether parsers in this context retain + * whitespace. + */ +static VALUE rxml_parser_context_keep_blanks_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->keepBlanks) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.name_depth -> num + * + * Obtain the name depth for this context. + */ +static VALUE rxml_parser_context_name_depth_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->nameNr)); +} + +/* + * call-seq: + * context.name_depth_max -> num + * + * Obtain the maximum name depth for this context. + */ +static VALUE rxml_parser_context_name_depth_max_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->nameMax)); +} + +/* + * call-seq: + * context.name_node -> "name" + * + * Obtain the name node for this context. + */ +static VALUE rxml_parser_context_name_node_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->name == NULL) + return (Qnil); + else + return (rxml_new_cstr( ctxt->name, ctxt->encoding)); +} + +/* + * call-seq: + * context.name_tab -> ["name", ..., "name"] + * + * Obtain the name table for this context. + */ +static VALUE rxml_parser_context_name_tab_get(VALUE self) +{ + int i; + xmlParserCtxtPtr ctxt; + VALUE tab_ary; + + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->nameTab == NULL) + return (Qnil); + + tab_ary = rb_ary_new(); + + for (i = (ctxt->nameNr - 1); i >= 0; i--) + { + if (ctxt->nameTab[i] == NULL) + continue; + else + rb_ary_push(tab_ary, rxml_new_cstr( ctxt->nameTab[i], ctxt->encoding)); + } + + return (tab_ary); +} + +/* + * call-seq: + * context.node_depth -> num + * + * Obtain the node depth for this context. + */ +static VALUE rxml_parser_context_node_depth_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->nodeNr)); +} + +/* + * call-seq: + * context.node -> node + * + * Obtain the root node of this context. + */ +static VALUE rxml_parser_context_node_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->node == NULL) + return (Qnil); + else + return (rxml_node_wrap(ctxt->node)); +} + +/* + * call-seq: + * context.node_depth_max -> num + * + * Obtain the maximum node depth for this context. + */ +static VALUE rxml_parser_context_node_depth_max_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->nodeMax)); +} + +/* + * call-seq: + * context.num_chars -> num + * + * Obtain the number of characters in this context. + */ +static VALUE rxml_parser_context_num_chars_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (LONG2NUM(ctxt->nbChars)); +} + + +/* + * call-seq: + * context.options > XML::Parser::Options::NOENT + * + * Returns the parser options for this context. Multiple + * options can be combined by using Bitwise OR (|). + */ +static VALUE rxml_parser_context_options_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return INT2NUM(ctxt->options); +} + +/* + * call-seq: + * context.options = XML::Parser::Options::NOENT | + XML::Parser::Options::NOCDATA + * + * Provides control over the execution of a parser. Valid values + * are the constants defined on XML::Parser::Options. Multiple + * options can be combined by using Bitwise OR (|). + */ +static VALUE rxml_parser_context_options_set(VALUE self, VALUE options) +{ + xmlParserCtxtPtr ctxt; + Check_Type(options, T_FIXNUM); + + Data_Get_Struct(self, xmlParserCtxt, ctxt); + xmlCtxtUseOptions(ctxt, NUM2INT(options)); + + return self; +} + +/* + * call-seq: + * context.recovery? -> (true|false) + * + * Determine whether recovery mode is enabled in this + * context. + */ +static VALUE rxml_parser_context_recovery_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->recovery) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.recovery = true|false + * + * Control whether recovery mode is enabled in this + * context. + */ +static VALUE rxml_parser_context_recovery_set(VALUE self, VALUE value) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (value == Qfalse) + { + ctxt->recovery = 0; + return (Qfalse); + } + else + { + ctxt->recovery = 1; + return (Qtrue); + } +} + +/* + * call-seq: + * context.replace_entities? -> (true|false) + * + * Determine whether external entity replacement is enabled in this + * context. + */ +static VALUE rxml_parser_context_replace_entities_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->replaceEntities) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.replace_entities = true|false + * + * Control whether external entity replacement is enabled in this + * context. + */ +static VALUE rxml_parser_context_replace_entities_set(VALUE self, VALUE value) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (value == Qfalse) + { + ctxt->replaceEntities = 0; + return (Qfalse); + } + else + { + ctxt->replaceEntities = 1; + return (Qtrue); + } +} + +/* + * call-seq: + * context.space_depth -> num + * + * Obtain the space depth for this context. + */ +static VALUE rxml_parser_context_space_depth_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->spaceNr)); +} + +/* + * call-seq: + * context.space_depth -> num + * + * Obtain the maximum space depth for this context. + */ +static VALUE rxml_parser_context_space_depth_max_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + return (INT2NUM(ctxt->spaceMax)); +} + +/* + * call-seq: + * context.subset_external? -> (true|false) + * + * Determine whether this context is a subset of an + * external context. + */ +static VALUE rxml_parser_context_subset_external_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->inSubset == 2) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.subset_internal? -> (true|false) + * + * Determine whether this context is a subset of an + * internal context. + */ +static VALUE rxml_parser_context_subset_internal_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->inSubset == 1) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.subset_internal_name -> "name" + * + * Obtain this context's subset name (valid only if + * either of subset_external? or subset_internal? + * is true). + */ +static VALUE rxml_parser_context_subset_name_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->intSubName == NULL) + return (Qnil); + else + return (rxml_new_cstr(ctxt->intSubName, ctxt->encoding)); +} + +/* + * call-seq: + * context.subset_external_uri -> "uri" + * + * Obtain this context's external subset URI. (valid only if + * either of subset_external? or subset_internal? + * is true). + */ +static VALUE rxml_parser_context_subset_external_uri_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->extSubURI == NULL) + return (Qnil); + else + return (rxml_new_cstr( ctxt->extSubURI, ctxt->encoding)); +} + +/* + * call-seq: + * context.subset_external_system_id -> "system_id" + * + * Obtain this context's external subset system identifier. + * (valid only if either of subset_external? or subset_internal? + * is true). + */ +static VALUE rxml_parser_context_subset_external_system_id_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->extSubSystem == NULL) + return (Qnil); + else + return (rxml_new_cstr( ctxt->extSubSystem, ctxt->encoding)); +} + +/* + * call-seq: + * context.standalone? -> (true|false) + * + * Determine whether this is a standalone context. + */ +static VALUE rxml_parser_context_standalone_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->standalone) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.stats? -> (true|false) + * + * Determine whether this context maintains statistics. + */ +static VALUE rxml_parser_context_stats_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->record_info) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.valid? -> (true|false) + * + * Determine whether this context is valid. + */ +static VALUE rxml_parser_context_valid_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->valid) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.validate? -> (true|false) + * + * Determine whether validation is enabled in this context. + */ +static VALUE rxml_parser_context_validate_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->validate) + return (Qtrue); + else + return (Qfalse); +} + +/* + * call-seq: + * context.version -> "version" + * + * Obtain this context's version identifier. + */ +static VALUE rxml_parser_context_version_get(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->version == NULL) + return (Qnil); + else + return (rxml_new_cstr( ctxt->version, ctxt->encoding)); +} + +/* + * call-seq: + * context.well_formed? -> (true|false) + * + * Determine whether this context contains well-formed XML. + */ +static VALUE rxml_parser_context_well_formed_q(VALUE self) +{ + xmlParserCtxtPtr ctxt; + Data_Get_Struct(self, xmlParserCtxt, ctxt); + + if (ctxt->wellFormed) + return (Qtrue); + else + return (Qfalse); +} + +void rxml_init_parser_context(void) +{ + IO_ATTR = ID2SYM(rb_intern("@io")); + + cXMLParserContext = rb_define_class_under(cXMLParser, "Context", rb_cObject); + rb_define_alloc_func(cXMLParserContext, rxml_parser_context_alloc); + + rb_define_singleton_method(cXMLParserContext, "document", rxml_parser_context_document, -1); + rb_define_singleton_method(cXMLParserContext, "file", rxml_parser_context_file, -1); + rb_define_singleton_method(cXMLParserContext, "io", rxml_parser_context_io, -1); + rb_define_singleton_method(cXMLParserContext, "string", rxml_parser_context_string, -1); + + rb_define_method(cXMLParserContext, "base_uri", rxml_parser_context_base_uri_get, 0); + rb_define_method(cXMLParserContext, "base_uri=", rxml_parser_context_base_uri_set, 1); + rb_define_method(cXMLParserContext, "close", rxml_parser_context_close, 0); + rb_define_method(cXMLParserContext, "data_directory", rxml_parser_context_data_directory_get, 0); + rb_define_method(cXMLParserContext, "depth", rxml_parser_context_depth_get, 0); + rb_define_method(cXMLParserContext, "disable_cdata?", rxml_parser_context_disable_cdata_q, 0); + rb_define_method(cXMLParserContext, "disable_cdata=", rxml_parser_context_disable_cdata_set, 1); + rb_define_method(cXMLParserContext, "disable_sax?", rxml_parser_context_disable_sax_q, 0); + rb_define_method(cXMLParserContext, "docbook?", rxml_parser_context_docbook_q, 0); + rb_define_method(cXMLParserContext, "encoding", rxml_parser_context_encoding_get, 0); + rb_define_method(cXMLParserContext, "encoding=", rxml_parser_context_encoding_set, 1); + rb_define_method(cXMLParserContext, "errno", rxml_parser_context_errno_get, 0); + rb_define_method(cXMLParserContext, "html?", rxml_parser_context_html_q, 0); + rb_define_method(cXMLParserContext, "io_max_num_streams", rxml_parser_context_io_max_num_streams_get, 0); + rb_define_method(cXMLParserContext, "io_num_streams", rxml_parser_context_io_num_streams_get, 0); + rb_define_method(cXMLParserContext, "keep_blanks?", rxml_parser_context_keep_blanks_q, 0); + rb_define_method(cXMLParserContext, "name_node", rxml_parser_context_name_node_get, 0); + rb_define_method(cXMLParserContext, "name_depth", rxml_parser_context_name_depth_get, 0); + rb_define_method(cXMLParserContext, "name_depth_max", rxml_parser_context_name_depth_max_get, 0); + rb_define_method(cXMLParserContext, "name_tab", rxml_parser_context_name_tab_get, 0); + rb_define_method(cXMLParserContext, "node", rxml_parser_context_node_get, 0); + rb_define_method(cXMLParserContext, "node_depth", rxml_parser_context_node_depth_get, 0); + rb_define_method(cXMLParserContext, "node_depth_max", rxml_parser_context_node_depth_max_get, 0); + rb_define_method(cXMLParserContext, "num_chars", rxml_parser_context_num_chars_get, 0); + rb_define_method(cXMLParserContext, "options", rxml_parser_context_options_get, 0); + rb_define_method(cXMLParserContext, "options=", rxml_parser_context_options_set, 1); + rb_define_method(cXMLParserContext, "recovery?", rxml_parser_context_recovery_q, 0); + rb_define_method(cXMLParserContext, "recovery=", rxml_parser_context_recovery_set, 1); + rb_define_method(cXMLParserContext, "replace_entities?", rxml_parser_context_replace_entities_q, 0); + rb_define_method(cXMLParserContext, "replace_entities=", rxml_parser_context_replace_entities_set, 1); + rb_define_method(cXMLParserContext, "space_depth", rxml_parser_context_space_depth_get, 0); + rb_define_method(cXMLParserContext, "space_depth_max", rxml_parser_context_space_depth_max_get, 0); + rb_define_method(cXMLParserContext, "subset_external?", rxml_parser_context_subset_external_q, 0); + rb_define_method(cXMLParserContext, "subset_external_system_id", rxml_parser_context_subset_external_system_id_get, 0); + rb_define_method(cXMLParserContext, "subset_external_uri", rxml_parser_context_subset_external_uri_get, 0); + rb_define_method(cXMLParserContext, "subset_internal?", rxml_parser_context_subset_internal_q, 0); + rb_define_method(cXMLParserContext, "subset_internal_name", rxml_parser_context_subset_name_get, 0); + rb_define_method(cXMLParserContext, "stats?", rxml_parser_context_stats_q, 0); + rb_define_method(cXMLParserContext, "standalone?", rxml_parser_context_standalone_q, 0); + rb_define_method(cXMLParserContext, "valid", rxml_parser_context_valid_q, 0); + rb_define_method(cXMLParserContext, "validate?", rxml_parser_context_validate_q, 0); + rb_define_method(cXMLParserContext, "version", rxml_parser_context_version_get, 0); + rb_define_method(cXMLParserContext, "well_formed?", rxml_parser_context_well_formed_q, 0); +} diff --git a/ext/libxml/ruby_xml_parser_options.c b/ext/libxml/ruby_xml_parser_options.c index be40e8d8..d85ec76e 100644 --- a/ext/libxml/ruby_xml_parser_options.c +++ b/ext/libxml/ruby_xml_parser_options.c @@ -1,66 +1,74 @@ -/* Please see the LICENSE file for copyright and distribution information */ - -#include -#include "ruby_libxml.h" - -/* Document-class: LibXML::XML::Parser::Options - * - * Options that control the operation of the HTMLParser. The easiest - * way to set a parser's options is to use the methods - * XML::Parser.file, XML::Parser.io or XML::Parser.string. - * For additional control, see XML::Parser::Context#options=. -*/ - -VALUE mXMLParserOptions; - -void rxml_init_parser_options(void) -{ - mXMLParserOptions = rb_define_module_under(cXMLParser, "Options"); - - /* recover on errors */ - rb_define_const(mXMLParserOptions, "RECOVER", INT2NUM(XML_PARSE_RECOVER)); - /* substitute entities */ - rb_define_const(mXMLParserOptions, "NOENT", INT2NUM(XML_PARSE_NOENT)); - /* load the external subset */ - rb_define_const(mXMLParserOptions, "DTDLOAD", INT2NUM(XML_PARSE_DTDLOAD)); - /* default DTD attributes */ - rb_define_const(mXMLParserOptions, "DTDATTR", INT2NUM(XML_PARSE_DTDATTR)); - /* validate with the DTD */ - rb_define_const(mXMLParserOptions, "DTDVALID", INT2NUM(XML_PARSE_DTDVALID)); - /* suppress error reports */ - rb_define_const(mXMLParserOptions, "NOERROR", INT2NUM(XML_PARSE_NOERROR)); - /* suppress warning reports */ - rb_define_const(mXMLParserOptions, "NOWARNING", INT2NUM(XML_PARSE_NOWARNING)); - /* pedantic error reporting */ - rb_define_const(mXMLParserOptions, "PEDANTIC", INT2NUM(XML_PARSE_PEDANTIC)); - /* remove blank nodes */ - rb_define_const(mXMLParserOptions, "NOBLANKS", INT2NUM(XML_PARSE_NOBLANKS)); - /* use the SAX1 interface internally */ - rb_define_const(mXMLParserOptions, "SAX1", INT2NUM(XML_PARSE_SAX1)); - /* Implement XInclude substitition */ - rb_define_const(mXMLParserOptions, "XINCLUDE", INT2NUM(XML_PARSE_XINCLUDE)); - /* Forbid network access */ - rb_define_const(mXMLParserOptions, "NONET", INT2NUM(XML_PARSE_NONET)); - /* Do not reuse the context dictionnary */ - rb_define_const(mXMLParserOptions, "NODICT", INT2NUM(XML_PARSE_NODICT)); - /* remove redundant namespaces declarations */ - rb_define_const(mXMLParserOptions, "NSCLEAN", INT2NUM(XML_PARSE_NSCLEAN)); - /* merge CDATA as text nodes */ - rb_define_const(mXMLParserOptions, "NOCDATA", INT2NUM(XML_PARSE_NOCDATA)); -#if LIBXML_VERSION >= 20621 - /* do not generate XINCLUDE START/END nodes */ - rb_define_const(mXMLParserOptions, "NOXINCNODE", INT2NUM(XML_PARSE_NOXINCNODE)); -#endif -#if LIBXML_VERSION >= 20700 - /* compact small text nodes */ - rb_define_const(mXMLParserOptions, "COMPACT", INT2NUM(XML_PARSE_COMPACT)); - /* parse using XML-1.0 before update 5 */ - rb_define_const(mXMLParserOptions, "PARSE_OLD10", INT2NUM(XML_PARSE_OLD10)); - /* do not fixup XINCLUDE xml:base uris */ - rb_define_const(mXMLParserOptions, "NOBASEFIX", INT2NUM(XML_PARSE_NOBASEFIX)); -#endif -#if LIBXML_VERSION >= 20703 - /* relax any hardcoded limit from the parser */ - rb_define_const(mXMLParserOptions, "HUGE", INT2NUM(XML_PARSE_HUGE)); -#endif -} +/* Please see the LICENSE file for copyright and distribution information */ + +#include +#include "ruby_libxml.h" + +/* Document-class: LibXML::XML::Parser::Options + * + * Options that control the operation of the HTMLParser. The easiest + * way to set a parser's options is to use the methods + * XML::Parser.file, XML::Parser.io or XML::Parser.string. + * For additional control, see XML::Parser::Context#options=. +*/ + +VALUE mXMLParserOptions; + +void rxml_init_parser_options(void) +{ + mXMLParserOptions = rb_define_module_under(cXMLParser, "Options"); + + /* recover on errors */ + rb_define_const(mXMLParserOptions, "RECOVER", INT2NUM(XML_PARSE_RECOVER)); + /* substitute entities */ + rb_define_const(mXMLParserOptions, "NOENT", INT2NUM(XML_PARSE_NOENT)); + /* load the external subset */ + rb_define_const(mXMLParserOptions, "DTDLOAD", INT2NUM(XML_PARSE_DTDLOAD)); + /* default DTD attributes */ + rb_define_const(mXMLParserOptions, "DTDATTR", INT2NUM(XML_PARSE_DTDATTR)); + /* validate with the DTD */ + rb_define_const(mXMLParserOptions, "DTDVALID", INT2NUM(XML_PARSE_DTDVALID)); + /* suppress error reports */ + rb_define_const(mXMLParserOptions, "NOERROR", INT2NUM(XML_PARSE_NOERROR)); + /* suppress warning reports */ + rb_define_const(mXMLParserOptions, "NOWARNING", INT2NUM(XML_PARSE_NOWARNING)); + /* pedantic error reporting */ + rb_define_const(mXMLParserOptions, "PEDANTIC", INT2NUM(XML_PARSE_PEDANTIC)); + /* remove blank nodes */ + rb_define_const(mXMLParserOptions, "NOBLANKS", INT2NUM(XML_PARSE_NOBLANKS)); + /* use the SAX1 interface internally */ + rb_define_const(mXMLParserOptions, "SAX1", INT2NUM(XML_PARSE_SAX1)); + /* Implement XInclude substitition */ + rb_define_const(mXMLParserOptions, "XINCLUDE", INT2NUM(XML_PARSE_XINCLUDE)); + /* Forbid network access */ + rb_define_const(mXMLParserOptions, "NONET", INT2NUM(XML_PARSE_NONET)); + /* Do not reuse the context dictionnary */ + rb_define_const(mXMLParserOptions, "NODICT", INT2NUM(XML_PARSE_NODICT)); + /* remove redundant namespaces declarations */ + rb_define_const(mXMLParserOptions, "NSCLEAN", INT2NUM(XML_PARSE_NSCLEAN)); + /* merge CDATA as text nodes */ + rb_define_const(mXMLParserOptions, "NOCDATA", INT2NUM(XML_PARSE_NOCDATA)); +#if LIBXML_VERSION >= 20621 + /* do not generate XINCLUDE START/END nodes */ + rb_define_const(mXMLParserOptions, "NOXINCNODE", INT2NUM(XML_PARSE_NOXINCNODE)); +#endif +#if LIBXML_VERSION >= 20700 + /* compact small text nodes */ + rb_define_const(mXMLParserOptions, "COMPACT", INT2NUM(XML_PARSE_COMPACT)); + /* parse using XML-1.0 before update 5 */ + rb_define_const(mXMLParserOptions, "OLD10", INT2NUM(XML_PARSE_OLD10)); + /* do not fixup XINCLUDE xml:base uris */ + rb_define_const(mXMLParserOptions, "NOBASEFIX", INT2NUM(XML_PARSE_NOBASEFIX)); +#endif +#if LIBXML_VERSION >= 20703 + /* relax any hardcoded limit from the parser */ + rb_define_const(mXMLParserOptions, "HUGE", INT2NUM(XML_PARSE_HUGE)); +#endif +#if LIBXML_VERSION >= 21106 + /* parse using SAX2 interface before 2.7.0 */ + rb_define_const(mXMLParserOptions, "OLDSAX", INT2NUM(XML_PARSE_OLDSAX)); + /* ignore internal document encoding hint */ + rb_define_const(mXMLParserOptions, "IGNORE_ENC", INT2NUM(XML_PARSE_IGNORE_ENC)); + /* Store big lines numbers in text PSVI field */ + rb_define_const(mXMLParserOptions, "BIG_LINES", INT2NUM(XML_PARSE_BIG_LINES)); +#endif +} diff --git a/lib/libxml/document.rb b/lib/libxml/document.rb index 8e15738a..f78219b4 100644 --- a/lib/libxml/document.rb +++ b/lib/libxml/document.rb @@ -17,21 +17,21 @@ def self.document(value) # call-seq: # XML::Document.file(path) -> XML::Document - # XML::Document.file(path, :encoding => XML::Encoding::UTF_8, - # :options => XML::Parser::Options::NOENT) -> XML::Document + # XML::Document.file(path, encoding: XML::Encoding::UTF_8, + # options: XML::Parser::Options::NOENT) -> XML::Document # # Creates a new document from the specified file or uri. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # path - Path to file # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on # XML::Parser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.file(value, options = {}) - Parser.file(value, options).parse + def self.file(path, encoding: nil, options: nil) + Parser.file(path, encoding: encoding, options: options).parse end # call-seq: @@ -57,23 +57,23 @@ def self.io(value, options = {}) # call-seq: # XML::Document.string(string) -> XML::Document - # XML::Document.string(string, :encoding => XML::Encoding::UTF_8, - # :options => XML::Parser::Options::NOENT - # :base_uri="http://libxml.org") -> XML::Document + # XML::Document.string(string, encoding: XML::Encoding::UTF_8, + # options: XML::Parser::Options::NOENT + # base_uri: "http://libxml.org") -> XML::Document # # Creates a new document from the specified string. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # string - String to parse # base_uri - The base url for the parsed document. # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on # XML::Parser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.string(value, options = {}) - Parser.string(value, options).parse + def self.string(value, base_uri: nil, encoding: nil, options: nil) + Parser.string(value, base_uri: base_uri, encoding: encoding, options: options).parse end # Returns a new XML::XPathContext for the document. diff --git a/lib/libxml/html_parser.rb b/lib/libxml/html_parser.rb index 5b561e42..2e8b4f3b 100644 --- a/lib/libxml/html_parser.rb +++ b/lib/libxml/html_parser.rb @@ -5,31 +5,31 @@ module XML class HTMLParser # call-seq: # XML::HTMLParser.file(path) -> XML::HTMLParser - # XML::HTMLParser.file(path, :encoding => XML::Encoding::UTF_8, - # :options => XML::HTMLParser::Options::NOENT) -> XML::HTMLParser + # XML::HTMLParser.file(path, encoding: XML::Encoding::UTF_8, + # options: XML::HTMLParser::Options::NOENT) -> XML::HTMLParser # # Creates a new parser by parsing the specified file or uri. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # path - Path to file to parse # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on # XML::HTMLParser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.file(path, options = {}) + def self.file(path, encoding: nil, options: nil) context = XML::HTMLParser::Context.file(path) - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.encoding = encoding if encoding + context.options = options if options self.new(context) end # call-seq: # XML::HTMLParser.io(io) -> XML::HTMLParser - # XML::HTMLParser.io(io, :encoding => XML::Encoding::UTF_8, - # :options => XML::HTMLParser::Options::NOENT - # :base_uri="http://libxml.org") -> XML::HTMLParser + # XML::HTMLParser.io(io, encoding: XML::Encoding::UTF_8, + # options: XML::HTMLParser::Options::NOENT + # base_uri: "http://libxml.org") -> XML::HTMLParser # # Creates a new reader by parsing the specified io object. # @@ -42,36 +42,36 @@ def self.file(path, options = {}) # options - Parser options. Valid values are the constants defined on # XML::HTMLParser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.io(io, options = {}) + def self.io(io, base_uri: nil, encoding: nil, options: nil) context = XML::HTMLParser::Context.io(io) - context.base_uri = options[:base_uri] if options[:base_uri] - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.base_uri = base_uri if base_uri + context.encoding = encoding if encoding + context.options = options if options self.new(context) end # call-seq: # XML::HTMLParser.string(string) - # XML::HTMLParser.string(string, :encoding => XML::Encoding::UTF_8, - # :options => XML::HTMLParser::Options::NOENT - # :base_uri="http://libxml.org") -> XML::HTMLParser + # XML::HTMLParser.string(string, encoding: XML::Encoding::UTF_8, + # options: XML::HTMLParser::Options::NOENT + # base_uri: "http://libxml.org") -> XML::HTMLParser # # Creates a new parser by parsing the specified string. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # string - String to parse # base_uri - The base url for the parsed document. # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on # XML::HTMLParser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.string(string, options = {}) + def self.string(string, base_uri: nil, encoding: nil, options: nil) context = XML::HTMLParser::Context.string(string) - context.base_uri = options[:base_uri] if options[:base_uri] - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.base_uri = base_uri if base_uri + context.encoding = encoding if encoding + context.options = options if options self.new(context) end diff --git a/lib/libxml/parser.rb b/lib/libxml/parser.rb index eb90e71e..7ba5a9b7 100644 --- a/lib/libxml/parser.rb +++ b/lib/libxml/parser.rb @@ -18,31 +18,33 @@ def self.document(doc) # call-seq: # XML::Parser.file(path) -> XML::Parser - # XML::Parser.file(path, :encoding => XML::Encoding::UTF_8, - # :options => XML::Parser::Options::NOENT) -> XML::Parser + # XML::Parser.file(path, encoding: XML::Encoding::UTF_8, + # options: XML::Parser::Options::NOENT) -> XML::Parser # # Creates a new parser for the specified file or uri. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # path - Path to file + # base_uri - The base url for the parsed document. # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on # XML::Parser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.file(path, options = {}) + def self.file(path, base_uri: nil, encoding: nil, options: nil) context = XML::Parser::Context.file(path) - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.base_uri = base_uri if base_uri + context.encoding = encoding if encoding + context.options = options if options self.new(context) end # call-seq: # XML::Parser.io(io) -> XML::Parser - # XML::Parser.io(io, :encoding => XML::Encoding::UTF_8, - # :options => XML::Parser::Options::NOENT - # :base_uri="http://libxml.org") -> XML::Parser + # XML::Parser.io(io, encoding: XML::Encoding::UTF_8, + # options: XML::Parser::Options::NOENT + # base_uri: "http://libxml.org") -> XML::Parser # # Creates a new parser for the specified io object. # @@ -55,36 +57,36 @@ def self.file(path, options = {}) # options - Parser options. Valid values are the constants defined on # XML::Parser::Options. Mutliple options can be combined # by using Bitwise OR (|). - def self.io(io, options = {}) + def self.io(io, base_uri: nil, encoding: nil, options: nil) context = XML::Parser::Context.io(io) - context.base_uri = options[:base_uri] if options[:base_uri] - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.base_uri = base_uri if base_uri + context.encoding = encoding if encoding + context.options = options if options self.new(context) end # call-seq: # XML::Parser.string(string) - # XML::Parser.string(string, :encoding => XML::Encoding::UTF_8, - # :options => XML::Parser::Options::NOENT - # :base_uri="http://libxml.org") -> XML::Parser + # XML::Parser.string(string, encoding: XML::Encoding::UTF_8, + # options: XML::Parser::Options::NOENT + # base_uri: "http://libxml.org") -> XML::Parser # # Creates a new parser by parsing the specified string. # - # You may provide an optional hash table to control how the - # parsing is performed. Valid options are: + # Parameters: # + # string - The string to parse # base_uri - The base url for the parsed document. # encoding - The document encoding, defaults to nil. Valid values # are the encoding constants defined on XML::Encoding. # options - Parser options. Valid values are the constants defined on - # XML::Parser::Options. Mutliple options can be combined + # XML::Parser::Options. Multiple options can be combined # by using Bitwise OR (|). - def self.string(string, options = {}) + def self.string(string, base_uri: nil, encoding: nil, options: nil) context = XML::Parser::Context.string(string) - context.base_uri = options[:base_uri] if options[:base_uri] - context.encoding = options[:encoding] if options[:encoding] - context.options = options[:options] if options[:options] + context.base_uri = base_uri if base_uri + context.encoding = encoding if encoding + context.options = options if options self.new(context) end diff --git a/test/test_document_write.rb b/test/test_document_write.rb index f8970ef5..26ba9daf 100644 --- a/test/test_document_write.rb +++ b/test/test_document_write.rb @@ -1,146 +1,143 @@ -# encoding: UTF-8 - -require_relative './test_helper' -require 'tmpdir' - -class TestDocumentWrite < Minitest::Test - def setup - @file_name = "model/bands.utf-8.xml" - - # Strip spaces to make testing easier - LibXML::XML.default_keep_blanks = false - file = File.join(File.dirname(__FILE__), @file_name) - @doc = LibXML::XML::Document.file(file) - end - - def teardown - LibXML::XML.default_keep_blanks = true - @doc = nil - end - - # --- to_s tests --- - def test_to_s_default - # Default to_s has indentation - assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", - @doc.to_s) - end - - def test_to_s_no_global_indentation - # No indentation due to global setting - LibXML::XML.indent_tree_output = false - value = @doc.to_s - - assert_equal(Encoding::UTF_8, value.encoding) - assert_equal("\n\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\nIron Maiden is a British heavy metal band formed in 1975.\n\n", - value) - ensure - LibXML::XML.indent_tree_output = true - end - - def test_to_s_no_indentation - # No indentation due to local setting - value = @doc.to_s(:indent => false) - assert_equal(Encoding::UTF_8, value.encoding) - assert_equal("\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n", - value) - end - - def test_to_s_encoding - # Test encodings - - # UTF8: - # ö - c3 b6 in hex, \303\266 in octal - # ü - c3 bc in hex, \303\274 in octal - value = @doc.to_s(:encoding => LibXML::XML::Encoding::UTF_8) - assert_equal(Encoding::UTF_8, value.encoding) - assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", - value) - - # ISO_8859_1: - # ö - f6 in hex, \366 in octal - # ü - fc in hex, \374 in octal - value = @doc.to_s(:encoding => LibXML::XML::Encoding::ISO_8859_1) - assert_equal(Encoding::ISO8859_1, value.encoding) - assert_equal("\n\n M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n".force_encoding(Encoding::ISO8859_1), - @doc.to_s(:encoding => LibXML::XML::Encoding::ISO_8859_1)) - - # Invalid encoding - error = assert_raises(ArgumentError) do - @doc.to_s(:encoding => -9999) - end - assert_equal('Unknown encoding value: -9999', error.to_s) - end - - # --- save tests ----- - def test_save_utf8 - temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_utf8.xml") - - bytes = @doc.save(temp_filename) - assert_equal(305, bytes) - - contents = File.read(temp_filename, nil, nil, :encoding => Encoding::UTF_8) - assert_equal(Encoding::UTF_8, contents.encoding) - assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", - contents) - ensure - File.delete(temp_filename) - end - - def test_save_utf8_no_indents - temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_utf8_no_indents.xml") - - bytes = @doc.save(temp_filename, :indent => false) - assert_equal(298, bytes) - - contents = File.read(temp_filename, nil, nil, :encoding => Encoding::UTF_8) - assert_equal("\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n", - contents) - ensure - File.delete(temp_filename) - end - - def test_save_iso_8859_1 - temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_iso_8859_1.xml") - bytes = @doc.save(temp_filename, :encoding => LibXML::XML::Encoding::ISO_8859_1) - assert_equal(304, bytes) - - contents = File.read(temp_filename, nil, nil, :encoding => Encoding::ISO8859_1) - assert_equal(Encoding::ISO8859_1, contents.encoding) - assert_equal("\n\n M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n".force_encoding(Encoding::ISO8859_1), - contents) - ensure - File.delete(temp_filename) - end - - def test_save_iso_8859_1_no_indent - temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_iso_8859_1_no_indent.xml") - bytes = @doc.save(temp_filename, :indent => false, :encoding => LibXML::XML::Encoding::ISO_8859_1) - assert_equal(297, bytes) - - contents = File.read(temp_filename, nil, nil, :encoding => Encoding::ISO8859_1) - assert_equal(Encoding::ISO8859_1, contents.encoding) - assert_equal("\nM\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n".force_encoding(Encoding::ISO8859_1), - contents) - ensure - File.delete(temp_filename) - end - - def test_thread_set_root - # Previously a segmentation fault occurred when running libxml in - # background threads. - thread = Thread.new do - 100000.times do |i| - document = LibXML::XML::Document.new - node = LibXML::XML::Node.new('test') - document.root = node - end - end - thread.join - assert(true) - end - - # --- Debug --- - def test_debug - assert(@doc.debug) - end +# encoding: UTF-8 + +require_relative './test_helper' +require 'tmpdir' + +class TestDocumentWrite < Minitest::Test + def setup + @file_name = "model/bands.utf-8.xml" + + file = File.join(File.dirname(__FILE__), @file_name) + @doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS) + end + + def teardown + @doc = nil + end + + # --- to_s tests --- + def test_to_s_default + # Default to_s has indentation + assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", + @doc.to_s) + end + + def test_to_s_no_global_indentation + # No indentation due to global setting + LibXML::XML.indent_tree_output = false + value = @doc.to_s + + assert_equal(Encoding::UTF_8, value.encoding) + assert_equal("\n\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\nIron Maiden is a British heavy metal band formed in 1975.\n\n", + value) + ensure + LibXML::XML.indent_tree_output = true + end + + def test_to_s_no_indentation + # No indentation due to local setting + value = @doc.to_s(:indent => false) + assert_equal(Encoding::UTF_8, value.encoding) + assert_equal("\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n", + value) + end + + def test_to_s_encoding + # Test encodings + + # UTF8: + # ö - c3 b6 in hex, \303\266 in octal + # ü - c3 bc in hex, \303\274 in octal + value = @doc.to_s(:encoding => LibXML::XML::Encoding::UTF_8) + assert_equal(Encoding::UTF_8, value.encoding) + assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", + value) + + # ISO_8859_1: + # ö - f6 in hex, \366 in octal + # ü - fc in hex, \374 in octal + value = @doc.to_s(:encoding => LibXML::XML::Encoding::ISO_8859_1) + assert_equal(Encoding::ISO8859_1, value.encoding) + assert_equal("\n\n M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n".force_encoding(Encoding::ISO8859_1), + @doc.to_s(:encoding => LibXML::XML::Encoding::ISO_8859_1)) + + # Invalid encoding + error = assert_raises(ArgumentError) do + @doc.to_s(:encoding => -9999) + end + assert_equal('Unknown encoding value: -9999', error.to_s) + end + + # --- save tests ----- + def test_save_utf8 + temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_utf8.xml") + + bytes = @doc.save(temp_filename) + assert_equal(305, bytes) + + contents = File.read(temp_filename, nil, nil, :encoding => Encoding::UTF_8) + assert_equal(Encoding::UTF_8, contents.encoding) + assert_equal("\n\n M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n", + contents) + ensure + File.delete(temp_filename) + end + + def test_save_utf8_no_indents + temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_utf8_no_indents.xml") + + bytes = @doc.save(temp_filename, :indent => false) + assert_equal(298, bytes) + + contents = File.read(temp_filename, nil, nil, :encoding => Encoding::UTF_8) + assert_equal("\nM\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n", + contents) + ensure + File.delete(temp_filename) + end + + def test_save_iso_8859_1 + temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_iso_8859_1.xml") + bytes = @doc.save(temp_filename, :encoding => LibXML::XML::Encoding::ISO_8859_1) + assert_equal(304, bytes) + + contents = File.read(temp_filename, nil, nil, :encoding => Encoding::ISO8859_1) + assert_equal(Encoding::ISO8859_1, contents.encoding) + assert_equal("\n\n M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.\n Iron Maiden is a British heavy metal band formed in 1975.\n\n".force_encoding(Encoding::ISO8859_1), + contents) + ensure + File.delete(temp_filename) + end + + def test_save_iso_8859_1_no_indent + temp_filename = File.join(Dir.tmpdir, "tc_document_write_test_save_iso_8859_1_no_indent.xml") + bytes = @doc.save(temp_filename, :indent => false, :encoding => LibXML::XML::Encoding::ISO_8859_1) + assert_equal(297, bytes) + + contents = File.read(temp_filename, nil, nil, :encoding => Encoding::ISO8859_1) + assert_equal(Encoding::ISO8859_1, contents.encoding) + assert_equal("\nM\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.Iron Maiden is a British heavy metal band formed in 1975.\n".force_encoding(Encoding::ISO8859_1), + contents) + ensure + File.delete(temp_filename) + end + + def test_thread_set_root + # Previously a segmentation fault occurred when running libxml in + # background threads. + thread = Thread.new do + 100000.times do |i| + document = LibXML::XML::Document.new + node = LibXML::XML::Node.new('test') + document.root = node + end + end + thread.join + assert(true) + end + + # --- Debug --- + def test_debug + assert(@doc.debug) + end end \ No newline at end of file diff --git a/test/test_dtd.rb b/test/test_dtd.rb index 35831f9c..944c6d9a 100644 --- a/test/test_dtd.rb +++ b/test/test_dtd.rb @@ -1,129 +1,126 @@ -# encoding: UTF-8 - -require_relative './test_helper' - - -class TestDtd < Minitest::Test - def setup - xp = LibXML::XML::Parser.string(<<-EOS) - - Colorado - Lots of nice mountains - - EOS - @doc = xp.parse - end - - def teardown - @doc = nil - end - - def dtd - LibXML::XML::Dtd.new(<<-EOS) - - - - - EOS - end - - def test_internal_subset - xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil, nil, true - assert xhtml_dtd.name.nil? - assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id - - xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1", nil, true - assert_equal "xhtml1", xhtml_dtd.name - assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id - end - - def test_external_subset - xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil - assert xhtml_dtd.name.nil? - assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id - - xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1" - assert_equal "xhtml1", xhtml_dtd.name - assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri - assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id - end - - def test_valid - assert(@doc.validate(dtd)) - end - - def test_node_type - assert_equal(LibXML::XML::Node::DTD_NODE, dtd.node_type) - end - - def test_invalid - new_node = LibXML::XML::Node.new('invalid', 'this will mess up validation') - @doc.root << new_node - - error = assert_raises(LibXML::XML::Error) do - @doc.validate(dtd) - end - - # Check the error worked - refute_nil(error) - assert_kind_of(LibXML::XML::Error, error) - assert_equal("Error: No declaration for element invalid.", error.message) - assert_equal(LibXML::XML::Error::VALID, error.domain) - assert_equal(LibXML::XML::Error::DTD_UNKNOWN_ELEM, error.code) - assert_equal(LibXML::XML::Error::ERROR, error.level) - assert_nil(error.file) - assert_nil(error.line) - assert_equal('invalid', error.str1) - # Different answers across operating systems - # assert_nil(error.str2) - assert_nil(error.str3) - assert_equal(0, error.int1) - assert_equal(0, error.int2) - refute_nil(error.node) - assert_equal('invalid', error.node.name) - end - - def test_external_dtd - xml = <<-EOS - - - T1 - - EOS - - errors = Array.new - LibXML::XML::Error.set_handler do |error| - errors << error - end - - LibXML::XML.default_load_external_dtd = false - LibXML::XML::Parser.string(xml).parse - assert_equal(0, errors.length) - - errors.clear - LibXML::XML.default_load_external_dtd = true - LibXML::XML::Parser.string(xml).parse - assert_equal(1, errors.length) - assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.", - errors[0].to_s) - - errors = Array.new - LibXML::XML::Parser.string(xml, :options => LibXML::XML::Parser::Options::DTDLOAD).parse - assert_equal(1, errors.length) - assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.", - errors[0].to_s) - ensure - LibXML::XML.default_load_external_dtd = false - LibXML::XML::Error.reset_handler - end -end +# encoding: UTF-8 + +require_relative './test_helper' + + +class TestDtd < Minitest::Test + def setup + xp = LibXML::XML::Parser.string(<<-EOS) + + Colorado + Lots of nice mountains + + EOS + @doc = xp.parse + end + + def teardown + @doc = nil + end + + def dtd + LibXML::XML::Dtd.new(<<-EOS) + + + + + EOS + end + + def test_internal_subset + xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil, nil, true + assert xhtml_dtd.name.nil? + assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id + + xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1", nil, true + assert_equal "xhtml1", xhtml_dtd.name + assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id + end + + def test_external_subset + xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", nil + assert xhtml_dtd.name.nil? + assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id + + xhtml_dtd = LibXML::XML::Dtd.new "-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "xhtml1" + assert_equal "xhtml1", xhtml_dtd.name + assert_equal "-//W3C//DTD XHTML 1.0 Transitional//EN", xhtml_dtd.external_id + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.uri + assert_equal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", xhtml_dtd.system_id + end + + def test_valid + assert(@doc.validate(dtd)) + end + + def test_node_type + assert_equal(LibXML::XML::Node::DTD_NODE, dtd.node_type) + end + + def test_invalid + new_node = LibXML::XML::Node.new('invalid', 'this will mess up validation') + @doc.root << new_node + + error = assert_raises(LibXML::XML::Error) do + @doc.validate(dtd) + end + + # Check the error worked + refute_nil(error) + assert_kind_of(LibXML::XML::Error, error) + assert_equal("Error: No declaration for element invalid.", error.message) + assert_equal(LibXML::XML::Error::VALID, error.domain) + assert_equal(LibXML::XML::Error::DTD_UNKNOWN_ELEM, error.code) + assert_equal(LibXML::XML::Error::ERROR, error.level) + assert_nil(error.file) + assert_nil(error.line) + assert_equal('invalid', error.str1) + # Different answers across operating systems + # assert_nil(error.str2) + assert_nil(error.str3) + assert_equal(0, error.int1) + assert_equal(0, error.int2) + refute_nil(error.node) + assert_equal('invalid', error.node.name) + end + + def test_external_dtd + xml = <<-EOS + + + T1 + + EOS + + errors = Array.new + LibXML::XML::Error.set_handler do |error| + errors << error + end + + LibXML::XML::Parser.string(xml).parse + assert_equal(0, errors.length) + + errors.clear + LibXML::XML::Parser.string(xml, options: LibXML::XML::Parser::Options::DTDLOAD).parse + assert_equal(1, errors.length) + assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.", + errors[0].to_s) + + errors = Array.new + LibXML::XML::Parser.string(xml, :options => LibXML::XML::Parser::Options::DTDLOAD).parse + assert_equal(1, errors.length) + assert_equal("Warning: failed to load external entity \"test.dtd\" at :1.", + errors[0].to_s) + ensure + LibXML::XML::Error.reset_handler + end +end diff --git a/test/test_encoding.rb b/test/test_encoding.rb index 00417018..af72ff5d 100644 --- a/test/test_encoding.rb +++ b/test/test_encoding.rb @@ -1,129 +1,126 @@ -# encoding: UTF-8 - -require_relative './test_helper' - -# Code UTF8 Latin1 Hex -# m 109 109 6D -# ö 195 182 246 C3 B6 / F6 -# t 116 116 74 -# l 108 108 6C -# e 101 101 65 -# y 121 121 79 -# _ 95 95 5F -# c 99 99 63 -# r 114 114 72 -# ü 195 188 252 C3 BC / FC -# e 101 101 65 - -# See: -# http://en.wikipedia.org/wiki/ISO/IEC_8859-1 -# http://en.wikipedia.org/wiki/List_of_Unicode_characters - -class TestEncoding < Minitest::Test - def setup - @original_encoding = Encoding.default_internal - Encoding.default_internal = nil - end - - def teardown - Encoding.default_internal = @original_encoding - end - - def file_for_encoding(encoding) - file_name = "model/bands.#{encoding.name.downcase}.xml" - File.join(File.dirname(__FILE__), file_name) - end - - def load_encoding(encoding) - @encoding = encoding - file = file_for_encoding(encoding) - - # Strip spaces to make testing easier - LibXML::XML.default_keep_blanks = false - @doc = LibXML::XML::Document.file(file) - LibXML::XML.default_keep_blanks = true - end - - def test_encoding - doc = LibXML::XML::Document.new - assert_equal(LibXML::XML::Encoding::NONE, doc.encoding) - assert_equal(Encoding::ASCII_8BIT, doc.rb_encoding) - - file = File.expand_path(File.join(File.dirname(__FILE__), 'model/bands.xml')) - doc = LibXML::XML::Document.file(file) - assert_equal(LibXML::XML::Encoding::UTF_8, doc.encoding) - assert_equal(Encoding::UTF_8, doc.rb_encoding) - - doc.encoding = LibXML::XML::Encoding::ISO_8859_1 - assert_equal(LibXML::XML::Encoding::ISO_8859_1, doc.encoding) - assert_equal(Encoding::ISO8859_1, doc.rb_encoding) - end - - def test_no_internal_encoding_iso_8859_1 - load_encoding(Encoding::ISO_8859_1) - node = @doc.root.children.first - - name = node.name - assert_equal(Encoding::UTF_8, name.encoding) - assert_equal("m\u00F6tley_cr\u00FCe", name) - assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101", - name.bytes.to_a.join(" ")) - assert_equal("M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.", - node.content) - - name = name.encode(Encoding::ISO_8859_1) - assert_equal(Encoding::ISO_8859_1, name.encoding) - assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name) - assert_equal("109 246 116 108 101 121 95 99 114 252 101", - name.bytes.to_a.join(" ")) - assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1), - node.content.encode(Encoding::ISO_8859_1)) - end - - def test_internal_encoding_iso_8859_1 - Encoding.default_internal = Encoding::ISO_8859_1 - load_encoding(Encoding::ISO_8859_1) - node = @doc.root.children.first - - name = node.name - assert_equal(Encoding::ISO_8859_1, name.encoding) - assert_equal("109 246 116 108 101 121 95 99 114 252 101", - name.bytes.to_a.join(" ")) - assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name) - assert_equal("109 246 116 108 101 121 95 99 114 252 101", - name.bytes.to_a.join(" ")) - assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1), - node.content.encode(Encoding::ISO_8859_1)) - end - - def test_no_internal_encoding_utf_8 - load_encoding(Encoding::UTF_8) - node = @doc.root.children.first - - name = node.name - assert_equal(@encoding, name.encoding) - assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101", - name.bytes.to_a.join(" ")) - - name = name.encode(Encoding::ISO_8859_1) - assert_equal(Encoding::ISO_8859_1, name.encoding) - assert_equal("109 246 116 108 101 121 95 99 114 252 101", - name.bytes.to_a.join(" ")) - end - - def test_internal_encoding_utf_8 - Encoding.default_internal = Encoding::ISO_8859_1 - load_encoding(Encoding::UTF_8) - node = @doc.root.children.first - - name = node.name - assert_equal(Encoding::ISO_8859_1, name.encoding) - assert_equal("109 246 116 108 101 121 95 99 114 252 101", - name.bytes.to_a.join(" ")) - end - - def test_encoding_conversions - assert_equal("UTF-8", LibXML::XML::Encoding.to_s(LibXML::XML::Encoding::UTF_8)) - assert_equal(LibXML::XML::Encoding::UTF_8, LibXML::XML::Encoding.from_s("UTF-8")) - end -end +# encoding: UTF-8 + +require_relative './test_helper' + +# Code UTF8 Latin1 Hex +# m 109 109 6D +# ö 195 182 246 C3 B6 / F6 +# t 116 116 74 +# l 108 108 6C +# e 101 101 65 +# y 121 121 79 +# _ 95 95 5F +# c 99 99 63 +# r 114 114 72 +# ü 195 188 252 C3 BC / FC +# e 101 101 65 + +# See: +# http://en.wikipedia.org/wiki/ISO/IEC_8859-1 +# http://en.wikipedia.org/wiki/List_of_Unicode_characters + +class TestEncoding < Minitest::Test + def setup + @original_encoding = Encoding.default_internal + Encoding.default_internal = nil + end + + def teardown + Encoding.default_internal = @original_encoding + end + + def file_for_encoding(encoding) + file_name = "model/bands.#{encoding.name.downcase}.xml" + File.join(File.dirname(__FILE__), file_name) + end + + def load_encoding(encoding) + @encoding = encoding + file = file_for_encoding(encoding) + + @doc = LibXML::XML::Document.file(file, options: LibXML::XML::Parser::Options::NOBLANKS) + end + + def test_encoding + doc = LibXML::XML::Document.new + assert_equal(LibXML::XML::Encoding::NONE, doc.encoding) + assert_equal(Encoding::ASCII_8BIT, doc.rb_encoding) + + file = File.expand_path(File.join(File.dirname(__FILE__), 'model/bands.xml')) + doc = LibXML::XML::Document.file(file) + assert_equal(LibXML::XML::Encoding::UTF_8, doc.encoding) + assert_equal(Encoding::UTF_8, doc.rb_encoding) + + doc.encoding = LibXML::XML::Encoding::ISO_8859_1 + assert_equal(LibXML::XML::Encoding::ISO_8859_1, doc.encoding) + assert_equal(Encoding::ISO8859_1, doc.rb_encoding) + end + + def test_no_internal_encoding_iso_8859_1 + load_encoding(Encoding::ISO_8859_1) + node = @doc.root.children.first + + name = node.name + assert_equal(Encoding::UTF_8, name.encoding) + assert_equal("m\u00F6tley_cr\u00FCe", name) + assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101", + name.bytes.to_a.join(" ")) + assert_equal("M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.", + node.content) + + name = name.encode(Encoding::ISO_8859_1) + assert_equal(Encoding::ISO_8859_1, name.encoding) + assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name) + assert_equal("109 246 116 108 101 121 95 99 114 252 101", + name.bytes.to_a.join(" ")) + assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1), + node.content.encode(Encoding::ISO_8859_1)) + end + + def test_internal_encoding_iso_8859_1 + Encoding.default_internal = Encoding::ISO_8859_1 + load_encoding(Encoding::ISO_8859_1) + node = @doc.root.children.first + + name = node.name + assert_equal(Encoding::ISO_8859_1, name.encoding) + assert_equal("109 246 116 108 101 121 95 99 114 252 101", + name.bytes.to_a.join(" ")) + assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name) + assert_equal("109 246 116 108 101 121 95 99 114 252 101", + name.bytes.to_a.join(" ")) + assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1), + node.content.encode(Encoding::ISO_8859_1)) + end + + def test_no_internal_encoding_utf_8 + load_encoding(Encoding::UTF_8) + node = @doc.root.children.first + + name = node.name + assert_equal(@encoding, name.encoding) + assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101", + name.bytes.to_a.join(" ")) + + name = name.encode(Encoding::ISO_8859_1) + assert_equal(Encoding::ISO_8859_1, name.encoding) + assert_equal("109 246 116 108 101 121 95 99 114 252 101", + name.bytes.to_a.join(" ")) + end + + def test_internal_encoding_utf_8 + Encoding.default_internal = Encoding::ISO_8859_1 + load_encoding(Encoding::UTF_8) + node = @doc.root.children.first + + name = node.name + assert_equal(Encoding::ISO_8859_1, name.encoding) + assert_equal("109 246 116 108 101 121 95 99 114 252 101", + name.bytes.to_a.join(" ")) + end + + def test_encoding_conversions + assert_equal("UTF-8", LibXML::XML::Encoding.to_s(LibXML::XML::Encoding::UTF_8)) + assert_equal(LibXML::XML::Encoding::UTF_8, LibXML::XML::Encoding.from_s("UTF-8")) + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index bfc8ac11..2f00414a 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,9 +1,13 @@ # encoding: UTF-8 # To make testing/debugging easier, test within this source tree versus an installed gem - require 'bundler/setup' -require 'minitest/autorun' + +# Add ext directory to load path to make it easier to test locally built extensions +ext_path = File.expand_path(File.join(__dir__, '..', 'ext', 'libxml')) +$LOAD_PATH.unshift(File.expand_path(ext_path)) + +# Now load code require 'libxml-ruby' def windows? @@ -11,3 +15,6 @@ def windows? end STDOUT.write "\nlibxml2: #{LibXML::XML::LIBXML_VERSION}\n#{RUBY_DESCRIPTION}\n\n" + +require 'minitest/autorun' + diff --git a/test/test_node.rb b/test/test_node.rb index 5a1ce4d7..c0063290 100644 --- a/test/test_node.rb +++ b/test/test_node.rb @@ -1,237 +1,235 @@ -# encoding: UTF-8 - -require_relative './test_helper' - -class TestNode < Minitest::Test - def setup - @file_name = "model/bands.utf-8.xml" - - # Strip spaces to make testing easier - LibXML::XML.default_keep_blanks = false - file = File.join(File.dirname(__FILE__), @file_name) - @doc = LibXML::XML::Document.file(file) - end - - def teardown - LibXML::XML.default_keep_blanks = true - @doc = nil - end - - def nodes - # Find all nodes with a country attributes - @doc.find('*[@country]') - end - - def test_doc_class - assert_instance_of(LibXML::XML::Document, @doc) - end - - def test_doc_node_type - assert_equal LibXML::XML::Node::DOCUMENT_NODE, @doc.node_type - end - - def test_root_class - assert_instance_of(LibXML::XML::Node, @doc.root) - end - - def test_root_node_type - assert_equal LibXML::XML::Node::ELEMENT_NODE, @doc.root.node_type - end - - def test_node_class - for n in nodes - assert_instance_of(LibXML::XML::Node, n) - end - end - - def test_context - node = @doc.root - context = node.context - assert_instance_of(LibXML::XML::XPath::Context, context) - end - - def test_find - assert_instance_of(LibXML::XML::XPath::Object, self.nodes) - end - - def test_node_child_get - assert_instance_of(TrueClass, @doc.root.child?) - assert_instance_of(LibXML::XML::Node, @doc.root.child) - - assert_equal(Encoding::UTF_8, @doc.root.child.name.encoding) - assert_equal("m\u00F6tley_cr\u00FCe", @doc.root.child.name) - end - - def test_node_doc - for n in nodes - assert_instance_of(LibXML::XML::Document, n.doc) if n.document? - end - end - - def test_name - node = @doc.root.children.last - assert_equal("iron_maiden", node.name) - end - - def test_node_find - nodes = @doc.root.find('./fixnum') - for node in nodes - assert_instance_of(LibXML::XML::Node, node) - end - end - - def test_equality - node_a = @doc.find_first('*[@country]') - node_b = @doc.root.child - - # On the ruby side these are different objects - refute(node_a.equal?(node_b)) - - # But they are the same underlying libxml node so specify they are equal - assert(node_a == node_b) - assert(node_a.eql?(node_b)) - - file = File.join(File.dirname(__FILE__), @file_name) - doc2 = LibXML::XML::Document.file(file) - - node_a2 = doc2.find_first('*[@country]') - - refute(node_a == node_a2) - refute(node_a.eql?(node_a2)) - assert_equal(node_a.to_s, node_a2.to_s) - refute(node_a.equal?(node_a2)) - end - - def test_equality_2 - parent = LibXML::XML::Node.new('parent') - child = LibXML::XML::Node.new('child') - parent << child - - node_a = child.parent - node_b = child.parent - # In this case the nodes are equal - the parent being the root - assert(node_a.equal?(node_b)) - assert(node_a == node_b) - assert(node_a.eql?(node_b)) - end - - def test_equality_nil - node = @doc.root - assert(node != nil) - end - - def test_equality_wrong_type - node = @doc.root - - assert_raises(TypeError) do - assert(node != 'abc') - end - end - - def test_content - node = @doc.root.last - assert_equal("Iron Maiden is a British heavy metal band formed in 1975.", - node.content) - end - - def test_base - doc = LibXML::XML::Parser.string('').parse - assert_nil(doc.root.base_uri) - end - - # We use the same facility that libXSLT does here to disable output escaping. - # This lets you specify that the node's content should be rendered unaltered - # whenever it is being output. This is useful for things like