From 6ac03841652688b5f1b9e60572ec74d6836aa272 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 13 Oct 2020 08:06:31 -0400 Subject: [PATCH 1/5] cleanup: no need to check for HAVE_RUBY_UTIL_H in C extension which was introduced in ruby v1_9_0_3 --- ext/nokogiri/nokogiri.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 740b30d5a0..292ffee358 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -33,11 +33,7 @@ void vasprintf_free (void *p) free(p); } -#ifdef HAVE_RUBY_UTIL_H #include "ruby/util.h" -#else -#include "util.h" -#endif void nokogiri_root_node(xmlNodePtr node) { From 17e04391d65230cbf402ec8a50bee9af310600a2 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 12 Oct 2020 16:10:56 -0400 Subject: [PATCH 2/5] info: add libxml2 version information to extconf have_func calls this will be useful to do some cleanup when we eventually decide to bump our libxml2 dependency. --- ext/nokogiri/extconf.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 89c6ec56d8..7c8b447f72 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -675,13 +675,12 @@ def configure asplode("lib#{lib}") end -have_func('xmlHasFeature') or abort "xmlHasFeature() is missing." -have_func('xmlFirstElementChild') -have_func('xmlRelaxNGSetParserStructuredErrors') -have_func('xmlRelaxNGSetParserStructuredErrors') -have_func('xmlRelaxNGSetValidStructuredErrors') -have_func('xmlSchemaSetValidStructuredErrors') -have_func('xmlSchemaSetParserStructuredErrors') +have_func('xmlHasFeature') or abort "xmlHasFeature() is missing." # introduced in libxml 2.6.21 +have_func('xmlFirstElementChild') # introduced in libxml 2.7.3 +have_func('xmlRelaxNGSetParserStructuredErrors') # introduced in libxml 2.6.24 +have_func('xmlRelaxNGSetValidStructuredErrors') # introduced in libxml 2.6.21 +have_func('xmlSchemaSetValidStructuredErrors') # introduced in libxml 2.6.23 +have_func('xmlSchemaSetParserStructuredErrors') # introduced in libxml 2.6.23 create_makefile('nokogiri/nokogiri') From b329a7c87a39c5c3d05574a8e8fa8b778d398fab Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 12 Oct 2020 16:30:53 -0400 Subject: [PATCH 3/5] cleanup: use have_func to check for vasprintf and - remove platform-specific "-D" flags - remove `vasprintf_free` - change calls to `vasprintf_free` to call `free` --- ext/nokogiri/extconf.rb | 7 ++---- ext/nokogiri/nokogiri.c | 10 ++------- ext/nokogiri/nokogiri.h | 38 ++++++++++++++------------------ ext/nokogiri/xml_sax_parser.c | 7 ++---- ext/nokogiri/xml_xpath_context.c | 2 -- ext/nokogiri/xslt_stylesheet.c | 5 +---- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 7c8b447f72..861dd6545a 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -421,11 +421,7 @@ def using_system_libraries? add_cflags(ENV["CFLAGS"]) if windows? - $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF" -end - -if solaris? || aix? - $CFLAGS << " -DUSE_INCLUDED_VASPRINTF" + $CFLAGS << " -DXP_WIN -DXP_WIN32" end if darwin? @@ -675,6 +671,7 @@ def configure asplode("lib#{lib}") end +have_func('vasprintf') have_func('xmlHasFeature') or abort "xmlHasFeature() is missing." # introduced in libxml 2.6.21 have_func('xmlFirstElementChild') # introduced in libxml 2.7.3 have_func('xmlRelaxNGSetParserStructuredErrors') # introduced in libxml 2.6.24 diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 292ffee358..b68cd5866a 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -7,12 +7,11 @@ VALUE mNokogiriXslt ; VALUE mNokogiriXmlSax ; VALUE mNokogiriHtmlSax ; -#ifdef USE_INCLUDED_VASPRINTF +#ifndef HAVE_VASPRINTF /* - * I srsly hate windows. it doesn't have vasprintf. * Thank you Geoffroy Couprie for this implementation of vasprintf! */ -int vasprintf (char **strp, const char *fmt, va_list ap) +int vasprintf(char **strp, const char *fmt, va_list ap) { /* Mingw32/64 have a broken vsnprintf implementation that fails when * using a zero-byte limit in order to retrieve the required size for malloc. @@ -28,11 +27,6 @@ int vasprintf (char **strp, const char *fmt, va_list ap) } #endif -void vasprintf_free (void *p) -{ - free(p); -} - #include "ruby/util.h" void nokogiri_root_node(xmlNodePtr node) diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 2dad57633b..1d4940a1c8 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -2,31 +2,24 @@ #define NOKOGIRI_NATIVE #if _MSC_VER -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif /* WIN32_LEAN_AND_MEAN */ -#ifndef WIN32 -#define WIN32 -#endif /* WIN32 */ -#include -#include -#include +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif /* WIN32_LEAN_AND_MEAN */ + +# ifndef WIN32 +# define WIN32 +# endif /* WIN32 */ + +# include +# include +# include #endif #include #include #include #include - -#ifdef USE_INCLUDED_VASPRINTF -int vasprintf (char **strp, const char *fmt, va_list ap); -#else - -#define _GNU_SOURCE -# include -#undef _GNU_SOURCE - -#endif +#include #include #include @@ -43,6 +36,9 @@ int vasprintf (char **strp, const char *fmt, va_list ap); #include #include #include + +#include + #include #include #include @@ -64,8 +60,6 @@ int vasprintf (char **strp, const char *fmt, va_list ap); #define RBSTR_OR_QNIL(_str) \ (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil) -#include - #include #include #include @@ -106,6 +100,8 @@ extern VALUE mNokogiriHtml ; extern VALUE mNokogiriHtmlSax ; extern VALUE mNokogiriXslt ; +int vasprintf(char **strp, const char *fmt, va_list ap); + void nokogiri_root_node(xmlNodePtr); void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr); diff --git a/ext/nokogiri/xml_sax_parser.c b/ext/nokogiri/xml_sax_parser.c index 1a5f6c5f51..0c7d45ec30 100644 --- a/ext/nokogiri/xml_sax_parser.c +++ b/ext/nokogiri/xml_sax_parser.c @@ -1,8 +1,5 @@ #include -int vasprintf (char **strp, const char *fmt, va_list ap); -void vasprintf_free (void *p); - static ID id_start_document, id_end_document, id_start_element, id_end_element; static ID id_start_element_namespace, id_end_element_namespace; static ID id_comment, id_characters, id_xmldecl, id_error, id_warning; @@ -206,7 +203,7 @@ static void warning_func(void * ctx, const char *msg, ...) va_end(args); ruby_message = NOKOGIRI_STR_NEW2(message); - vasprintf_free(message); + free(message); rb_funcall(doc, id_warning, 1, ruby_message); } @@ -223,7 +220,7 @@ static void error_func(void * ctx, const char *msg, ...) va_end(args); ruby_message = NOKOGIRI_STR_NEW2(message); - vasprintf_free(message); + free(message); rb_funcall(doc, id_error, 1, ruby_message); } diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index cb1be24b72..94f47f640b 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -1,7 +1,5 @@ #include -int vasprintf (char **strp, const char *fmt, va_list ap); - static void deallocate(xmlXPathContextPtr ctx) { NOKOGIRI_DEBUG_START(ctx); diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c index 779294badd..73b975044e 100644 --- a/ext/nokogiri/xslt_stylesheet.c +++ b/ext/nokogiri/xslt_stylesheet.c @@ -7,9 +7,6 @@ VALUE xslt; -int vasprintf (char **strp, const char *fmt, va_list ap); -void vasprintf_free (void *p); - static void mark(nokogiriXsltStylesheetTuple *wrapper) { rb_gc_mark(wrapper->func_instances); @@ -37,7 +34,7 @@ static void xslt_generic_error_handler(void * ctx, const char *msg, ...) rb_str_cat2((VALUE)ctx, message); - vasprintf_free(message); + free(message); } VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss) From b76ee14e05d5b05020550e03be29537b7e38e1be Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 12 Oct 2020 16:41:02 -0400 Subject: [PATCH 4/5] cleanup: fix xml_node.c type cast warning --- ext/nokogiri/xml_node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 3cde12ad67..e0beb8b36e 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -301,7 +301,7 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep * issue #391, where new node's prefix may become the string "default" * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior. */ - xmlFree(reparentee->ns->prefix); + xmlFree((xmlChar*)reparentee->ns->prefix); reparentee->ns->prefix = NULL; } } From 426fd8909ab0cef956ed9549f6d575191d7eb519 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 12 Oct 2020 17:25:25 -0400 Subject: [PATCH 5/5] fix: address memory leak when xpath evaluation raises an error also ensure CRuby raises an XML::XPath::SyntaxError on xpath function errors (like JRuby). previously CRuby raised a RuntimeError. Related to #1610 and #1882. --- ext/nokogiri/xml_syntax_error.c | 32 +++++++++++++++++++++++++------- ext/nokogiri/xml_syntax_error.h | 2 ++ ext/nokogiri/xml_xpath_context.c | 25 ++++--------------------- test/xml/test_document.rb | 21 +++++++-------------- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0b240f05a5..b726a47afb 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -12,6 +12,26 @@ void Nokogiri_error_raise(void * ctx, xmlErrorPtr error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } +void Nokogiri_generic_error_array_pusher(void * ctx, const char *msg, ...) +{ + va_list args; + char * message; + VALUE error_list = (VALUE)ctx; + VALUE message_rb; + + Check_Type(error_list, T_ARRAY); + + va_start(args, msg); + vasprintf(&message, msg, args); + va_end(args); + + message_rb = NOKOGIRI_STR_NEW2(message); + rb_ary_push(error_list, + rb_class_new_instance(1, &message_rb, cNokogiriXmlXpathSyntaxError)); + + free(message); +} + VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) { VALUE msg, e, klass; @@ -19,8 +39,7 @@ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) klass = cNokogiriXmlSyntaxError; if (error && error->domain == XML_FROM_XPATH) { - VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath")); - klass = rb_const_get(xpath, rb_intern("SyntaxError")); + klass = cNokogiriXmlXpathSyntaxError; } msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil; @@ -49,16 +68,15 @@ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) } VALUE cNokogiriXmlSyntaxError; +VALUE cNokogiriXmlXpathSyntaxError; void init_xml_syntax_error() { VALUE nokogiri = rb_define_module("Nokogiri"); VALUE xml = rb_define_module_under(nokogiri, "XML"); - /* - * The XML::SyntaxError is raised on parse errors - */ VALUE syntax_error_mommy = rb_define_class_under(nokogiri, "SyntaxError", rb_eStandardError); - VALUE klass = rb_define_class_under(xml, "SyntaxError", syntax_error_mommy); - cNokogiriXmlSyntaxError = klass; + cNokogiriXmlSyntaxError = rb_define_class_under(xml, "SyntaxError", syntax_error_mommy); + VALUE xml_xpath = rb_define_class_under(mNokogiriXml, "XPath", rb_cObject); + cNokogiriXmlXpathSyntaxError = rb_define_class_under(xml_xpath, "SyntaxError", cNokogiriXmlSyntaxError); } diff --git a/ext/nokogiri/xml_syntax_error.h b/ext/nokogiri/xml_syntax_error.h index 58475cb852..1883639f75 100644 --- a/ext/nokogiri/xml_syntax_error.h +++ b/ext/nokogiri/xml_syntax_error.h @@ -6,8 +6,10 @@ void init_xml_syntax_error(); VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error); +void Nokogiri_generic_error_array_pusher(void * ctx, const char *msg, ...); NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)); extern VALUE cNokogiriXmlSyntaxError; +extern VALUE cNokogiriXmlXpathSyntaxError; #endif diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index 94f47f640b..b6fc0c187d 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -166,19 +166,6 @@ static xmlXPathFunction lookup( void *ctx, return NULL; } -NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)); -static void xpath_generic_exception_handler(void * ctx, const char *msg, ...) -{ - char * message; - - va_list args; - va_start(args, msg); - vasprintf(&message, msg, args); - va_end(args); - - rb_raise(rb_eRuntimeError, "%s", message); -} - /* * call-seq: * evaluate(search_path, handler = nil) @@ -192,6 +179,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self) xmlXPathContextPtr ctx; xmlXPathObjectPtr xpath; xmlChar *query; + VALUE error_list = rb_ary_new(); Data_Get_Struct(self, xmlXPathContext, ctx); @@ -206,20 +194,15 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self) xmlXPathRegisterFuncLookup(ctx, lookup, (void *)xpath_handler); } - xmlResetLastError(); - xmlSetStructuredErrorFunc(NULL, Nokogiri_error_raise); - - /* For some reason, xmlXPathEvalExpression will blow up with a generic error */ - /* when there is a non existent function. */ - xmlSetGenericErrorFunc(NULL, xpath_generic_exception_handler); + xmlSetStructuredErrorFunc((void*)error_list, Nokogiri_error_array_pusher); + xmlSetGenericErrorFunc((void*)error_list, Nokogiri_generic_error_array_pusher); xpath = xmlXPathEvalExpression(query, ctx); xmlSetStructuredErrorFunc(NULL, NULL); xmlSetGenericErrorFunc(NULL, NULL); if(xpath == NULL) { - xmlErrorPtr error = xmlGetLastError(); - rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); + rb_exc_raise(rb_ary_entry(error_list, -1)); } assert(ctx->doc); diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 46d91f0d3d..03b32550a2 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -536,25 +536,18 @@ def test_namespace_should_not_exist } end - def test_non_existant_function - # WTF. I don't know why this is different between MRI and Jruby - # They should be the same... Either way, raising an exception - # is the correct thing to do. - exception = RuntimeError - - if !Nokogiri.uses_libxml? || (Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO["libxml"]["platform"] == "jruby") - exception = Nokogiri::XML::XPath::SyntaxError - end - - assert_raises(exception) { + def test_non_existent_function + e = assert_raises(Nokogiri::XML::XPath::SyntaxError) do @xml.xpath("//name[foo()]") - } + end + assert_match(/foo/, e.message) end def test_xpath_syntax_error - assert_raises(Nokogiri::XML::XPath::SyntaxError) do - @xml.xpath('\\') + e = assert_raises(Nokogiri::XML::XPath::SyntaxError) do + @xml.xpath('[asdf]') end + assert_match(/\[asdf\]/, e.message) end def test_ancestors