Create a Nokogiri::XML::DocumentFragment from tags
# File lib/nokogiri/html/document.rb, line 70 def fragment tags = nil DocumentFragment.new(self, tags, self.root) end
Get the meta tag encoding for this document. If there is no meta tag, then nil is returned.
# File lib/nokogiri/html/document.rb, line 7 def meta_encoding meta = meta_content_type and /charset\s*=\s*([\w-]+)/.match(meta['content'])[1] end
Set the meta tag encoding for this document. If there is no meta content tag, the encoding is not set.
# File lib/nokogiri/html/document.rb, line 15 def meta_encoding= encoding meta = meta_content_type and meta['content'] = "text/html; charset=%s" % encoding end
Serialize Node using options. Save options can also be set using a block. See SaveOptions.
These two statements are equivalent:
node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
or
node.serialize(:encoding => 'UTF-8') do |config| config.format.as_xml end
# File lib/nokogiri/html/document.rb, line 60 def serialize options = {} options[:save_with] ||= XML::Node::SaveOptions::FORMAT | XML::Node::SaveOptions::AS_HTML | XML::Node::SaveOptions::NO_DECLARATION | XML::Node::SaveOptions::NO_EMPTY_TAGS super end
Get the title string of this document. Return nil if there is no title tag.
# File lib/nokogiri/html/document.rb, line 30 def title title = at('title') and title.inner_text end
Set the title string of this document. If there is no head element, the title is not set.
# File lib/nokogiri/html/document.rb, line 37 def title=(text) unless title = at('title') head = at('head') or return nil title = Nokogiri::XML::Node.new('title', self) head << title end title.children = XML::Text.new(text, self) end
Create a new document
static VALUE new(int argc, VALUE *argv, VALUE klass) { VALUE uri, external_id, rest, rb_doc; htmlDocPtr doc; rb_scan_args(argc, argv, "0*", &rest); uri = rb_ary_entry(rest, (long)0); external_id = rb_ary_entry(rest, (long)1); doc = htmlNewDoc( RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL ); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
Parse HTML. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
# File lib/nokogiri/html/document.rb, line 83 def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? if string_or_io.respond_to?(:encoding) unless string_or_io.encoding.name == "ASCII-8BIT" encoding ||= string_or_io.encoding.name end end if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil if !encoding # Perform further encoding detection that libxml2 does # not do. string_or_io = EncodingReader.new(string_or_io) begin return read_io(string_or_io, url, encoding, options.to_i) rescue EncodingFoundException => e # A retry is required because libxml2 has a problem in # that it cannot switch encoding well in the middle of # parsing, especially if it has already seen a # non-ASCII character when it finds an encoding hint. encoding = e.encoding end end return read_io(string_or_io, url, encoding, options.to_i) end # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? if !encoding encoding = EncodingReader.detect_encoding(string_or_io) end read_memory(string_or_io, url, encoding, options.to_i) end
Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadIO( io_read_callback, io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Generated with the Darkfish Rdoc Generator 2.