HEX
Server: Apache
System: Linux s198.coreserver.jp 5.15.0-151-generic #161-Ubuntu SMP Tue Jul 22 14:25:40 UTC 2025 x86_64
User: nagasaki (10062)
PHP: 7.1.33
Disabled: NONE
Upload Files
File: //usr/local/rvm/gems/default/gems/nokogiri-1.12.5-x86_64-linux/ext/nokogiri/xml_document.c
#include <nokogiri.h>

VALUE cNokogiriXmlDocument ;

static int
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
{
  switch (node->type) {
    case XML_ATTRIBUTE_NODE:
      xmlFreePropList((xmlAttrPtr)node);
      break;
    case XML_NAMESPACE_DECL:
      xmlFreeNs((xmlNsPtr)node);
      break;
    case XML_DTD_NODE:
      xmlFreeDtd((xmlDtdPtr)node);
      break;
    default:
      if (node->parent == NULL) {
        xmlAddChild((xmlNodePtr)doc, node);
      }
  }
  return ST_CONTINUE;
}

static int
dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
{
  return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
}

static void
remove_private(xmlNodePtr node)
{
  xmlNodePtr child;

  for (child = node->children; child; child = child->next) {
    remove_private(child);
  }

  if ((node->type == XML_ELEMENT_NODE ||
       node->type == XML_XINCLUDE_START ||
       node->type == XML_XINCLUDE_END) &&
      node->properties) {
    for (child = (xmlNodePtr)node->properties; child; child = child->next) {
      remove_private(child);
    }
  }

  node->_private = NULL;
}

static void
mark(xmlDocPtr doc)
{
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
  if (tuple) {
    rb_gc_mark(tuple->doc);
    rb_gc_mark(tuple->node_cache);
  }
}

static void
dealloc(xmlDocPtr doc)
{
  st_table *node_hash;

  NOKOGIRI_DEBUG_START(doc);

  node_hash  = DOC_UNLINKED_NODE_HASH(doc);

  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
  st_free_table(node_hash);

  free(doc->_private);

  /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
   * have their _private pointers cleared. This is to avoid libxml-ruby's
   * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
   * free context, which can result in segfaults.
   */
  if (xmlDeregisterNodeDefaultValue) {
    remove_private((xmlNodePtr)doc);
  }

  xmlFreeDoc(doc);

  NOKOGIRI_DEBUG_END(doc);
}

static void
recursively_remove_namespaces_from_node(xmlNodePtr node)
{
  xmlNodePtr child ;
  xmlAttrPtr property ;

  xmlSetNs(node, NULL);

  for (child = node->children ; child ; child = child->next) {
    recursively_remove_namespaces_from_node(child);
  }

  if (((node->type == XML_ELEMENT_NODE) ||
       (node->type == XML_XINCLUDE_START) ||
       (node->type == XML_XINCLUDE_END)) &&
      node->nsDef) {
    xmlFreeNsList(node->nsDef);
    node->nsDef = NULL;
  }

  if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
    property = node->properties ;
    while (property != NULL) {
      if (property->ns) { property->ns = NULL ; }
      property = property->next ;
    }
  }
}

/*
 * call-seq:
 *  url
 *
 * Get the url name for this document.
 */
static VALUE
url(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }

  return Qnil;
}

/*
 * call-seq:
 *  root=
 *
 * Set the root element on this document
 */
static VALUE
rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
{
  xmlDocPtr c_document;
  xmlNodePtr c_new_root = NULL, c_current_root;

  Data_Get_Struct(self, xmlDoc, c_document);

  c_current_root = xmlDocGetRootElement(c_document);
  if (c_current_root) {
    xmlUnlinkNode(c_current_root);
    noko_xml_document_pin_node(c_current_root);
  }

  if (!NIL_P(rb_new_root)) {
    if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
      rb_raise(rb_eArgError,
               "expected Nokogiri::XML::Node but received %"PRIsVALUE,
               rb_obj_class(rb_new_root));
    }

    Data_Get_Struct(rb_new_root, xmlNode, c_new_root);

    /* If the new root's document is not the same as the current document,
     * then we need to dup the node in to this document. */
    if (c_new_root->doc != c_document) {
      c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
      if (!c_new_root) {
        rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
      }
    }
  }

  xmlDocSetRootElement(c_document, c_new_root);

  return rb_new_root;
}

/*
 * call-seq:
 *  root
 *
 * Get the root node for this document.
 */
static VALUE
rb_xml_document_root(VALUE self)
{
  xmlDocPtr c_document;
  xmlNodePtr c_root;

  Data_Get_Struct(self, xmlDoc, c_document);

  c_root = xmlDocGetRootElement(c_document);
  if (!c_root) {
    return Qnil;
  }

  return noko_xml_node_wrap(Qnil, c_root) ;
}

/*
 * call-seq:
 *  encoding= encoding
 *
 * Set the encoding string for this Document
 */
static VALUE
set_encoding(VALUE self, VALUE encoding)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (doc->encoding) {
    xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
  }

  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));

  return encoding;
}

/*
 * call-seq:
 *  encoding
 *
 * Get the encoding for this Document
 */
static VALUE
encoding(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (!doc->encoding) { return Qnil; }
  return NOKOGIRI_STR_NEW2(doc->encoding);
}

/*
 * call-seq:
 *  version
 *
 * Get the XML version for this Document
 */
static VALUE
version(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (!doc->version) { return Qnil; }
  return NOKOGIRI_STR_NEW2(doc->version);
}

/*
 * call-seq:
 *  read_io(io, url, encoding, options)
 *
 * Create a new document from an IO object
 */
static VALUE
read_io(VALUE klass,
        VALUE io,
        VALUE url,
        VALUE encoding,
        VALUE options)
{
  const char *c_url    = NIL_P(url)      ? NULL : StringValueCStr(url);
  const char *c_enc    = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  doc = xmlReadIO(
          (xmlInputReadCallback)noko_io_read,
          (xmlInputCloseCallback)noko_io_close,
          (void *)io,
          c_url,
          c_enc,
          (int)NUM2INT(options)
        );
  xmlSetStructuredErrorFunc(NULL, NULL);

  if (doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if (error) {
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
    } else {
      rb_raise(rb_eRuntimeError, "Could not parse document");
    }

    return Qnil;
  }

  document = noko_xml_document_wrap(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}

/*
 * call-seq:
 *  read_memory(string, url, encoding, options)
 *
 * Create a new document from a String
 */
static VALUE
read_memory(VALUE klass,
            VALUE string,
            VALUE url,
            VALUE encoding,
            VALUE options)
{
  const char *c_buffer = StringValuePtr(string);
  const char *c_url    = NIL_P(url)      ? NULL : StringValueCStr(url);
  const char *c_enc    = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
  int len               = (int)RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if (doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if (error) {
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
    } else {
      rb_raise(rb_eRuntimeError, "Could not parse document");
    }

    return Qnil;
  }

  document = noko_xml_document_wrap(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}

/*
 * call-seq:
 *  dup
 *
 * Copy this Document.  An optional depth may be passed in, but it defaults
 * to a deep copy.  0 is a shallow copy, 1 is a deep copy.
 */
static VALUE
duplicate_document(int argc, VALUE *argv, VALUE self)
{
  xmlDocPtr doc, dup;
  VALUE copy;
  VALUE level;

  if (rb_scan_args(argc, argv, "01", &level) == 0) {
    level = INT2NUM((long)1);
  }

  Data_Get_Struct(self, xmlDoc, doc);

  dup = xmlCopyDoc(doc, (int)NUM2INT(level));

  if (dup == NULL) { return Qnil; }

  dup->type = doc->type;
  copy = noko_xml_document_wrap(rb_obj_class(self), dup);
  rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
  return copy ;
}

/*
 * call-seq:
 *  new(version = default)
 *
 * Create a new document with +version+ (defaults to "1.0")
 */
static VALUE
new (int argc, VALUE *argv, VALUE klass)
{
  xmlDocPtr doc;
  VALUE version, rest, rb_doc ;

  rb_scan_args(argc, argv, "0*", &rest);
  version = rb_ary_entry(rest, (long)0);
  if (NIL_P(version)) { version = rb_str_new2("1.0"); }

  doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
  rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
  return rb_doc ;
}

/*
 *  call-seq:
 *    remove_namespaces!
 *
 *  Remove all namespaces from all nodes in the document.
 *
 *  This could be useful for developers who either don't understand namespaces
 *  or don't care about them.
 *
 *  The following example shows a use case, and you can decide for yourself
 *  whether this is a good thing or not:
 *
 *    doc = Nokogiri::XML <<-EOXML
 *       <root>
 *         <car xmlns:part="http://general-motors.com/">
 *           <part:tire>Michelin Model XGV</part:tire>
 *         </car>
 *         <bicycle xmlns:part="http://schwinn.com/">
 *           <part:tire>I'm a bicycle tire!</part:tire>
 *         </bicycle>
 *       </root>
 *       EOXML
 *
 *    doc.xpath("//tire").to_s # => ""
 *    doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
 *    doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
 *
 *    doc.remove_namespaces!
 *
 *    doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
 *    doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
 *    doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
 *
 *  For more information on why this probably is *not* a good thing in general,
 *  please direct your browser to
 *  http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
 */
static VALUE
remove_namespaces_bang(VALUE self)
{
  xmlDocPtr doc ;
  Data_Get_Struct(self, xmlDoc, doc);

  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
  return self;
}

/* call-seq: doc.create_entity(name, type, external_id, system_id, content)
 *
 * Create a new entity named +name+.
 *
 * +type+ is an integer representing the type of entity to be created, and it
 * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL.  See
 * the constants on Nokogiri::XML::EntityDecl for more information.
 *
 * +external_id+, +system_id+, and +content+ set the External ID, System ID,
 * and content respectively.  All of these parameters are optional.
 */
static VALUE
create_entity(int argc, VALUE *argv, VALUE self)
{
  VALUE name;
  VALUE type;
  VALUE external_id;
  VALUE system_id;
  VALUE content;
  xmlEntityPtr ptr;
  xmlDocPtr doc ;

  Data_Get_Struct(self, xmlDoc, doc);

  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
               &content);

  xmlResetLastError();
  ptr = xmlAddDocEntity(
          doc,
          (xmlChar *)(NIL_P(name)        ? NULL                        : StringValueCStr(name)),
          (int)(NIL_P(type)        ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
          (xmlChar *)(NIL_P(external_id) ? NULL                        : StringValueCStr(external_id)),
          (xmlChar *)(NIL_P(system_id)   ? NULL                        : StringValueCStr(system_id)),
          (xmlChar *)(NIL_P(content)     ? NULL                        : StringValueCStr(content))
        );

  if (NULL == ptr) {
    xmlErrorPtr error = xmlGetLastError();
    if (error) {
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
    } else {
      rb_raise(rb_eRuntimeError, "Could not create entity");
    }

    return Qnil;
  }

  return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
}

static int
block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
{
  VALUE block = (VALUE)ctx;
  VALUE rb_node;
  VALUE rb_parent_node;
  VALUE ret;

  if (c_node->type == XML_NAMESPACE_DECL) {
    rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
  } else {
    rb_node = noko_xml_node_wrap(Qnil, c_node);
  }
  rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;

  ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);

  return (Qfalse == ret || Qnil == ret) ? 0 : 1;
}

/* call-seq:
 *  doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
 *  doc.canonicalize { |obj, parent| ... }
 *
 * Canonicalize a document and return the results.  Takes an optional block
 * that takes two parameters: the +obj+ and that node's +parent+.
 * The  +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
 * The block must return a non-nil, non-false value if the +obj+ passed in
 * should be included in the canonicalized document.
 */
static VALUE
rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
{
  VALUE mode;
  VALUE incl_ns;
  VALUE with_comments;
  xmlChar **ns;
  long ns_len, i;

  xmlDocPtr doc;
  xmlOutputBufferPtr buf;
  xmlC14NIsVisibleCallback cb = NULL;
  void *ctx = NULL;

  VALUE rb_cStringIO;
  VALUE io;

  rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);

  Data_Get_Struct(self, xmlDoc, doc);

  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
  io           = rb_class_new_instance(0, 0, rb_cStringIO);
  buf          = xmlAllocOutputBuffer(NULL);

  buf->writecallback = (xmlOutputWriteCallback)noko_io_write;
  buf->closecallback = (xmlOutputCloseCallback)noko_io_close;
  buf->context       = (void *)io;

  if (rb_block_given_p()) {
    cb = block_caller;
    ctx = (void *)rb_block_proc();
  }

  if (NIL_P(incl_ns)) {
    ns = NULL;
  } else {
    Check_Type(incl_ns, T_ARRAY);
    ns_len = RARRAY_LEN(incl_ns);
    ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
    for (i = 0 ; i < ns_len ; i++) {
      VALUE entry = rb_ary_entry(incl_ns, i);
      ns[i] = (xmlChar *)StringValueCStr(entry);
    }
  }


  xmlC14NExecute(doc, cb, ctx,
                 (int)(NIL_P(mode)        ? 0 : NUM2INT(mode)),
                 ns,
                 (int)      RTEST(with_comments),
                 buf);

  xmlOutputBufferClose(buf);

  return rb_funcall(io, rb_intern("string"), 0);
}

VALUE
noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
{
  VALUE rb_document;
  nokogiriTuplePtr tuple;

  if (!klass) {
    klass = cNokogiriXmlDocument;
  }

  rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);

  tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
  tuple->doc = rb_document;
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
  tuple->node_cache = rb_ary_new();

  c_document->_private = tuple ;

  rb_iv_set(rb_document, "@decorators", Qnil);
  rb_iv_set(rb_document, "@errors", Qnil);
  rb_iv_set(rb_document, "@node_cache", tuple->node_cache);

  rb_obj_call_init(rb_document, argc, argv);

  return rb_document ;
}


/* deprecated. use noko_xml_document_wrap() instead. */
VALUE
Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
{
  /* TODO: deprecate this method in v2.0 */
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
}

VALUE
noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
{
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
}


void
noko_xml_document_pin_node(xmlNodePtr node)
{
  xmlDocPtr doc;
  nokogiriTuplePtr tuple;

  doc = node->doc;
  tuple = (nokogiriTuplePtr)doc->_private;
  st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
}


void
noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
{
  nokogiriTuplePtr tuple;

  tuple = (nokogiriTuplePtr)doc->_private;
  st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
}


void
noko_init_xml_document()
{
  assert(cNokogiriXmlNode);
  /*
   * Nokogiri::XML::Document wraps an xml document.
   */
  cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);

  rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
  rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
  rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);

  rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
  rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
  rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
  rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
  rb_define_method(cNokogiriXmlDocument, "version", version, 0);
  rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
  rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
  rb_define_method(cNokogiriXmlDocument, "url", url, 0);
  rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
  rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
}