yarv-diff:249
From: ko1 atdot.net
Date: 13 Feb 2006 23:05:38 -0000
Subject: [yarv-diff:249] r412 - in trunk: . lib lib/rexml lib/rexml/dtd lib/rexml/encodings lib/rexml/light lib/rexml/parsers lib/rexml/validation lib/rss test test/digest test/erb test/fileutils test/optparse test/ostruct test/ruby
Author: aamine
Date: 2006-02-14 08:05:37 +0900 (Tue, 14 Feb 2006)
New Revision: 412
Added:
trunk/lib/rexml/
trunk/lib/rexml/attlistdecl.rb
trunk/lib/rexml/attribute.rb
trunk/lib/rexml/cdata.rb
trunk/lib/rexml/child.rb
trunk/lib/rexml/comment.rb
trunk/lib/rexml/doctype.rb
trunk/lib/rexml/document.rb
trunk/lib/rexml/dtd/
trunk/lib/rexml/dtd/attlistdecl.rb
trunk/lib/rexml/dtd/dtd.rb
trunk/lib/rexml/dtd/elementdecl.rb
trunk/lib/rexml/dtd/entitydecl.rb
trunk/lib/rexml/dtd/notationdecl.rb
trunk/lib/rexml/element.rb
trunk/lib/rexml/encoding.rb
trunk/lib/rexml/encodings/
trunk/lib/rexml/encodings/CP-1252.rb
trunk/lib/rexml/encodings/EUC-JP.rb
trunk/lib/rexml/encodings/ICONV.rb
trunk/lib/rexml/encodings/ISO-8859-1.rb
trunk/lib/rexml/encodings/ISO-8859-15.rb
trunk/lib/rexml/encodings/SHIFT-JIS.rb
trunk/lib/rexml/encodings/SHIFT_JIS.rb
trunk/lib/rexml/encodings/UNILE.rb
trunk/lib/rexml/encodings/US-ASCII.rb
trunk/lib/rexml/encodings/UTF-16.rb
trunk/lib/rexml/encodings/UTF-8.rb
trunk/lib/rexml/entity.rb
trunk/lib/rexml/functions.rb
trunk/lib/rexml/instruction.rb
trunk/lib/rexml/light/
trunk/lib/rexml/light/node.rb
trunk/lib/rexml/namespace.rb
trunk/lib/rexml/node.rb
trunk/lib/rexml/output.rb
trunk/lib/rexml/parent.rb
trunk/lib/rexml/parseexception.rb
trunk/lib/rexml/parsers/
trunk/lib/rexml/parsers/baseparser.rb
trunk/lib/rexml/parsers/lightparser.rb
trunk/lib/rexml/parsers/pullparser.rb
trunk/lib/rexml/parsers/sax2parser.rb
trunk/lib/rexml/parsers/streamparser.rb
trunk/lib/rexml/parsers/treeparser.rb
trunk/lib/rexml/parsers/ultralightparser.rb
trunk/lib/rexml/parsers/xpathparser.rb
trunk/lib/rexml/quickpath.rb
trunk/lib/rexml/rexml.rb
trunk/lib/rexml/sax2listener.rb
trunk/lib/rexml/source.rb
trunk/lib/rexml/streamlistener.rb
trunk/lib/rexml/syncenumerator.rb
trunk/lib/rexml/text.rb
trunk/lib/rexml/validation/
trunk/lib/rexml/validation/relaxng.rb
trunk/lib/rexml/validation/validation.rb
trunk/lib/rexml/validation/validationexception.rb
trunk/lib/rexml/xmldecl.rb
trunk/lib/rexml/xmltokens.rb
trunk/lib/rexml/xpath.rb
trunk/lib/rexml/xpath_parser.rb
trunk/test/digest/
trunk/test/digest/test_digest.rb
trunk/test/erb/
trunk/test/erb/test_erb.rb
trunk/test/fileutils/
trunk/test/fileutils/fileasserts.rb
trunk/test/fileutils/test_dryrun.rb
trunk/test/fileutils/test_fileutils.rb
trunk/test/fileutils/test_nowrite.rb
trunk/test/fileutils/test_verbose.rb
trunk/test/optparse/
trunk/test/optparse/test_noarg.rb
trunk/test/optparse/test_optarg.rb
trunk/test/optparse/test_optparse.rb
trunk/test/optparse/test_placearg.rb
trunk/test/optparse/test_reqarg.rb
trunk/test/ostruct/
trunk/test/ostruct/test_ostruct.rb
Modified:
trunk/ChangeLog
trunk/lib/rss/dublincore.rb
trunk/lib/tempfile.rb
trunk/test/ruby/test_signal.rb
Log:
* lib/tempfile.rb: use Mutex instead of Thread.critical.
* lib/rss/dublincore.rb: |x,| -> |x,_| to avoid YARV bug (tmp).
* lib/rexml: imported from ruby CVS trunk HEAD.
* test/digest: ditto.
* test/fileutils: ditto.
* test/ostruct: ditto.
* test/erb: ditto.
* test/optparse: ditto.
* test/ruby/test_signal.rb: turn off a test to avoid unknown error (tmp).
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/ChangeLog 2006-02-13 23:05:37 UTC (rev 412)
@@ -4,6 +4,41 @@
# from Mon, 03 May 2004 01:24:19 +0900
#
+2006-02-14(Tue) 08:04:33 +0900 Minero Aoki <aamine loveruby.net>
+
+ * lib/tempfile.rb: use Mutex instead of Thread.critical.
+
+ * lib/rss/dublincore.rb: |x,| -> |x,_| to avoid YARV bug (tmp).
+
+ * lib/rexml: imported from ruby CVS trunk HEAD.
+
+ * test/digest: ditto.
+
+ * test/fileutils: ditto.
+
+ * test/ostruct: ditto.
+
+ * test/erb: ditto.
+
+ * test/optparse: ditto.
+
+ * test/ruby/test_signal.rb: turn off a test to avoid unknown error
+ (tmp).
+
+
+2006-02-14(Tue) 07:52:03 +0900 Minero Aoki <aamine loveruby.net>
+
+ * test/digest: imported from ruby CVS trunk HEAD.
+
+ * test/fileutils: ditto.
+
+ * test/ostruct: ditto.
+
+ * test/erb: ditto.
+
+ * test/optparse: ditto.
+
+
2006-02-14(Tue) 06:26:21 +0900 Koichi Sasada <ko1 atdot.net>
* compile.c, parse.y : support BEGIN{} (remove local scope)
Added: trunk/lib/rexml/attlistdecl.rb
===================================================================
--- trunk/lib/rexml/attlistdecl.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/attlistdecl.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,62 @@
+#vim:ts=2 sw=2 noexpandtab:
+require 'rexml/child'
+require 'rexml/source'
+
+module REXML
+ # This class needs:
+ # * Documentation
+ # * Work! Not all types of attlists are intelligently parsed, so we just
+ # spew back out what we get in. This works, but it would be better if
+ # we formatted the output ourselves.
+ #
+ # AttlistDecls provide *just* enough support to allow namespace
+ # declarations. If you need some sort of generalized support, or have an
+ # interesting idea about how to map the hideous, terrible design of DTD
+ # AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
+ # for anything to make DTDs more palateable.
+ class AttlistDecl < Child
+ include Enumerable
+
+ # What is this? Got me.
+ attr_reader :element_name
+
+ # Create an AttlistDecl, pulling the information from a Source. Notice
+ # that this isn't very convenient; to create an AttlistDecl, you basically
+ # have to format it yourself, and then have the initializer parse it.
+ # Sorry, but for the forseeable future, DTD support in REXML is pretty
+ # weak on convenience. Have I mentioned how much I hate DTDs?
+ def initialize(source)
+ super()
+ if (source.kind_of? Array)
+ @element_name, @pairs, @contents = *source
+ end
+ end
+
+ # Access the attlist attribute/value pairs.
+ # value = attlist_decl[ attribute_name ]
+ def [](key)
+ @pairs[key]
+ end
+
+ # Whether an attlist declaration includes the given attribute definition
+ # if attlist_decl.include? "xmlns:foobar"
+ def include?(key)
+ @pairs.keys.include? key
+ end
+
+ # Itterate over the key/value pairs:
+ # attlist_decl.each { |attribute_name, attribute_value| ... }
+ def each(&block)
+ @pairs.each(&block)
+ end
+
+ # Write out exactly what we got in.
+ def write out, indent=-1
+ out << @contents
+ end
+
+ def node_type
+ :attlistdecl
+ end
+ end
+end
Added: trunk/lib/rexml/attribute.rb
===================================================================
--- trunk/lib/rexml/attribute.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/attribute.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,163 @@
+require "rexml/namespace"
+require 'rexml/text'
+
+module REXML
+ # Defines an Element Attribute; IE, a attribute=value pair, as in:
+ # <element attribute="value"/>. Attributes can be in their own
+ # namespaces. General users of REXML will not interact with the
+ # Attribute class much.
+ class Attribute
+ include Node
+ include Namespace
+
+ # The element to which this attribute belongs
+ attr_reader :element
+ # The normalized value of this attribute. That is, the attribute with
+ # entities intact.
+ attr_writer :normalized
+ PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+
+ # Constructor.
+ #
+ # Attribute.new( attribute_to_clone )
+ # Attribute.new( source )
+ # Attribute.new( "attr", "attr_value" )
+ # Attribute.new( "attr", "attr_value", parent_element )
+ def initialize( first, second=nil, parent=nil )
+ @normalized = @unnormalized = @element = nil
+ if first.kind_of? Attribute
+ self.name = first.expanded_name
+ @value = first.value
+ if second.kind_of? Element
+ @element = second
+ else
+ @element = first.element
+ end
+ elsif first.kind_of? String
+ @element = parent if parent.kind_of? Element
+ self.name = first
+ @value = second.to_s
+ else
+ raise "illegal argument #{first.class.name} to Attribute constructor"
+ end
+ end
+
+ # Returns the namespace of the attribute.
+ #
+ # e = Element.new( "elns:myelement" )
+ # e.add_attribute( "nsa:a", "aval" )
+ # e.add_attribute( "b", "bval" )
+ # e.attributes.get_attribute( "a" ).prefix # -> "nsa"
+ # e.attributes.get_attribute( "b" ).prefix # -> "elns"
+ # a = Attribute.new( "x", "y" )
+ # a.prefix # -> ""
+ def prefix
+ pf = super
+ if pf == ""
+ pf = @element.prefix if @element
+ end
+ pf
+ end
+
+ # Returns the namespace URL, if defined, or nil otherwise
+ #
+ # e = Element.new("el")
+ # e.add_attributes({"xmlns:ns", "http://url"})
+ # e.namespace( "ns" ) # -> "http://url"
+ def namespace arg=nil
+ arg = prefix if arg.nil?
+ @element.namespace arg
+ end
+
+ # Returns true if other is an Attribute and has the same name and value,
+ # false otherwise.
+ def ==( other )
+ other.kind_of?(Attribute) and other.name==name and other.value==@value
+ end
+
+ # Creates (and returns) a hash from both the name and value
+ def hash
+ name.hash + value.hash
+ end
+
+ # Returns this attribute out as XML source, expanding the name
+ #
+ # a = Attribute.new( "x", "y" )
+ # a.to_string # -> "x='y'"
+ # b = Attribute.new( "ns:x", "y" )
+ # b.to_string # -> "ns:x='y'"
+ def to_string
+ "# expanded_name='#{to_s().gsub(/'/, ''')}'"
+ end
+
+ # Returns the attribute value, with entities replaced
+ def to_s
+ return @normalized if @normalized
+
+ doctype = nil
+ if @element
+ doc = @element.document
+ doctype = doc.doctype if doc
+ end
+
+ @unnormalized = nil
+ @value = @normalized = Text::normalize( @value, doctype )
+ end
+
+ # Returns the UNNORMALIZED value of this attribute. That is, entities
+ # have been expanded to their values
+ def value
+ @unnormalized if @unnormalized
+ doctype = nil
+ if @element
+ doc = @element.document
+ doctype = doc.doctype if doc
+ end
+ @normalized = nil
+ @value = @unnormalized = Text::unnormalize( @value, doctype )
+ end
+
+ # Returns a copy of this attribute
+ def clone
+ Attribute.new self
+ end
+
+ # Sets the element of which this object is an attribute. Normally, this
+ # is not directly called.
+ #
+ # Returns this attribute
+ def element=( element )
+ @element = element
+ self
+ end
+
+ # Removes this Attribute from the tree, and returns true if successfull
+ #
+ # This method is usually not called directly.
+ def remove
+ @element.attributes.delete self.name unless @element.nil?
+ end
+
+ # Writes this attribute (EG, puts 'key="value"' to the output)
+ def write( output, indent=-1 )
+ output << to_string
+ end
+
+ def node_type
+ :attribute
+ end
+
+ def inspect
+ rv = ""
+ write( rv )
+ rv
+ end
+
+ def xpath
+ path = @element.xpath
+ path += "/ #{self.expanded_name}"
+ return path
+ end
+ end
+end
+#vim:ts=2 sw=2 noexpandtab:
Added: trunk/lib/rexml/cdata.rb
===================================================================
--- trunk/lib/rexml/cdata.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/cdata.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,68 @@
+require "rexml/text"
+
+module REXML
+ class CData < Text
+ START = '<![CDATA['
+ STOP = ']]>'
+ ILLEGAL = /(\]\]>)/
+
+ # Constructor. CData is data between <![CDATA[ ... ]]>
+ #
+ # _Examples_
+ # CData.new( source )
+ # CData.new( "Here is some CDATA" )
+ # CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
+ def initialize( first, whitespace=true, parent=nil )
+ super( first, whitespace, parent, true, true, ILLEGAL )
+ end
+
+ # Make a copy of this object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # d = c.clone
+ # d.to_s # -> "Some text"
+ def clone
+ CData.new self
+ end
+
+ # Returns the content of this CData object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # c.to_s # -> "Some text"
+ def to_s
+ @string
+ end
+
+ # Generates XML output of this object
+ #
+ # output::
+ # Where to write the string. Defaults to $stdout
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1.
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags.
+ #
+ # _Examples_
+ # c = CData.new( " Some text " )
+ # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
+ def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+ #indent( output, indent ) unless transitive
+ output << START
+ output << @string
+ output << STOP
+ end
+ end
+end
Added: trunk/lib/rexml/child.rb
===================================================================
--- trunk/lib/rexml/child.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/child.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,96 @@
+require "rexml/node"
+
+module REXML
+ ##
+ # A Child object is something contained by a parent, and this class
+ # contains methods to support that. Most user code will not use this
+ # class directly.
+ class Child
+ include Node
+ attr_reader :parent # The Parent of this object
+
+ # Constructor. Any inheritors of this class should call super to make
+ # sure this method is called.
+ # parent::
+ # if supplied, the parent of this child will be set to the
+ # supplied value, and self will be added to the parent
+ def initialize( parent = nil )
+ @parent = nil
+ # Declare @parent, but don't define it. The next line sets the
+ # parent.
+ parent.add( self ) if parent
+ end
+
+ # Replaces this object with another object. Basically, calls
+ # Parent.replace_child
+ #
+ # Returns:: self
+ def replace_with( child )
+ @parent.replace_child( self, child )
+ self
+ end
+
+ # Removes this child from the parent.
+ #
+ # Returns:: self
+ def remove
+ unless @parent.nil?
+ @parent.delete self
+ end
+ self
+ end
+
+ # Sets the parent of this child to the supplied argument.
+ #
+ # other::
+ # Must be a Parent object. If this object is the same object as the
+ # existing parent of this child, no action is taken. Otherwise, this
+ # child is removed from the current parent (if one exists), and is added
+ # to the new parent.
+ # Returns:: The parent added
+ def parent=( other )
+ return @parent if @parent == other
+ @parent.delete self if defined? @parent and @parent
+ @parent = other
+ end
+
+ alias :next_sibling :next_sibling_node
+ alias :previous_sibling :previous_sibling_node
+
+ # Sets the next sibling of this child. This can be used to insert a child
+ # after some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.next_sibling = c
+ # # => <a><b/><c/></a>
+ def next_sibling=( other )
+ parent.insert_after self, other
+ end
+
+ # Sets the previous sibling of this child. This can be used to insert a
+ # child before some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.previous_sibling = c
+ # # => <a><b/><c/></a>
+ def previous_sibling=(other)
+ parent.insert_before self, other
+ end
+
+ # Returns:: the document this child belongs to, or nil if this child
+ # belongs to no document
+ def document
+ return parent.document unless parent.nil?
+ nil
+ end
+
+ # This doesn't yet handle encodings
+ def bytes
+ encoding = document.encoding
+
+ to_s
+ end
+ end
+end
Added: trunk/lib/rexml/comment.rb
===================================================================
--- trunk/lib/rexml/comment.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/comment.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,86 @@
+require "rexml/child"
+
+module REXML
+ ##
+ # Represents an XML comment; that is, text between <!-- ... -->
+ class Comment < Child
+ include Comparable
+ START = "<!--"
+ STOP = "-->"
+
+ attr_accessor :string # The content text
+
+ ##
+ # Constructor. The first argument can be one of three types:
+ # @param first If String, the contents of this comment are set to the
+ # argument. If Comment, the argument is duplicated. If
+ # Source, the argument is scanned for a comment.
+ # @param second If the first argument is a Source, this argument
+ # should be nil, not supplied, or a Parent to be set as the parent
+ # of this object
+ def initialize( first, second = nil )
+ #puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
+ super(second)
+ if first.kind_of? String
+ @string = first
+ elsif first.kind_of? Comment
+ @string = first.string
+ end
+ end
+
+ def clone
+ Comment.new self
+ end
+
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags.
+ #
+ def write( output, indent=-1, transitive=false, ie_hack=false )
+ indent( output, indent )
+ output << START
+ output << @string
+ if indent>-1
+ output << "\n"
+ indent( output, indent )
+ end
+ output << STOP
+ end
+
+ alias :to_s :string
+
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def <=>(other)
+ other.to_s <=> @string
+ end
+
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def ==( other )
+ other.kind_of? Comment and
+ (other <=> self) == 0
+ end
+
+ def node_type
+ :comment
+ end
+ end
+end
+#vim:ts=2 sw=2 noexpandtab:
Added: trunk/lib/rexml/doctype.rb
===================================================================
--- trunk/lib/rexml/doctype.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/doctype.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,213 @@
+require "rexml/parent"
+require "rexml/parseexception"
+require "rexml/namespace"
+require 'rexml/entity'
+require 'rexml/attlistdecl'
+require 'rexml/xmltokens'
+
+module REXML
+ # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
+ # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
+ # being used to declare entities used in the document.
+ class DocType < Parent
+ include XMLTokens
+ START = "<!DOCTYPE"
+ STOP = ">"
+ SYSTEM = "SYSTEM"
+ PUBLIC = "PUBLIC"
+ DEFAULT_ENTITIES = {
+ 'gt'=>EntityConst::GT,
+ 'lt'=>EntityConst::LT,
+ 'quot'=>EntityConst::QUOT,
+ "apos"=>EntityConst::APOS
+ }
+
+ # name is the name of the doctype
+ # external_id is the referenced DTD, if given
+ attr_reader :name, :external_id, :entities, :namespaces
+
+ # Constructor
+ #
+ # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
+ # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
+ # dt = DocType.new( doctype_to_clone )
+ # # Incomplete. Shallow clone of doctype
+ #
+ # +Note+ that the constructor:
+ #
+ # Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
+ #
+ # is _deprecated_. Do not use it. It will probably disappear.
+ def initialize( first, parent=nil )
+ @entities = DEFAULT_ENTITIES
+ @long_name = @uri = nil
+ if first.kind_of? String
+ super()
+ @name = first
+ @external_id = parent
+ elsif first.kind_of? DocType
+ super( parent )
+ @name = first.name
+ @external_id = first.external_id
+ elsif first.kind_of? Array
+ super( parent )
+ @name = first[0]
+ @external_id = first[1]
+ @long_name = first[2]
+ @uri = first[3]
+ elsif first.kind_of? Source
+ super( parent )
+ parser = Parsers::BaseParser.new( first )
+ event = parser.pull
+ if event[0] == :start_doctype
+ @name, @external_id, @long_name, @uri, = event[1..-1]
+ end
+ else
+ super()
+ end
+ end
+
+ def node_type
+ :doctype
+ end
+
+ def attributes_of element
+ rv = []
+ each do |child|
+ child.each do |key,val|
+ rv << Attribute.new(key,val)
+ end if child.kind_of? AttlistDecl and child.element_name == element
+ end
+ rv
+ end
+
+ def attribute_of element, attribute
+ att_decl = find do |child|
+ child.kind_of? AttlistDecl and
+ child.element_name == element and
+ child.include? attribute
+ end
+ return nil unless att_decl
+ att_decl[attribute]
+ end
+
+ def clone
+ DocType.new self
+ end
+
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags.
+ #
+ def write( output, indent=0, transitive=false, ie_hack=false )
+ indent( output, indent )
+ output << START
+ output << ' '
+ output << @name
+ output << " #@external_id" if @external_id
+ output << " #@long_name" if @long_name
+ output << " #@uri" if @uri
+ unless @children.empty?
+ next_indent = indent + 1
+ output << ' ['
+ child = nil # speed
+ @children.each { |child|
+ output << "\n"
+ child.write( output, next_indent )
+ }
+ output << "\n"
+ #output << ' '*next_indent
+ output << "]"
+ end
+ output << STOP
+ end
+
+ def context
+ @parent.context
+ end
+
+ def entity( name )
+ @entities[name].unnormalized if @entities[name]
+ end
+
+ def add child
+ super(child)
+ @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
+ @entities[ child.name ] = child if child.kind_of? Entity
+ end
+ end
+
+ # We don't really handle any of these since we're not a validating
+ # parser, so we can be pretty dumb about them. All we need to be able
+ # to do is spew them back out on a write()
+
+ # This is an abstract class. You never use this directly; it serves as a
+ # parent class for the specific declarations.
+ class Declaration < Child
+ def initialize src
+ super()
+ @string = src
+ end
+
+ def to_s
+ @string+'>'
+ end
+
+ def write( output, indent )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+ end
+
+ public
+ class ElementDecl < Declaration
+ def initialize( src )
+ super
+ end
+ end
+
+ class ExternalEntity < Child
+ def initialize( src )
+ super()
+ @entity = src
+ end
+ def to_s
+ @entity
+ end
+ def write( output, indent )
+ output << @entity
+ output << "\n"
+ end
+ end
+
+ class NotationDecl < Child
+ def initialize name, middle, rest
+ @name = name
+ @middle = middle
+ @rest = rest
+ end
+
+ def to_s
+ "<!NOTATION #@name '#@middle #@rest'>"
+ end
+
+ def write( output, indent=-1 )
+ output << (' '*indent) if indent > 0
+ output << to_s
+ end
+ end
+end
Added: trunk/lib/rexml/document.rb
===================================================================
--- trunk/lib/rexml/document.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/document.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,185 @@
+require "rexml/element"
+require "rexml/xmldecl"
+require "rexml/source"
+require "rexml/comment"
+require "rexml/doctype"
+require "rexml/instruction"
+require "rexml/rexml"
+require "rexml/parseexception"
+require "rexml/output"
+require "rexml/parsers/baseparser"
+require "rexml/parsers/streamparser"
+require "rexml/parsers/treeparser"
+
+module REXML
+ # Represents a full XML document, including PIs, a doctype, etc. A
+ # Document has a single child that can be accessed by root().
+ # Note that if you want to have an XML declaration written for a document
+ # you create, you must add one; REXML documents do not write a default
+ # declaration for you. See |DECLARATION| and |write|.
+ class Document < Element
+ # A convenient default XML declaration. If you want an XML declaration,
+ # the easiest way to add one is mydoc << Document::DECLARATION
+ # +DEPRECATED+
+ # Use: mydoc << XMLDecl.default
+ DECLARATION = XMLDecl.default
+
+ # Constructor
+ # @param source if supplied, must be a Document, String, or IO.
+ # Documents have their context and Element attributes cloned.
+ # Strings are expected to be valid XML documents. IOs are expected
+ # to be sources of valid XML documents.
+ # @param context if supplied, contains the context of the document;
+ # this should be a Hash.
+ # NOTE that I'm not sure what the context is for; I cloned it out of
+ # the Electric XML API (in which it also seems to do nothing), and it
+ # is now legacy. It may do something, someday... it may disappear.
+ def initialize( source = nil, context = {} )
+ super()
+ @context = context
+ return if source.nil?
+ if source.kind_of? Document
+ @context = source.context
+ super source
+ else
+ build( source )
+ end
+ end
+
+ def node_type
+ :document
+ end
+
+ # Should be obvious
+ def clone
+ Document.new self
+ end
+
+ # According to the XML spec, a root node has no expanded name
+ def expanded_name
+ ''
+ #d = doc_type
+ #d ? d.name : "UNDEFINED"
+ end
+
+ alias :name :expanded_name
+
+ # We override this, because XMLDecls and DocTypes must go at the start
+ # of the document
+ def add( child )
+ if child.kind_of? XMLDecl
+ @children.unshift child
+ elsif child.kind_of? DocType
+ if @children[0].kind_of? XMLDecl
+ @children[1,0] = child
+ else
+ @children.unshift child
+ end
+ child.parent = self
+ else
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+ end
+ alias :<< :add
+
+ def add_element(arg=nil, arg2=nil)
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+
+ # @return the root Element of the document, or nil if this document
+ # has no children.
+ def root
+puts 'THRU 1'
+result=
+ elements[1]
+puts 'THRU 2'
+result
+ #self
+ # children.find { |item| item.kind_of? Element }
+ end
+
+ # @return the DocType child of the document, if one exists,
+ # and nil otherwise.
+ def doctype
+ @children.find { |item| item.kind_of? DocType }
+ end
+
+ # @return the XMLDecl of this document; if no XMLDecl has been
+ # set, the default declaration is returned.
+ def xml_decl
+ rv = @children[0]
+ return rv if rv.kind_of? XMLDecl
+ rv = @children.unshift(XMLDecl.default)[0]
+ end
+
+ # @return the XMLDecl version of this document as a String.
+ # If no XMLDecl has been set, returns the default version.
+ def version
+ xml_decl().version
+ end
+
+ # @return the XMLDecl encoding of this document as a String.
+ # If no XMLDecl has been set, returns the default encoding.
+ def encoding
+ xml_decl().encoding
+ end
+
+ # @return the XMLDecl standalone value of this document as a String.
+ # If no XMLDecl has been set, returns the default setting.
+ def stand_alone?
+ xml_decl().stand_alone?
+ end
+
+ # Write the XML tree out, optionally with indent. This writes out the
+ # entire XML document, including XML declarations, doctype declarations,
+ # and processing instructions (if any are given).
+ # A controversial point is whether Document should always write the XML
+ # declaration (<?xml version='1.0'?>) whether or not one is given by the
+ # user (or source document). REXML does not write one if one was not
+ # specified, because it adds unneccessary bandwidth to applications such
+ # as XML-RPC.
+ #
+ #
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags. Defaults to false
+ def write( output=$stdout, indent_level=-1, transitive=false, ie_hack=false )
+ output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+ @children.each { |node|
+ indent( output, indent_level ) if node.node_type == :element
+ if node.write( output, indent_level, transitive, ie_hack )
+ output << "\n" unless indent_level<0 or node == @children[-1]
+ end
+ }
+ end
+
+
+ def Document::parse_stream( source, listener )
+ Parsers::StreamParser.new( source, listener ).parse
+ end
+
+ private
+ def build( source )
+ Parsers::TreeParser.new( source, self ).parse
+ end
+ end
+end
Added: trunk/lib/rexml/dtd/attlistdecl.rb
===================================================================
--- trunk/lib/rexml/dtd/attlistdecl.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/dtd/attlistdecl.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,10 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class AttlistDecl < Child
+ START = "<!ATTLIST"
+ START_RE = /^\s*#{START}/um
+ PATTERN_RE = /\s*(#{START}.*?>)/um
+ end
+ end
+end
Added: trunk/lib/rexml/dtd/dtd.rb
===================================================================
--- trunk/lib/rexml/dtd/dtd.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/dtd/dtd.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,51 @@
+require "rexml/dtd/elementdecl"
+require "rexml/dtd/entitydecl"
+require "rexml/comment"
+require "rexml/dtd/notationdecl"
+require "rexml/dtd/attlistdecl"
+require "rexml/parent"
+
+module REXML
+ module DTD
+ class Parser
+ def Parser.parse( input )
+ case input
+ when String
+ parse_helper input
+ when File
+ parse_helper input.read
+ end
+ end
+
+ # Takes a String and parses it out
+ def Parser.parse_helper( input )
+ contents = Parent.new
+ while input.size > 0
+ case input
+ when ElementDecl.PATTERN_RE
+ match = $&
+ source = $'
+ contents << EleemntDecl.new( match )
+ when AttlistDecl.PATTERN_RE
+ matchdata = $~
+ source = $'
+ contents << AttlistDecl.new( matchdata )
+ when EntityDecl.PATTERN_RE
+ matchdata = $~
+ source = $'
+ contents << EntityDecl.new( matchdata )
+ when Comment.PATTERN_RE
+ matchdata = $~
+ source = $'
+ contents << Comment.new( matchdata )
+ when NotationDecl.PATTERN_RE
+ matchdata = $~
+ source = $'
+ contents << NotationDecl.new( matchdata )
+ end
+ end
+ contents
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/dtd/elementdecl.rb
===================================================================
--- trunk/lib/rexml/dtd/elementdecl.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/dtd/elementdecl.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,17 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class ElementDecl < Child
+ START = "<!ELEMENT"
+ START_RE = /^\s*#{START}/um
+ PATTERN_RE = /^\s*(#{START}.*?)>/um
+ PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
+ #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
+
+ def initialize match
+ @name = match[1]
+ @rest = match[2]
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/dtd/entitydecl.rb
===================================================================
--- trunk/lib/rexml/dtd/entitydecl.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/dtd/entitydecl.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,56 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class EntityDecl < Child
+ START = "<!ENTITY"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
+ PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
+ PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
+ # <!ENTITY name SYSTEM "...">
+ # <!ENTITY name "...">
+ def initialize src
+ super()
+ md = nil
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ @middle = "PUBLIC"
+ @content = "#{md[2]} #{md[4]}"
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ @middle = "SYSTEM"
+ @content = md[2]
+ elsif src.match( PLAIN )
+ md = src.match( PLAIN, true )
+ @middle = ""
+ @content = md[2]
+ elsif src.match( PERCENT )
+ md = src.match( PERCENT, true )
+ @middle = ""
+ @content = md[2]
+ end
+ raise ParseException.new("failed Entity match", src) if md.nil?
+ @name = md[1]
+ end
+
+ def to_s
+ rv = "<!ENTITY #@name "
+ rv << "#@middle " if @middle.size > 0
+ rv << @content
+ rv
+ end
+
+ def write( output, indent )
+ indent( output, indent )
+ output << to_s
+ end
+
+ def EntityDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/dtd/notationdecl.rb
===================================================================
--- trunk/lib/rexml/dtd/notationdecl.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/dtd/notationdecl.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,39 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class NotationDecl < Child
+ START = "<!NOTATION"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ def initialize src
+ super()
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ else
+ raise ParseException.new( "error parsing notation: no matching pattern", src )
+ end
+ @name = md[1]
+ @middle = md[2]
+ @rest = md[3]
+ end
+
+ def to_s
+ "<!NOTATION #@name #@middle #@rest>"
+ end
+
+ def write( output, indent )
+ indent( output, indent )
+ output << to_s
+ end
+
+ def NotationDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/element.rb
===================================================================
--- trunk/lib/rexml/element.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/element.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,1230 @@
+require "rexml/parent"
+require "rexml/namespace"
+require "rexml/attribute"
+require "rexml/cdata"
+require "rexml/xpath"
+require "rexml/parseexception"
+
+module REXML
+ # An implementation note about namespaces:
+ # As we parse, when we find namespaces we put them in a hash and assign
+ # them a unique ID. We then convert the namespace prefix for the node
+ # to the unique ID. This makes namespace lookup much faster for the
+ # cost of extra memory use. We save the namespace prefix for the
+ # context node and convert it back when we write it.
+ @@namespaces = {}
+
+ # Represents a tagged XML element. Elements are characterized by
+ # having children, attributes, and names, and can themselves be
+ # children.
+ class Element < Parent
+ include Namespace
+
+ UNDEFINED = "UNDEFINED"; # The default name
+
+ # Mechanisms for accessing attributes and child elements of this
+ # element.
+ attr_reader :attributes, :elements
+ # The context holds information about the processing environment, such as
+ # whitespace handling.
+ attr_accessor :context
+
+ # Constructor
+ # arg::
+ # if not supplied, will be set to the default value.
+ # If a String, the name of this object will be set to the argument.
+ # If an Element, the object will be shallowly cloned; name,
+ # attributes, and namespaces will be copied. Children will +not+ be
+ # copied.
+ # If a Source, the source will be scanned and parsed for an Element,
+ # and all child elements will be recursively parsed as well.
+ # parent::
+ # if supplied, must be a Parent, and will be used as
+ # the parent of this object.
+ # context::
+ # If supplied, must be a hash containing context items. Context items
+ # include:
+ # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
+ # strings being the names of the elements to respect
+ # whitespace for. Defaults to :+all+.
+ # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
+ # strings being the names of the elements to ignore whitespace on.
+ # Overrides :+respect_whitespace+.
+ # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
+ # of strings being the names of the elements in which to ignore
+ # whitespace-only nodes. If this is set, Text nodes which contain only
+ # whitespace will not be added to the document tree.
+ # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
+ # the elements to process in raw mode. In raw mode, special
+ # characters in text is not converted to or from entities.
+ def initialize( arg = UNDEFINED, parent=nil, context=nil )
+ super(parent)
+
+ @elements = Elements.new(self)
+ @attributes = Attributes.new(self)
+ @context = context
+
+ if arg.kind_of? String
+ self.name = arg
+ elsif arg.kind_of? Element
+ self.name = arg.expanded_name
+ arg.attributes.each_attribute{ |attribute|
+ @attributes << Attribute.new( attribute )
+ }
+ @context = arg.context
+ end
+ end
+
+ def inspect
+ rv = "<#@expanded_name"
+
+ @attributes.each_attribute do |attr|
+ rv << " "
+ attr.write( rv, 0 )
+ end
+
+ if children.size > 0
+ rv << "> ... </>"
+ else
+ rv << "/>"
+ end
+ end
+
+
+ # Creates a shallow copy of self.
+ # d = Document.new "<a><b/><b/><c><d/></c></a>"
+ # new_a = d.root.clone
+ # puts new_a # => "<a/>"
+ def clone
+ Element.new self
+ end
+
+ # Evaluates to the root node of the document that this element
+ # belongs to. If this element doesn't belong to a document, but does
+ # belong to another Element, the parent's root will be returned, until the
+ # earliest ancestor is found.
+ #
+ # Note that this is not the same as the document element.
+ # In the following example, <a> is the document element, and the root
+ # node is the parent node of the document element. You may ask yourself
+ # why the root node is useful: consider the doctype and XML declaration,
+ # and any processing instructions before the document element... they
+ # are children of the root node, or siblings of the document element.
+ # The only time this isn't true is when an Element is created that is
+ # not part of any Document. In this case, the ancestor that has no
+ # parent acts as the root node.
+ # d = Document.new '<a><b><c/></b></a>'
+ # a = d[1] ; c = a[1][1]
+ # d.root_node == d # TRUE
+ # a.root_node # namely, d
+ # c.root_node # again, d
+ def root_node
+ parent.nil? ? self : parent.root_node
+ end
+
+ def root
+ return elements[1] if self.kind_of? Document
+ return self if parent.kind_of? Document or parent.nil?
+ return parent.root
+ end
+
+ # Evaluates to the document to which this element belongs, or nil if this
+ # element doesn't belong to a document.
+ def document
+ rt = root
+ rt.parent if rt
+ end
+
+ # Evaluates to +true+ if whitespace is respected for this element. This
+ # is the case if:
+ # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
+ # 2. The context has :+respect_whitespace+ set to :+all+ or
+ # an array containing the name of this element, and
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
+ # name of this element.
+ # The evaluation is tested against +expanded_name+, and so is namespace
+ # sensitive.
+ def whitespace
+ @whitespace = nil
+ if @context
+ if @context[:respect_whitespace]
+ @whitespace = (@context[:respect_whitespace] == :all or
+ @context[:respect_whitespace].include? expanded_name)
+ end
+ @whitespace = false if (@context[:compress_whitespace] and
+ (@context[:compress_whitespace] == :all or
+ @context[:compress_whitespace].include? expanded_name)
+ )
+ end
+ @whitespace = true unless @whitespace == false
+ @whitespace
+ end
+
+ def ignore_whitespace_nodes
+ @ignore_whitespace_nodes = false
+ if @context
+ if @context[:ignore_whitespace_nodes]
+ @ignore_whitespace_nodes =
+ (@context[:ignore_whitespace_nodes] == :all or
+ @context[:ignore_whitespace_nodes].include? expanded_name)
+ end
+ end
+ end
+
+ # Evaluates to +true+ if raw mode is set for this element. This
+ # is the case if the context has :+raw+ set to :+all+ or
+ # an array containing the name of this element.
+ #
+ # The evaluation is tested against +expanded_name+, and so is namespace
+ # sensitive.
+ def raw
+ @raw = (@context and @context[:raw] and
+ (@context[:raw] == :all or
+ @context[:raw].include? expanded_name))
+ @raw
+ end
+
+ #once :whitespace, :raw, :ignore_whitespace_nodes
+
+ #################################################
+ # Namespaces #
+ #################################################
+
+ # Evaluates to an +Array+ containing the prefixes (names) of all defined
+ # namespaces at this context node.
+ # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+ # doc.elements['//b'].prefixes # -> ['x', 'y']
+ def prefixes
+ prefixes = []
+ prefixes = parent.prefixes if parent
+ prefixes |= attributes.prefixes
+ return prefixes
+ end
+
+ def namespaces
+ namespaces = []
+ namespaces = parent.namespaces if parent
+ namespaces |= attributes.namespaces
+ return namespaces
+ end
+
+ # Evalutas to the URI for a prefix, or the empty string if no such
+ # namespace is declared for this element. Evaluates recursively for
+ # ancestors. Returns the default namespace, if there is one.
+ # prefix::
+ # the prefix to search for. If not supplied, returns the default
+ # namespace if one exists
+ # Returns::
+ # the namespace URI as a String, or nil if no such namespace
+ # exists. If the namespace is undefined, returns an empty string
+ # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+ # b = doc.elements['//b']
+ # b.namespace # -> '1'
+ # b.namespace("y") # -> '2'
+ def namespace(prefix=nil)
+ if prefix.nil?
+ prefix = self.prefix()
+ end
+ if prefix == ''
+ prefix = "xmlns"
+ else
+ prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
+ end
+ ns = attributes[ prefix ]
+ ns = parent.namespace(prefix) if ns.nil? and parent
+ ns = '' if ns.nil? and prefix == 'xmlns'
+ return ns
+ end
+
+ # Adds a namespace to this element.
+ # prefix::
+ # the prefix string, or the namespace URI if +uri+ is not
+ # supplied
+ # uri::
+ # the namespace URI. May be nil, in which +prefix+ is used as
+ # the URI
+ # Evaluates to: this Element
+ # a = Element.new("a")
+ # a.add_namespace("xmlns:foo", "bar" )
+ # a.add_namespace("foo", "bar") # shorthand for previous line
+ # a.add_namespace("twiddle")
+ # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
+ def add_namespace( prefix, uri=nil )
+ unless uri
+ @attributes["xmlns"] = prefix
+ else
+ prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
+ @attributes[ prefix ] = uri
+ end
+ self
+ end
+
+ # Removes a namespace from this node. This only works if the namespace is
+ # actually declared in this node. If no argument is passed, deletes the
+ # default namespace.
+ #
+ # Evaluates to: this element
+ # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
+ # doc.root.delete_namespace
+ # puts doc # -> <a xmlns:foo='bar'/>
+ # doc.root.delete_namespace 'foo'
+ # puts doc # -> <a/>
+ def delete_namespace namespace="xmlns"
+ namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
+ attribute = attributes.get_attribute(namespace)
+ attribute.remove unless attribute.nil?
+ self
+ end
+
+ #################################################
+ # Elements #
+ #################################################
+
+ # Adds a child to this element, optionally setting attributes in
+ # the element.
+ # element::
+ # optional. If Element, the element is added.
+ # Otherwise, a new Element is constructed with the argument (see
+ # Element.initialize).
+ # attrs::
+ # If supplied, must be a Hash containing String name,value
+ # pairs, which will be used to set the attributes of the new Element.
+ # Returns:: the Element that was added
+ # el = doc.add_element 'my-tag'
+ # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
+ # el = Element.new 'my-tag'
+ # doc.add_element el
+ def add_element element, attrs=nil
+ raise "First argument must be either an element name, or an Element object" if element.nil?
+ el = @elements.add(element)
+ if attrs.kind_of? Hash
+ attrs.each do |key, value|
+ el.attributes[key]=value if key =~ /^xmlns:/
+ end
+ attrs.each do |key, value|
+ el.attributes[key]=value if key !~ /^xmlns:/
+ end
+ end
+ el
+ end
+
+ # Deletes a child element.
+ # element::
+ # Must be an +Element+, +String+, or +Integer+. If Element,
+ # the element is removed. If String, the element is found (via XPath)
+ # and removed. <em>This means that any parent can remove any
+ # descendant.<em> If Integer, the Element indexed by that number will be
+ # removed.
+ # Returns:: the element that was removed.
+ # doc.delete_element "/a/b/c[@id='4']"
+ # doc.delete_element doc.elements["//k"]
+ # doc.delete_element 1
+ def delete_element element
+ @elements.delete element
+ end
+
+ # Evaluates to +true+ if this element has at least one child Element
+ # doc = Document.new "<a><b/><c>Text</c></a>"
+ # doc.root.has_elements # -> true
+ # doc.elements["/a/b"].has_elements # -> false
+ # doc.elements["/a/c"].has_elements # -> false
+ def has_elements?
+ ! elements.empty?
+ end
+
+ # Iterates through the child elements, yielding for each Element that
+ # has a particular attribute set.
+ # key::
+ # the name of the attribute to search for
+ # value::
+ # the value of the attribute
+ # max::
+ # (optional) causes this method to return after yielding
+ # for this number of matching children
+ # name::
+ # (optional) if supplied, this is an XPath that filters
+ # the children to check.
+ #
+ # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
+ # # Yields b, c, d
+ # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
+ # # Yields b, d
+ # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
+ # # Yields b
+ # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
+ # # Yields d
+ # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
+ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
+ each_with_something( proc {|child|
+ if value.nil?
+ child.attributes[key] != nil
+ else
+ child.attributes[key]==value
+ end
+ }, max, name, &block )
+ end
+
+ # Iterates through the children, yielding for each Element that
+ # has a particular text set.
+ # text::
+ # the text to search for. If nil, or not supplied, will itterate
+ # over all +Element+ children that contain at least one +Text+ node.
+ # max::
+ # (optional) causes this method to return after yielding
+ # for this number of matching children
+ # name::
+ # (optional) if supplied, this is an XPath that filters
+ # the children to check.
+ #
+ # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
+ # # Yields b, c, d
+ # doc.each_element_with_text {|e|p e}
+ # # Yields b, c
+ # doc.each_element_with_text('b'){|e|p e}
+ # # Yields b
+ # doc.each_element_with_text('b', 1){|e|p e}
+ # # Yields d
+ # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
+ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
+ each_with_something( proc {|child|
+ if text.nil?
+ child.has_text?
+ else
+ child.text == text
+ end
+ }, max, name, &block )
+ end
+
+ # Synonym for Element.elements.each
+ def each_element( xpath=nil, &block ) # :yields: Element
+ @elements.each( xpath, &block )
+ end
+
+ # Synonym for Element.to_a
+ # This is a little slower than calling elements.each directly.
+ # xpath:: any XPath by which to search for elements in the tree
+ # Returns:: an array of Elements that match the supplied path
+ def get_elements( xpath )
+ @elements.to_a( xpath )
+ end
+
+ # Returns the next sibling that is an element, or nil if there is
+ # no Element sibling after this one
+ # doc = Document.new '<a><b/>text<c/></a>'
+ # doc.root.elements['b'].next_element #-> <c/>
+ # doc.root.elements['c'].next_element #-> nil
+ def next_element
+ element = next_sibling
+ element = element.next_sibling until element.nil? or element.kind_of? Element
+ return element
+ end
+
+ # Returns the previous sibling that is an element, or nil if there is
+ # no Element sibling prior to this one
+ # doc = Document.new '<a><b/>text<c/></a>'
+ # doc.root.elements['c'].previous_element #-> <b/>
+ # doc.root.elements['b'].previous_element #-> nil
+ def previous_element
+ element = previous_sibling
+ element = element.previous_sibling until element.nil? or element.kind_of? Element
+ return element
+ end
+
+
+ #################################################
+ # Text #
+ #################################################
+
+ # Evaluates to +true+ if this element has at least one Text child
+ def has_text?
+ not text().nil?
+ end
+
+ # A convenience method which returns the String value of the _first_
+ # child text element, if one exists, and +nil+ otherwise.
+ #
+ # <em>Note that an element may have multiple Text elements, perhaps
+ # separated by other children</em>. Be aware that this method only returns
+ # the first Text node.
+ #
+ # This method returns the +value+ of the first text child node, which
+ # ignores the +raw+ setting, so always returns normalized text. See
+ # the Text::value documentation.
+ #
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # # The element 'p' has two text elements, "some text " and " more text".
+ # doc.root.text #-> "some text "
+ def text( path = nil )
+ rv = get_text(path)
+ return rv.value unless rv.nil?
+ nil
+ end
+
+ # Returns the first child Text node, if any, or +nil+ otherwise.
+ # This method returns the actual +Text+ node, rather than the String content.
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # # The element 'p' has two text elements, "some text " and " more text".
+ # doc.root.get_text.value #-> "some text "
+ def get_text path = nil
+ rv = nil
+ if path
+ element = @elements[ path ]
+ rv = element.get_text unless element.nil?
+ else
+ rv = @children.find { |node| node.kind_of? Text }
+ end
+ return rv
+ end
+
+ # Sets the first Text child of this object. See text() for a
+ # discussion about Text children.
+ #
+ # If a Text child already exists, the child is replaced by this
+ # content. This means that Text content can be deleted by calling
+ # this method with a nil argument. In this case, the next Text
+ # child becomes the first Text child. In no case is the order of
+ # any siblings disturbed.
+ # text::
+ # If a String, a new Text child is created and added to
+ # this Element as the first Text child. If Text, the text is set
+ # as the first Child element. If nil, then any existing first Text
+ # child is removed.
+ # Returns:: this Element.
+ # doc = Document.new '<a><b/></a>'
+ # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
+ # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
+ # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
+ # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
+ # doc.root.text = nil #-> '<a><b/><c/></a>'
+ def text=( text )
+ if text.kind_of? String
+ text = Text.new( text, whitespace(), nil, raw() )
+ elsif text and !text.kind_of? Text
+ text = Text.new( text.to_s, whitespace(), nil, raw() )
+ end
+
+ old_text = get_text
+ if text.nil?
+ old_text.remove unless old_text.nil?
+ else
+ if old_text.nil?
+ self << text
+ else
+ old_text.replace_with( text )
+ end
+ end
+ return self
+ end
+
+ # A helper method to add a Text child. Actual Text instances can
+ # be added with regular Parent methods, such as add() and <<()
+ # text::
+ # if a String, a new Text instance is created and added
+ # to the parent. If Text, the object is added directly.
+ # Returns:: this Element
+ # e = Element.new('a') #-> <e/>
+ # e.add_text 'foo' #-> <e>foo</e>
+ # e.add_text Text.new(' bar') #-> <e>foo bar</e>
+ # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
+ # element and <b>2</b> Text node children.
+ def add_text( text )
+ if text.kind_of? String
+ if @children[-1].kind_of? Text
+ @children[-1] << text
+ return
+ end
+ text = Text.new( text, whitespace(), nil, raw() )
+ end
+ self << text unless text.nil?
+ return self
+ end
+
+ def node_type
+ :element
+ end
+
+ def xpath
+ path_elements = []
+ cur = self
+ path_elements << __to_xpath_helper( self )
+ while cur.parent
+ cur = cur.parent
+ path_elements << __to_xpath_helper( cur )
+ end
+ return path_elements.reverse.join( "/" )
+ end
+
+ #################################################
+ # Attributes #
+ #################################################
+
+ def attribute( name, namespace=nil )
+ prefix = ''
+ if namespace
+ prefix = attributes.prefixes.each { |prefix|
+ return "#{prefix}:" if namespace( prefix ) == namespace
+ } || ''
+ end
+ attributes.get_attribute( "#{prefix}#{name}" )
+ end
+
+ # Evaluates to +true+ if this element has any attributes set, false
+ # otherwise.
+ def has_attributes?
+ return ! attributes.empty?
+ end
+
+ # Adds an attribute to this element, overwriting any existing attribute
+ # by the same name.
+ # key::
+ # can be either an Attribute or a String. If an Attribute,
+ # the attribute is added to the list of Element attributes. If String,
+ # the argument is used as the name of the new attribute, and the value
+ # parameter must be supplied.
+ # value::
+ # Required if +key+ is a String, and ignored if the first argument is
+ # an Attribute. This is a String, and is used as the value
+ # of the new Attribute.
+ # Returns:: the Attribute added
+ # e = Element.new 'e'
+ # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
+ # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
+ # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
+ def add_attribute( key, value=nil )
+ if key.kind_of? Attribute
+ @attributes << key
+ else
+ @attributes[key] = value
+ end
+ end
+
+ # Add multiple attributes to this element.
+ # hash:: is either a hash, or array of arrays
+ # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
+ # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
+ def add_attributes hash
+ if hash.kind_of? Hash
+ hash.each_pair {|key, value| @attributes[key] = value }
+ elsif hash.kind_of? Array
+ hash.each { |value| @attributes[ value[0] ] = value[1] }
+ end
+ end
+
+ # Removes an attribute
+ # key::
+ # either an Attribute or a String. In either case, the
+ # attribute is found by matching the attribute name to the argument,
+ # and then removed. If no attribute is found, no action is taken.
+ # Returns::
+ # the attribute removed, or nil if this Element did not contain
+ # a matching attribute
+ # e = Element.new('E')
+ # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
+ # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
+ # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
+ # e.delete_attribute( r ) #-> <E/>
+ def delete_attribute(key)
+ attr = @attributes.get_attribute(key)
+ attr.remove unless attr.nil?
+ end
+
+ #################################################
+ # Other Utilities #
+ #################################################
+
+ # Get an array of all CData children.
+ # IMMUTABLE
+ def cdatas
+ find_all { |child| child.kind_of? CData }.freeze
+ end
+
+ # Get an array of all Comment children.
+ # IMMUTABLE
+ def comments
+ find_all { |child| child.kind_of? Comment }.freeze
+ end
+
+ # Get an array of all Instruction children.
+ # IMMUTABLE
+ def instructions
+ find_all { |child| child.kind_of? Instruction }.freeze
+ end
+
+ # Get an array of all Text children.
+ # IMMUTABLE
+ def texts
+ find_all { |child| child.kind_of? Text }.freeze
+ end
+
+ # Writes out this element, and recursively, all children.
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the parse tree of the document
+ # ie_hack::
+ # Internet Explorer is the worst piece of crap to have ever been
+ # written, with the possible exception of Windows itself. Since IE is
+ # unable to parse proper XML, we have to provide a hack to generate XML
+ # that IE's limited abilities can handle. This hack inserts a space
+ # before the /> on empty tags. Defaults to false
+ #
+ # out = ''
+ # doc.write( out ) #-> doc is written to the string 'out'
+ # doc.write( $stdout ) #-> doc written to the console
+ def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
+ #print "ID:#{indent}"
+ writer << "<#@expanded_name"
+
+ @attributes.each_attribute do |attr|
+ writer << " "
+ attr.write( writer, indent )
+ end unless @attributes.empty?
+
+ if @children.empty?
+ if transitive and indent>-1
+ writer << "\n"
+ indent( writer, indent )
+ elsif ie_hack
+ writer << " "
+ end
+ writer << "/"
+ else
+ if transitive and indent>-1 and ! children[0].kind_of? Text
+ writer << "\n"
+ indent writer, indent+1
+ end
+ writer << ">"
+ write_children( writer, indent, transitive, ie_hack )
+ writer << "</#{expanded_name}"
+ end
+ if transitive and indent>-1 and ! children.empty?
+ writer << "\n"
+ indent -= 1 if next_sibling.nil?
+ indent(writer, indent)
+ end
+ writer << ">"
+ end
+
+
+ private
+ def __to_xpath_helper node
+ rv = node.expanded_name
+ if node.parent
+ results = node.parent.find_all {|n|
+ n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
+ }
+ if results.length > 1
+ idx = results.index( node )
+ rv << "[#{idx+1}]"
+ end
+ end
+ rv
+ end
+
+ # A private helper method
+ def each_with_something( test, max=0, name=nil )
+ num = 0
+ child=nil
+ @elements.each( name ){ |child|
+ yield child if test.call(child) and num += 1
+ return if max>0 and num == max
+ }
+ end
+
+ # A private helper method
+ def write_children( writer, indent, transitive, ie_hack )
+ cr = (indent < 0) ? '' : "\n"
+ if indent == -1
+ each { |child| child.write( writer, indent, transitive, ie_hack ) }
+ else
+ next_indent = indent+1
+ last_child=nil
+ each { |child|
+ unless child.kind_of? Text or last_child.kind_of? Text or transitive
+ writer << cr
+ indent(writer, next_indent)
+ end
+ child.write( writer, next_indent, transitive, ie_hack )
+ last_child = child
+ }
+ unless last_child.kind_of? Text or transitive
+ writer << cr
+ indent( writer, indent )
+ end
+ end
+ end
+ end
+
+ ########################################################################
+ # ELEMENTS #
+ ########################################################################
+
+ # A class which provides filtering of children for Elements, and
+ # XPath search support. You are expected to only encounter this class as
+ # the <tt>element.elements</tt> object. Therefore, you are
+ # _not_ expected to instantiate this yourself.
+ class Elements
+ include Enumerable
+ # Constructor
+ # parent:: the parent Element
+ def initialize parent
+ @element = parent
+ end
+
+ # Fetches a child element. Filters only Element children, regardless of
+ # the XPath match.
+ # index::
+ # the search parameter. This is either an Integer, which
+ # will be used to find the index'th child Element, or an XPath,
+ # which will be used to search for the Element. <em>Because
+ # of the nature of XPath searches, any element in the connected XML
+ # document can be fetched through any other element.</em> <b>The
+ # Integer index is 1-based, not 0-based.</b> This means that the first
+ # child element is at index 1, not 0, and the +n+th element is at index
+ # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
+ # starting from 1, not 0, and the indexes should be the same.
+ # name::
+ # optional, and only used in the first argument is an
+ # Integer. In that case, the index'th child Element that has the
+ # supplied name will be returned. Note again that the indexes start at 1.
+ # Returns:: the first matching Element, or nil if no child matched
+ # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
+ # doc.root.elements[1] #-> <b/>
+ # doc.root.elements['c'] #-> <c id="1"/>
+ # doc.root.elements[2,'c'] #-> <c id="2"/>
+ def []( index, name=nil)
+ if index.kind_of? Integer
+ raise "index (#{index}) must be >= 1" if index < 1
+ name = literalize(name) if name
+ num = 0
+ child = nil
+ @element.find { |child|
+ child.kind_of? Element and
+ (name.nil? ? true : child.has_name?( name )) and
+ (num += 1) == index
+ }
+ else
+ return XPath::first( @element, index )
+ #{ |element|
+ # return element if element.kind_of? Element
+ #}
+ #return nil
+ end
+ end
+
+ # Sets an element, replacing any previous matching element. If no
+ # existing element is found ,the element is added.
+ # index:: Used to find a matching element to replace. See []().
+ # element::
+ # The element to replace the existing element with
+ # the previous element
+ # Returns:: nil if no previous element was found.
+ #
+ # doc = Document.new '<a/>'
+ # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
+ # doc.root.elements[1] #-> <b/>
+ # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
+ # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
+ def []=( index, element )
+ previous = self[index]
+ if previous.nil?
+ @element.add element
+ else
+ previous.replace_with element
+ end
+ return previous
+ end
+
+ # Returns +true+ if there are no +Element+ children, +false+ otherwise
+ def empty?
+ @element.find{ |child| child.kind_of? Element}.nil?
+ end
+
+ # Returns the index of the supplied child (starting at 1), or -1 if
+ # the element is not a child
+ # element:: an +Element+ child
+ def index element
+ rv = 0
+ found = @element.find do |child|
+ child.kind_of? Element and
+ (rv += 1) and
+ child == element
+ end
+ return rv if found == element
+ return -1
+ end
+
+ # Deletes a child Element
+ # element::
+ # Either an Element, which is removed directly; an
+ # xpath, where the first matching child is removed; or an Integer,
+ # where the n'th Element is removed.
+ # Returns:: the removed child
+ # doc = Document.new '<a><b/><c/><c id="1"/></a>'
+ # b = doc.root.elements[1]
+ # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
+ # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
+ # doc.root.elements.delete 1 #-> <a/>
+ def delete element
+ if element.kind_of? Element
+ @element.delete element
+ else
+ el = self[element]
+ el.remove if el
+ end
+ end
+
+ # Removes multiple elements. Filters for Element children, regardless of
+ # XPath matching.
+ # xpath:: all elements matching this String path are removed.
+ # Returns:: an Array of Elements that have been removed
+ # doc = Document.new '<a><c/><c/><c/><c/></a>'
+ # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
+ def delete_all( xpath )
+ rv = []
+ XPath::each( @element, xpath) {|element|
+ rv << element if element.kind_of? Element
+ }
+ rv.each do |element|
+ @element.delete element
+ element.remove
+ end
+ return rv
+ end
+
+ # Adds an element
+ # element::
+ # if supplied, is either an Element, String, or
+ # Source (see Element.initialize). If not supplied or nil, a
+ # new, default Element will be constructed
+ # Returns:: the added Element
+ # a = Element.new 'a'
+ # a.elements.add Element.new 'b' #-> <a><b/></a>
+ # a.elements.add 'c' #-> <a><b/><c/></a>
+ def add element=nil
+ rv = nil
+ if element.nil?
+ Element.new "", self, @element.context
+ elsif not element.kind_of?(Element)
+ Element.new element, self, @element.context
+ else
+ @element << element
+ element.context = @element.context
+ element
+ end
+ end
+
+ alias :<< :add
+
+ # Iterates through all of the child Elements, optionally filtering
+ # them by a given XPath
+ # xpath::
+ # optional. If supplied, this is a String XPath, and is used to
+ # filter the children, so that only matching children are yielded. Note
+ # that XPaths are automatically filtered for Elements, so that
+ # non-Element children will not be yielded
+ # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
+ # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
+ # doc.root.each('b') {|e|p e} #-> Yields b, b elements
+ # doc.root.each('child::node()') {|e|p e}
+ # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
+ # XPath.each(doc.root, 'child::node()', &block)
+ # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
+ def each( xpath=nil, &block)
+ XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
+ end
+
+ # Returns the number of +Element+ children of the parent object.
+ # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
+ # doc.root.size #-> 6, 3 element and 3 text nodes
+ # doc.root.elements.size #-> 3
+ def size
+ count = 0
+ @element.each {|child| count+=1 if child.kind_of? Element }
+ count
+ end
+
+ # Returns an Array of Element children. An XPath may be supplied to
+ # filter the children. Only Element children are returned, even if the
+ # supplied XPath matches non-Element children.
+ # doc = Document.new '<a>sean<b/>elliott<c/></a>'
+ # doc.root.elements.to_a #-> [ <b/>, <c/> ]
+ # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
+ # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
+ def to_a( xpath=nil )
+ rv = XPath.match( @element, xpath )
+ return rv.find_all{|e| e.kind_of? Element} if xpath
+ rv
+ end
+
+ private
+ # Private helper class. Removes quotes from quoted strings
+ def literalize name
+ name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
+ name
+ end
+ end
+
+ ########################################################################
+ # ATTRIBUTES #
+ ########################################################################
+
+ # A class that defines the set of Attributes of an Element and provides
+ # operations for accessing elements in that set.
+ class Attributes < Hash
+ # Constructor
+ # element:: the Element of which this is an Attribute
+ def initialize element
+ @element = element
+ end
+
+ # Fetches an attribute value. If you want to get the Attribute itself,
+ # use get_attribute()
+ # name:: an XPath attribute name. Namespaces are relevant here.
+ # Returns::
+ # the String value of the matching attribute, or +nil+ if no
+ # matching attribute was found.
+ #
+ # doc = Document.new "<a foo:att='1' bar:att='2' att='3'/>"
+ # doc.root.attributes['att'] #-> '3'
+ # doc.root.attributes['bar:att'] #-> '2'
+ def [](name)
+ attr = get_attribute(name)
+ return attr.value unless attr.nil?
+ return nil
+ end
+
+ def to_a
+ values.flatten
+ end
+
+ # Returns the number of attributes the owning Element contains.
+ # doc = Document "<a x='1' y='2' foo:x='3'/>"
+ # doc.root.attributes.length #-> 3
+ def length
+ c = 0
+ each_attribute { c+=1 }
+ c
+ end
+ alias :size :length
+
+ # Itterates over the attributes of an Element. Yields actual Attribute
+ # nodes, not String values.
+ #
+ # doc = Document.new '<a x="1" y="2"/>'
+ # doc.root.attributes.each_attribute {|attr|
+ # p attr.expanded_name+" => "+attr.value
+ # }
+ def each_attribute # :yields: attribute
+ each_value do |val|
+ if val.kind_of? Attribute
+ yield val
+ else
+ val.each_value { |atr| yield atr }
+ end
+ end
+ end
+
+ # Itterates over each attribute of an Element, yielding the expanded name
+ # and value as a pair of Strings.
+ #
+ # doc = Document.new '<a x="1" y="2"/>'
+ # doc.root.attributes.each {|name, value| p name+" => "+value }
+ def each
+ each_attribute do |attr|
+ yield attr.expanded_name, attr.value
+ end
+ end
+
+ # Fetches an attribute
+ # name::
+ # the name by which to search for the attribute. Can be a
+ # <tt>prefix:name</tt> namespace name.
+ # Returns:: The first matching attribute, or nil if there was none. This
+ # value is an Attribute node, not the String value of the attribute.
+ # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
+ # doc.root.attributes.get_attribute("foo").value #-> "2"
+ # doc.root.attributes.get_attribute("x:foo").value #-> "1"
+ def get_attribute( name )
+ attr = fetch( name, nil )
+ if attr.nil?
+ return nil if name.nil?
+ # Look for prefix
+ name =~ Namespace::NAMESPLIT
+ prefix, n = $1, $2
+ if prefix
+ attr = fetch( n, nil )
+ # check prefix
+ if attr == nil
+ elsif attr.kind_of? Attribute
+ return attr if prefix == attr.prefix
+ else
+ attr = attr[ prefix ]
+ return attr
+ end
+ end
+ element_document = @element.document
+ if element_document and element_document.doctype
+ expn = @element.expanded_name
+ expn = element_document.doctype.name if expn.size == 0
+ attr_val = element_document.doctype.attribute_of(expn, name)
+ return Attribute.new( name, attr_val ) if attr_val
+ end
+ return nil
+ end
+ if attr.kind_of? Hash
+ attr = attr[ @element.prefix ]
+ end
+ return attr
+ end
+
+ # Sets an attribute, overwriting any existing attribute value by the
+ # same name. Namespace is significant.
+ # name:: the name of the attribute
+ # value::
+ # (optional) If supplied, the value of the attribute. If
+ # nil, any existing matching attribute is deleted.
+ # Returns::
+ # Owning element
+ # doc = Document.new "<a x:foo='1' foo='3'/>"
+ # doc.root.attributes['y:foo'] = '2'
+ # doc.root.attributes['foo'] = '4'
+ # doc.root.attributes['x:foo'] = nil
+ def []=( name, value )
+ if value.nil? # Delete the named attribute
+ attr = get_attribute(name)
+ delete attr
+ return
+ end
+ value = Attribute.new(name, value) unless value.kind_of? Attribute
+ value.element = @element
+ old_attr = fetch(value.name, nil)
+ if old_attr.nil?
+ store(value.name, value)
+ elsif old_attr.kind_of? Hash
+ old_attr[value.prefix] = value
+ elsif old_attr.prefix != value.prefix
+ # Check for conflicting namespaces
+ raise ParseException.new(
+ "Namespace conflict in adding attribute \"#{value.name}\": "+
+ "Prefix \"#{old_attr.prefix}\" = "+
+ "\"#{ element.namespace(old_attr.prefix)}\" and prefix "+
+ "\"#{value.prefix}\" = \"#{ element.namespace(value.prefix)}\"") if
+ value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
+ @element.namespace( old_attr.prefix ) ==
+ @element.namespace( value.prefix )
+ store value.name, { old_attr.prefix => old_attr,
+ value.prefix => value }
+ else
+ store value.name, value
+ end
+ return @element
+ end
+
+ # Returns an array of Strings containing all of the prefixes declared
+ # by this set of # attributes. The array does not include the default
+ # namespace declaration, if one exists.
+ # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
+ # "z='glorp' p:k='gru'/>")
+ # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
+ def prefixes
+ ns = []
+ each_attribute do |attribute|
+ ns << attribute.name if attribute.prefix == 'xmlns'
+ end
+ if @element.document and @element.document.doctype
+ expn = @element.expanded_name
+ expn = @element.document.doctype.name if expn.size == 0
+ @element.document.doctype.attributes_of(expn).each {
+ |attribute|
+ ns << attribute.name if attribute.prefix == 'xmlns'
+ }
+ end
+ ns
+ end
+
+ def namespaces
+ namespaces = []
+ each_attribute do |attribute|
+ namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+ end
+ if @element.document and @element.document.doctype
+ expn = @element.expanded_name
+ expn = @element.document.doctype.name if expn.size == 0
+ @element.document.doctype.attributes_of(expn).each {
+ |attribute|
+ namespaces << attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+ }
+ end
+ namespaces
+ end
+
+ # Removes an attribute
+ # attribute::
+ # either a String, which is the name of the attribute to remove --
+ # namespaces are significant here -- or the attribute to remove.
+ # Returns:: the owning element
+ # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
+ # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
+ # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
+ # attr = doc.root.attributes.get_attribute('y:foo')
+ # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
+ def delete( attribute )
+ name = nil
+ prefix = nil
+ if attribute.kind_of? Attribute
+ name = attribute.name
+ prefix = attribute.prefix
+ else
+ attribute =~ Namespace::NAMESPLIT
+ prefix, name = $1, $2
+ prefix = '' unless prefix
+ end
+ old = fetch(name, nil)
+ attr = nil
+ if old.kind_of? Hash # the supplied attribute is one of many
+ attr = old.delete(prefix)
+ if old.size == 1
+ repl = nil
+ old.each_value{|v| repl = v}
+ store name, repl
+ end
+ elsif old.nil?
+ return @element
+ else # the supplied attribute is a top-level one
+ attr = old
+ res = super(name)
+ end
+ @element
+ end
+
+ # Adds an attribute, overriding any existing attribute by the
+ # same name. Namespaces are significant.
+ # attribute:: An Attribute
+ def add( attribute )
+ self[attribute.name] = attribute
+ end
+
+ alias :<< :add
+
+ # Deletes all attributes matching a name. Namespaces are significant.
+ # name::
+ # A String; all attributes that match this path will be removed
+ # Returns:: an Array of the Attributes that were removed
+ def delete_all( name )
+ rv = []
+ each_attribute { |attribute|
+ rv << attribute if attribute.expanded_name == name
+ }
+ rv.each{ |attr| attr.remove }
+ return rv
+ end
+ end
+end
Added: trunk/lib/rexml/encoding.rb
===================================================================
--- trunk/lib/rexml/encoding.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encoding.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,58 @@
+# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
+module REXML
+ module Encoding
+ @encoding_methods = {}
+ def self.register(enc, &block)
+ @encoding_methods[enc] = block
+ end
+ def self.apply(obj, enc)
+ @encoding_methods[enc][obj]
+ end
+ def self.encoding_method(enc)
+ @encoding_methods[enc]
+ end
+
+ # Native, default format is UTF-8, so it is declared here rather than in
+ # an encodings/ definition.
+ UTF_8 = 'UTF-8'
+ UTF_16 = 'UTF-16'
+ UNILE = 'UNILE'
+
+ # ID ---> Encoding name
+ attr_reader :encoding
+ def encoding=( enc )
+ old_verbosity = $VERBOSE
+ begin
+ $VERBOSE = false
+ return if defined? @encoding and enc == @encoding
+ if enc
+ raise ArgumentError, "Bad encoding name #{enc}" unless /\A[\w-]+\z/n =~ enc
+ @encoding = enc.upcase.untaint
+ else
+ @encoding = UTF_8
+ end
+ err = nil
+ [@encoding, "ICONV"].each do |enc|
+ begin
+ require File.join("rexml", "encodings", "#{enc}.rb")
+ return Encoding.apply(self, enc)
+ rescue LoadError, Exception => err
+ end
+ end
+ puts err.message
+ raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
+ ensure
+ $VERBOSE = old_verbosity
+ end
+ end
+
+ def check_encoding str
+ # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+ return UTF_16 if str[0] == 254 && str[1] == 255
+ return UNILE if str[0] == 255 && str[1] == 254
+ str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um
+ return $1.upcase if $1
+ return UTF_8
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/CP-1252.rb
===================================================================
--- trunk/lib/rexml/encodings/CP-1252.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/CP-1252.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,98 @@
+#
+# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
+#
+module REXML
+ module Encoding
+ @@__REXML_encoding_methods = %q~
+ # Convert from UTF-8
+ def encode content
+ array_utf8 = content.unpack('U*')
+ array_enc = []
+ array_utf8.each do |num|
+ case num
+ # shortcut first bunch basic characters
+ when 0..0xFF: array_enc << num
+ # characters added compared to iso-8859-1
+ when 0x20AC: array_enc << 0x80 # 0xe2 0x82 0xac
+ when 0x201A: array_enc << 0x82 # 0xe2 0x82 0x9a
+ when 0x0192: array_enc << 0x83 # 0xc6 0x92
+ when 0x201E: array_enc << 0x84 # 0xe2 0x82 0x9e
+ when 0x2026: array_enc << 0x85 # 0xe2 0x80 0xa6
+ when 0x2020: array_enc << 0x86 # 0xe2 0x80 0xa0
+ when 0x2021: array_enc << 0x87 # 0xe2 0x80 0xa1
+ when 0x02C6: array_enc << 0x88 # 0xcb 0x86
+ when 0x2030: array_enc << 0x89 # 0xe2 0x80 0xb0
+ when 0x0160: array_enc << 0x8A # 0xc5 0xa0
+ when 0x2039: array_enc << 0x8B # 0xe2 0x80 0xb9
+ when 0x0152: array_enc << 0x8C # 0xc5 0x92
+ when 0x017D: array_enc << 0x8E # 0xc5 0xbd
+ when 0x2018: array_enc << 0x91 # 0xe2 0x80 0x98
+ when 0x2019: array_enc << 0x92 # 0xe2 0x80 0x99
+ when 0x201C: array_enc << 0x93 # 0xe2 0x80 0x9c
+ when 0x201D: array_enc << 0x94 # 0xe2 0x80 0x9d
+ when 0x2022: array_enc << 0x95 # 0xe2 0x80 0xa2
+ when 0x2013: array_enc << 0x96 # 0xe2 0x80 0x93
+ when 0x2014: array_enc << 0x97 # 0xe2 0x80 0x94
+ when 0x02DC: array_enc << 0x98 # 0xcb 0x9c
+ when 0x2122: array_enc << 0x99 # 0xe2 0x84 0xa2
+ when 0x0161: array_enc << 0x9A # 0xc5 0xa1
+ when 0x203A: array_enc << 0x9B # 0xe2 0x80 0xba
+ when 0x0152: array_enc << 0x9C # 0xc5 0x93
+ when 0x017E: array_enc << 0x9E # 0xc5 0xbe
+ when 0x0178: array_enc << 0x9F # 0xc5 0xb8
+ else
+ # all remaining basic characters can be used directly
+ if num <= 0xFF
+ array_enc << num
+ else
+ # Numeric entity (&#nnnn;); shard by Stefan Scholl
+ array_enc.concat "&\##{num};".unpack('C*')
+ end
+ end
+ end
+ array_enc.pack('C*')
+ end
+
+ # Convert to UTF-8
+ def decode(str)
+ array_latin9 = str.unpack('C*')
+ array_enc = []
+ array_latin9.each do |num|
+ case num
+ # characters that added compared to iso-8859-1
+ when 0x80: array_enc << 0x20AC # 0xe2 0x82 0xac
+ when 0x82: array_enc << 0x201A # 0xe2 0x82 0x9a
+ when 0x83: array_enc << 0x0192 # 0xc6 0x92
+ when 0x84: array_enc << 0x201E # 0xe2 0x82 0x9e
+ when 0x85: array_enc << 0x2026 # 0xe2 0x80 0xa6
+ when 0x86: array_enc << 0x2020 # 0xe2 0x80 0xa0
+ when 0x87: array_enc << 0x2021 # 0xe2 0x80 0xa1
+ when 0x88: array_enc << 0x02C6 # 0xcb 0x86
+ when 0x89: array_enc << 0x2030 # 0xe2 0x80 0xb0
+ when 0x8A: array_enc << 0x0160 # 0xc5 0xa0
+ when 0x8B: array_enc << 0x2039 # 0xe2 0x80 0xb9
+ when 0x8C: array_enc << 0x0152 # 0xc5 0x92
+ when 0x8E: array_enc << 0x017D # 0xc5 0xbd
+ when 0x91: array_enc << 0x2018 # 0xe2 0x80 0x98
+ when 0x92: array_enc << 0x2019 # 0xe2 0x80 0x99
+ when 0x93: array_enc << 0x201C # 0xe2 0x80 0x9c
+ when 0x94: array_enc << 0x201D # 0xe2 0x80 0x9d
+ when 0x95: array_enc << 0x2022 # 0xe2 0x80 0xa2
+ when 0x96: array_enc << 0x2013 # 0xe2 0x80 0x93
+ when 0x97: array_enc << 0x2014 # 0xe2 0x80 0x94
+ when 0x98: array_enc << 0x02DC # 0xcb 0x9c
+ when 0x99: array_enc << 0x2122 # 0xe2 0x84 0xa2
+ when 0x9A: array_enc << 0x0161 # 0xc5 0xa1
+ when 0x9B: array_enc << 0x203A # 0xe2 0x80 0xba
+ when 0x9C: array_enc << 0x0152 # 0xc5 0x93
+ when 0x9E: array_enc << 0x017E # 0xc5 0xbe
+ when 0x9F: array_enc << 0x0178 # 0xc5 0xb8
+ else
+ array_enc << num
+ end
+ end
+ array_enc.pack('U*')
+ end
+ ~
+ end
+end
Added: trunk/lib/rexml/encodings/EUC-JP.rb
===================================================================
--- trunk/lib/rexml/encodings/EUC-JP.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/EUC-JP.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,35 @@
+module REXML
+ module Encoding
+ begin
+ require 'uconv'
+
+ def decode_eucjp(str)
+ Uconv::euctou8(str)
+ end
+
+ def encode_eucjp content
+ Uconv::u8toeuc(content)
+ end
+ rescue LoadError
+ require 'nkf'
+
+ EUCTOU8 = '-Ewm0'
+ U8TOEUC = '-Wem0'
+
+ def decode_eucjp(str)
+ NKF.nkf(EUCTOU8, str)
+ end
+
+ def encode_eucjp content
+ NKF.nkf(U8TOEUC, content)
+ end
+ end
+
+ register("EUC-JP") do |obj|
+ class << obj
+ alias decode decode_eucjp
+ alias encode encode_eucjp
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/ICONV.rb
===================================================================
--- trunk/lib/rexml/encodings/ICONV.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/ICONV.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,22 @@
+require "iconv"
+raise LoadError unless defined? Iconv
+
+module REXML
+ module Encoding
+ def decode_iconv(str)
+ Iconv.conv(UTF_8, @encoding, str)
+ end
+
+ def encode_iconv(content)
+ Iconv.conv(@encoding, UTF_8, content)
+ end
+
+ register("ICONV") do |obj|
+ Iconv.conv(UTF_8, obj.encoding, nil)
+ class << obj
+ alias decode decode_iconv
+ alias encode encode_iconv
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/ISO-8859-1.rb
===================================================================
--- trunk/lib/rexml/encodings/ISO-8859-1.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/ISO-8859-1.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,7 @@
+require 'rexml/encodings/US-ASCII'
+
+module REXML
+ module Encoding
+ register("ISO-8859-1", &encoding_method("US-ASCII"))
+ end
+end
Added: trunk/lib/rexml/encodings/ISO-8859-15.rb
===================================================================
--- trunk/lib/rexml/encodings/ISO-8859-15.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/ISO-8859-15.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,69 @@
+#
+# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
+#
+module REXML
+ module Encoding
+ @@__REXML_encoding_methods = %q~
+ # Convert from UTF-8
+ def to_iso_8859_15 content
+ array_utf8 = content.unpack('U*')
+ array_enc = []
+ array_utf8.each do |num|
+ case num
+ # shortcut first bunch basic characters
+ when 0..0xA3: array_enc << num
+ # characters removed compared to iso-8859-1
+ when 0xA4: array_enc << '¤'
+ when 0xA6: array_enc << '¦'
+ when 0xA8: array_enc << '¨'
+ when 0xB4: array_enc << '´'
+ when 0xB8: array_enc << '¸'
+ when 0xBC: array_enc << '¼'
+ when 0xBD: array_enc << '½'
+ when 0xBE: array_enc << '¾'
+ # characters added compared to iso-8859-1
+ when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac
+ when 0x0160: array_enc << 0xA6 # 0xc5 0xa0
+ when 0x0161: array_enc << 0xA8 # 0xc5 0xa1
+ when 0x017D: array_enc << 0xB4 # 0xc5 0xbd
+ when 0x017E: array_enc << 0xB8 # 0xc5 0xbe
+ when 0x0152: array_enc << 0xBC # 0xc5 0x92
+ when 0x0153: array_enc << 0xBD # 0xc5 0x93
+ when 0x0178: array_enc << 0xBE # 0xc5 0xb8
+ else
+ # all remaining basic characters can be used directly
+ if num <= 0xFF
+ array_enc << num
+ else
+ # Numeric entity (&#nnnn;); shard by Stefan Scholl
+ array_enc.concat "&\##{num};".unpack('C*')
+ end
+ end
+ end
+ array_enc.pack('C*')
+ end
+
+ # Convert to UTF-8
+ def from_iso_8859_15(str)
+ array_latin9 = str.unpack('C*')
+ array_enc = []
+ array_latin9.each do |num|
+ case num
+ # characters that differ compared to iso-8859-1
+ when 0xA4: array_enc << 0x20AC
+ when 0xA6: array_enc << 0x0160
+ when 0xA8: array_enc << 0x0161
+ when 0xB4: array_enc << 0x017D
+ when 0xB8: array_enc << 0x017E
+ when 0xBC: array_enc << 0x0152
+ when 0xBD: array_enc << 0x0153
+ when 0xBE: array_enc << 0x0178
+ else
+ array_enc << num
+ end
+ end
+ array_enc.pack('U*')
+ end
+ ~
+ end
+end
Added: trunk/lib/rexml/encodings/SHIFT-JIS.rb
===================================================================
--- trunk/lib/rexml/encodings/SHIFT-JIS.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/SHIFT-JIS.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,37 @@
+module REXML
+ module Encoding
+ begin
+ require 'uconv'
+
+ def decode_sjis content
+ Uconv::sjistou8(content)
+ end
+
+ def encode_sjis(str)
+ Uconv::u8tosjis(str)
+ end
+ rescue LoadError
+ require 'nkf'
+
+ SJISTOU8 = '-Swm0'
+ U8TOSJIS = '-Wsm0'
+
+ def decode_sjis(str)
+ NKF.nkf(SJISTOU8, str)
+ end
+
+ def encode_sjis content
+ NKF.nkf(U8TOSJIS, content)
+ end
+ end
+
+ b = proc do |obj|
+ class << obj
+ alias decode decode_sjis
+ alias encode encode_sjis
+ end
+ end
+ register("SHIFT-JIS", &b)
+ register("SHIFT_JIS", &b)
+ end
+end
Added: trunk/lib/rexml/encodings/SHIFT_JIS.rb
===================================================================
--- trunk/lib/rexml/encodings/SHIFT_JIS.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/SHIFT_JIS.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1 @@
+require 'rexml/encodings/SHIFT-JIS'
Added: trunk/lib/rexml/encodings/UNILE.rb
===================================================================
--- trunk/lib/rexml/encodings/UNILE.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/UNILE.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,34 @@
+module REXML
+ module Encoding
+ def encode_unile content
+ array_utf8 = content.unpack("U*")
+ array_enc = []
+ array_utf8.each do |num|
+ if ((num>>16) > 0)
+ array_enc << ??
+ array_enc << 0
+ else
+ array_enc << (num & 0xFF)
+ array_enc << (num >> 8)
+ end
+ end
+ array_enc.pack('C*')
+ end
+
+ def decode_unile(str)
+ array_enc=str.unpack('C*')
+ array_utf8 = []
+ 2.step(array_enc.size-1, 2){|i|
+ array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
+ }
+ array_utf8.pack('U*')
+ end
+
+ register(UNILE) do |obj|
+ class << obj
+ alias decode decode_unile
+ alias encode encode_unile
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/US-ASCII.rb
===================================================================
--- trunk/lib/rexml/encodings/US-ASCII.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/US-ASCII.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,30 @@
+module REXML
+ module Encoding
+ # Convert from UTF-8
+ def encode_ascii content
+ array_utf8 = content.unpack('U*')
+ array_enc = []
+ array_utf8.each do |num|
+ if num <= 0x7F
+ array_enc << num
+ else
+ # Numeric entity (&#nnnn;); shard by Stefan Scholl
+ array_enc.concat "&\##{num};".unpack('C*')
+ end
+ end
+ array_enc.pack('C*')
+ end
+
+ # Convert to UTF-8
+ def decode_ascii(str)
+ str.unpack('C*').pack('U*')
+ end
+
+ register("US-ASCII") do |obj|
+ class << obj
+ alias decode decode_ascii
+ alias encode encode_ascii
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/UTF-16.rb
===================================================================
--- trunk/lib/rexml/encodings/UTF-16.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/UTF-16.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,34 @@
+module REXML
+ module Encoding
+ def encode_utf16 content
+ array_utf8 = content.unpack("U*")
+ array_enc = []
+ array_utf8.each do |num|
+ if ((num>>16) > 0)
+ array_enc << 0
+ array_enc << ??
+ else
+ array_enc << (num >> 8)
+ array_enc << (num & 0xFF)
+ end
+ end
+ array_enc.pack('C*')
+ end
+
+ def decode_utf16(str)
+ array_enc=str.unpack('C*')
+ array_utf8 = []
+ 2.step(array_enc.size-1, 2){|i|
+ array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
+ }
+ array_utf8.pack('U*')
+ end
+
+ register(UTF_16) do |obj|
+ class << obj
+ alias decode decode_utf16
+ alias encode encode_utf16
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/encodings/UTF-8.rb
===================================================================
--- trunk/lib/rexml/encodings/UTF-8.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/encodings/UTF-8.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,18 @@
+module REXML
+ module Encoding
+ def encode_utf8 content
+ content
+ end
+
+ def decode_utf8(str)
+ str
+ end
+
+ register(UTF_8) do |obj|
+ class << obj
+ alias decode decode_utf8
+ alias encode encode_utf8
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/entity.rb
===================================================================
--- trunk/lib/rexml/entity.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/entity.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,159 @@
+require 'rexml/child'
+require 'rexml/source'
+require 'rexml/xmltokens'
+
+module REXML
+ # God, I hate DTDs. I really do. Why this idiot standard still
+ # plagues us is beyond me.
+ class Entity < Child
+ include XMLTokens
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+ attr_reader :name, :external, :ref, :ndata, :pubid
+
+ # Create a new entity. Simple entities can be constructed by passing a
+ # name, value to the constructor; this creates a generic, plain entity
+ # reference. For anything more complicated, you have to pass a Source to
+ # the constructor with the entity definiton, or use the accessor methods.
+ # +WARNING+: There is no validation of entity state except when the entity
+ # is read from a stream. If you start poking around with the accessors,
+ # you can easily create a non-conformant Entity. The best thing to do is
+ # dump the stupid DTDs and use XMLSchema instead.
+ #
+ # e = Entity.new( 'amp', '&' )
+ def initialize stream, value=nil, parent=nil, reference=false
+ super(parent)
+ @ndata = @pubid = @value = @external = nil
+ if stream.kind_of? Array
+ @name = stream[1]
+ if stream[-1] == '%'
+ @reference = true
+ stream.pop
+ else
+ @reference = false
+ end
+ if stream[2] =~ /SYSTEM|PUBLIC/
+ @external = stream[2]
+ if @external == 'SYSTEM'
+ @ref = stream[3]
+ @ndata = stream[4] if stream.size == 5
+ else
+ @pubid = stream[3]
+ @ref = stream[4]
+ end
+ else
+ @value = stream[2]
+ end
+ else
+ @reference = reference
+ @external = nil
+ @name = stream
+ @value = value
+ end
+ end
+
+ # Evaluates whether the given string matchs an entity definition,
+ # returning true if so, and false otherwise.
+ def Entity::matches? string
+ (ENTITYDECL =~ string) == 0
+ end
+
+ # Evaluates to the unnormalized value of this entity; that is, replacing
+ # all entities -- both %ent; and &ent; entities. This differs from
+ # +value()+ in that +value+ only replaces %ent; entities.
+ def unnormalized
+ v = value()
+ return nil if v.nil?
+ @unnormalized = Text::unnormalize(v, parent)
+ @unnormalized
+ end
+
+ #once :unnormalized
+
+ # Returns the value of this entity unprocessed -- raw. This is the
+ # normalized value; that is, with all %ent; and &ent; entities intact
+ def normalized
+ @value
+ end
+
+ # Write out a fully formed, correct entity definition (assuming the Entity
+ # object itself is valid.)
+ def write out, indent=-1
+ out << '<!ENTITY '
+ out << '% ' if @reference
+ out << @name
+ out << ' '
+ if @external
+ out << @external << ' '
+ if @pubid
+ q = @pubid.include?('"')?"'":'"'
+ out << q << @pubid << q << ' '
+ end
+ q = @ref.include?('"')?"'":'"'
+ out << q << @ref << q
+ out << ' NDATA ' << @ndata if @ndata
+ else
+ q = @value.include?('"')?"'":'"'
+ out << q << @value << q
+ end
+ out << '>'
+ end
+
+ # Returns this entity as a string. See write().
+ def to_s
+ rv = ''
+ write rv
+ rv
+ end
+
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
+ # Returns the value of this entity. At the moment, only internal entities
+ # are processed. If the value contains internal references (IE,
+ # %blah;), those are replaced with their values. IE, if the doctype
+ # contains:
+ # <!ENTITY % foo "bar">
+ # <!ENTITY yada "nanoo %foo; nanoo>
+ # then:
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
+ def value
+ if @value
+ matches = @value.scan(PEREFERENCE_RE)
+ rv = @value.clone
+ if @parent
+ matches.each do |entity_reference|
+ entity_value = @parent.entity( entity_reference[0] )
+ rv.gsub!( /%#{entity_reference};/um, entity_value )
+ end
+ end
+ return rv
+ end
+ nil
+ end
+ end
+
+ # This is a set of entity constants -- the ones defined in the XML
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
+ module EntityConst
+ # +>+
+ GT = Entity.new( 'gt', '>' )
+ # +<+
+ LT = Entity.new( 'lt', '<' )
+ # +&+
+ AMP = Entity.new( 'amp', '&' )
+ # +"+
+ QUOT = Entity.new( 'quot', '"' )
+ # +'+
+ APOS = Entity.new( 'apos', "'" )
+ end
+end
Added: trunk/lib/rexml/functions.rb
===================================================================
--- trunk/lib/rexml/functions.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/functions.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,369 @@
+module REXML
+ # If you add a method, keep in mind two things:
+ # (1) the first argument will always be a list of nodes from which to
+ # filter. In the case of context methods (such as position), the function
+ # should return an array with a value for each child in the array.
+ # (2) all method calls from XML will have "-" replaced with "_".
+ # Therefore, in XML, "local-name()" is identical (and actually becomes)
+ # "local_name()"
+ module Functions
+ @@context = nil
+ @@namespace_context = {}
+ @@variables = {}
+
+ def Functions::namespace_context=(x) ; @@namespace_context=x ; end
+ def Functions::variables=(x) ; @@variables=x ; end
+ def Functions::namespace_context ; @@namespace_context ; end
+ def Functions::variables ; @@variables ; end
+
+ def Functions::context=(value); @@context = value; end
+
+ def Functions::text( )
+ if context[:node].node_type == :element
+ return context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
+ elsif context[:node].node_type == :text
+ return context[:node].value
+ else
+ return false
+ end
+ end
+
+ def Functions::last( )
+ @@context[:size]
+ end
+
+ def Functions::position( )
+ @@context[:index]
+ end
+
+ def Functions::count( node_set )
+ node_set.size
+ end
+
+ # Since REXML is non-validating, this method is not implemented as it
+ # requires a DTD
+ def Functions::id( object )
+ end
+
+ # UNTESTED
+ def Functions::local_name( node_set=nil )
+ get_namespace( node_set ) do |node|
+ return node.local_name
+ end
+ end
+
+ def Functions::namespace_uri( node_set=nil )
+ get_namespace( node_set ) {|node| node.namespace}
+ end
+
+ def Functions::name( node_set=nil )
+ get_namespace( node_set ) do |node|
+ node.expanded_name
+ end
+ end
+
+ # Helper method.
+ def Functions::get_namespace( node_set = nil )
+ if node_set == nil
+ yield @@context[:node] if defined? context[:node].namespace
+ else
+ if node_set.namespace
+ yield node_set
+ else
+ return unless node_set.kind_of? Enumerable
+ node_set.each { |node| yield node if defined? node.namespace }
+ end
+ end
+ end
+
+ # A node-set is converted to a string by returning the string-value of the
+ # node in the node-set that is first in document order. If the node-set is
+ # empty, an empty string is returned.
+ #
+ # A number is converted to a string as follows
+ #
+ # NaN is converted to the string NaN
+ #
+ # positive zero is converted to the string 0
+ #
+ # negative zero is converted to the string 0
+ #
+ # positive infinity is converted to the string Infinity
+ #
+ # negative infinity is converted to the string -Infinity
+ #
+ # if the number is an integer, the number is represented in decimal form
+ # as a Number with no decimal point and no leading zeros, preceded by a
+ # minus sign (-) if the number is negative
+ #
+ # otherwise, the number is represented in decimal form as a Number
+ # including a decimal point with at least one digit before the decimal
+ # point and at least one digit after the decimal point, preceded by a
+ # minus sign (-) if the number is negative; there must be no leading zeros
+ # before the decimal point apart possibly from the one required digit
+ # immediately before the decimal point; beyond the one required digit
+ # after the decimal point there must be as many, but only as many, more
+ # digits as are needed to uniquely distinguish the number from all other
+ # IEEE 754 numeric values.
+ #
+ # The boolean false value is converted to the string false. The boolean
+ # true value is converted to the string true.
+ #
+ # An object of a type other than the four basic types is converted to a
+ # string in a way that is dependent on that type.
+ def Functions::string( object=nil )
+ #object = @context unless object
+ if object.instance_of? Array
+ string( object[0] )
+ elsif defined? object.node_type
+ if object.node_type == :attribute
+ object.value
+ elsif object.node_type == :element
+ object.text
+ else
+ object.to_s
+ end
+ else
+ object.to_s
+ end
+ end
+
+ # UNTESTED
+ def Functions::concat( *objects )
+ objects.join
+ end
+
+ # Fixed by Mike Stok
+ def Functions::starts_with( string, test )
+ string(string).index(string(test)) == 0
+ end
+
+ # Fixed by Mike Stok
+ def Functions::contains( string, test )
+ string(string).include? string(test)
+ end
+
+ # Kouhei fixed this
+ def Functions::substring_before( string, test )
+ ruby_string = string(string)
+ ruby_index = ruby_string.index(string(test))
+ if ruby_index.nil?
+ ""
+ else
+ ruby_string[ 0...ruby_index ]
+ end
+ end
+
+ # Kouhei fixed this too
+ def Functions::substring_after( string, test )
+ ruby_string = string(string)
+ ruby_index = ruby_string.index(string(test))
+ if ruby_index.nil?
+ ""
+ else
+ ruby_string[ ruby_index+1..-1 ]
+ end
+ end
+
+ # Take equal portions of Mike Stok and Sean Russell; mix
+ # vigorously, and pour into a tall, chilled glass. Serves 10,000.
+ def Functions::substring( string, start, length=nil )
+ ruby_string = string(string)
+ ruby_length = if length.nil?
+ ruby_string.length.to_f
+ else
+ number(length)
+ end
+ ruby_start = number(start)
+
+ # Handle the special cases
+ return '' if (
+ ruby_length.nan? or
+ ruby_start.nan? or
+ ruby_start.infinite?
+ )
+
+ infinite_length = ruby_length.infinite? == 1
+ ruby_length = ruby_string.length if infinite_length
+
+ # Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
+ # are 0..length. Therefore, we have to offset the bounds by one.
+ ruby_start = ruby_start.round - 1
+ ruby_length = ruby_length.round
+
+ if ruby_start < 0
+ ruby_length += ruby_start unless infinite_length
+ ruby_start = 0
+ end
+ return '' if ruby_length <= 0
+ ruby_string[ruby_start,ruby_length]
+ end
+
+ # UNTESTED
+ def Functions::string_length( string )
+ string(string).length
+ end
+
+ # UNTESTED
+ def Functions::normalize_space( string=nil )
+ string = string(@@context[:node]) if string.nil?
+ if string.kind_of? Array
+ string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
+ else
+ string.to_s.strip.gsub(/\s+/um, ' ')
+ end
+ end
+
+ # This is entirely Mike Stok's beast
+ def Functions::translate( string, tr1, tr2 )
+ from = string(tr1)
+ to = string(tr2)
+
+ # the map is our translation table.
+ #
+ # if a character occurs more than once in the
+ # from string then we ignore the second &
+ # subsequent mappings
+ #
+ # if a charactcer maps to nil then we delete it
+ # in the output. This happens if the from
+ # string is longer than the to string
+ #
+ # there's nothing about - or ^ being special in
+ # http://www.w3.org/TR/xpath#function-translate
+ # so we don't build ranges or negated classes
+
+ map = Hash.new
+ 0.upto(from.length - 1) { |pos|
+ from_char = from[pos]
+ unless map.has_key? from_char
+ map[from_char] =
+ if pos < to.length
+ to[pos]
+ else
+ nil
+ end
+ end
+ }
+
+ string(string).unpack('U*').collect { |c|
+ if map.has_key? c then map[c] else c end
+ }.compact.pack('U*')
+ end
+
+ # UNTESTED
+ def Functions::boolean( object=nil )
+ if object.kind_of? String
+ if object =~ /\d+/u
+ return object.to_f != 0
+ else
+ return object.size > 0
+ end
+ elsif object.kind_of? Array
+ object = object.find{|x| x and true}
+ end
+ return object ? true : false
+ end
+
+ # UNTESTED
+ def Functions::not( object )
+ not boolean( object )
+ end
+
+ # UNTESTED
+ def Functions::true( )
+ true
+ end
+
+ # UNTESTED
+ def Functions::false( )
+ false
+ end
+
+ # UNTESTED
+ def Functions::lang( language )
+ lang = false
+ node = @@context[:node]
+ attr = nil
+ until node.nil?
+ if node.node_type == :element
+ attr = node.attributes["xml:lang"]
+ unless attr.nil?
+ lang = compare_language(string(language), attr)
+ break
+ else
+ end
+ end
+ node = node.parent
+ end
+ lang
+ end
+
+ def Functions::compare_language lang1, lang2
+ lang2.downcase.index(lang1.downcase) == 0
+ end
+
+ # a string that consists of optional whitespace followed by an optional
+ # minus sign followed by a Number followed by whitespace is converted to
+ # the IEEE 754 number that is nearest (according to the IEEE 754
+ # round-to-nearest rule) to the mathematical value represented by the
+ # string; any other string is converted to NaN
+ #
+ # boolean true is converted to 1; boolean false is converted to 0
+ #
+ # a node-set is first converted to a string as if by a call to the string
+ # function and then converted in the same way as a string argument
+ #
+ # an object of a type other than the four basic types is converted to a
+ # number in a way that is dependent on that type
+ def Functions::number( object=nil )
+ object = @@context[:node] unless object
+ case object
+ when true
+ Float(1)
+ when false
+ Float(0)
+ when Array
+ number(string( object ))
+ when Numeric
+ object.to_f
+ else
+ str = string( object )
+ #puts "STRING OF #{object.inspect} = #{str}"
+ if str =~ /^\d+/
+ object.to_s.to_f
+ else
+ (0.0 / 0.0)
+ end
+ end
+ end
+
+ def Functions::sum( nodes )
+ end
+
+ def Functions::floor( number )
+ number(number).floor
+ end
+
+ def Functions::ceiling( number )
+ number(number).ceil
+ end
+
+ def Functions::round( number )
+ begin
+ number(number).round
+ rescue FloatDomainError
+ number(number)
+ end
+ end
+
+ def Functions::processing_instruction( node )
+ node.node_type == :processing_instruction
+ end
+
+ def Functions::method_missing( id )
+ puts "METHOD MISSING #{id.id2name}"
+ XPath.match( @@context[:node], id.id2name )
+ end
+ end
+end
Added: trunk/lib/rexml/instruction.rb
===================================================================
--- trunk/lib/rexml/instruction.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/instruction.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,66 @@
+require "rexml/child"
+require "rexml/source"
+
+module REXML
+ # Represents an XML Instruction; IE, <? ... ?>
+ # TODO: Add parent arg (3rd arg) to constructor
+ class Instruction < Child
+ START = '<\?'
+ STOP = '\?>'
+
+ # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
+ # content is everything else.
+ attr_accessor :target, :content
+
+ # Constructs a new Instruction
+ # @param target can be one of a number of things. If String, then
+ # the target of this instruction is set to this. If an Instruction,
+ # then the Instruction is shallowly cloned (target and content are
+ # copied). If a Source, then the source is scanned and parsed for
+ # an Instruction declaration.
+ # @param content Must be either a String, or a Parent. Can only
+ # be a Parent if the target argument is a Source. Otherwise, this
+ # String is set as the content of this instruction.
+ def initialize(target, content=nil)
+ if target.kind_of? String
+ super()
+ @target = target
+ @content = content
+ elsif target.kind_of? Instruction
+ super(content)
+ @target = target.target
+ @content = target.content
+ end
+ @content.strip! if @content
+ end
+
+ def clone
+ Instruction.new self
+ end
+
+ def write writer, indent_level=-1, transitive=false, ie_hack=false
+ indent(writer, indent_level)
+ writer << START.sub(/\\/u, '')
+ writer << @target
+ writer << ' '
+ writer << @content
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ # @return true if other is an Instruction, and the content and target
+ # of the other matches the target and content of this object.
+ def ==( other )
+ other.kind_of? Instruction and
+ other.target == @target and
+ other.content == @content
+ end
+
+ def node_type
+ :processing_instruction
+ end
+
+ def inspect
+ "<?p-i #{target} ...?>"
+ end
+ end
+end
Added: trunk/lib/rexml/light/node.rb
===================================================================
--- trunk/lib/rexml/light/node.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/light/node.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,196 @@
+require 'rexml/xmltokens'
+require 'rexml/light/node'
+
+# [ :element, parent, name, attributes, children* ]
+ # a = Node.new
+ # a << "B" # => <a>B</a>
+ # a.b # => <a>B<b/></a>
+ # a.b[1] # => <a>B<b/><b/><a>
+ # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
+ # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
+ # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
+module REXML
+ module Light
+ # Represents a tagged XML element. Elements are characterized by
+ # having children, attributes, and names, and can themselves be
+ # children.
+ class Node
+ NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+ PARENTS = [ :element, :document, :doctype ]
+ # Create a new element.
+ def initialize node=nil
+ @node = node
+ if node.kind_of? String
+ node = [ :text, node ]
+ elsif node.nil?
+ node = [ :document, nil, nil ]
+ elsif node[0] == :start_element
+ node[0] = :element
+ elsif node[0] == :start_doctype
+ node[0] = :doctype
+ elsif node[0] == :start_document
+ node[0] = :document
+ end
+ end
+
+ def size
+ if PARENTS.include? @node[0]
+ @node[-1].size
+ else
+ 0
+ end
+ end
+
+ def each( &block )
+ size.times { |x| yield( at(x+4) ) }
+ end
+
+ def name
+ at(2)
+ end
+
+ def name=( name_str, ns=nil )
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ _old_put(2, "#{pfx}#{name_str}")
+ end
+
+ def parent=( node )
+ _old_put(1,node)
+ end
+
+ def local_name
+ namesplit
+ @name
+ end
+
+ def local_name=( name_str )
+ _old_put( 1, "#@prefix:#{name_str}" )
+ end
+
+ def prefix( namespace=nil )
+ prefix_of( self, namespace )
+ end
+
+ def namespace( prefix=prefix() )
+ namespace_of( self, prefix )
+ end
+
+ def namespace=( namespace )
+ @prefix = prefix( namespace )
+ pfx = ''
+ pfx = "#@prefix:" if @prefix.size > 0
+ _old_put(1, "#{pfx}#@name")
+ end
+
+ def []( reference, ns=nil )
+ if reference.kind_of? String
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ at(3)["#{pfx}#{reference}"]
+ elsif reference.kind_of? Range
+ _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
+ else
+ _old_get( 4+reference )
+ end
+ end
+
+ def =~( path )
+ XPath.match( self, path )
+ end
+
+ # Doesn't handle namespaces yet
+ def []=( reference, ns, value=nil )
+ if reference.kind_of? String
+ value = ns unless value
+ at( 3 )[reference] = value
+ elsif reference.kind_of? Range
+ _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
+ else
+ if value
+ _old_put( 4+reference, ns, value )
+ else
+ _old_put( 4+reference, ns )
+ end
+ end
+ end
+
+ # Append a child to this element, optionally under a provided namespace.
+ # The namespace argument is ignored if the element argument is an Element
+ # object. Otherwise, the element argument is a string, the namespace (if
+ # provided) is the namespace the element is created in.
+ def << element
+ if node_type() == :text
+ at(-1) << element
+ else
+ newnode = Node.new( element )
+ newnode.parent = self
+ self.push( newnode )
+ end
+ at(-1)
+ end
+
+ def node_type
+ _old_get(0)
+ end
+
+ def text=( foo )
+ replace = at(4).kind_of?(String)? 1 : 0
+ self._old_put(4,replace, normalizefoo)
+ end
+
+ def root
+ context = self
+ context = context.at(1) while context.at(1)
+ end
+
+ def has_name?( name, namespace = '' )
+ at(3) == name and namespace() == namespace
+ end
+
+ def children
+ self
+ end
+
+ def parent
+ at(1)
+ end
+
+ def to_s
+
+ end
+
+ private
+
+ def namesplit
+ return if @name.defined?
+ at(2) =~ NAMESPLIT
+ @prefix = '' || $1
+ @name = $2
+ end
+
+ def namespace_of( node, prefix=nil )
+ if not prefix
+ name = at(2)
+ name =~ NAMESPLIT
+ prefix = $1
+ end
+ to_find = 'xmlns'
+ to_find = "xmlns:#{prefix}" if not prefix.nil?
+ ns = at(3)[ to_find ]
+ ns ? ns : namespace_of( @node[0], prefix )
+ end
+
+ def prefix_of( node, namespace=nil )
+ if not namespace
+ name = node.name
+ name =~ NAMESPLIT
+ $1
+ else
+ ns = at(3).find { |k,v| v == namespace }
+ ns ? ns : prefix_of( node.parent, namespace )
+ end
+ end
+ end
+ end
+end
Added: trunk/lib/rexml/namespace.rb
===================================================================
--- trunk/lib/rexml/namespace.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/namespace.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,47 @@
+require 'rexml/xmltokens'
+
+module REXML
+ # Adds named attributes to an object.
+ module Namespace
+ # The name of the object, valid if set
+ attr_reader :name, :expanded_name
+ # The expanded name of the object, valid if name is set
+ attr_accessor :prefix
+ include XMLTokens
+ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
+
+ # Sets the name and the expanded name
+ def name=( name )
+ @expanded_name = name
+ name =~ NAMESPLIT
+ if $1
+ @prefix = $1
+ else
+ @prefix = ""
+ @namespace = ""
+ end
+ @name = $2
+ end
+
+ # Compares names optionally WITH namespaces
+ def has_name?( other, ns=nil )
+ if ns
+ return (namespace() == ns and name() == other)
+ elsif other.include? ":"
+ return fully_expanded_name == other
+ else
+ return name == other
+ end
+ end
+
+ alias :local_name :name
+
+ # Fully expand the name, even if the prefix wasn't specified in the
+ # source file.
+ def fully_expanded_name
+ ns = prefix
+ return "#{ns}:#@name" if ns.size > 0
+ return @name
+ end
+ end
+end
Added: trunk/lib/rexml/node.rb
===================================================================
--- trunk/lib/rexml/node.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/node.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,66 @@
+require "rexml/parseexception"
+
+module REXML
+ # Represents a node in the tree. Nodes are never encountered except as
+ # superclasses of other objects. Nodes have siblings.
+ module Node
+ # @return the next sibling (nil if unset)
+ def next_sibling_node
+ return nil if @parent.nil?
+ @parent[ @parent.index(self) + 1 ]
+ end
+
+ # @return the previous sibling (nil if unset)
+ def previous_sibling_node
+ return nil if @parent.nil?
+ ind = @parent.index(self)
+ return nil if ind == 0
+ @parent[ ind - 1 ]
+ end
+
+ def to_s indent=-1
+ rv = ""
+ write rv,indent
+ rv
+ end
+
+ def indent to, ind
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
+ indentstyle = @parent.context[:indentstyle]
+ else
+ indentstyle = ' '
+ end
+ to << indentstyle*ind unless ind<1
+ end
+
+ def parent?
+ false;
+ end
+
+
+ # Visit all subnodes of +self+ recursively
+ def each_recursive(&block) # :yields: node
+ self.elements.each {|node|
+ block.call(node)
+ node.each_recursive(&block)
+ }
+ end
+
+ # Find (and return) first subnode (recursively) for which the block
+ # evaluates to true. Returns +nil+ if none was found.
+ def find_first_recursive(&block) # :yields: node
+ each_recursive {|node|
+ return node if block.call(node)
+ }
+ return nil
+ end
+
+ # Returns the index that +self+ has in its parent's elements array, so that
+ # the following equation holds true:
+ #
+ # node == node.parent.elements[node.index_in_parent]
+ def index_in_parent
+ parent.index(self)+1
+ end
+ end
+end
Added: trunk/lib/rexml/output.rb
===================================================================
--- trunk/lib/rexml/output.rb 2006-02-13 21:32:53 UTC (rev 411)
+++ trunk/lib/rexml/output.rb 2006-02-13 23:05:37 UTC (rev 412)
@@ -0,0 +1,24 @@
+require 'rexml/encoding'
+
+module REXML
+ class Output
+ include Encoding
+
+ attr_reader :encoding
+
+ def initialize real_IO, encd="iso-8859-1"
+ @output = real_IO
+ self.encoding = encd
+
+ @to_utf = encd == UTF_8 ? false : true
+