neingeist
/
neinomaten
Archived
1
0
Fork 0

ALL YOUR LIBRARIES ARE BELONG TO US

master
neingeist 18 years ago
parent 85295f82e8
commit 6d05f8ef2d

@ -0,0 +1,3 @@
index.html
README.html
doc

@ -0,0 +1,21 @@
RUBY=ruby
RD2HTML=rd2 -r rd/rd2html-lib.rb
all: README.html doc/index.html
README.html: README.rd
$(RD2HTML) --html-title='htree - HTML/XML tree library' -o README README.rd
check test:
$(RUBY) -I. test-all.rb
install:
$(RUBY) install.rb
.PHONY: check test all install
RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb)
doc/index.html: $(RB)
rm -rf doc
rdoc $(RB)

@ -0,0 +1,48 @@
= htree - HTML/XML tree library
htree provides a tree data structure which represent HTML and XML data.
== Features
* Permissive unified HTML/XML parser
* byte-to-byte round-tripping unparser
* XML namespace support
* Dedicated class for escaped string. This ease sanitization.
* HTML/XHTML/XML generator
* template engine
* recursive template expansion
* converter to REXML document
== Home Page
((<URL:http://cvs.m17n.org/~akr/htree/>))
== Download
* ((<URL:http://cvs.m17n.org/viewcvs/ruby/htree.tar.gz>))
== Install
% ruby install.rb
== Reference Manual
((<URL:doc/index.html>))
== Usage Example
Following two-line script convert HTML to XHTML.
require 'htree'
HTree(STDIN).display_xml
The conversion method to REXML is provided as to_rexml.
HTree(...).to_rexml
== License
Ruby's
== Author
Tanaka Akira <akr@m17n.org>

@ -0,0 +1,97 @@
#
# = htree.rb
#
# HTML/XML document tree
#
# Author:: Tanaka Akira <akr@m17n.org>
#
# == Features
#
# - Permissive unified HTML/XML parser
# - byte-to-byte round-tripping unparser
# - XML namespace support
# - Dedicated class for escaped string. This ease sanitization.
# - XHTML/XML generator
# - template engine: link:files/htree/template_rb.html
# - recursive template expansion
# - REXML tree generator: link:files/htree/rexml_rb.html
#
# == Example
#
# The following one-liner prints parsed tree object.
#
# % ruby -rhtree -e 'pp HTree(ARGF)' html-file
#
# The following two-line script convert HTML to XHTML.
#
# require 'htree'
# HTree(STDIN).display_xml
#
# The conversion method to REXML is provided as to_rexml.
#
# HTree(...).to_rexml
#
# == Module/Class Hierarchy
#
# * HTree
# * HTree::Name
# * HTree::Context
# * HTree::Location
# * HTree::Node
# * HTree::Doc
# * HTree::Elem
# * HTree::Text
# * HTree::XMLDecl
# * HTree::DocType
# * HTree::ProcIns
# * HTree::Comment
# * HTree::BogusETag
# * HTree::Error
#
# == Method Summary
#
# HTree provides following methods.
#
# - Parsing Methods
# - HTree(<i>html_string</i>) -> HTree::Doc
# - HTree.parse(<i>input</i>) -> HTree::Doc
#
# - Generation Methods
# - HTree::Node#display_xml -> STDOUT
# - HTree::Node#display_xml(<i>out</i>) -> <i>out</i>
# - HTree::Node#display_xml(<i>out</i>, <i>encoding</i>) -> <i>out</i>
# - HTree::Text#to_s -> String
#
# - Template Methods
# - HTree.expand_template{<i>template_string</i>} -> STDOUT
# - HTree.expand_template(<i>out</i>){<i>template_string</i>} -> <i>out</i>
# - HTree.expand_template(<i>out</i>, <i>encoding</i>){<i>template_string</i>} -> <i>out</i>
# - HTree.compile_template(<i>template_string</i>) -> Module
# - HTree{<i>template_string</i>} -> HTree::Doc
#
# - Traverse Methods
# - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text]
# - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location]
#
# - Predicate Methods
# - HTree::Traverse#doc? -> true or false
# - HTree::Traverse#elem? -> true or false
# - HTree::Traverse#text? -> true or false
# - HTree::Traverse#xmldecl? -> true or false
# - HTree::Traverse#doctype? -> true or false
# - HTree::Traverse#procins? -> true or false
# - HTree::Traverse#comment? -> true or false
# - HTree::Traverse#bogusetag? -> true or false
#
# - REXML Tree Generator
# - HTree::Node#to_rexml -> REXML::Child
require 'htree/parse'
require 'htree/extract_text'
require 'htree/equality'
require 'htree/inspect'
require 'htree/display'
require 'htree/loc'
require 'htree/traverse'
require 'htree/template'
require 'htree/rexml'

@ -0,0 +1,8 @@
require 'htree/modules'
module HTree::Container
# +children+ returns children nodes as an array.
def children
@children.dup
end
end

@ -0,0 +1,69 @@
module HTree
class Context
# :stopdoc:
DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'}
DefaultNamespaces.default = ""
DefaultNamespaces.freeze
# :startdoc:
# The optional argument `namespaces' should be a hash or nil.
# HTree::DefaultNamespaces is used if nil is specified.
#
# If it is a hash, its key should be nil or a string.
# nil means default namespace.
# The string means some prefix which must not be empty.
#
# The hash value should be a string.
# The empty string "" means unbound namespace.
def initialize(namespaces=nil)
namespaces ||= DefaultNamespaces
namespaces.each_pair {|k, v|
check_namespace_prefix(k)
check_namespace_uri(v)
}
namespaces = namespaces.dup.freeze unless namespaces.frozen?
@namespaces = namespaces
end
attr_reader :namespaces
# return a namespace URI corresponding to _prefix_.
# It returns nil if _prefix_ is not defined.
def namespace_uri(prefix)
@namespaces[prefix]
end
# generate a new Context object which namespaces are substituted by
# a hash _declared_namespaces_.
def subst_namespaces(declared_namespaces)
namespaces = @namespaces.dup
declared_namespaces.each {|k, v|
check_namespace_prefix(k)
check_namespace_uri(v)
namespaces[k] = v
}
if namespaces == @namespaces
self
else
Context.new(namespaces)
end
end
private
def check_namespace_prefix(k)
unless (String === k && !k.empty?) || k == nil
raise ArgumentError, "invalid namespace prefix: #{k.inspect}"
end
end
def check_namespace_uri(v)
unless String === v
raise ArgumentError, "invalid namespace URI: #{v.inspect}"
end
end
end
# :stopdoc:
DefaultContext = Context.new
HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml")
# :startdoc:
end

@ -0,0 +1,46 @@
require 'htree/output'
module HTree
module Node
# HTree::Node#display_xml prints the node as XML.
#
# The first optional argument, <i>out</i>,
# specifies output target.
# It should respond to <tt><<</tt>.
# If it is not specified, $stdout is used.
#
# The second optional argument, <i>encoding</i>,
# specifies output MIME charset (character encoding).
# If it is not specified, HTree::Encoder.internal_charset is used.
#
# HTree::Node#display_xml returns <i>out</i>.
def display_xml(out=$stdout, encoding=HTree::Encoder.internal_charset)
encoder = HTree::Encoder.new(encoding)
self.output(encoder, HTree::DefaultContext)
# don't call finish_with_xmldecl because self already has a xml decl.
out << encoder.finish
out
end
# HTree::Node#display_html prints the node as HTML.
#
# The first optional argument, <i>out</i>,
# specifies output target.
# It should respond to <tt><<</tt>.
# If it is not specified, $stdout is used.
#
# The second optional argument, <i>encoding</i>,
# specifies output MIME charset (character encoding).
# If it is not specified, HTree::Encoder.internal_charset is used.
#
# HTree::Node#display_html returns <i>out</i>.
def display_html(out=$stdout, encoding=HTree::Encoder.internal_charset)
encoder = HTree::Encoder.new(encoding)
encoder.html_output = true
self.output(encoder, HTree::HTMLContext)
out << encoder.finish
out
end
end
end

@ -0,0 +1,149 @@
require 'htree/modules'
require 'htree/container'
module HTree
class Doc
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
# The arguments should be a sequence of follows.
# [String object] specified string is converted to HTree::Text.
# [HTree::Node object] used as a child.
# [HTree::Doc object]
# used as children.
# It is expanded except HTree::XMLDecl and HTree::DocType objects.
# [Array of String, HTree::Node and HTree::Doc] used as children.
#
def Doc.new(*args)
children = []
args.each {|arg|
arg = arg.to_node if HTree::Location === arg
case arg
when Array
arg.each {|a|
a = a.to_node if HTree::Location === a
case a
when HTree::Doc
children.concat(a.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << a
when String
children << Text.new(a)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
when HTree::Doc
children.concat(arg.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << arg
when String
children << Text.new(arg)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
new!(children)
end
def initialize(children=[]) # :notnew:
@children = children.dup.freeze
unless @children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = @children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ')
raise TypeError, "Unacceptable document child: #{unacceptable}"
end
end
def get_subnode_internal(index) # :nodoc:
unless Integer === index
raise TypeError, "invalid index: #{index.inspect}"
end
if index < 0 || @children.length <= index
nil
else
@children[index]
end
end
# doc.subst_subnode(pairs) -> doc
#
# The argument _pairs_ should be a hash or an assocs.
# Its key should be an integer which means an index for children.
#
# Its value should be one of follows.
# [HTree::Node object] specified object is used as is.
# [String object] specified string is converted to HTree::Text
# [Array of above] specified HTree::Node and String is used in that order.
# [nil] delete corresponding node.
#
# d = HTree('<a/><b/><c/>')
# p d.subst_subnode({0=>HTree('<x/>'), 2=>HTree('<z/>')})
# p d.subst_subnode([[0,HTree('<x/>')], [2,HTree('<z/>')]])
# # =>
# #<HTree::Doc {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}>
# #<HTree::Doc {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}>
#
def subst_subnode(pairs)
hash = {}
pairs.each {|index, value|
unless Integer === index
raise TypeError, "invalid index: #{index.inspect}"
end
value = value.to_node if HTree::Location === value
case value
when Node
value = [value]
when String
value = [value]
when Array
value = value.dup
when nil
value = []
else
raise TypeError, "invalid value: #{value.inspect}"
end
value.map! {|v|
v = v.to_node if HTree::Location === v
case v
when Node
v
when String
Text.new(v)
else
raise TypeError, "invalid value: #{v.inspect}"
end
}
if !hash.include?(index)
hash[index] = []
end
hash[index].concat value
}
children_left = []
children = @children.dup
children_right = []
hash.keys.sort.each {|index|
value = hash[index]
if index < 0
children_left << value
elsif children.length <= index
children_right << value
else
children[index] = value
end
}
children = [children_left, children, children_right].flatten.compact
Doc.new(children)
end
end
end

@ -0,0 +1,262 @@
require 'htree/modules'
require 'htree/tag'
require 'htree/context'
require 'htree/container'
module HTree
class Elem
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
# The first argument _name_ should be an instance of String or HTree::Name.
#
# The rest of arguments should be a sequence of follows.
# [Hash object] used as attributes.
# [String object] specified string is converted to HTree::Text.
# [HTree::Node object] used as a child.
# [HTree::Doc object]
# used as children.
# It is expanded except HTree::XMLDecl and HTree::DocType objects.
# [Array of String, HTree::Node, HTree::Doc] used as children.
# [HTree::Context object]
# used as as context which represents XML namespaces.
# This should apper once at most.
#
# HTree::Location object is accepted just as HTree::Node.
#
# If the rest arguments consists only
# Hash and HTree::Context, empty element is created.
#
# p HTree::Elem.new("e").empty_element? # => true
# p HTree::Elem.new("e", []).empty_element? # => false
def Elem.new(name, *args)
attrs = []
children = []
context = nil
args.each {|arg|
arg = arg.to_node if HTree::Location === arg
case arg
when Context
raise ArgumentError, "multiple context" if context
context = arg
when Hash
arg.each {|k, v| attrs << [k, v] }
when Array
arg.each {|a|
a = a.to_node if HTree::Location === a
case a
when HTree::Doc
children.concat(a.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << a
when String
children << Text.new(a)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
when HTree::Doc
children.concat(arg.children.reject {|c|
HTree::XMLDecl === c || HTree::DocType === c
})
when HTree::Node
children << arg
when String
children << Text.new(arg)
else
raise TypeError, "unexpected argument: #{arg.inspect}"
end
}
context ||= DefaultContext
if children.empty? && args.all? {|arg| Hash === arg || Context === arg }
children = nil
end
new!(STag.new(name, attrs, context), children)
end
def initialize(stag, children=nil, etag=nil) # :notnew:
unless stag.class == STag
raise TypeError, "HTree::STag expected: #{stag.inspect}"
end
unless !children || children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ')
raise TypeError, "Unacceptable element child: #{unacceptable}"
end
unless !etag || etag.class == ETag
raise TypeError, "HTree::ETag expected: #{etag.inspect}"
end
@stag = stag
@children = (children ? children.dup : []).freeze
@empty = children == nil && etag == nil
@etag = etag
end
def context; @stag.context end
# +element_name+ returns the name of the element name as a Name object.
def element_name() @stag.element_name end
def empty_element?
@empty
end
def each_attribute(&block) # :yields: attr_name, attr_text
@stag.each_attribute(&block)
end
def get_subnode_internal(index) # :nodoc:
case index
when String
name = Name.parse_attribute_name(index, DefaultContext)
update_attribute_hash[name.universal_name]
when Name
update_attribute_hash[index.universal_name]
when Integer
if index < 0 || @children.length <= index
nil
else
@children[index]
end
else
raise TypeError, "invalid index: #{index.inspect}"
end
end
# call-seq:
# elem.subst_subnode(pairs) -> elem
#
# The argument _pairs_ should be a hash or an assocs.
#
# The key of pairs should be one of following.
# [HTree::Name or String object] attribute name.
# [Integer object] child index.
#
# The value of pairs should be one of follows.
# [HTree::Node object] specified object is used as is.
# [String object] specified string is converted to HTree::Text
# [Array of above] specified HTree::Node and String is used in that order.
# [nil] delete corresponding node.
#
# e = HTree('<r><a/><b/><c/></r>').root
# p e.subst_subnode({0=>HTree('<x/>'), 2=>HTree('<z/>')})
# p e.subst_subnode([[0, HTree('<x/>')], [2,HTree('<z/>')]])
# # =>
# {elem <r> {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}}
# {elem <r> {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}}
#
def subst_subnode(pairs)
hash = {}
pairs.each {|index, value|
case index
when Name, Integer
when String
index = Name.parse_attribute_name(index, DefaultContext)
else
raise TypeError, "invalid index: #{index.inspect}"
end
value = value.to_node if HTree::Location === value
case value
when Node
value = [value]
when String
value = [value]
when Array
value = value.dup
when nil
value = []
else
raise TypeError, "invalid value: #{value.inspect}"
end
value.map! {|v|
v = v.to_node if HTree::Location === v
case v
when Node
v
when String
Text.new(v)
else
raise TypeError, "invalid value: #{v.inspect}"
end
}
if !hash.include?(index)
hash[index] = []
end
hash[index].concat value
}
attrs = []
@stag.attributes.each {|k, v|
if hash.include? k
v = hash[k]
if !v.empty?
attrs << {k=>Text.concat(*v)}
end
hash.delete k
else
attrs << {k=>v}
end
}
hash.keys.each {|k|
if Name === k
v = hash[k]
if !v.empty?
attrs << {k=>Text.concat(*v)}
end
hash.delete k
end
}
children_left = []
children = @children.dup
children_right = []
hash.keys.sort.each {|index|
value = hash[index]
if index < 0
children_left << value
elsif children.length <= index
children_right << value
else
children[index] = value
end
}
children = [children_left, children, children_right].flatten
if children.empty? && @empty
Elem.new(
@stag.element_name,
@stag.context,
*attrs)
else
Elem.new(
@stag.element_name,
@stag.context,
children,
*attrs)
end
end
end
module Elem::Trav
private
def update_attribute_hash
if defined?(@attribute_hash)
@attribute_hash
else
h = {}
each_attribute {|name, text|
h[name.universal_name] = text
}
@attribute_hash = h
end
end
end
end

@ -0,0 +1,212 @@
require 'iconv'
module HTree
class Encoder
# HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
#
# - 'ISO-8859-1' when $KCODE=='NONE'
# - 'UTF-8' when $KCODE=='UTF8'
# - 'EUC-JP' when $KCODE=='EUC'
# - 'Shift_JIS' when $KCODE=='SJIS'
#
# This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
# This should be fixed when Ruby is m17nized.
def Encoder.internal_charset
KcodeCharset[$KCODE]
end
def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
@buf = ''
@internal_encoding = internal_encoding
@output_encoding = output_encoding
@ic = Iconv.new(output_encoding, @internal_encoding)
@charpat = FirstCharPattern[internal_encoding]
@subcharset_list = SubCharset[output_encoding] || []
@subcharset_ic = {}
@subcharset_list.each {|subcharset|
@subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
}
@html_output = false
end
# :stopdoc:
def html_output?
@html_output
end
def html_output=(flag)
@html_output = flag
end
def output_cdata_content_do(out, pre, body, post)
if @html_output
pre.call
body.call
post.call(out)
else
body.call
end
return out
end
def output_slash_if_xml
if !@html_output
output_string('/')
end
end
def output_cdata_content(content, context)
if @html_output
# xxx: should raise an error for non-text node?
texts = content.grep(HTree::Text)
text = HTree::Text.concat(*texts)
text.output_cdata(self)
else
content.each {|n| n.output(self, context) }
end
end
def output_cdata_for_html(*args)
str = args.join('')
if %r{</} =~ str
raise ArgumentError, "cdata contains '</' : #{str.inspect}"
end
output_string str
end
def output_string(internal_str, external_str=@ic.iconv(internal_str))
@buf << external_str
@subcharset_ic.reject! {|subcharset, ic|
begin
ic.iconv(internal_str) != external_str
rescue Iconv::Failure
true
end
}
nil
end
def output_text(string)
begin
output_string string, @ic.iconv(string)
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
output_string string[0, string.length - e.failed.length], e.success
unless @charpat =~ e.failed
# xxx: should be configulable?
#raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
string = e.failed[1, e.failed.length-1]
output_string '?'
retry
end
char = $&
rest = $'
begin
ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
char = "&##{ucode};"
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
# xxx: should be configulable?
char = '?'
end
output_string char
string = rest
retry
end
end
ChRef = {
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
'"' => '&quot;',
}
def output_dynamic_text(string)
if string.respond_to? :rcdata
output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
else
output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
end
end
def output_dynamic_attvalue(string)
if string.respond_to? :rcdata
output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
else
output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
end
end
# :startdoc:
def finish
external_str = @ic.close
@buf << external_str
@subcharset_ic.reject! {|subcharset, ic|
begin
ic.close != external_str
rescue Iconv::Failure
true
end
}
@buf
end
def finish_with_xmldecl
content = finish
xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
"<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
xmldecl + content
end
def minimal_charset
@subcharset_list.each {|subcharset|
if @subcharset_ic.include? subcharset
return subcharset
end
}
@output_encoding
end
# :stopdoc:
KcodeCharset = {
'EUC' => 'EUC-JP',
'SJIS' => 'Shift_JIS',
'UTF8' => 'UTF-8',
'NONE' => 'ISO-8859-1',
}
FirstCharPattern = {
'EUC-JP' => /\A(?:
[\x00-\x7f]
|[\xa1-\xfe][\xa1-\xfe]
|\x8e[\xa1-\xfe]
|\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
'Shift_JIS' => /\A(?:
[\x00-\x7f]
|[\x81-\x9f][\x40-\x7e\x80-\xfc]
|[\xa1-\xdf]
|[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
'UTF-8' => /\A(?:
[\x00-\x7f]
|[\xc0-\xdf][\x80-\xbf]
|[\xe0-\xef][\x80-\xbf][\x80-\xbf]
|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
'ISO-8859-1' => /\A[\x00-\xff]/n
}
SubCharset = {
'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
'UTF-16BE' => [],
'UTF-16LE' => [],
'UTF-16' => [],
}
SubCharset.default = ['US-ASCII']
# :startdoc:
end
end

@ -0,0 +1,219 @@
require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/tag'
require 'htree/raw_string'
require 'htree/context'
module HTree
# compare tree structures.
def ==(other)
check_equality(self, other, :usual_equal_object)
end
alias eql? ==
# hash value for the tree structure.
def hash
return @hash_code if defined? @hash_code
@hash_code = usual_equal_object.hash
end
# :stopdoc:
def usual_equal_object
return @usual_equal_object if defined? @usual_equal_object
@usual_equal_object = make_usual_equal_object
end
def make_usual_equal_object
raise NotImplementedError
end
def exact_equal_object
return @exact_equal_object if defined? @exact_equal_object
@exact_equal_object = make_exact_equal_object
end
def make_exact_equal_object
raise NotImplementedError
end
def exact_equal?(other)
check_equality(self, other, :exact_equal_object)
end
def check_equality(obj1, obj2, equal_object_method)
return false unless obj1.class == obj2.class
if obj1.class == Array
return false unless obj1.length == obj2.length
obj1.each_with_index {|c1, i|
return false unless c1.class == obj2[i].class
}
obj1.each_with_index {|c1, i|
return false unless check_equality(c1, obj2[i], equal_object_method)
}
true
elsif obj1.respond_to? equal_object_method
o1 = obj1.send(equal_object_method)
o2 = obj2.send(equal_object_method)
check_equality(o1, o2, equal_object_method)
else
obj1 == obj2
end
end
class Doc
alias exact_equal_object children
alias usual_equal_object children
end
class Elem
def make_exact_equal_object
[@stag, @children, @empty, @etag]
end
def make_usual_equal_object
[@stag, @children]
end
end
class Name
def make_exact_equal_object
[@namespace_prefix, @namespace_uri, @local_name]
end
def make_usual_equal_object
xmlns? ? @local_name : [@namespace_uri, @local_name]
end
end
module Util
module_function
def cmp_with_nil(a, b)
if a == nil
if b == nil
0
else
-1
end
else
if b == nil
1
else
a <=> b
end
end
end
end
class Context
def make_exact_equal_object
@namespaces.keys.sort {|prefix1, prefix2|
Util.cmp_with_nil(prefix1, prefix2)
}.map {|prefix| [prefix, @namespaces[prefix]] }
end
# make_usual_equal_object is not used through STag#make_usual_equal_object
# NotImplementedError is suitable?
alias make_usual_equal_object make_exact_equal_object
end
class STag
def make_exact_equal_object
[@raw_string,
@name,
@attributes.sort {|(n1,t1), (n2, t2)|
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
Util.cmp_with_nil(n1.local_name, n2.local_name)
},
@inherited_context
]
end
def make_usual_equal_object
[@name,
@attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
Util.cmp_with_nil(n1.local_name, n2.local_name)
}
]
end
end
class ETag
def make_exact_equal_object
[@raw_string, @qualified_name]
end
alias usual_equal_object qualified_name
end
class Text
def make_exact_equal_object
[@raw_string, @rcdata]
end
def make_usual_equal_object
@normalized_rcdata
end
end
class XMLDecl
def make_exact_equal_object
[@raw_string, @version, @encoding, @standalone]
end
def make_usual_equal_object
[@version, @encoding, @standalone]
end
end
class DocType
def make_exact_equal_object
[@raw_string, @root_element_name, @system_identifier, @public_identifier]
end
def make_usual_equal_object
[@root_element_name, @system_identifier, @public_identifier]
end
end
class ProcIns
def make_exact_equal_object
[@raw_string, @target, @content]
end
def make_usual_equal_object
[@target, @content]
end
end
class Comment
def make_exact_equal_object
[@raw_string, @content]
end
alias usual_equal_object content
end
class BogusETag
def make_exact_equal_object
[@etag]
end
alias usual_equal_object make_exact_equal_object
end
class Location
def make_exact_equal_object
[@parent, @index, @node]
end
alias usual_equal_object make_exact_equal_object
end
# :startdoc:
end

@ -0,0 +1,37 @@
require 'htree/text'
require 'htree/doc'
require 'htree/elem'
module HTree
module Node
def extract_text
raise NotImplementedError
end
end
class Location
def extract_text
to_node.extract_text
end
end
# :stopdoc:
module Container
def extract_text
Text.concat(*@children.map {|n| n.extract_text })
end
end
module Leaf
def extract_text
Text.new('')
end
end
class Text
def extract_text
self
end
end
# :startdoc:
end

@ -0,0 +1,32 @@
require 'htree/modules'
module HTree
# :stopdoc:
def HTree.with_frozen_string_hash
if Thread.current[:htree_frozen_string_hash]
yield
else
begin
Thread.current[:htree_frozen_string_hash] = {}
yield
ensure
Thread.current[:htree_frozen_string_hash] = nil
end
end
end
def HTree.frozen_string(str)
if h = Thread.current[:htree_frozen_string_hash]
if s = h[str]
s
else
str = str.dup.freeze unless str.frozen?
h[str] = str
end
else
str = str.dup.freeze unless str.frozen?
str
end
end
# :startdoc:
end

@ -0,0 +1,193 @@
require 'htree/encoder'
require 'htree/output'
# :stopdoc:
module HTree
module Node
def generate_xml_output_code(outvar='out', contextvar='top_context')
namespaces = HTree::Context::DefaultNamespaces.dup
namespaces.default = nil
context = Context.new(namespaces)
gen = HTree::GenCode.new(outvar, contextvar)
output(gen, context)
gen.finish
end
end
class GenCode
def initialize(outvar, contextvar, internal_encoding=Encoder.internal_charset)
@outvar = outvar
@contextvar = contextvar
@state = :none
@buffer = ''
@internal_encoding = internal_encoding
@code = ''
@html_output = nil
end
attr_reader :outvar, :contextvar
def html_output?
@html_output
end
def html_output=(flag)
@html_output = flag
end
class CDATABuffer
def initialize
@buf = ''
end
def html_output?
true
end
def not_valid_for_html_cdata(*args)
raise ArgumentError, "CDATA content only accept texts."
end
alias output_slash_if_xml not_valid_for_html_cdata
alias output_cdata_content not_valid_for_html_cdata
alias output_dynamic_attvalue not_valid_for_html_cdata
def output_string(string)
@buf << string
end
def output_text(string)
@buf << string
end
ChRef = {
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
'"' => '&quot;',
}
def output_dynamic_text(string)
if string.respond_to? :rcdata
@buf << string.rcdata.gsub(/[<>]/) { ChRef[$&] }
else
@buf << string.to_s.gsub(/[&<>]/) { ChRef[$&] }
end
end
def result
if %r{[<>]} =~ @buf
raise ArgumentError, "cdata contains non-text : #{@buf.inspect}"
end
str = HTree::Text.parse_pcdata(@buf).to_s
if %r{</} =~ str
raise ArgumentError, "cdata contains '</' : #{str.inspect}"
end
str
end
end
def output_cdata_content(content, context)
tmp_outvar = @outvar + '_tmp'
output_logic_line "#{@outvar} = #{@outvar}.output_cdata_content_do(#{@outvar},"
output_logic_line "lambda { #{@outvar} = HTree::GenCode::CDATABuffer.new },"
output_logic_line "lambda {"
content.each {|n| n.output(self, context) }
output_logic_line "},"
output_logic_line "lambda {|#{tmp_outvar}| #{tmp_outvar}.output_string(#{@outvar}.result) })"
end
def output_slash_if_xml
output_logic_line "#{@outvar}.output_slash_if_xml"
end
def output_dynamic_text(expr)
flush_buffer
@code << "#{@outvar}.output_dynamic_text((#{expr}))\n"
end
def output_dynamic_tree(expr, context_expr)
flush_buffer
@code << "(#{expr}).output(#{@outvar}, #{context_expr})\n"
end
def output_dynamic_attvalue(expr)
flush_buffer
@code << "#{@outvar}.output_dynamic_attvalue((#{expr}))\n"
end
def output_logic_line(line)
flush_buffer
@code << line << "\n"
end
def output_string(str)
return if str.empty?
if @state != :string
flush_buffer
@state = :string
end
@buffer << str
end
def output_text(str)
return if str.empty?
if /\A[\s\x21-\x7e]+\z/ =~ str && @state == :string
# Assumption: external charset can represent white spaces and
# ASCII printable.
output_string(str)
return
end
if @state != :text
flush_buffer
@state = :text
end
@buffer << str
end
ChRef = {
'&' => '&amp;',
'>' => '&gt;',
'<' => '&lt;',
'"' => '&quot;',
}
def output_xmlns(namespaces)
unless namespaces.empty?
flush_buffer
namespaces.each {|k, v|
if k
ks = k.dump
aname = "xmlns:#{k}"
else
ks = "nil"
aname = "xmlns"
end
@code << "if #{@contextvar}.namespace_uri(#{ks}) != #{v.dump}\n"
output_string " #{aname}=\""
output_text v.gsub(/[&<>"]/) {|s| ChRef[s] }
output_string '"'
flush_buffer
@code << "end\n"
}
end
end
def flush_buffer
return if @buffer.empty?
case @state
when :string
@code << "#{@outvar}.output_string #{@buffer.dump}\n"
@buffer = ''
when :text
@code << "#{@outvar}.output_text #{@buffer.dump}\n"
@buffer = ''
end
end
def finish
flush_buffer
@code
end
end
end
# :startdoc:

@ -0,0 +1,672 @@
module HTree
# The code below is auto-generated. Don't edit manually.
# :stopdoc:
NamedCharacters =
{"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
NamedCharactersPattern = /\A(?-mix:AElig|Aacute|Acirc|Agrave|Alpha|Aring|Atilde|Auml|Beta|Ccedil|Chi|Dagger|Delta|ETH|Eacute|Ecirc|Egrave|Epsilon|Eta|Euml|Gamma|Iacute|Icirc|Igrave|Iota|Iuml|Kappa|Lambda|Mu|Ntilde|Nu|OElig|Oacute|Ocirc|Ograve|Omega|Omicron|Oslash|Otilde|Ouml|Phi|Pi|Prime|Psi|Rho|Scaron|Sigma|THORN|Tau|Theta|Uacute|Ucirc|Ugrave|Upsilon|Uuml|Xi|Yacute|Yuml|Zeta|aacute|acirc|acute|aelig|agrave|alefsym|alpha|amp|and|ang|apos|aring|asymp|atilde|auml|bdquo|beta|brvbar|bull|cap|ccedil|cedil|cent|chi|circ|clubs|cong|copy|crarr|cup|curren|dArr|dagger|darr|deg|delta|diams|divide|eacute|ecirc|egrave|empty|emsp|ensp|epsilon|equiv|eta|eth|euml|euro|exist|fnof|forall|frac12|frac14|frac34|frasl|gamma|ge|gt|hArr|harr|hearts|hellip|iacute|icirc|iexcl|igrave|image|infin|int|iota|iquest|isin|iuml|kappa|lArr|lambda|lang|laquo|larr|lceil|ldquo|le|lfloor|lowast|loz|lrm|lsaquo|lsquo|lt|macr|mdash|micro|middot|minus|mu|nabla|nbsp|ndash|ne|ni|not|notin|nsub|ntilde|nu|oacute|ocirc|oelig|ograve|oline|omega|omicron|oplus|or|ordf|ordm|oslash|otilde|otimes|ouml|para|part|permil|perp|phi|pi|piv|plusmn|pound|prime|prod|prop|psi|quot|rArr|radic|rang|raquo|rarr|rceil|rdquo|real|reg|rfloor|rho|rlm|rsaquo|rsquo|sbquo|scaron|sdot|sect|shy|sigma|sigmaf|sim|spades|sub|sube|sum|sup|sup1|sup2|sup3|supe|szlig|tau|there4|theta|thetasym|thinsp|thorn|tilde|times|trade|uArr|uacute|uarr|ucirc|ugrave|uml|upsih|upsilon|uuml|weierp|xi|yacute|yen|yuml|zeta|zwj|zwnj)\z/
ElementContent =
{"h6"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"object"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"dl"=>["dd", "dt"],
"p"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"acronym"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"code"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"ul"=>["li"],
"tt"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"label"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"form"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"q"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"thead"=>["tr"],
"area"=>:EMPTY,
"td"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"title"=>[],
"dir"=>["li"],
"s"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"ol"=>["li"],
"hr"=>:EMPTY,
"applet"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"table"=>["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"],
"legend"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"cite"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"a"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"html"=>
["a", "abbr", "acronym", "address", "applet", "b", "base", "basefont", "bdo",
"big", "blockquote", "body", "br", "button", "center", "cite", "code",
"dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2",
"h3", "h4", "h5", "h6", "head", "hr", "i", "iframe", "img", "input",
"isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object",
"ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span",
"strike", "strong", "sub", "sup", "table", "textarea", "title", "tt", "u",
"ul", "var"],
"u"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"blockquote"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"center"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"b"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"base"=>:EMPTY,
"th"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"link"=>:EMPTY,
"var"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"samp"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"div"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"textarea"=>[],
"pre"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"head"=>["base", "isindex", "title"],
"span"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"br"=>:EMPTY,
"script"=>:CDATA,
"noframes"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"style"=>:CDATA,
"meta"=>:EMPTY,
"dt"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"option"=>[],
"kbd"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"big"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"tfoot"=>["tr"],
"sup"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"bdo"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"isindex"=>:EMPTY,
"dfn"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"fieldset"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "legend",
"map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"em"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"font"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"tbody"=>["tr"],
"noscript"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"li"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"col"=>:EMPTY,
"small"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"dd"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"i"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"menu"=>["li"],
"strong"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"basefont"=>:EMPTY,
"img"=>:EMPTY,
"optgroup"=>["option"],
"map"=>
["address", "area", "blockquote", "center", "dir", "div", "dl", "fieldset",
"form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu",
"noframes", "noscript", "ol", "p", "pre", "table", "ul"],
"h1"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"address"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "p", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"sub"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"param"=>:EMPTY,
"input"=>:EMPTY,
"h2"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"abbr"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"h3"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"strike"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"body"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"ins"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"button"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"h4"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"select"=>["optgroup", "option"],
"caption"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"colgroup"=>["col"],
"tr"=>["td", "th"],
"del"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"],
"h5"=>
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
"tt", "u", "var"],
"iframe"=>
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
"sup", "table", "textarea", "tt", "u", "ul", "var"]}
ElementInclusions =
{"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]}
ElementExclusions =
{"button"=>
["a", "button", "fieldset", "form", "iframe", "input", "isindex", "label",
"select", "textarea"],
"a"=>["a"],
"dir"=>
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
"noscript", "ol", "p", "pre", "table", "ul"],
"title"=>["link", "meta", "object", "script", "style"],
"pre"=>
["applet", "basefont", "big", "font", "img", "object", "small", "sub",
"sup"],
"form"=>["form"],
"menu"=>
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
"noscript", "ol", "p", "pre", "table", "ul"],
"label"=>["label"]}
OmittedAttrName =
{"h6"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"object"=>
{"bottom"=>"align", "declare"=>"declare", "left"=>"align", "ltr"=>"dir",
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
"dl"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"p"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"acronym"=>{"ltr"=>"dir", "rtl"=>"dir"},
"code"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ul"=>
{"circle"=>"type", "compact"=>"compact", "disc"=>"type", "ltr"=>"dir",
"rtl"=>"dir", "square"=>"type"},
"tt"=>{"ltr"=>"dir", "rtl"=>"dir"},
"label"=>{"ltr"=>"dir", "rtl"=>"dir"},
"form"=>{"get"=>"method", "ltr"=>"dir", "post"=>"method", "rtl"=>"dir"},
"q"=>{"ltr"=>"dir", "rtl"=>"dir"},
"thead"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"area"=>
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "nohref"=>"nohref",
"poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"},
"td"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
"top"=>"valign"},
"title"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dir"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"s"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ol"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"hr"=>
{"center"=>"align", "left"=>"align", "ltr"=>"dir", "noshade"=>"noshade",
"right"=>"align", "rtl"=>"dir"},
"applet"=>
{"bottom"=>"align", "left"=>"align", "middle"=>"align", "right"=>"align",
"top"=>"align"},
"table"=>
{"above"=>"frame", "all"=>"rules", "below"=>"frame", "border"=>"frame",
"box"=>"frame", "center"=>"align", "cols"=>"rules", "groups"=>"rules",
"hsides"=>"frame", "left"=>"align", "lhs"=>"frame", "ltr"=>"dir",
"none"=>"rules", "rhs"=>"frame", "right"=>"align", "rows"=>"rules",
"rtl"=>"dir", "void"=>"frame", "vsides"=>"frame"},
"legend"=>
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
"rtl"=>"dir", "top"=>"align"},
"cite"=>{"ltr"=>"dir", "rtl"=>"dir"},
"a"=>
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "poly"=>"shape",
"rect"=>"shape", "rtl"=>"dir"},
"html"=>{"ltr"=>"dir", "rtl"=>"dir"},
"u"=>{"ltr"=>"dir", "rtl"=>"dir"},
"blockquote"=>{"ltr"=>"dir", "rtl"=>"dir"},
"center"=>{"ltr"=>"dir", "rtl"=>"dir"},
"b"=>{"ltr"=>"dir", "rtl"=>"dir"},
"th"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
"top"=>"valign"},
"link"=>{"ltr"=>"dir", "rtl"=>"dir"},
"var"=>{"ltr"=>"dir", "rtl"=>"dir"},
"samp"=>{"ltr"=>"dir", "rtl"=>"dir"},
"div"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"textarea"=>
{"disabled"=>"disabled", "ltr"=>"dir", "readonly"=>"readonly", "rtl"=>"dir"},
"pre"=>{"ltr"=>"dir", "rtl"=>"dir"},
"head"=>{"ltr"=>"dir", "rtl"=>"dir"},
"span"=>{"ltr"=>"dir", "rtl"=>"dir"},
"br"=>{"all"=>"clear", "left"=>"clear", "none"=>"clear", "right"=>"clear"},
"script"=>{"defer"=>"defer"},
"noframes"=>{"ltr"=>"dir", "rtl"=>"dir"},
"style"=>{"ltr"=>"dir", "rtl"=>"dir"},
"meta"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dt"=>{"ltr"=>"dir", "rtl"=>"dir"},
"option"=>
{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir", "selected"=>"selected"},
"kbd"=>{"ltr"=>"dir", "rtl"=>"dir"},
"big"=>{"ltr"=>"dir", "rtl"=>"dir"},
"tfoot"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"sup"=>{"ltr"=>"dir", "rtl"=>"dir"},
"bdo"=>{"ltr"=>"dir", "rtl"=>"dir"},
"isindex"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dfn"=>{"ltr"=>"dir", "rtl"=>"dir"},
"fieldset"=>{"ltr"=>"dir", "rtl"=>"dir"},
"em"=>{"ltr"=>"dir", "rtl"=>"dir"},
"font"=>{"ltr"=>"dir", "rtl"=>"dir"},
"tbody"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"noscript"=>{"ltr"=>"dir", "rtl"=>"dir"},
"li"=>{"ltr"=>"dir", "rtl"=>"dir"},
"col"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"small"=>{"ltr"=>"dir", "rtl"=>"dir"},
"dd"=>{"ltr"=>"dir", "rtl"=>"dir"},
"i"=>{"ltr"=>"dir", "rtl"=>"dir"},
"menu"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
"strong"=>{"ltr"=>"dir", "rtl"=>"dir"},
"img"=>
{"bottom"=>"align", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir",
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
"optgroup"=>{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir"},
"map"=>{"ltr"=>"dir", "rtl"=>"dir"},
"address"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h1"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"sub"=>{"ltr"=>"dir", "rtl"=>"dir"},
"param"=>{"data"=>"valuetype", "object"=>"valuetype", "ref"=>"valuetype"},
"input"=>
{"bottom"=>"align", "button"=>"type", "checkbox"=>"type",
"checked"=>"checked", "disabled"=>"disabled", "file"=>"type",
"hidden"=>"type", "image"=>"type", "ismap"=>"ismap", "left"=>"align",
"ltr"=>"dir", "middle"=>"align", "password"=>"type", "radio"=>"type",
"readonly"=>"readonly", "reset"=>"type", "right"=>"align", "rtl"=>"dir",
"submit"=>"type", "text"=>"type", "top"=>"align"},
"h2"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"abbr"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h3"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"strike"=>{"ltr"=>"dir", "rtl"=>"dir"},
"body"=>{"ltr"=>"dir", "rtl"=>"dir"},
"ins"=>{"ltr"=>"dir", "rtl"=>"dir"},
"button"=>
{"button"=>"type", "disabled"=>"disabled", "ltr"=>"dir", "reset"=>"type",
"rtl"=>"dir", "submit"=>"type"},
"h4"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"select"=>
{"disabled"=>"disabled", "ltr"=>"dir", "multiple"=>"multiple", "rtl"=>"dir"},
"caption"=>
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
"rtl"=>"dir", "top"=>"align"},
"colgroup"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"tr"=>
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
"del"=>{"ltr"=>"dir", "rtl"=>"dir"},
"h5"=>
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
"right"=>"align", "rtl"=>"dir"},
"iframe"=>
{"0"=>"frameborder", "1"=>"frameborder", "auto"=>"scrolling",
"bottom"=>"align", "left"=>"align", "middle"=>"align", "no"=>"scrolling",
"right"=>"align", "top"=>"align", "yes"=>"scrolling"}}
# :startdoc:
# The code above is auto-generated. Don't edit manually.
end

@ -0,0 +1,108 @@
require 'pp'
require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/tag'
require 'htree/output'
require 'htree/raw_string'
module HTree
# :stopdoc:
class Doc
def pretty_print(q)
q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
end
alias inspect pretty_print_inspect
end
class Elem
def pretty_print(q)
if @empty
q.group(1, '{emptyelem', '}') {
q.breakable; q.pp @stag
}
else
q.group(1, "{elem", "}") {
q.breakable; q.pp @stag
@children.each {|elt| q.breakable; q.pp elt }
if @etag
q.breakable; q.pp @etag
end
}
end
end
alias inspect pretty_print_inspect
end
module Leaf
def pretty_print(q)
q.group(1, '{', '}') {
q.text self.class.name.sub(/.*::/,'').downcase
if rs = @raw_string
rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
q.breakable
q.pp line
}
elsif self.respond_to? :display_xml
q.breakable
q.text self.display_xml('')
end
}
end
alias inspect pretty_print_inspect
end
class Name
def inspect
if xmlns?
@local_name ? "xmlns:#{@local_name}" : "xmlns"
elsif !@namespace_uri || @namespace_uri.empty?
@local_name
elsif @namespace_prefix
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
elsif @namespace_prefix == false
"-{#{@namespace_uri}}#{@local_name}"
else
"{#{@namespace_uri}}#{@local_name}"
end
end
end
class STag
def pretty_print(q)
q.group(1, '<', '>') {
q.text @name.inspect
@attributes.each {|n, t|
q.breakable
q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
}
}
end
alias inspect pretty_print_inspect
end
class ETag
def pretty_print(q)
q.group(1, '</', '>') {
q.text @qualified_name
}
end
alias inspect pretty_print_inspect
end
class BogusETag
def pretty_print(q)
q.group(1, '{', '}') {
q.text self.class.name.sub(/.*::/,'').downcase
if rs = @raw_string
q.breakable
q.text rs
else
q.text "</#{@qualified_name}>"
end
}
end
end
# :startdoc:
end

@ -0,0 +1,92 @@
require 'htree/modules'
require 'htree/raw_string'
module HTree
class XMLDecl
def initialize(version, encoding=nil, standalone=nil)
init_raw_string
if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
end
if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
end
unless standalone == nil || standalone == true || standalone == false
raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
end
@version = version
@encoding = encoding
@standalone = standalone
end
attr_reader :version, :encoding, :standalone
end
class DocType
def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
init_raw_string
if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
end
if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
end
@root_element_name = root_element_name
@public_identifier = public_identifier
@system_identifier = system_identifier
end
attr_reader :root_element_name, :public_identifier, :system_identifier
end
class ProcIns
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
def ProcIns.new(target, content)
content = content.gsub(/\?>/, '? >') if content
new! target, content
end
def initialize(target, content) # :notnew:
init_raw_string
if content && /\?>/ =~ content
raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
end
@target = target
@content = content
end
attr_reader :target, :content
end
class Comment
# :stopdoc:
class << self
alias new! new
end
# :startdoc:
def Comment.new(content)
content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
new! content
end
def initialize(content) # :notnew:
init_raw_string
if /--/ =~ content || /-\z/ =~ content
raise HTree::Error, "invalid comment content: #{content.inspect}"
end
@content = content
end
attr_reader :content
end
class BogusETag
def initialize(qualified_name)
init_raw_string
@etag = ETag.new(qualified_name)
end
end
end

@ -0,0 +1,369 @@
require 'htree/modules'
require 'htree/elem'
require 'htree/inspect'
module HTree
module Node
# creates a location object which points to self.
def make_loc
self.class::Loc.new(nil, nil, self)
end
# return self.
def to_node
self
end
# +subst+ substitutes several subtrees at once.
#
# t = HTree('<r><x/><y/><z/></r>')
# l = t.make_loc
# t2 = t.subst({
# l.get_subnode(0, 'k') => 'v',
# l.get_subnode(0, -1) => HTree('<a/>'),
# l.get_subnode(0, 1) => nil,
# l.get_subnode(0, 2, 0) => HTree('<b/>'),
# })
# pp t2
# # =>
# #<HTree::Doc
# {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
def subst(pairs)
pairs = pairs.map {|key, val|
key = key.index_list(self)
unless Array === val
val = [val]
end
[key, val]
}
pairs_empty_key, pairs_nonempty_key =
pairs.partition {|key, val| key.empty? }
if !pairs_empty_key.empty?
if !pairs_nonempty_key.empty?
raise ArgumentError, "cannot substitute a node under substituting tree."
end
result = []
pairs_empty_key.each {|key, val| result.concat val }
result.compact!
if result.length == 1
return result[0]
else
raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
end
end
if pairs_nonempty_key.empty?
return self
end
subst_internal(pairs)
end
def subst_internal(pairs) # :nodoc:
subnode_pairs = {}
pairs.each {|key, val|
k = key.pop
(subnode_pairs[k] ||= []) << [key, val]
}
subnode_pairs = subnode_pairs.map {|k, subpairs|
s = get_subnode(k)
subpairs_empty_key, subpairs_nonempty_key =
subpairs.partition {|key, val| key.empty? }
if !subpairs_empty_key.empty?
if !subpairs_nonempty_key.empty?
raise ArgumentError, "cannot substitute a node under substituting tree."
end
r = []
subpairs_empty_key.each {|key, val| r.concat val }
[k, r.compact]
elsif subpairs_nonempty_key.empty?
[k, s]
else
[k, s.subst_internal(subpairs)]
end
}
subst_subnode(subnode_pairs)
end
end
# :stopdoc:
class Doc; def node_test_string() 'doc()' end end
class Elem; def node_test_string() @stag.element_name.qualified_name end end
class Text; def node_test_string() 'text()' end end
class BogusETag; def node_test_string() 'bogus-etag()' end end
class XMLDecl; def node_test_string() 'xml-declaration()' end end
class DocType; def node_test_string() 'doctype()' end end
class ProcIns; def node_test_string() 'processing-instruction()' end end
class Comment; def node_test_string() 'comment()' end end
module Container
def find_loc_step(index)
if index < 0 || @children.length <= index
return "*[#{index}]"
end
return @loc_step_children[index].dup if defined? @loc_step_children
count = {}
count.default = 0
steps = []
@children.each {|c|
node_test = c.node_test_string
count[node_test] += 1
steps << [node_test, count[node_test]]
}
@loc_step_children = []
steps.each {|node_test, i|
if count[node_test] == 1
@loc_step_children << node_test
else
@loc_step_children << "#{node_test}[#{i}]"
end
}
return @loc_step_children[index].dup
end
end
class Elem
def find_loc_step(index)
return super if Integer === index
if String === index
index = Name.parse_attribute_name(index, DefaultContext)
end
unless Name === index
raise TypeError, "invalid index: #{index.inspect}"
end
"@#{index.qualified_name}"
end
end
# :startdoc:
end
class HTree::Location
def initialize(parent, index, node) # :nodoc:
if parent
@parent = parent
@index = index
@node = parent.node.get_subnode(index)
if !@node.equal?(node)
raise ArgumentError, "unexpected node"
end
else
@parent = nil
@index = nil
@node = node
end
if @node && self.class != @node.class::Loc
raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
end
@subloc = {}
end
attr_reader :parent, :index, :node
alias to_node node
# return self.
def make_loc
self
end
# +top+ returns the originator location.
#
# t = HTree('<a><b><c><d>')
# l = t.make_loc.get_subnode(0, 0, 0, 0)
# p l, l.top
# # =>
# #<HTree::Location: doc()/a/b/c/d>
# #<HTree::Location: doc()>
def top
result = self
while result.parent
result = result.parent
end
result
end
# +subst_itself+ substitutes the node pointed by the location.
# It returns the location of substituted node.
#
# t1 = HTree('<a><b><c><d>')
# p t1
# l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
# p l1
# l2 = l1.subst_itself(HTree('<z/>'))
# p l2
# t2 = l2.top.to_node
# p t2
# # =>
# #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
# #<HTree::Location: doc()/a/b/c/d>
# #<HTree::Location: doc()/a/b/c/z>
# #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
#
def subst_itself(node)
if @parent
new_index = @index
if !@node
if Integer === @index
if @index < 0
new_index = 0
elsif @parent.to_node.children.length < @index
new_index = @parent.to_node.children.length
end
end
end
@parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
else
node.make_loc
end
end
# +subst+ substitutes several subtrees at once.
#
# t = HTree('<r><x/><y/><z/></r>')
# l = t.make_loc
# l2 = l.subst({
# l.root.get_subnode('k') => 'v',
# l.root.get_subnode(-1) => HTree('<a/>'),
# l.find_element('y') => nil,
# l.find_element('z').get_subnode(0) => HTree('<b/>'),
# })
# pp l2, l2.to_node
# # =>
# #<HTree::Doc::Loc: doc()>
# #<HTree::Doc
# {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
def subst(pairs)
subst_itself(@node.subst(pairs))
end
# +loc_list+ returns an array containing from location's root to itself.
#
# t = HTree('<a><b><c>')
# l = t.make_loc.get_subnode(0, 0, 0)
# pp l, l.loc_list
# # =>
# #<HTree::Location: doc()/a/b/c>
# [#<HTree::Location: doc()>,
# #<HTree::Location: doc()/a>,
# #<HTree::Location: doc()/a/b>,
# #<HTree::Location: doc()/a/b/c>]
#
def loc_list
loc = self
result = [self]
while loc = loc.parent
result << loc
end
result.reverse!
result
end
# +path+ returns the path of the location.
#
# l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
# l = l.get_subnode(0, 0, 0)
# p l.path # => "doc()/a/b[1]/text()"
def path
result = ''
loc_list.each {|loc|
if parent = loc.parent
result << '/' << parent.node.find_loc_step(loc.index)
else
result << loc.node.node_test_string
end
}
result
end
def index_list(node) # :nodoc:
result = []
loc = self
while parent = loc.parent
return result if loc.to_node.equal? node
result << loc.index
loc = parent
end
return result if loc.to_node.equal? node
raise ArgumentError, "the location is not under the node: #{self.path}"
end
# :stopdoc:
def pretty_print(q)
q.group(1, "#<#{self.class.name}", '>') {
q.text ':'
q.breakable
loc_list.each {|loc|
if parent = loc.parent
q.text '/'
q.group { q.breakable '' }
q.text parent.node.find_loc_step(loc.index)
else
q.text loc.node.node_test_string
end
}
}
end
alias inspect pretty_print_inspect
# :startdoc:
end
module HTree::Container::Loc
# +get_subnode+ returns a location object which points to a subnode
# indexed by _index_.
def get_subnode_internal(index) # :nodoc:
return @subloc[index] if @subloc.include? index
node = @node.get_subnode(index)
if node
@subloc[index] = node.class::Loc.new(self, index, node)
else
@subloc[index] = HTree::Location.new(self, index, node)
end
end
# +subst_subnode+ returns the location which refers the substituted tree.
# loc.subst_subnode(pairs) -> loc
#
# t = HTree('<a><b><c>')
# l = t.make_loc.get_subnode(0, 0)
# l = l.subst_subnode({0=>HTree('<z/>')})
# pp t, l.top.to_node
# # =>
# #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
# #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
#
def subst_subnode(pairs)
self.subst_itself(@node.subst_subnode(pairs))
end
# +children+ returns an array of child locations.
def children
(0...@node.children.length).map {|i| get_subnode(i) }
end
end
class HTree::Elem::Loc
def context() @node.context end
# +element_name+ returns the name of the element name as a Name object.
def element_name() @node.element_name end
def empty_element?() @node.empty_element? end
# +each_attribute+ iterates over each attributes.
def each_attribute
@node.each_attribute {|attr_name, attr_text|
attr_loc = get_subnode(attr_name)
yield attr_name, attr_loc
}
end
end
class HTree::Text::Loc
def to_s() @node.to_s end
def strip() @node.strip end
def empty?() @node.empty? end
end

@ -0,0 +1,49 @@
module HTree
class Name; include HTree end
class Context; include HTree end
# :stopdoc:
module Tag; include HTree end
class STag; include Tag end
class ETag; include Tag end
# :startdoc:
module Node; include HTree end
module Container; include Node end
class Doc; include Container end
class Elem; include Container end
module Leaf; include Node end
class Text; include Leaf end
class XMLDecl; include Leaf end
class DocType; include Leaf end
class ProcIns; include Leaf end
class Comment; include Leaf end
class BogusETag; include Leaf end
module Traverse end
module Container::Trav; include Traverse end
module Leaf::Trav; include Traverse end
class Doc; module Trav; include Container::Trav end; include Trav end
class Elem; module Trav; include Container::Trav end; include Trav end
class Text; module Trav; include Leaf::Trav end; include Trav end
class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
class DocType; module Trav; include Leaf::Trav end; include Trav end
class ProcIns; module Trav; include Leaf::Trav end; include Trav end
class Comment; module Trav; include Leaf::Trav end; include Trav end
class BogusETag; module Trav; include Leaf::Trav end; include Trav end
class Location; include HTree end
module Container::Loc end
module Leaf::Loc end
class Doc; class Loc < Location; include Trav, Container::Loc end end
class Elem; class Loc < Location; include Trav, Container::Loc end end
class Text; class Loc < Location; include Trav, Leaf::Loc end end
class XMLDecl; class Loc < Location; include Trav, Leaf::Loc end end
class DocType; class Loc < Location; include Trav, Leaf::Loc end end
class ProcIns; class Loc < Location; include Trav, Leaf::Loc end end
class Comment; class Loc < Location; include Trav, Leaf::Loc end end
class BogusETag; class Loc < Location; include Trav, Leaf::Loc end end
class Error < StandardError; end
end

@ -0,0 +1,122 @@
require 'htree/scan' # for Pat::Nmtoken
require 'htree/context'
module HTree
# Name represents a element name and attribute name.
# It consists of a namespace prefix, a namespace URI and a local name.
class Name
=begin
element name prefix uri localname
{u}n, n with xmlns=u nil 'u' 'n'
p{u}n, p:n with xmlns:p=u 'p' 'u' 'n'
n with xmlns='' nil '' 'n'
attribute name
xmlns= 'xmlns' nil nil
xmlns:n= 'xmlns' nil 'n'
p{u}n=, p:n= with xmlns:p=u 'p' 'u' 'n'
n= nil '' 'n'
=end
def Name.parse_element_name(name, context)
if /\{(.*)\}/ =~ name
# "{u}n" means "use default namespace",
# "p{u}n" means "use the specified prefix p"
$` == '' ? Name.new(nil, $1, $') : Name.new($`, $1, $')
elsif /:/ =~ name && !context.namespace_uri($`).empty?
Name.new($`, context.namespace_uri($`), $')
elsif !context.namespace_uri(nil).empty?
Name.new(nil, context.namespace_uri(nil), name)
else
Name.new(nil, '', name)
end
end
def Name.parse_attribute_name(name, context)
if name == 'xmlns'
Name.new('xmlns', nil, nil)
elsif /\Axmlns:/ =~ name
Name.new('xmlns', nil, $')
elsif /\{(.*)\}/ =~ name
case $`
when ''; Name.new(nil, $1, $')
else Name.new($`, $1, $')
end
elsif /:/ =~ name && !context.namespace_uri($`).empty?
Name.new($`, context.namespace_uri($`), $')
else
Name.new(nil, '', name)
end
end
NameCache = {}
def Name.new(namespace_prefix, namespace_uri, local_name)
key = [namespace_prefix, namespace_uri, local_name, self]
NameCache.fetch(key) {
0.upto(2) {|i| key[i] = key[i].dup.freeze if key[i] }
NameCache[key] = super(key[0], key[1], key[2])
}
end
def initialize(namespace_prefix, namespace_uri, local_name)
@namespace_prefix = namespace_prefix
@namespace_uri = namespace_uri
@local_name = local_name
if @namespace_prefix && /\A#{Pat::Nmtoken}\z/o !~ @namespace_prefix
raise HTree::Error, "invalid namespace prefix: #{@namespace_prefix.inspect}"
end
if @local_name && /\A#{Pat::Nmtoken}\z/o !~ @local_name
raise HTree::Error, "invalid local name: #{@local_name.inspect}"
end
if @namespace_prefix == 'xmlns'
unless @namespace_uri == nil
raise HTree::Error, "Name object for xmlns:* must not have namespace URI: #{@namespace_uri.inspect}"
end
else
unless String === @namespace_uri
raise HTree::Error, "invalid namespace URI: #{@namespace_uri.inspect}"
end
end
end
attr_reader :namespace_prefix, :namespace_uri, :local_name
def xmlns?
@namespace_prefix == 'xmlns' && @namespace_uri == nil
end
def universal_name
if @namespace_uri && !@namespace_uri.empty?
"{#{@namespace_uri}}#{@local_name}"
else
@local_name.dup
end
end
def qualified_name
if @namespace_uri && !@namespace_uri.empty?
if @namespace_prefix
"#{@namespace_prefix}:#{@local_name}"
else
@local_name.dup
end
elsif @local_name
@local_name.dup
else
"xmlns"
end
end
def to_s
if @namespace_uri && !@namespace_uri.empty?
if @namespace_prefix
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
else
"{#{@namespace_uri}}#{@local_name}"
end
elsif @local_name
@local_name.dup
else
"xmlns"
end
end
end
end

@ -0,0 +1,212 @@
require 'htree/encoder'
require 'htree/doc'
require 'htree/elem'
require 'htree/leaf'
require 'htree/text'
module HTree
# :stopdoc:
class Text
ChRef = {
'>' => '&gt;',
'<' => '&lt;',
'"' => '&quot;',
}
def output(out, context=nil)
out.output_text @rcdata.gsub(/[<>]/) {|s| ChRef[s] }
end
def to_attvalue_content
@rcdata.gsub(/[<>"]/) {|s| ChRef[s] }
end
def output_attvalue(out, context)
out.output_string '"'
out.output_text to_attvalue_content
out.output_string '"'
end
def output_cdata(out)
str = self.to_s
if %r{</} =~ str
raise ArgumentError, "CDATA cannot contain '</': #{str.inspect}"
end
out.output_string(str)
end
end
class Name
def output(out, context)
# xxx: validate namespace prefix
if xmlns?
if @local_name
out.output_string "xmlns:#{@local_name}"
else
out.output_string "xmlns"
end
else
out.output_string qualified_name
end
end
def output_attribute(text, out, context)
output(out, context)
out.output_string '='
text.output_attvalue(out, context)
end
end
class Doc
def output(out, context)
xmldecl = false
@children.each {|n|
if n.respond_to? :output_prolog_xmldecl
n.output_prolog_xmldecl(out, context) unless xmldecl # xxx: encoding?
xmldecl = true
else
n.output(out, context)
end
}
end
end
class Elem
def output(out, context)
if %r{\A\{http://www.w3.org/1999/xhtml\}(script|style)} =~ @stag.element_name.universal_name
children_context = @stag.output_stag(out, context)
out.output_cdata_content(@children, children_context)
@stag.output_etag(out, context)
elsif @empty
@stag.output_emptytag(out, context)
else
children_context = @stag.output_stag(out, context)
@children.each {|n| n.output(out, children_context) }
@stag.output_etag(out, context)
end
end
end
class STag
def output_attributes(out, context)
@attributes.each {|aname, text|
next if aname.xmlns?
out.output_string ' '
aname.output_attribute(text, out, context)
}
@context.output_namespaces(out, context)
end
def output_emptytag(out, context)
out.output_string '<'
@name.output(out, context)
children_context = output_attributes(out, context)
out.output_string "\n"
out.output_slash_if_xml
out.output_string ">"
children_context
end
def output_stag(out, context)
out.output_string '<'
@name.output(out, context)
children_context = output_attributes(out, context)
out.output_string "\n>"
children_context
end
def output_etag(out, context)
out.output_string '</'
@name.output(out, context)
out.output_string "\n>"
end
end
class Context
def output_namespaces(out, outer_context)
unknown_namespaces = {}
@namespaces.each {|prefix, uri|
outer_uri = outer_context.namespace_uri(prefix)
if outer_uri == nil
unknown_namespaces[prefix] = uri
elsif outer_uri != uri
if prefix
out.output_string " xmlns:#{prefix}="
else
out.output_string " xmlns="
end
Text.new(uri).output_attvalue(out, outer_context)
end
}
unless unknown_namespaces.empty?
out.output_xmlns(unknown_namespaces)
end
outer_context.subst_namespaces(@namespaces)
end
end
class BogusETag
# don't output anything.
def output(out, context)
end
end
class XMLDecl
# don't output anything.
def output(out, context)
end
def output_prolog_xmldecl(out, context)
out.output_string "<?xml version=\"#{@version}\""
if @encoding
out.output_string " encoding=\"#{@encoding}\""
end
if @standalone != nil
out.output_string " standalone=\"#{@standalone ? 'yes' : 'no'}\""
end
out.output_string "?>"
end
end
class DocType
def output(out, context)
out.output_string "<!DOCTYPE #{@root_element_name} #{generate_content}>"
end
def generate_content # :nodoc:
result = ''
if @public_identifier
result << "PUBLIC \"#{@public_identifier}\""
else
result << "SYSTEM"
end
# Although a system identifier is not omissible in XML,
# we cannot output it if it is not given.
if @system_identifier
if /"/ !~ @system_identifier
result << " \"#{@system_identifier}\""
else
result << " '#{@system_identifier}'"
end
end
result
end
end
class ProcIns
def output(out, context)
out.output_string "<?#{@target}"
out.output_string " #{@content}" if @content
out.output_string "?>"
end
end
class Comment
def output(out, context)
out.output_string "<!--#{@content}-->"
end
end
# :startdoc:
end

@ -0,0 +1,410 @@
require 'htree/scan'
require 'htree/htmlinfo'
require 'htree/text'
require 'htree/tag'
require 'htree/leaf'
require 'htree/doc'
require 'htree/elem'
require 'htree/raw_string'
require 'htree/context'
require 'htree/encoder'
require 'htree/fstr'
module HTree
# HTree.parse parses <i>input</i> and return a document tree.
# represented by HTree::Doc.
#
# <i>input</i> should be a String or
# an object which respond to read or open method.
# For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable.
# Note that the URIs need open-uri.
#
# HTree.parse guesses <i>input</i> is HTML or not and XML or not.
#
# If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml
# regardless of <i>input</i> has XML namespace declaration or not nor even it is pre-XML HTML.
#
# If it is guessed as HTML and not XML, all element and attribute names are downcaseed.
#
# If opened file or read content has charset method,
# HTree.parse decode it according to $KCODE before parsing.
# Otherwise HTree.parse assumes the character encoding of the content is
# compatible to $KCODE.
# Note that the charset method is provided by URI::HTTP with open-uri.
def HTree.parse(input)
HTree.with_frozen_string_hash {
parse_as(input, false)
}
end
# HTree.parse_xml parses <i>input</i> as XML and
# return a document tree represented by HTree::Doc.
#
# It behaves almost same as HTree.parse but it assumes <i>input</i> is XML
# even if no XML declaration.
# The assumption causes following differences.
# * doesn't downcase element name.
# * The content of <script> and <style> element is PCDATA, not CDATA.
def HTree.parse_xml(input)
HTree.with_frozen_string_hash {
parse_as(input, true)
}
end
# :stopdoc:
def HTree.parse_as(input, is_xml)
input_charset = nil
if input.tainted? && 1 <= $SAFE
raise SecurityError, "input tainted"
end
if input.respond_to? :read # IO, StringIO
input = input.read.untaint
input_charset = input.charset if input.respond_to? :charset
elsif input.respond_to? :open # Pathname, URI with open-uri
input.open {|f|
input = f.read.untaint
input_charset = f.charset if f.respond_to? :charset
}
end
if input_charset && input_charset != Encoder.internal_charset
input = Iconv.conv(Encoder.internal_charset, input_charset, input)
end
tokens = []
is_xml, is_html = HTree.scan(input, is_xml) {|token|
tokens << token
}
context = is_html ? HTMLContext : DefaultContext
structure_list = parse_pairs(tokens, is_xml, is_html)
structure_list = fix_structure_list(structure_list, is_xml, is_html)
nodes = structure_list.map {|s| build_node(s, is_xml, is_html, context) }
Doc.new(nodes)
end
def HTree.parse_pairs(tokens, is_xml, is_html)
stack = [[nil, nil, []]]
tokens.each {|token|
case token[0]
when :stag
stag_raw_string = token[1]
stagname = stag_raw_string[Pat::Name]
stagname = stagname.downcase if !is_xml && is_html
stagname = HTree.frozen_string(stagname)
stack << [stagname, stag_raw_string, []]
when :etag
etag_raw_string = token[1]
etagname = etag_raw_string[Pat::Name]
etagname = etagname.downcase if !is_xml && is_html
etagname = HTree.frozen_string(etagname)
matched_elem = nil
stack.reverse_each {|elem|
stagname, _, _ = elem
if stagname == etagname
matched_elem = elem
break
end
}
if matched_elem
until matched_elem.equal? stack.last
stagname, stag_raw_string, children = stack.pop
stack.last[2] << [:elem, stag_raw_string, children]
end
stagname, stag_raw_string, children = stack.pop
stack.last[2] << [:elem, stag_raw_string, children, etag_raw_string]
else
stack.last[2] << [:bogus_etag, etag_raw_string]
end
else
stack.last[2] << token
end
}
elem = nil
while 1 < stack.length
stagname, stag_raw_string, children = stack.pop
stack.last[2] << [:elem, stag_raw_string, children]
end
stack[0][2]
end
def HTree.fix_structure_list(structure_list, is_xml, is_html)
result = []
rest = structure_list.dup
until rest.empty?
structure = rest.shift
if structure[0] == :elem
elem, rest2 = fix_element(structure, [], [], is_xml, is_html)
result << elem
rest = rest2 + rest
else
result << structure
end
end
result
end
def HTree.fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
stag_raw_string = elem[1]
children = elem[2]
if etag_raw_string = elem[3]
return [:elem, stag_raw_string, fix_structure_list(children, is_xml, is_html), etag_raw_string], []
else
tagname = stag_raw_string[Pat::Name]
tagname = tagname.downcase if !is_xml && is_html
if ElementContent[tagname] == :EMPTY
return [:elem, stag_raw_string, []], children
else
if ElementContent[tagname] == :CDATA
possible_tags = []
else
possible_tags = ElementContent[tagname]
end
if possible_tags
excluded_tags2 = ElementExclusions[tagname]
included_tags2 = ElementInclusions[tagname]
excluded_tags |= excluded_tags2 if excluded_tags2
included_tags |= included_tags2 if included_tags2
containable_tags = (possible_tags | included_tags) - excluded_tags
uncontainable_tags = ElementContent.keys - containable_tags
else
# If the tagname is unknown, it is assumed that any element
# except excluded can be contained.
uncontainable_tags = excluded_tags
end
fixed_children = []
rest = children
until rest.empty?
if rest[0][0] == :elem
elem = rest.shift
elem_tagname = elem[1][Pat::Name]
elem_tagname = elem_tagname.downcase if !is_xml && is_html
if uncontainable_tags.include? elem_tagname
rest.unshift elem
break
else
fixed_elem, rest2 = fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
fixed_children << fixed_elem
rest = rest2 + rest
end
else
fixed_children << rest.shift
end
end
return [:elem, stag_raw_string, fixed_children], rest
end
end
end
def HTree.build_node(structure, is_xml, is_html, inherited_context=DefaultContext)
case structure[0]
when :text_pcdata
Text.parse_pcdata(structure[1])
when :elem
_, stag_rawstring, children, etag_rawstring = structure
etag = etag_rawstring && ETag.parse(etag_rawstring, is_xml, is_html)
stag = STag.parse(stag_rawstring, true, is_xml, is_html, inherited_context)
if !children.empty? || etag ||
stag.element_name.namespace_uri != 'http://www.w3.org/1999/xhtml' ||
HTree::ElementContent[stag.element_name.local_name] != :EMPTY
Elem.new!(stag,
children.map {|c| build_node(c, is_xml, is_html, stag.context) },
etag)
else
Elem.new!(stag)
end
when :emptytag
Elem.new!(STag.parse(structure[1], false, is_xml, is_html, inherited_context))
when :bogus_etag
BogusETag.parse(structure[1], is_xml, is_html)
when :xmldecl
XMLDecl.parse(structure[1])
when :doctype
DocType.parse(structure[1], is_xml, is_html)
when :procins
ProcIns.parse(structure[1])
when :comment
Comment.parse(structure[1])
when :text_cdata_content
Text.parse_cdata_content(structure[1])
when :text_cdata_section
Text.parse_cdata_section(structure[1])
else
raise Exception, "[bug] unknown structure: #{structure.inspect}"
end
end
def STag.parse(raw_string, is_stag, is_xml, is_html, inherited_context=DefaultContext)
attrs = []
if (is_stag ? /\A#{Pat::ValidStartTag_C}\z/o : /\A#{Pat::ValidEmptyTag_C}\z/o) =~ raw_string
qname = $1
$2.scan(Pat::ValidAttr_C) {
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
}
elsif (is_stag ? /\A#{Pat::InvalidStartTag_C}\z/o : /\A#{Pat::InvalidEmptyTag_C}\z/o) =~ raw_string
qname = $1
last_attr = $3
$2.scan(Pat::InvalidAttr1_C) {
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
}
if last_attr
/#{Pat::InvalidAttr1End_C}/o =~ last_attr
attrs << [$1, $2 || $3]
end
else
raise HTree::Error, "cannot recognize as start tag or empty tag: #{raw_string.inspect}"
end
qname = qname.downcase if !is_xml && is_html
attrs.map! {|aname, aval|
if aname
aname = (!is_xml && is_html) ? aname.downcase : aname
[aname, Text.parse_pcdata(aval)]
else
if val2name = OmittedAttrName[qname]
aval_downcase = aval.downcase
aname = val2name.fetch(aval_downcase, aval_downcase)
else
aname = aval
end
[aname, Text.new(aval)]
end
}
result = STag.new(qname, attrs, inherited_context)
result.raw_string = raw_string
result
end
def ETag.parse(raw_string, is_xml, is_html)
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
end
qname = $1
qname = qname.downcase if !is_xml && is_html
result = self.new(qname)
result.raw_string = raw_string
result
end
def BogusETag.parse(raw_string, is_xml, is_html)
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
end
qname = $1
qname = qname.downcase if !is_xml && is_html
result = self.new(qname)
result.raw_string = raw_string
result
end
def Text.parse_pcdata(raw_string)
fixed = raw_string.gsub(/&(?:(?:#[0-9]+|#x[0-9a-fA-F]+|([A-Za-z][A-Za-z0-9]*));?)?/o) {|s|
name = $1
case s
when /;\z/
s
when /\A&#/
"#{s};"
when '&'
'&amp;'
else
if NamedCharactersPattern =~ name
"&#{name};"
else
"&amp;#{name}"
end
end
}
fixed = raw_string if fixed == raw_string
result = Text.new_internal(fixed)
result.raw_string = raw_string
result
end
def Text.parse_cdata_content(raw_string)
result = Text.new(raw_string)
result.raw_string = raw_string
result
end
def Text.parse_cdata_section(raw_string)
unless /\A#{Pat::CDATA_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as CDATA section: #{raw_string.inspect}"
end
content = $1
result = Text.new(content)
result.raw_string = raw_string
result
end
def XMLDecl.parse(raw_string)
unless /\A#{Pat::XmlDecl_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
end
version = $1 || $2
encoding = $3 || $4
case $5 || $6
when 'yes'
standalone = true
when 'no'
standalone = false
else
standalone = nil
end
result = XMLDecl.new(version, encoding, standalone)
result.raw_string = raw_string
result
end
def DocType.parse(raw_string, is_xml, is_html)
unless /\A#{Pat::DocType_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
end
root_element_name = $1
public_identifier = $2 || $3
system_identifier = $4 || $5
root_element_name = root_element_name.downcase if !is_xml && is_html
result = DocType.new(root_element_name, public_identifier, system_identifier)
result.raw_string = raw_string
result
end
def ProcIns.parse(raw_string)
unless /\A#{Pat::XmlProcIns_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as processing instruction: #{raw_string.inspect}"
end
target = $1
content = $2
result = ProcIns.new(target, content)
result.raw_string = raw_string
result
end
def Comment.parse(raw_string)
unless /\A#{Pat::Comment_C}\z/o =~ raw_string
raise HTree::Error, "cannot recognize as comment: #{raw_string.inspect}"
end
content = $1
result = Comment.new(content)
result.raw_string = raw_string
result
end
# :startdoc:
end

@ -0,0 +1,127 @@
require 'htree/modules'
require 'htree/fstr'
module HTree
module Node
# raw_string returns a source string recorded by parsing.
# It returns +nil+ if the node is constructed not via parsing.
def raw_string
catch(:raw_string_tag) {
return raw_string_internal('')
}
nil
end
end
# :stopdoc:
class Doc
def raw_string_internal(result)
@children.each {|n|
n.raw_string_internal(result)
}
end
end
class Elem
def raw_string_internal(result)
@stag.raw_string_internal(result)
@children.each {|n| n.raw_string_internal(result) }
@etag.raw_string_internal(result) if @etag
end
end
module Tag
def init_raw_string() @raw_string = nil end
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
def raw_string_internal(result)
throw :raw_string_tag if !@raw_string
result << @raw_string
end
end
module Leaf
def init_raw_string() @raw_string = nil end
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
def raw_string_internal(result)
throw :raw_string_tag if !@raw_string
result << @raw_string
end
end
class Text
def raw_string=(arg)
if arg == @rcdata then
@raw_string = @rcdata
else
super
end
end
end
# :startdoc:
module Node
def eliminate_raw_string
raise NotImplementedError
end
end
# :stopdoc:
class Doc
def eliminate_raw_string
Doc.new(@children.map {|c| c.eliminate_raw_string })
end
end
class Elem
def eliminate_raw_string
Elem.new!(
@stag.eliminate_raw_string,
@empty ? nil : @children.map {|c| c.eliminate_raw_string },
@etag && @etag.eliminate_raw_string)
end
end
class Text
def eliminate_raw_string
Text.new_internal(@rcdata)
end
end
class STag
def eliminate_raw_string
STag.new(@qualified_name, @attributes, @inherited_context)
end
end
class ETag
def eliminate_raw_string
self.class.new(@qualified_name)
end
end
class XMLDecl
def eliminate_raw_string
XMLDecl.new(@version, @encoding, @standalone)
end
end
class DocType
def eliminate_raw_string
DocType.new(@root_element_name, @public_identifier, @system_identifier)
end
end
class ProcIns
def eliminate_raw_string
ProcIns.new(@target, @content)
end
end
class Comment
def eliminate_raw_string
Comment.new(@content)
end
end
# :startdoc:
end

@ -0,0 +1,14 @@
class Regexp
def disable_capture
re = ''
self.source.scan(/\\.|[^\\\(]+|\(\?|\(/m) {|s|
if s == '('
re << '(?:'
else
re << s
end
}
Regexp.new(re, self.options, self.kcode)
end
end

@ -0,0 +1,131 @@
# = REXML Tree Generator
#
# HTree::Node#to_rexml is used for converting HTree to REXML.
#
# == Method Summary
#
# - HTree::Node#to_rexml -> REXML::Child
#
# == Example
#
# HTree.parse(...).to_rexml #=> REXML::Document
#
# == Comparison between HTree and REXML.
#
# - HTree parser is permissive HTML/XML parser.
# REXML parser is strict XML parser.
# HTree is recommended if you need to parse realworld HTML.
# REXML is recommended if you need strict error checking.
# - HTree object is immutable.
# REXML object is mutable.
# REXML should be used if you need modification.
#
require 'htree/modules'
require 'htree/output' # HTree::DocType#generate_content
module HTree
module Node
# convert to REXML tree.
def to_rexml
require 'rexml/document'
to_rexml_internal(nil, DefaultContext)
end
end
# :stopdoc:
class Doc
def to_rexml_internal(parent, context)
raise ArgumentError, "parent must be nil" if parent != nil
result = REXML::Document.new
self.children.each {|c|
c.to_rexml_internal(result, context)
}
result
end
end
class Elem
def to_rexml_internal(parent, context)
ename = self.element_name
ns_decl = {}
if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri
ns_decl[ename.namespace_prefix] = ename.namespace_uri
end
if ename.namespace_prefix
result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent)
else
result = REXML::Element.new(ename.local_name, parent)
end
self.each_attribute {|aname, atext|
if aname.namespace_prefix
if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri
ns_decl[aname.namespace_prefix] = aname.namespace_uri
end
result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s)
else
result.add_attribute(aname.local_name, atext.to_s)
end
}
ns_decl.each {|k, v|
if k
result.add_namespace(k, v)
else
result.add_namespace(v)
end
}
context = context.subst_namespaces(ns_decl)
self.children.each {|c|
c.to_rexml_internal(result, context)
}
result
end
end
class Text
def to_rexml_internal(parent, context)
rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] }
REXML::Text.new(rcdata, true, parent, true)
end
end
class XMLDecl
def to_rexml_internal(parent, context)
r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone)
parent << r if parent
r
end
end
class DocType
def to_rexml_internal(parent, context)
REXML::DocType.new([self.root_element_name, self.generate_content], parent)
end
end
class ProcIns
def to_rexml_internal(parent, context)
r = REXML::Instruction.new(self.target, self.content)
parent << r if parent
r
end
end
class Comment
def to_rexml_internal(parent, context)
REXML::Comment.new(self.content, parent)
end
end
class BogusETag
def to_rexml_internal(parent, context)
nil
end
end
# :startdoc:
end

@ -0,0 +1,166 @@
require 'htree/htmlinfo'
require 'htree/regexp-util'
require 'htree/fstr'
module HTree
# :stopdoc:
module Pat
NameChar = /[-A-Za-z0-9._:]/
Name = /[A-Za-z_:]#{NameChar}*/
Nmtoken = /#{NameChar}+/
Comment_C = /<!--(.*?)-->/m
Comment = Comment_C.disable_capture
CDATA_C = /<!\[CDATA\[(.*?)\]\]>/m
CDATA = CDATA_C.disable_capture
QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/
QuotedAttr = QuotedAttr_C.disable_capture
ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/
ValidAttr = ValidAttr_C.disable_capture
InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*(?![^\s<>"'])))|(#{Nmtoken})/
InvalidAttr1 = InvalidAttr1_C.disable_capture
InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/
InvalidAttr1End = InvalidAttr1End_C.disable_capture
QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/
QuotedStartTag = QuotedStartTag_C.disable_capture
ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/
ValidStartTag = ValidStartTag_C.disable_capture
InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/
InvalidStartTag = InvalidStartTag_C.disable_capture
StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/
QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>}
QuotedEmptyTag = QuotedEmptyTag_C.disable_capture
ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>}
ValidEmptyTag = ValidEmptyTag_C.disable_capture
InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>}
InvalidEmptyTag = InvalidEmptyTag_C.disable_capture
EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/
EndTag_C = %r{</(#{Name})\s*>}
EndTag = EndTag_C.disable_capture
XmlVersionNum = /[a-zA-Z0-9_.:-]+/
XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/
XmlVersionInfo = XmlVersionInfo_C.disable_capture
XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/
XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/
XmlEncodingDecl = XmlEncodingDecl_C.disable_capture
XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/
XmlSDDecl = XmlSDDecl_C.disable_capture
XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/
XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/
# xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)?
SystemLiteral_C = /"([^"]*)"|'([^']*)'/
PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'}
ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/
DocType_C = /<!DOCTYPE\s+(#{Name})(?:\s+#{ExternalID_C})?\s*(?:\[.*?\]\s*)?>/m
DocType = DocType_C.disable_capture
XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m
XmlProcIns = XmlProcIns_C.disable_capture
#ProcIns = /<\?([^>]*)>/m
end
def HTree.scan(input, is_xml=false)
is_html = false
cdata_content = nil
text_start = 0
first_element = true
index_xmldecl = 1
index_doctype = 2
index_xmlprocins = 3
index_quotedstarttag = 4
index_quotedemptytag = 5
index_starttag = 6
index_endtag = 7
index_emptytag = 8
index_comment = 9
index_cdata = 10
input.scan(/(#{Pat::XmlDecl})
|(#{Pat::DocType})
|(#{Pat::XmlProcIns})
|(#{Pat::QuotedStartTag})
|(#{Pat::QuotedEmptyTag})
|(#{Pat::StartTag})
|(#{Pat::EndTag})
|(#{Pat::EmptyTag})
|(#{Pat::Comment})
|(#{Pat::CDATA})
/ox) {
match = $~
if cdata_content
str = $&
if match.begin(index_endtag) && str[Pat::Name] == cdata_content
text_end = match.begin(0)
if text_start < text_end
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
end
yield [:etag, HTree.frozen_string(str)]
text_start = match.end(0)
cdata_content = nil
end
else
str = match[0]
text_end = match.begin(0)
if text_start < text_end
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
end
text_start = match.end(0)
if match.begin(index_xmldecl)
yield [:xmldecl, HTree.frozen_string(str)]
is_xml = true
elsif match.begin(index_doctype)
Pat::DocType_C =~ str
root_element_name = $1
public_identifier = $2 || $3
system_identifier = $4 || $5
is_html = true if /\Ahtml\z/i =~ root_element_name
is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier
yield [:doctype, HTree.frozen_string(str)]
elsif match.begin(index_xmlprocins)
yield [:procins, HTree.frozen_string(str)]
elsif match.begin(index_starttag) || match.begin(index_quotedstarttag)
yield stag = [:stag, HTree.frozen_string(str)]
tagname = str[Pat::Name]
if first_element
if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname
is_html = true
else
is_xml = true
end
first_element = false
end
if !is_xml && ElementContent[tagname] == :CDATA
cdata_content = tagname
end
elsif match.begin(index_endtag)
yield [:etag, HTree.frozen_string(str)]
elsif match.begin(index_emptytag) || match.begin(index_quotedemptytag)
yield [:emptytag, HTree.frozen_string(str)]
first_element = false
#is_xml = true
elsif match.begin(index_comment)
yield [:comment, HTree.frozen_string(str)]
elsif match.begin(index_cdata)
yield [:text_cdata_section, HTree.frozen_string(str)]
else
raise Exception, "unknown match [bug]"
end
end
}
text_end = input.length
if text_start < text_end
if cdata_content
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
else
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
end
end
return is_xml, is_html
end
# :startdoc:
end

@ -0,0 +1,113 @@
require 'htree/raw_string'
require 'htree/text'
require 'htree/scan' # for Pat::Name and Pat::Nmtoken
require 'htree/context'
require 'htree/name'
require 'htree/fstr'
module HTree
# :stopdoc:
class STag
def initialize(name, attributes=[], inherited_context=DefaultContext)
init_raw_string
# normalize xml declaration name and attribute value.
attributes = attributes.map {|aname, val|
if !(Name === aname) && /\A(?:#{Pat::Name}?\{.*\})?#{Pat::Nmtoken}\z/o !~ aname
raise HTree::Error, "invalid attribute name: #{aname.inspect}"
end
if !(Name === aname) && /\Axmlns(?:\z|:)/ =~ aname
aname = Name.parse_attribute_name(aname, nil)
end
val = val.to_node if HTree::Location === val
val = Text.new(val) unless Text === val
[aname, val]
}
@inherited_context = inherited_context
@xmlns_decls = {}
# validate namespace consistency of given Name objects.
if Name === name
@xmlns_decls[name.namespace_prefix] = name.namespace_uri
end
attributes.each {|aname, text|
next unless Name === aname
next if aname.xmlns?
if aname.namespace_prefix && aname.namespace_uri
if @xmlns_decls.include? aname.namespace_prefix
if @xmlns_decls[aname.namespace_prefix] != aname.namespace_uri
raise ArgumentError, "inconsistent namespace use: #{aname.namespace_prefix} is used as #{@xmlns_decls[aname.namespace_prefix]} and #{aname.namespace_uri}"
end
else
@xmlns_decls[aname.namespace_prefix] = aname.namespace_uri
end
end
}
attributes.each {|aname, text|
next unless Name === aname
next unless aname.xmlns?
next if @xmlns_decls.include? aname.local_name
if aname.local_name
@xmlns_decls[aname.local_name] = text.to_s
else
uri = text.to_s
@xmlns_decls[nil] = uri
end
}
@context = make_context(@inherited_context)
if Name === name
@name = name
else
@name = Name.parse_element_name(name, @context)
end
@attributes = attributes.map {|aname, text|
aname = Name.parse_attribute_name(aname, @context) unless Name === aname
if !aname.namespace_prefix && !aname.namespace_uri.empty?
# xxx: should recover error?
raise HTree::Error, "global attribute without namespace prefix: #{aname.inspect}"
end
[aname, text]
}
@attributes.freeze
end
attr_reader :attributes, :inherited_context, :context
def element_name
@name
end
def make_context(inherited_context)
inherited_context.subst_namespaces(@xmlns_decls)
end
def each_namespace_attribute
@xmlns_decls.each {|name, uri|
yield name, uri
}
nil
end
def each_attribute
@attributes.each {|name, text|
next if name.xmlns?
yield name, text
}
nil
end
end
class ETag
def initialize(qualified_name)
init_raw_string
@qualified_name = HTree.frozen_string(qualified_name)
end
attr_reader :qualified_name
end
# :startdoc:
end

@ -0,0 +1,961 @@
# = Template Engine
#
# The htree template engine converts HTML and some data to HTML or XML.
#
# == Template Method Summary
#
# - HTree.expand_template(<i>template_pathname</i>) -> $stdout
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>) -> $stdout
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>, <i>out</i>) -> <i>out</i>
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>, <i>out</i>, <i>encoding</i>) -> <i>out</i>
#
# - HTree.expand_template{<i>template_string</i>} -> $stdout
# - HTree.expand_template(<i>out</i>) {<i>template_string</i>} -> <i>out</i>
# - HTree.expand_template(<i>out</i>, <i>encoding</i>) {<i>template_string</i>} -> <i>out</i>
#
# - HTree.compile_template(<i>template_string</i>) -> Module
# - HTree{<i>template_string</i>} -> HTree::Doc
#
# Note that the following method, HTree(), is not a template method.
#
# - HTree(<i>html_string</i>) -> HTree::Doc
#
# == Template Directives.
#
# A template directive is described as a special HTML attribute which name
# begins with underscore.
#
# The template directives are listed as follows.
#
# - <elem \_attr_<i>name</i>="<i>expr</i>">content</elem>
# - <elem _text="<i>expr</i>">dummy-content</elem>
# - <elem _text><i>expr</i></elem>
# - <elem _tree="<i>expr</i>">dummy-content</elem>
# - <elem _tree><i>expr</i></elem>
# - <elem _if="<i>expr</i>" _else="<i>mod.name(args)</i>">then-content</elem>
# - <elem _iter="<i>expr.meth(args)//vars</i>">content</elem>
# - <elem _iter_content="<i>expr.meth(args)//vars</i>">content</elem>
# - <elem _call="<i>mod.name(args)</i>">dummy-content</elem>
# - <elem _template="<i>name(vars)</i>">body</elem>
#
# === Template Semantics
#
# - attribute substitution
# - <elem \_attr_<i>name</i>="<i>expr</i>">content</elem>
#
# \_attr_<i>name</i> is used for a dynamic attribute.
#
# <elem _attr_xxx="..."/>
# -> <elem xxx="..."/>
#
# It is expanded to <i>name</i>="content".
# The content is generated by evaluating _expr_.
# Usually you don't need to care escaping: &, <, > and " are automatically escaped.
# If you need to output character references,
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
# If the value has a +rcdata+ method,
# it is called and the result is used as the content with escaping <, > and ".
#
# \_attr_<i>name</i> can be used multiple times in single element.
#
# - text substitution
# - <elem _text="<i>expr</i>">dummy-content</elem>
# - <elem _text><i>expr</i></elem>
#
# _text substitutes the content of the element by the string
# evaluated from _expr_.
# _expr_ is described in the attribute value or the content of the element.
#
# If a result of _expr_ have &, < and/or >, they are automatically escaped.
# If you need to output character references,
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
# If the value has a +rcdata+ method,
# it is called and the result is used as the content with escaping < and >.
#
# If the element is span or div, and there is no other attributes,
# no tags are produced.
#
# <elem _text="...">dummy-content</elem>
# -> <elem>...</elem>
#
# - tree substitution
# - <elem _tree="<i>expr</i>">dummy-content</elem>
# - <elem _tree><i>expr</i></elem>
#
# _tree substitutes the content of the element by the htree object
# evaluated from _expr_.
# _expr_ is described in the attribute value or the content of the element.
#
# If the element is span or div, and there is no other attributes,
# no tags are produced.
#
# <elem _tree="...">dummy-content</elem>
# -> <elem>...</elem>
#
# - conditional
# - <elem _if="<i>expr</i>">then-content</elem>
# - <elem _if="<i>expr</i>" _else="<i>name(args)</i>">then-content</elem>
#
# _if is used for conditional.
#
# If <i>expr</i> is evaluated to true, it expands as follows
# regardless of existence of _else.
#
# <elem _if="<i>expr</i>">then-content</elem>
# -> <elem>then-content</elem>
#
# If <i>expr</i> is evaluated to false, it expands using _else.
# If _else is not given, it expands to empty.
# If _else is given, it expands as follows.
#
# <elem _if="<i>expr</i>" _else="<i>name(args)</i>">then-content</elem>
# -> <elem _call="<i>name(args)</i>">then-content</elem>
# -> see _call for further expansion.
#
# It is expanded to <elem>then-content</elem> if _expr_ is evaluated to
# a true value.
# Otherwise, it is replaced by other template specified by _else attribute.
# If _else attribute is not given, it just replaced by empty.
#
# - iteration
# - <elem _iter="<i>expr.meth(args)//vars</i>">content</elem>
# - <elem _iter_content="<i>expr.meth(args)//vars</i>">content</elem>
#
# _iter and _iter_content is used for iteration.
# _iter iterates the element itself but _iter_content iterates the content.
#
# <outer _iter="..."><inner/></outer>
# -> <outer><inner/></outer><outer><inner/></outer>...
#
# <outer _iter_content="..."><inner/></outer>
# -> <outer><inner/><inner/>...</outer>
#
# <i>expr.meth(args)</i> specifies iterator method call.
# It is actually called with a block.
# The block have block parameters <i>vars</i>.
# <i>vars</i> must be variables separated by comma.
#
# - template call
# - <elem _call="<i>name(args)</i>">dummy-content</elem>
# - <elem _call="<i>mod.name(args)</i>">dummy-content</elem>
#
# _call is used to expand a template function.
# The template function is defined by _template.
#
# <d _template="m">...</d>
# <c _call="m">...</c>
# -> <d>...</d>
#
# A local template can be called as follows:
#
# HTree.expand_template{<<'End'}
# <a _template=ruby_talk(num)
# _attr_href='"http://ruby-talk.org/#{num}"'
# >[ruby-talk:<span _text=num>nnn</span>]</a>
# Ruby 1.8.0 is released at <span _call=ruby_talk(77946) />.
# Ruby 1.8.1 is released at <span _call=ruby_talk(88814) />.
# End
#
# <i>mod</i> should be the result of HTree.compile_template.
#
# M = HTree.compile_template(<<'End')
# <a _template=ruby_talk(num)
# _attr_href='"http://ruby-talk.org/#{num}"'
# >[ruby-talk:<span _text=num>nnn</span>]</a>
# End
# HTree.expand_template{<<'End'}
# <html>
# Ruby 1.8.0 is released at <span _call=M.ruby_talk(77946) />.
# Ruby 1.8.1 is released at <span _call=M.ruby_talk(88814) />.
# </html>
# End
#
# The module can included.
# In such case, the template function can be called without <i>mod.</i>
# prefix.
#
# include HTree.compile_template(<<'End')
# <a _template=ruby_talk(num)
# _attr_href='"http://ruby-talk.org/#{num}"'
# >[ruby-talk:<span _text=num>nnn</span>]</a>
# End
# HTree.expand_template{<<'End'}
# <html>
# Ruby 1.8.0 is released at <span _call=ruby_talk(77946) />.
# Ruby 1.8.1 is released at <span _call=ruby_talk(88814) />.
# </html>
# End
#
# - template definition
# - <elem _template="<i>name(vars)</i>">body</elem>
#
# _template defines a template function which is usable by _call.
#
# When a template is compiled to a module by HTree.compile_template,
# the module have a module function for each template function
# defined by outermost _template attribute.
#
# === White Space Handling
#
# The htree template engine strips whitespace text nodes in a template
# except under HTML pre element.
#
# For example the white space text node between two spans in following template is stripped.
#
# <span _text="'a'"/> <span _text="'b'"/> -> "ab"
#
# Character entity references are not stripped.
#
# <span _text="'a'"/>&#32;<span _text="'b'"/> -> "a&#32;b"
#
# Text nodes generated by _text is not stripped.
#
# <span _text="'a'"/><span _text="' '"> </span><span _text="'b'"/> -> "a b"
#
# == HTML and XML
#
# The htree template engine outputs HTML or XML.
#
# If a template has no XML declaration and the top element is HTML,
# the result is HTML.
# Otherwise the result is XML.
#
# They differs as follows.
#
# - XML declaration is (re-)generated for XML.
# - empty elements ends with a slash for XML.
# - script and style element is escaped for XML.
#
# == Design Decision on Design/Logic Separation
#
# HTree template engine doesn't force you to separate design and logic.
# Any logic (Ruby code) can be embedded in design (HTML).
#
# However the template engine cares the separation by logic refactorings.
# The logic is easy to move between a template and an application.
# For example, following tangled template
#
# tmpl.html:
# <html>
# <head>
# <title _text="very-complex-ruby-code">dummy</title>
# </head>
# ...
# </html>
#
# app.rb:
# HTree.expand_template('tmpl.html', obj)
#
# can be refactored as follows.
#
# tmpl.html:
# <html>
# <head>
# <title _text="title">dummy</title>
# </head>
# ...
# </html>
#
# app.rb:
# def obj.title
# very-complex-ruby-code
# end
# HTree.expand_template('tmpl.html', obj)
#
# In general, any expression in a template can be refactored to an application
# by extracting it as a method.
# In JSP, this is difficult especially for a code fragment of an iteration.
#
# Also HTree encourages to separate business logic (Ruby code in an application)
# and presentation logic (Ruby code in a template).
# For example, presentation logic to color table rows stripe
# can be embedded in a template.
# It doesn't need to tangle an application.
#
module HTree
# :stopdoc:
EmptyBindingObject = Object.new
# :startdoc:
end
# :stopdoc:
def (HTree::EmptyBindingObject).empty_binding
binding
end
# :startdoc:
require 'htree/parse'
require 'htree/gencode'
require 'htree/equality'
require 'htree/traverse'
# call-seq:
# HTree.expand_template(template_pathname, obj=Object.new, out=$stdout, encoding=internal_encoding) -> out
# HTree.expand_template(out=$stdout, encoding=internal_encoding) { template_string } -> out
#
# <code>HTree.expand_template</code> expands a template.
#
# The arguments should be specified as follows.
# All argument except <i>pathname</i> are optional.
#
# - HTree.expand_template(<i>pathname</i>, <i>obj</i>, <i>out</i>, <i>encoding</i>) -> <i>out</i>
# - HTree.expand_template(<i>out</i>, <i>encoding</i>) {<i>template_string</i>} -> <i>out</i>
#
# The template is specified by a file or a string.
# If a block is not given, the first argument represent a template pathname.
# Otherwise, the block is yielded and its value is interpreted as a template
# string.
# So it can be called as follows in simplest case.
#
# - HTree.expand_template(<i>template_pathname</i>)
# - HTree.expand_template{<i>template_string</i>}
#
# Ruby expressions in the template file specified by _template_pathname_ are
# evaluated in the context of the optional second argument <i>obj</i> as follows.
# I.e. the pseudo variable self in the expressions is bound to <i>obj</i>.
#
# HTree.expand_template(template_pathname, obj)
#
# Ruby expressions in the template_string are evaluated
# in the context of the caller of HTree.expand_template.
# (binding information is specified by the block.)
# I.e. they can access local variables etc.
# We recommend to specify template_string as a literal string without
# interpolation because dynamically generated string may break lexical scope.
#
# HTree.expand_template has two more optional arguments:
# <i>out</i>, <i>encoding</i>.
#
# <i>out</i> specifies output target.
# It should have <tt><<</tt> method: IO and String for example.
# If it is not specified, $stdout is used.
# If it has a method <tt>charset=</tt>, it is called to set the minimal charset
# of the result before <tt><<</tt> is called.
#
# <i>encoding</i> specifies output character encoding.
# If it is not specified, internal encoding is used.
#
# HTree.expand_template returns <i>out</i> or $stdout if <i>out</i> is not
# specified.
#
def HTree.expand_template(*args, &block)
if block
template = block.call
binding = block
else
pathname = args.fetch(0) { raise ArgumentError, "pathname not given" }
args.shift
obj = args.fetch(0) { Object.new }
args.shift
if pathname.respond_to? :read
template = pathname.read.untaint
if template.respond_to? :charset
template = Iconv.conv(HTree::Encoder.internal_charset, template.charset, template)
end
else
template = File.read(pathname).untaint
end
Thread.current[:htree_expand_template_obj] = obj
binding = eval(<<-'End',
Thread.current[:htree_expand_template_obj].class.class_eval <<-'EE'
Thread.current[:htree_expand_template_obj].instance_eval { binding }
EE
End
HTree::EmptyBindingObject.empty_binding)
Thread.current[:htree_expand_template_obj] = nil
end
out = args.shift || $stdout
encoding = args.shift || HTree::Encoder.internal_charset
if !args.empty?
raise ArgumentError, "wrong number of arguments"
end
HTree::TemplateCompiler.new.expand_template(template, out, encoding, binding)
end
# call-seq:
# HTree(html_string) -> doc
# HTree{template_string} -> doc
#
# <code>HTree(<i>html_string</i>)</code> parses <i>html_string</i>.
# <code>HTree{<i>template_string</i>}</code> parses <i>template_string</i> and expand it as a template.
# Ruby expressions in <i>template_string</i> is evaluated in the scope of the caller.
#
# <code>HTree()</code> and <code>HTree{}</code> returns a tree as an instance of HTree::Doc.
def HTree(html_string=nil, &block)
if block_given?
raise ArgumentError, "both argument and block given." if html_string
template = block.call
HTree.parse(HTree::TemplateCompiler.new.expand_template(template, '', HTree::Encoder.internal_charset, block))
else
HTree.parse(html_string)
end
end
# call-seq:
# HTree.compile_template(template_string) -> module
#
# <code>HTree.compile_template(<i>template_string</i>)</code> compiles
# <i>template_string</i> as a template.
#
# HTree.compile_template returns a module.
# The module has module functions for each templates defined in
# <i>template_string</i>.
# The returned module can be used for +include+.
#
# M = HTree.compile_template(<<'End')
# <p _template=birthday(subj,t)>
# <span _text=subj />'s birthday is <span _text="t.strftime('%B %dth %Y')"/>.
# </p>
# End
# M.birthday('Ruby', Time.utc(1993, 2, 24)).display_xml
# # <p>Ruby's birthday is February 24th 1993.</p>
#
# The module function takes arguments specifies by a <code>_template</code>
# attribute and returns a tree represented as HTree::Node.
#
def HTree.compile_template(template_string)
code = HTree::TemplateCompiler.new.compile_template(template_string)
Thread.current[:htree_compile_template_code] = code
mod = eval(<<-'End',
eval(Thread.current[:htree_compile_template_code])
End
HTree::EmptyBindingObject.empty_binding)
Thread.current[:htree_compile_template_code] = nil
mod
end
# :stopdoc:
class HTree::TemplateCompiler
IGNORABLE_ELEMENTS = {
'span' => true,
'div' => true,
'{http://www.w3.org/1999/xhtml}span' => true,
'{http://www.w3.org/1999/xhtml}div' => true,
}
def initialize
@gensym_id = 0
end
def gensym(suffix='')
@gensym_id += 1
"g#{@gensym_id}#{suffix}"
end
def parse_template(template)
strip_whitespaces(HTree.parse(template))
end
WhiteSpacePreservingElements = {
'{http://www.w3.org/1999/xhtml}pre' => true
}
def strip_whitespaces(template)
case template
when HTree::Doc
HTree::Doc.new(*template.children.map {|c| strip_whitespaces(c) }.compact)
when HTree::Elem, HTree::Doc
return template if WhiteSpacePreservingElements[template.name]
subst = {}
template.children.each_with_index {|c, i|
subst[i] = strip_whitespaces(c)
}
template.subst_subnode(subst)
when HTree::Text
if /\A[ \t\r\n]*\z/ =~ template.rcdata
nil
else
template
end
else
template
end
end
def template_is_html(template)
template.each_child {|c|
return false if c.xmldecl?
return true if c.elem? && c.element_name.namespace_uri == 'http://www.w3.org/1999/xhtml'
}
false
end
def expand_template(template, out, encoding, binding)
template = parse_template(template)
is_html = template_is_html(template)
outvar = gensym('out')
contextvar = gensym('top_context')
code = ''
code << "#{outvar} = HTree::Encoder.new(#{encoding.dump})\n"
code << "#{outvar}.html_output = true\n" if is_html
code << "#{contextvar} = #{is_html ? "HTree::HTMLContext" : "HTree::DefaultContext"}\n"
code << compile_body(outvar, contextvar, template, false)
code << "[#{outvar}.#{is_html ? "finish" : "finish_with_xmldecl"}, #{outvar}.minimal_charset]\n"
#puts code; STDOUT.flush
result, minimal_charset = eval(code, binding)
out.charset = minimal_charset if out.respond_to? :charset=
out << result
out
end
def compile_template(src)
srcdoc = parse_template(src)
templates = []
body = extract_templates(srcdoc, templates, true)
methods = []
templates.each {|name_args, node|
methods << compile_global_template(name_args, node)
}
<<"End"
require 'htree/encoder'
require 'htree/context'
Module.new.module_eval <<'EE'
module_function
#{methods.join('').chomp}
self
EE
End
end
def template_attribute?(name)
/\A_/ =~ name.local_name
end
def extract_templates(node, templates, is_toplevel)
case node
when HTree::Doc
subst = {}
node.children.each_with_index {|n, i|
subst[i] = extract_templates(n, templates, is_toplevel)
}
node.subst_subnode(subst)
when HTree::Elem
ht_attrs, rest_attrs = node.attributes.partition {|name, text| template_attribute? name }
if ht_attrs.empty?
subst = {}
node.children.each_with_index {|n, i|
subst[i] = extract_templates(n, templates, is_toplevel)
}
node.subst_subnode(subst)
else
ht_attrs.each {|htname, text|
if htname.universal_name == '_template'
name_fargs = text.to_s
templates << [name_fargs, node.subst_subnode('_template' => nil)]
return nil
end
}
if is_toplevel
raise HTree::Error, "unexpected template attributes in toplevel: #{ht_attrs.inspect}"
else
node
end
end
else
node
end
end
ID_PAT = /[a-z][a-z0-9_]*/
NAME_FARGS_PAT = /(#{ID_PAT})(?:\(\s*(|#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)\))?/
def compile_global_template(name_fargs, node)
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
raise HTree::Error, "invalid template declaration: #{name_fargs}"
end
name = $1
fargs = $2 ? $2.scan(ID_PAT) : []
outvar = gensym('out')
contextvar = gensym('top_context')
args2 = [outvar, contextvar, *fargs]
<<"End"
def #{name}(#{fargs.join(',')})
HTree.parse(_xml_#{name}(#{fargs.join(',')}))
end
def _xml_#{name}(#{fargs.join(',')})
#{outvar} = HTree::Encoder.new(HTree::Encoder.internal_charset)
#{contextvar} = HTree::DefaultContext
_ht_#{name}(#{args2.join(',')})
#{outvar}.finish
end
def _ht_#{name}(#{args2.join(',')})
#{compile_body(outvar, contextvar, node, false)}\
end
public :_ht_#{name}
End
end
def compile_local_template(name_fargs, node, local_templates)
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
raise HTree::Error, "invalid template declaration: #{name_fargs}"
end
name = $1
fargs = $2 ? $2.scan(ID_PAT) : []
outvar = gensym('out')
contextvar = gensym('top_context')
args2 = [outvar, contextvar, *fargs]
<<"End"
#{name} = lambda {|#{args2.join(',')}|
#{compile_body(outvar, contextvar, node, false, local_templates)}\
}
End
end
def compile_body(outvar, contextvar, node, is_toplevel, local_templates={})
if node.elem? && IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
node = TemplateNode.new(node.children)
else
node = TemplateNode.new(node)
end
generate_logic_node([:content], node, local_templates).generate_xml_output_code(outvar, contextvar)
end
def compile_node(node, local_templates)
case node
when HTree::Doc
TemplateNode.new(node.children.map {|n| compile_node(n, local_templates) })
when HTree::Elem
ht_attrs = node.attributes.find_all {|name, text| template_attribute? name }
ht_attrs = ht_attrs.sort_by {|htname, text| htname.universal_name }
ignore_tag = false
unless ht_attrs.empty?
attr_mod = {}
ht_attrs.each {|htname, text|
attr_mod[htname] = nil
if /\A_attr_/ =~ htname.local_name
attr_mod[TemplateAttrName.new(htname.namespace_prefix, htname.namespace_uri, $')] = text
end
}
ht_attrs.reject! {|htname, text| /\A_attr_/ =~ htname.local_name }
node = node.subst_subnode(attr_mod)
ignore_tag = IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
end
ht_names = ht_attrs.map {|htname, text| htname.universal_name }
ht_vals = ht_attrs.map {|htname, text| text.to_s }
case ht_names
when []
generate_logic_node([:tag, [:content]], node, local_templates)
when ['_text'] # <n _text="expr" /> or <n _text>expr</n>
if ht_vals[0] != '_text' # xxx: attribute value is really omitted?
expr = ht_vals[0]
else
children = node.children
if children.length != 1
raise HTree::Error, "_text expression has #{children.length} nodes"
end
if !children[0].text?
raise HTree::Error, "_text expression is not text: #{children[0].class}"
end
expr = children[0].to_s
end
if /\A\s*'((?:[^'\\]|\\[\0-\377])*)'\s*\z/ =~ expr
# if expr is just a constant string literal, use it as a literal text.
# This saves dynamic evaluation of <span _text="' '"/>
# xxx: handle "..." as well if it has no #{}.
HTree::Text.new($1.gsub(/\\([\0-\377])/, '\1'))
else
generate_logic_node(compile_dynamic_text(ignore_tag, expr), node, local_templates)
end
when ['_tree'] # <n _tree="expr" /> or <n _tree>expr</n>
if ht_vals[0] != '_tree' # xxx: attribute value is really omitted?
expr = ht_vals[0]
else
children = node.children
if children.length != 1
raise HTree::Error, "_tree expression has #{children.length} nodes"
end
if !children[0].text?
raise HTree::Error, "_tree expression is not text: #{children[0].class}"
end
expr = children[0].to_s
end
generate_logic_node(compile_dynamic_tree(ignore_tag, expr), node, local_templates)
when ['_if'] # <n _if="expr" >...</n>
generate_logic_node(compile_if(ignore_tag, ht_vals[0], nil), node, local_templates)
when ['_else', '_if'] # <n _if="expr" _else="expr.meth(args)" >...</n>
generate_logic_node(compile_if(ignore_tag, ht_vals[1], ht_vals[0]), node, local_templates)
when ['_call'] # <n _call="recv.meth(args)" />
generate_logic_node(compile_call(ignore_tag, ht_vals[0]), node, local_templates)
when ['_iter'] # <n _iter="expr.meth(args)//fargs" >...</n>
generate_logic_node(compile_iter(ignore_tag, ht_vals[0]), node, local_templates)
when ['_iter_content'] # <n _iter_content="expr.meth(args)//fargs" >...</n>
generate_logic_node(compile_iter_content(ignore_tag, ht_vals[0]), node, local_templates)
else
raise HTree::Error, "unexpected template attributes: #{ht_attrs.inspect}"
end
else
return node
end
end
def valid_syntax?(code)
begin
eval("BEGIN {return true}\n#{code.untaint}")
rescue SyntaxError
raise SyntaxError, "invalid code: #{code}"
end
end
def check_syntax(code)
unless valid_syntax?(code)
raise HTree::Error, "invalid ruby code: #{code}"
end
end
def compile_dynamic_text(ignore_tag, expr)
check_syntax(expr)
logic = [:text, expr]
logic = [:tag, logic] unless ignore_tag
logic
end
def compile_dynamic_tree(ignore_tag, expr)
check_syntax(expr)
logic = [:tree, expr]
logic = [:tag, logic] unless ignore_tag
logic
end
def compile_if(ignore_tag, expr, else_call)
check_syntax(expr)
then_logic = [:content]
unless ignore_tag
then_logic = [:tag, then_logic]
end
else_logic = nil
if else_call
else_logic = compile_call(true, else_call)
end
[:if, expr, then_logic, else_logic]
end
def split_args(spec)
return spec, '' if /\)\z/ !~ spec
i = spec.length - 1
nest = 0
begin
raise HTree::Error, "unmatched paren: #{spec}" if i < 0
case spec[i]
when ?\)
nest += 1
when ?\(
nest -= 1
end
i -= 1
end while nest != 0
i += 1
return spec[0, i], spec[(i+1)...-1]
end
def compile_call(ignore_tag, spec)
# spec : [recv.]meth[(args)]
spec = spec.strip
spec, args = split_args(spec)
unless /#{ID_PAT}\z/o =~ spec
raise HTree::Error, "invalid _call: #{spec}"
end
meth = $&
spec = $`
if /\A\s*\z/ =~ spec
recv = nil
elsif /\A\s*(.*)\.\z/ =~ spec
recv = $1
else
raise HTree::Error, "invalid _call: #{spec}"
end
if recv
check_syntax(recv)
check_syntax("#{recv}.#{meth}(#{args})")
end
check_syntax("#{meth}(#{args})")
[:call, recv, meth, args]
end
def compile_iter(ignore_tag, spec)
# spec: <n _iter="expr.meth[(args)]//fargs" >...</n>
spec = spec.strip
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
end
call = $`.strip
fargs = $1 ? $1.strip : ''
check_syntax("#{call} {|#{fargs}| }")
logic = [:content]
unless ignore_tag
logic = [:tag, logic]
end
[:iter, call, fargs, logic]
end
def compile_iter_content(ignore_tag, spec)
# spec: <n _iter_content="expr.meth[(args)]//fargs" >...</n>
spec = spec.strip
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
end
call = $`.strip
fargs = $1 ? $1.strip : ''
check_syntax("#{call} {|#{fargs}| }")
logic = [:content]
logic = [:iter, call, fargs, logic]
unless ignore_tag
logic = [:tag, logic]
end
logic
end
def generate_logic_node(logic, node, local_templates)
# logic ::= [:if, expr, then_logic, else_logic]
# | [:iter, call, fargs, logic]
# | [:tag, logic]
# | [:text, expr]
# | [:tree, expr]
# | [:call, expr, meth, args]
# | [:content]
# | [:empty]
case logic.first
when :empty
nil
when :content
subtemplates = []
children = []
node.children.each {|c|
children << extract_templates(c, subtemplates, false)
}
if subtemplates.empty?
TemplateNode.new(node.children.map {|n|
compile_node(n, local_templates)
})
else
local_templates = local_templates.dup
decl = ''
subtemplates.each {|sub_name_args, sub_node|
sub_name = sub_name_args[ID_PAT]
local_templates[sub_name] = sub_name
decl << "#{sub_name} = "
}
decl << "nil\n"
defs = []
subtemplates.each {|sub_name_args, sub_node|
defs << lambda {|out, context|
out.output_logic_line compile_local_template(sub_name_args, sub_node, local_templates)
}
}
TemplateNode.new(
lambda {|out, context| out.output_logic_line decl },
defs,
children.map {|n| compile_node(n, local_templates) }
)
end
when :text
_, expr = logic
TemplateNode.new(lambda {|out, context| out.output_dynamic_text expr })
when :tree
_, expr = logic
TemplateNode.new(lambda {|out, context| out.output_dynamic_tree expr, make_context_expr(out, context) })
when :tag
_, rest_logic = logic
if rest_logic == [:content] && node.empty_element?
node
else
subst = {}
node.children.each_index {|i| subst[i] = nil }
subst[0] = TemplateNode.new(generate_logic_node(rest_logic, node, local_templates))
node.subst_subnode(subst)
end
when :if
_, expr, then_logic, else_logic = logic
children = [
lambda {|out, context| out.output_logic_line "if (#{expr})" },
generate_logic_node(then_logic, node, local_templates)
]
if else_logic
children.concat [
lambda {|out, context| out.output_logic_line "else" },
generate_logic_node(else_logic, node, local_templates)
]
end
children <<
lambda {|out, context| out.output_logic_line "end" }
TemplateNode.new(*children)
when :iter
_, call, fargs, rest_logic = logic
TemplateNode.new(
lambda {|out, context| out.output_logic_line "#{call} {|#{fargs}|" },
generate_logic_node(rest_logic, node, local_templates),
lambda {|out, context| out.output_logic_line "}" }
)
when :call
_, recv, meth, args = logic
TemplateNode.new(
lambda {|out, context|
as = [out.outvar, ", ", make_context_expr(out, context)]
unless args.empty?
as << ", " << args
end
if recv
out.output_logic_line "(#{recv})._ht_#{meth}(#{as.join('')})"
elsif local_templates.include? meth
out.output_logic_line "#{meth}.call(#{as.join('')})"
else
out.output_logic_line "_ht_#{meth}(#{as.join('')})"
end
}
)
else
raise Exception, "[bug] invalid logic: #{logic.inspect}"
end
end
def make_context_expr(out, context)
ns = context.namespaces.reject {|k, v| HTree::Context::DefaultNamespaces[k] == v }
if ns.empty?
result = out.contextvar
else
result = "#{out.contextvar}.subst_namespaces("
sep = ''
ns.each {|k, v|
result << sep << (k ? k.dump : "nil") << '=>' << v.dump
sep = ', '
}
result << ")"
end
result
end
class TemplateNode
include HTree::Node
def initialize(*children)
@children = children.flatten.compact
end
attr_reader :children
def output(out, context)
@children.each {|c|
if c.respond_to? :call
c.call(out, context)
else
c.output(out, context)
end
}
end
end
class TemplateAttrName < HTree::Name
def output_attribute(text, out, context)
output(out, context)
out.output_string '="'
out.output_dynamic_attvalue(text.to_s)
out.output_string '"'
end
end
end
# :startdoc:

@ -0,0 +1,115 @@
require 'htree/modules'
require 'htree/raw_string'
require 'htree/htmlinfo'
require 'htree/encoder'
require 'htree/fstr'
require 'iconv'
module HTree
class Text
# :stopdoc:
class << self
alias new_internal new
end
# :startdoc:
def Text.new(arg)
arg = arg.to_node if HTree::Location === arg
if Text === arg
new_internal arg.rcdata, arg.normalized_rcdata
elsif String === arg
arg2 = arg.gsub(/&/, '&amp;')
arg = arg2.freeze if arg != arg2
new_internal arg
else
raise TypeError, "cannot initialize Text with #{arg.inspect}"
end
end
def initialize(rcdata, normalized_rcdata=internal_normalize(rcdata)) # :notnew:
init_raw_string
@rcdata = rcdata && HTree.frozen_string(rcdata)
@normalized_rcdata = @rcdata == normalized_rcdata ? @rcdata : normalized_rcdata
end
attr_reader :rcdata, :normalized_rcdata
def internal_normalize(rcdata)
# - character references are decoded as much as possible.
# - undecodable character references are converted to decimal numeric character refereces.
result = rcdata.gsub(/&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([A-Za-z][A-Za-z0-9]*));/o) {|s|
u = nil
if $1
u = $1.to_i
elsif $2
u = $2.hex
elsif $3
u = NamedCharacters[$3]
end
if !u || u < 0 || 0x7fffffff < u
'?'
elsif u == 38 # '&' character.
'&#38;'
elsif u <= 0x7f
[u].pack("C")
else
begin
Iconv.conv(Encoder.internal_charset, 'UTF-8', [u].pack("U"))
rescue Iconv::Failure
"&##{u};"
end
end
}
HTree.frozen_string(result)
end
private :internal_normalize
# HTree::Text#to_s converts the text to a string.
# - character references are decoded as much as possible.
# - undecodable character reference are converted to `?' character.
def to_s
@normalized_rcdata.gsub(/&(?:#([0-9]+));/o) {|s|
u = $1.to_i
if 0 <= u && u <= 0x7f
[u].pack("C")
else
'?'
end
}
end
def empty?
@normalized_rcdata.empty?
end
def strip
rcdata = @normalized_rcdata.dup
rcdata.sub!(/\A(?:\s|&nbsp;)+/, '')
rcdata.sub!(/(?:\s|&nbsp;)+\z/, '')
if rcdata == @normalized_rcdata
self
else
rcdata.freeze
Text.new_internal(rcdata, rcdata)
end
end
# HTree::Text.concat returns a text which is concatenation of arguments.
#
# An argument should be one of follows.
# - String
# - HTree::Text
# - HTree::Location which points HTree::Text
def Text.concat(*args)
rcdata = ''
args.each {|arg|
arg = arg.to_node if HTree::Location === arg
if Text === arg
rcdata << arg.rcdata
else
rcdata << arg.gsub(/&/, '&amp;')
end
}
new_internal rcdata
end
end
end

@ -0,0 +1,497 @@
require 'htree/doc'
require 'htree/elem'
require 'htree/loc'
require 'htree/extract_text'
require 'uri'
module HTree
module Traverse
def doc?() Doc::Trav === self end
def elem?() Elem::Trav === self end
def text?() Text::Trav === self end
def xmldecl?() XMLDecl::Trav === self end
def doctype?() DocType::Trav === self end
def procins?() ProcIns::Trav === self end
def comment?() Comment::Trav === self end
def bogusetag?() BogusETag::Trav === self end
def get_subnode(*indexes)
n = self
indexes.each {|index|
n = n.get_subnode_internal(index)
}
n
end
end
module Container::Trav
# +each_child+ iterates over each child.
def each_child(&block) # :yields: child_node
children.each(&block)
nil
end
# +each_child_with_index+ iterates over each child.
def each_child_with_index(&block) # :yields: child_node, index
children.each_with_index(&block)
nil
end
# +find_element+ searches an element which universal name is specified by
# the arguments.
# It returns nil if not found.
def find_element(*names)
traverse_element(*names) {|e| return e }
nil
end
# +traverse_element+ traverses elements in the tree.
# It yields elements in depth first order.
#
# If _names_ are empty, it yields all elements.
# If non-empty _names_ are given, it should be list of universal names.
#
# A nested element is yielded in depth first order as follows.
#
# t = HTree('<a id=0><b><a id=1 /></b><c id=2 /></a>')
# t.traverse_element("a", "c") {|e| p e}
# # =>
# {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
# {emptyelem <a id="1">}
# {emptyelem <c id="2">}
#
# Universal names are specified as follows.
#
# t = HTree(<<'End')
# <html>
# <meta name="robots" content="index,nofollow">
# <meta name="author" content="Who am I?">
# </html>
# End
# t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# # =>
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
#
def traverse_element(*names, &block) # :yields: element
if names.empty?
traverse_all_element(&block)
else
name_set = {}
names.each {|n| name_set[n] = true }
traverse_some_element(name_set, &block)
end
nil
end
def each_hyperlink_attribute
traverse_element(
'{http://www.w3.org/1999/xhtml}a',
'{http://www.w3.org/1999/xhtml}area',
'{http://www.w3.org/1999/xhtml}link',
'{http://www.w3.org/1999/xhtml}img',
'{http://www.w3.org/1999/xhtml}object',
'{http://www.w3.org/1999/xhtml}q',
'{http://www.w3.org/1999/xhtml}blockquote',
'{http://www.w3.org/1999/xhtml}ins',
'{http://www.w3.org/1999/xhtml}del',
'{http://www.w3.org/1999/xhtml}form',
'{http://www.w3.org/1999/xhtml}input',
'{http://www.w3.org/1999/xhtml}head',
'{http://www.w3.org/1999/xhtml}base',
'{http://www.w3.org/1999/xhtml}script') {|elem|
case elem.name
when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i
attrs = ['href']
when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i
attrs = ['src', 'longdesc', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i
attrs = ['classid', 'codebase', 'data', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i
attrs = ['cite']
when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i
attrs = ['action']
when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i
attrs = ['src', 'usemap']
when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i
attrs = ['profile']
when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i
attrs = ['src', 'for']
end
attrs.each {|attr|
if hyperlink = elem.get_attribute(attr)
yield elem, attr, hyperlink
end
}
}
end
private :each_hyperlink_attribute
# +each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields HTree::Text (or HTree::Loc) and URI for each hyperlink.
#
# The URI objects are created with a base URI which is given by
# HTML BASE element or the argument ((|base_uri|)).
# +each_hyperlink_uri+ doesn't yields href of the BASE element.
def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri
base_uri = URI.parse(base_uri) if String === base_uri
links = []
each_hyperlink_attribute {|elem, attr, hyperlink|
if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name
base_uri = URI.parse(hyperlink.to_s)
else
links << hyperlink
end
}
if base_uri
links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s }
else
links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) }
end
end
# +each_hyperlink+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields HTree::Text or HTree::Loc.
#
# Note that +each_hyperlink+ yields HTML href attribute of BASE element.
def each_hyperlink # :yields: text
links = []
each_hyperlink_attribute {|elem, attr, hyperlink|
yield hyperlink
}
end
# +each_uri+ traverses hyperlinks such as HTML href attribute
# of A element.
#
# It yields URI for each hyperlink.
#
# The URI objects are created with a base URI which is given by
# HTML BASE element or the argument ((|base_uri|)).
def each_uri(base_uri=nil) # :yields: URI
each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri }
end
end
# :stopdoc:
module Doc::Trav
def traverse_all_element(&block)
children.each {|c| c.traverse_all_element(&block) }
end
end
module Elem::Trav
def traverse_all_element(&block)
yield self
children.each {|c| c.traverse_all_element(&block) }
end
end
module Leaf::Trav
def traverse_all_element
end
end
module Doc::Trav
def traverse_some_element(name_set, &block)
children.each {|c| c.traverse_some_element(name_set, &block) }
end
end
module Elem::Trav
def traverse_some_element(name_set, &block)
yield self if name_set.include? self.name
children.each {|c| c.traverse_some_element(name_set, &block) }
end
end
module Leaf::Trav
def traverse_some_element(name_set)
end
end
# :startdoc:
module Traverse
# +traverse_text+ traverses texts in the tree
def traverse_text(&block) # :yields: text
traverse_text_internal(&block)
nil
end
end
# :stopdoc:
module Container::Trav
def traverse_text_internal(&block)
each_child {|c| c.traverse_text_internal(&block) }
end
end
module Leaf::Trav
def traverse_text_internal
end
end
module Text::Trav
def traverse_text_internal
yield self
end
end
# :startdoc:
module Container::Trav
# +filter+ rebuilds the tree without some components.
#
# node.filter {|descendant_node| predicate } -> node
# loc.filter {|descendant_loc| predicate } -> node
#
# +filter+ yields each node except top node.
# If given block returns false, corresponding node is dropped.
# If given block returns true, corresponding node is retained and
# inner nodes are examined.
#
# +filter+ returns an node.
# It doesn't return location object even if self is location object.
#
def filter(&block)
subst = {}
each_child_with_index {|descendant, i|
if yield descendant
if descendant.elem?
subst[i] = descendant.filter(&block)
else
subst[i] = descendant
end
else
subst[i] = nil
end
}
to_node.subst_subnode(subst)
end
end
module Doc::Trav
# +title+ searches title and return it as a text.
# It returns nil if not found.
#
# +title+ searchs following information.
#
# - <title>...</title> in HTML
# - <title>...</title> in RSS
# - <title>...</title> in Atom
def title
e = find_element('title',
'{http://www.w3.org/1999/xhtml}title',
'{http://purl.org/rss/1.0/}title',
'{http://my.netscape.com/rdf/simple/0.9/}title',
'{http://www.w3.org/2005/Atom}title',
'{http://purl.org/atom/ns#}title')
e && e.extract_text
end
# +author+ searches author and return it as a text.
# It returns nil if not found.
#
# +author+ searchs following information.
#
# - <meta name="author" content="author-name"> in HTML
# - <link rev="made" title="author-name"> in HTML
# - <dc:creator>author-name</dc:creator> in RSS
# - <dc:publisher>author-name</dc:publisher> in RSS
# - <author><name>author-name</name></author> in Atom
def author
traverse_element('meta',
'{http://www.w3.org/1999/xhtml}meta') {|e|
begin
next unless e.fetch_attr('name').downcase == 'author'
author = e.fetch_attribute('content').strip
return author if !author.empty?
rescue IndexError
end
}
traverse_element('link',
'{http://www.w3.org/1999/xhtml}link') {|e|
begin
next unless e.fetch_attr('rev').downcase == 'made'
author = e.fetch_attribute('title').strip
return author if !author.empty?
rescue IndexError
end
}
if channel = find_element('{http://purl.org/rss/1.0/}channel')
channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
end
['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns|
each_child {|top|
next unless top.elem?
if top.name == "{#{xmlns}}feed"
if feed_author = find_element("{#{xmlns}}author")
feed_author.traverse_element("{#{xmlns}}name") {|e|
begin
author = e.extract_text.strip
return author if !author.empty?
rescue IndexError
end
}
end
end
}
}
nil
end
end
module Doc::Trav
# +root+ searches root element.
# If there is no element on top level, it raise HTree::Error.
# If there is two or more elements on top level, it raise HTree::Error.
def root
es = []
children.each {|c| es << c if c.elem? }
raise HTree::Error, "no element" if es.empty?
raise HTree::Error, "multiple top elements" if 1 < es.length
es[0]
end
# +has_xmldecl?+ returns true if there is an XML declaration on top level.
def has_xmldecl?
children.each {|c| return true if c.xmldecl? }
false
end
end
module Elem::Trav
# +name+ returns the universal name of the element as a string.
#
# p HTree('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.name
# # =>
# "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF"
#
def name() element_name.universal_name end
# +qualified_name+ returns the qualified name of the element as a string.
#
# p HTree('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.qualified_name
# # =>
# "rdf:RDF"
def qualified_name() element_name.qualified_name end
# +attributes+ returns attributes as a hash.
# The hash keys are HTree::Name objects.
# The hash values are HTree::Text or HTree::Location objects.
#
# p HTree('<a name="xx" href="uu">').root.attributes
# # =>
# {href=>{text "uu"}, name=>{text "xx"}}
#
# p HTree('<a name="xx" href="uu">').make_loc.root.attributes
# # =>
# {href=>#<HTree::Location: doc()/a/@href>, name=>#<HTree::Location: doc()/a/@name>}
#
def attributes
result = {}
each_attribute {|name, text|
result[name] = text
}
result
end
def each_attr
each_attribute {|name, text|
uname = name.universal_name
str = text.to_s
yield uname, str
}
end
# call-seq:
# elem.fetch_attribute(name) -> text or raise IndexError
# elem.fetch_attribute(name, default) -> text or default
# elem.fetch_attribute(name) {|uname| default } -> text or default
#
# +fetch_attribute+ returns an attribute value as a text.
#
# elem may be an instance of HTree::Elem or a location points to it.
def fetch_attribute(uname, *rest, &block)
if 1 < rest.length
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
end
if !rest.empty? && block_given?
raise ArgumentError, "block supersedes default value argument"
end
uname = uname.universal_name if uname.respond_to? :universal_name
return update_attribute_hash.fetch(uname) {
if block_given?
return yield(uname)
elsif !rest.empty?
return rest[0]
else
raise IndexError, "attribute not found: #{uname.inspect}"
end
}
end
# call-seq:
# elem.fetch_attr(name) -> string or raise IndexError
# elem.fetch_attr(name, default) -> string or default
# elem.fetch_attr(name) {|uname| default } -> string or default
#
# +fetch_attr+ returns an attribute value as a string.
#
# elem may be an instance of HTree::Elem or a location points to it.
def fetch_attr(uname, *rest, &block)
if 1 < rest.length
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
end
if !rest.empty? && block_given?
raise ArgumentError, "block supersedes default value argument"
end
uname = uname.universal_name if uname.respond_to? :universal_name
return update_attribute_hash.fetch(uname) {
if block_given?
return yield(uname)
elsif !rest.empty?
return rest[0]
else
raise IndexError, "attribute not found: #{uname.inspect}"
end
}.to_s
end
def get_attribute(uname)
uname = uname.universal_name if uname.respond_to? :universal_name
update_attribute_hash[uname]
end
def get_attr(uname)
if text = update_attribute_hash[uname]
text.to_s
else
nil
end
end
end
end

@ -0,0 +1,109 @@
#!/usr/bin/env ruby
# usage: ruby install.rb [-n]
# options:
# -n : don't install
#
# Author: Tanaka Akira <akr@m17n.org>
require 'optparse'
require 'fileutils'
def target_directory
$:.each {|loc|
if %r{/site_ruby/[\d.]+\z} =~ loc
return loc
end
}
raise "could not find target install directory"
end
CVS_FILES = {}
def cvs_files(dir)
return CVS_FILES[dir] if CVS_FILES.include? dir
if File.directory? "#{dir}/CVS"
result = {}
File.foreach("#{dir}/CVS/Entries") {|line|
case line
when %r{\A/([^/]+)/} then result[$1] = true
when %r{\AD/([^/]+)/} then result[$1] = true
end
}
else
result = nil
end
CVS_FILES[dir] = result
result
end
def each_target(&block)
target_set = {}
cvs = cvs_files('.')
Dir.glob("*.rb") {|filename|
next if /\Atest-/ =~ filename
next if /\Ainstall/ =~ filename
next if cvs && !cvs.include?(filename)
target_set[filename] = true
yield filename
each_require(filename, target_set, &block)
}
end
def each_require(file, target_set, &block)
File.foreach(file) {|line|
next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line
feature = $1
filename = "#{feature}.rb"
next if target_set.include? filename
next if !File.exist?(filename)
target_set[filename] = true
yield filename
each_require(filename, target_set, &block)
}
end
def collect_target
result = []
each_target {|filename| result << filename }
result.sort!
result
end
def install_file(src, dst)
ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst }
# check shadow
ignore_exc(Errno::ENOENT) { File.unlink dst }
FileUtils.mkdir_p(File.dirname(dst), :mode=>0755)
FileUtils.cp(src, dst, :verbose => true)
File.chmod(0644, dst)
end
def ignore_exc(exc)
begin
yield
rescue exc
end
end
$opt_n = false
ARGV.options {|q|
q.banner = 'ruby install.rb [opts]'
q.def_option('--help', 'show this message') {puts q; exit(0)}
q.def_option('-n', "don't install") { $opt_n = true }
q.parse!
}
if $opt_n
dir = target_directory
collect_target.each {|filename|
puts "-> #{dir}/#{filename}"
}
exit
else
File.umask 022
dir = target_directory
collect_target.each {|filename|
install_file filename, "#{dir}/#{filename}"
}
end

@ -0,0 +1,5 @@
$VERBOSE = true
Dir.glob('test/test-*.rb') {|filename|
load filename
}

@ -0,0 +1 @@
<span _text="htree_test_toplevel_local_variable = :modified" />

@ -0,0 +1,4 @@
<?xml version="1.0"?>
<html>
<title _text="self">dummy_title</title>
</html>

@ -0,0 +1,67 @@
require 'test/unit'
require 'htree/tag'
require 'htree/elem'
require 'htree/traverse'
class TestAttr < Test::Unit::TestCase
def test_each_attribute
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
t = HTree::Elem.new!(t)
t.each_attribute {|n, v|
assert_instance_of(HTree::Name, n)
assert_instance_of(HTree::Text, v)
assert_equal('{u}n', n.universal_name)
assert_equal('a&amp;b', v.rcdata)
}
end
def test_each_attr
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
t = HTree::Elem.new!(t)
t.each_attr {|n, v|
assert_instance_of(String, n)
assert_instance_of(String, v)
assert_equal('{u}n', n)
assert_equal('a&b', v)
}
end
def test_fetch_attribute
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
t = HTree::Elem.new!(t)
v = t.fetch_attribute('{u}n')
assert_instance_of(HTree::Text, v)
assert_equal('a&amp;b', v.rcdata)
assert_equal('y', t.fetch_attribute('x', 'y'))
assert_raises(IndexError) { t.fetch_attribute('x') }
end
def test_get_attribute
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
t = HTree::Elem.new!(t)
v = t.get_attribute('{u}n')
assert_instance_of(HTree::Text, v)
assert_equal('a&amp;b', v.rcdata)
assert_equal(nil, t.get_attribute('x'))
end
def test_get_attr
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
t = HTree::Elem.new!(t)
v = t.get_attr('{u}n')
assert_instance_of(String, v)
assert_equal('a&b', v)
assert_equal(nil, t.get_attr('x'))
end
def test_loc_get_attr
t = HTree::Elem.new('e', {'k'=>'v'})
v = t.make_loc.get_attr('k')
assert_instance_of(String, v)
assert_equal('v', v)
v = t.make_loc.fetch_attr('k')
assert_instance_of(String, v)
assert_equal('v', v)
end
end

@ -0,0 +1,53 @@
require 'test/unit'
require 'htree/parse'
class TestCharset < Test::Unit::TestCase
def setup
@old_kcode = $KCODE
end
def teardown
$KCODE = @old_kcode
end
def self.mark_string(str, charset)
def str.read() self end
class << str; self end.__send__(:define_method, :charset) { charset }
str
end
# HIRAGANA LETTER A in various charset
UTF8 = mark_string("\343\201\202", 'UTF-8')
EUCKR = mark_string("\252\242", 'EUC-KR')
EUCJP = mark_string("\244\242", 'EUC-JP')
SJIS = mark_string("\202\240", 'Shift_JIS')
ISO2022JP = mark_string("\e$B$\"\e(B", 'ISO-2022-JP')
def test_u
$KCODE = 'u'
assert_equal(UTF8, HTree.parse(UTF8).children[0].to_s)
assert_equal(UTF8, HTree.parse(EUCKR).children[0].to_s)
assert_equal(UTF8, HTree.parse(EUCJP).children[0].to_s)
assert_equal(UTF8, HTree.parse(SJIS).children[0].to_s)
assert_equal(UTF8, HTree.parse(ISO2022JP).children[0].to_s)
end
def test_e
$KCODE = 'e'
assert_equal(EUCJP, HTree.parse(UTF8).children[0].to_s)
assert_equal(EUCJP, HTree.parse(EUCKR).children[0].to_s)
assert_equal(EUCJP, HTree.parse(EUCJP).children[0].to_s)
assert_equal(EUCJP, HTree.parse(SJIS).children[0].to_s)
assert_equal(EUCJP, HTree.parse(ISO2022JP).children[0].to_s)
end
def test_s
$KCODE = 's'
assert_equal(SJIS, HTree.parse(UTF8).children[0].to_s)
assert_equal(SJIS, HTree.parse(EUCKR).children[0].to_s)
assert_equal(SJIS, HTree.parse(EUCJP).children[0].to_s)
assert_equal(SJIS, HTree.parse(SJIS).children[0].to_s)
assert_equal(SJIS, HTree.parse(ISO2022JP).children[0].to_s)
end
end

@ -0,0 +1,29 @@
require 'test/unit'
require 'htree/context'
class TestContext < Test::Unit::TestCase
def test_namespaces_validation
assert_raise(ArgumentError) { HTree::Context.new({1=>'u'}) }
assert_raise(ArgumentError) { HTree::Context.new({''=>'u'}) }
assert_raise(ArgumentError) { HTree::Context.new({'p'=>nil}) }
assert_nothing_raised { HTree::Context.new({nil=>'u'}) }
end
def test_namespace_uri
assert_equal('http://www.w3.org/XML/1998/namespace',
HTree::Context.new.namespace_uri('xml'))
assert_equal('u', HTree::Context.new({nil=>'u'}).namespace_uri(nil))
assert_equal('u', HTree::Context.new({'p'=>'u'}).namespace_uri('p'))
assert_equal(nil, HTree::Context.new({'p'=>'u'}).namespace_uri('q'))
end
def test_subst_namespaces
c1 = HTree::Context.new({'p'=>'u'})
c2 = c1.subst_namespaces({'q'=>'v'})
assert_equal('u', c1.namespace_uri('p'))
assert_equal(nil, c1.namespace_uri('q'))
assert_equal('u', c2.namespace_uri('p'))
assert_equal('v', c2.namespace_uri('q'))
end
end

@ -0,0 +1,45 @@
require 'test/unit'
require 'htree/elem'
require 'htree/display'
class TestXMLNS < Test::Unit::TestCase
def assert_xml(expected, node)
assert_equal(expected, node.display_xml('', 'US-ASCII'))
end
def test_update_xmlns_empty
assert_xml("<n\n/>", HTree::Elem.new('n'))
end
def test_reduce_xmlns
assert_xml(
"<p:n xmlns:p=\"u\"\n/>",
HTree::Elem.new('p:n', {'xmlns:p'=>'u'}))
assert_xml(
"<n xmlns:p=\"u\"\n><p:n\n/></n\n>",
HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'u'})))
assert_xml(
"<n xmlns:p=\"u\"\n><p:n xmlns:p=\"v\"\n/></n\n>",
HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'v'})))
end
def test_extra_xmlns
assert_xml(
"<p:n xmlns:p=\"u\"\n/>",
HTree::Elem.new(HTree::Name.new('p', 'u', 'n')))
assert_xml(
"<nn\n><p:n xmlns:p=\"u\"\n/></nn\n>",
HTree::Elem.new('nn', HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
assert_xml(
"<nn xmlns:p=\"u\"\n><p:n\n/></nn\n>",
HTree::Elem.new('nn', {'xmlns:p'=>'u'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
assert_xml(
"<nn xmlns:p=\"v\"\n><p:n xmlns:p=\"u\"\n/></nn\n>",
HTree::Elem.new('nn', {'xmlns:p'=>'v'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
end
end

@ -0,0 +1,101 @@
require 'test/unit'
require 'htree/doc'
require 'htree/elem'
require 'htree/equality'
require 'htree/traverse'
class TestElemNew < Test::Unit::TestCase
def test_empty
e = HTree::Elem.new('a')
assert_equal('a', e.qualified_name)
assert_equal({}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(true, e.empty_element?)
assert_nil(e.instance_variable_get(:@etag))
end
def test_empty_array
e = HTree::Elem.new('a', [])
assert_equal('a', e.qualified_name)
assert_equal({}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(false, e.empty_element?)
assert_equal(nil, e.instance_variable_get(:@etag))
end
def test_empty_attr
e = HTree::Elem.new('a', {'href'=>'xxx'})
assert_equal('a', e.qualified_name)
assert_equal({HTree::Name.parse_attribute_name('href', HTree::DefaultContext)=>HTree::Text.new('xxx')}, e.attributes)
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
assert_equal([], e.children)
assert_equal(true, e.empty_element?)
assert_equal(nil, e.instance_variable_get(:@etag))
end
def test_node
t = HTree::Text.new('t')
e = HTree::Elem.new('a', t)
assert_equal({}, e.attributes)
assert_equal([t], e.children)
end
def test_hash
t = HTree::Text.new('t')
e = HTree::Elem.new('a', {'b' => t})
assert_equal([['b', t]], e.attributes.map {|n,v| [n.universal_name, v] })
assert_equal([], e.children)
end
def test_string
t = HTree::Text.new('s')
e = HTree::Elem.new('a', "s")
assert_equal({}, e.attributes)
assert_equal([t], e.children)
end
def test_interleave
t = HTree::Text.new('t')
e = HTree::Elem.new('a', t, {'b' => t}, t, {'c' => 'd'}, t)
assert_equal([['b', t], ['c', HTree::Text.new('d')]],
e.attributes.map {|n,v| [n.universal_name, v] }.sort)
assert_equal([t, t, t], e.children)
end
def test_nest
t = HTree::Text.new('t')
b = HTree::BogusETag.new('a')
x = HTree::Elem.new('e', HTree::XMLDecl.new('1.0'))
d = HTree::Elem.new('e', HTree::DocType.new('html'))
e = HTree::Elem.new('a', [t, t, t, b, x, d])
assert_equal({}, e.attributes)
assert_equal([t, t, t, b, x, d], e.children)
end
def test_err
assert_raises(TypeError) { HTree::Elem.new('e', HTree::STag.new('a')) }
assert_raises(TypeError) { HTree::Elem.new('e', HTree::ETag.new('a')) }
end
def test_context
context = HTree::DefaultContext.subst_namespaces({'p'=>'u'})
elem = HTree::Elem.new('p:n', {'p:a'=>'t'}, context)
assert_equal('{u}n', elem.name)
assert_equal('t', elem.get_attr('{u}a'))
assert_same(context, elem.instance_variable_get(:@stag).inherited_context)
assert_raises(ArgumentError) { HTree::Elem.new('e', context, context) }
end
def test_hash_in_array
attrs = [{'a'=>'1'}, {'a'=>'2'}]
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
attrs.pop
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
attrs.pop
assert_equal([], attrs)
assert_equal(false, HTree::Elem.new('e', attrs).empty_element?)
end
end

@ -0,0 +1,49 @@
require 'test/unit'
require 'htree/encoder'
class TestEncoder < Test::Unit::TestCase
EUC_JISX0212_CH = "\217\260\241" # cannot encode with Shift_JIS.
EUC_JISX0208_CH = "\260\241"
def test_minimal_charset
out = HTree::Encoder.new('Shift_JIS', 'EUC-JP')
assert_equal("US-ASCII", out.minimal_charset)
out.output_text("a")
assert_equal("US-ASCII", out.minimal_charset)
out.output_text(EUC_JISX0212_CH)
assert_equal("US-ASCII", out.minimal_charset)
out.output_text("b")
assert_equal("US-ASCII", out.minimal_charset)
assert_equal("a&#19970;b", out.finish)
end
def test_minimal_charset_2
out = HTree::Encoder.new('ISO-2022-JP-2', 'EUC-JP')
assert_equal("US-ASCII", out.minimal_charset)
out.output_text("a")
assert_equal("US-ASCII", out.minimal_charset)
out.output_text(EUC_JISX0208_CH)
assert_equal("ISO-2022-JP", out.minimal_charset)
out.output_text("b")
assert_equal("ISO-2022-JP", out.minimal_charset)
out.output_text(EUC_JISX0212_CH)
assert_equal("ISO-2022-JP-2", out.minimal_charset)
assert_equal("a\e$B0!\e(Bb\e$(D0!\e(B", out.finish)
end
def test_minimal_charset_u
out = HTree::Encoder.new('UTF-16BE', 'EUC-JP')
assert_equal("UTF-16BE", out.minimal_charset)
out.output_text("a")
assert_equal("UTF-16BE", out.minimal_charset)
assert_equal("\000a", out.finish)
end
def test_close
out = HTree::Encoder.new('ISO-2022-JP', 'EUC-JP')
out.output_string(EUC_JISX0208_CH)
assert_equal("ISO-2022-JP", out.minimal_charset)
assert_equal("\e$B0!\e(B", out.finish)
end
end

@ -0,0 +1,55 @@
require 'test/unit'
require 'htree/equality'
class TestEQQ < Test::Unit::TestCase
def assert_exact_equal(expected, actual, message=nil)
full_message = build_message(message, <<EOT, expected, actual)
<?> expected but was
<?>.
EOT
assert_block(full_message) { expected.exact_equal? actual }
end
def test_tag_name_prefix
tags = [
HTree::STag.new('{u}n'),
HTree::STag.new('p1{u}n'),
HTree::STag.new('p2{u}n'),
HTree::STag.new('p1:n', [], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
HTree::STag.new('p2:n', [], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
]
tags.each {|t1|
tags.each {|t2|
assert_equal(t1, t2)
}
}
end
def test_tag_attribute_name_prefix
tags = [
HTree::STag.new('n', [['p1{u}a', 'v']]),
HTree::STag.new('n', [['p2{u}a', 'v']]),
HTree::STag.new('n', [['p1:a', 'v']], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
HTree::STag.new('n', [['p2:a', 'v']], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
]
tags.each {|t1|
tags.each {|t2|
assert_equal(t1, t2)
}
}
end
def test_element
assert_equal(HTree::Elem.new('p1{u}n'), HTree::Elem.new('p2{u}n'))
assert_equal(HTree::Elem.new('n', {'p1{u}a'=>'v'}),
HTree::Elem.new('n', {'p2{u}a'=>'v'}))
assert(!HTree::Elem.new('n', {'p1{u}a'=>'v'}).exact_equal?(HTree::Elem.new('n', {'p2{u}a'=>'v'})))
end
def test_tag_namespaces
assert_nothing_raised {
HTree::STag.new("n", [], HTree::DefaultContext.subst_namespaces({nil=>"u1", "p"=>"u2"})).make_exact_equal_object
}
end
end

@ -0,0 +1,18 @@
require 'test/unit'
require 'htree/extract_text'
require 'htree/equality'
class TestExtractText < Test::Unit::TestCase
def test_single
n = HTree::Text.new('abc')
assert_equal(n, n.extract_text)
end
def test_elem
t = HTree::Text.new('abc')
n = HTree::Elem.new('e', t)
assert_equal(t, n.extract_text)
end
end

@ -0,0 +1,27 @@
require 'test/unit'
require 'htree/gencode'
require 'htree/parse'
class TestGenCode < Test::Unit::TestCase
def run_code(code, top_context)
out = HTree::Encoder.new(HTree::Encoder.internal_charset, HTree::Encoder.internal_charset)
eval(code)
out.finish
end
def test_xmlns
t = HTree.parse_xml('<p:n xmlns:p=z><p:m>bb').children[0].children[0] # <p:m>bb</p:m>
code = t.generate_xml_output_code
assert_equal("<p:m xmlns:p=\"z\"\n>bb</p:m\n>", run_code(code, HTree::DefaultContext))
assert_equal("<p:m\n>bb</p:m\n>", run_code(code, HTree::DefaultContext.subst_namespaces("p"=>"z")))
end
def test_xmlns_chref
t = HTree.parse_xml('<p:n xmlns:p="a&amp;<>&quot;b"/>').children[0]
code = t.generate_xml_output_code
assert_equal("<p:n xmlns:p=\"a&amp;&lt;&gt;&quot;b\"\n/>", run_code(code, HTree::DefaultContext))
end
end

@ -0,0 +1,25 @@
require 'test/unit'
require 'htree/leaf'
class TestProcIns < Test::Unit::TestCase
def test_initialize
assert_raises(HTree::ProcIns::Error) { HTree::ProcIns.new!('target', "?>") }
end
def test_new
assert_equal('? >', HTree::ProcIns.new('target', "?>").content)
assert_equal(nil, HTree::ProcIns.new('target', nil).content)
end
end
class TestComment < Test::Unit::TestCase
def test_initialize
assert_raises(HTree::Comment::Error) { HTree::Comment.new!("a--b") }
assert_raises(HTree::Comment::Error) { HTree::Comment.new!("a-") }
end
def test_new
assert_equal('a- -b', HTree::Comment.new("a--b").content)
assert_equal('a- ', HTree::Comment.new("a-").content)
end
end

@ -0,0 +1,60 @@
require 'test/unit'
require 'htree/loc'
require 'htree/parse'
require 'htree/traverse'
class TestLoc < Test::Unit::TestCase
def test_make_loc
t = HTree.parse('<?xml version="1.0"?><!DOCTYPE root><root>a<?x y?><!-- c --></boo>')
assert_instance_of(HTree::Doc::Loc, t.make_loc)
assert_instance_of(HTree::XMLDecl::Loc, t.children[0].make_loc)
assert_instance_of(HTree::DocType::Loc, t.children[1].make_loc)
assert_instance_of(HTree::Elem::Loc, t.children[2].make_loc)
assert_instance_of(HTree::Text::Loc, t.children[2].children[0].make_loc)
assert_instance_of(HTree::ProcIns::Loc, t.children[2].children[1].make_loc)
assert_instance_of(HTree::Comment::Loc, t.children[2].children[2].make_loc)
assert_instance_of(HTree::BogusETag::Loc, t.children[2].children[3].make_loc)
assert_equal(nil, t.make_loc.parent)
assert_equal(nil, t.make_loc.index)
end
def test_get_subnode
t = HTree.parse('<?xml version="1.0"?><!DOCTYPE root><root>a<?x y?><!-- c --></boo>')
l = t.make_loc
assert_instance_of(HTree::Doc::Loc, l)
assert_instance_of(HTree::Location, l.get_subnode(-1))
assert_instance_of(HTree::XMLDecl::Loc, l.get_subnode(0))
assert_instance_of(HTree::DocType::Loc, l.get_subnode(1))
assert_instance_of(HTree::Elem::Loc, l2 = l.get_subnode(2))
assert_instance_of(HTree::Location, l.get_subnode(3))
assert_instance_of(HTree::Location, l2.get_subnode(-1))
assert_instance_of(HTree::Location, l2.get_subnode('attr'))
assert_instance_of(HTree::Text::Loc, l2.get_subnode(0))
assert_instance_of(HTree::ProcIns::Loc, l2.get_subnode(1))
assert_instance_of(HTree::Comment::Loc, l2.get_subnode(2))
assert_instance_of(HTree::BogusETag::Loc, l2.get_subnode(3))
assert_instance_of(HTree::Location, l2.get_subnode(4))
assert_same(l.get_subnode(0), l.get_subnode(0))
end
def test_find_loc_step
t = HTree.parse('<a><b>x<!---->y</a><c/><a/>')
assert_equal('a[1]', t.find_loc_step(0))
assert_equal('c', t.find_loc_step(1))
assert_equal('a[2]', t.find_loc_step(2))
t = t.children[0]
assert_equal('b', t.find_loc_step(0))
t = t.children[0]
assert_equal('text()[1]', t.find_loc_step(0))
assert_equal('comment()', t.find_loc_step(1))
assert_equal('text()[2]', t.find_loc_step(2))
end
def test_path
l = HTree.parse('<a><b>x</b><b/><a/>').make_loc
l2 = l.get_subnode(0, 0, 0)
assert_equal('doc()', l.path)
assert_equal('doc()/a/b[1]/text()', l2.path)
end
end

@ -0,0 +1,147 @@
require 'test/unit'
require 'htree/tag'
class TestNamespace < Test::Unit::TestCase
def assert_equal_exact(expected, actual, message=nil)
full_message = build_message(message, <<EOT, expected, actual)
<?> expected but was
<?>.
EOT
assert_block(full_message) { expected.equal_exact? actual }
end
# <ppp:nnn xmlns:ppp="uuu">
def test_prefixed
stag = HTree::STag.new("ppp:nnn",
[["xmlns:ppp", "uuu"], ["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
assert_equal("ppp:nnn", stag.element_name.qualified_name)
assert_equal("{uuu}nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal("uuu", stag.element_name.namespace_uri)
assert_equal("ppp", stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(1, nsattrs.length)
assert_equal(["ppp", "uuu"], nsattrs.shift)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(3, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
end
# <nnn xmlns="uuu">
def test_default_ns
stag = HTree::STag.new("nnn",
[["xmlns", "uuu"],
["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
assert_equal("nnn", stag.element_name.qualified_name)
assert_equal("{uuu}nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal("uuu", stag.element_name.namespace_uri)
assert_equal(nil, stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(1, nsattrs.length)
assert_equal([nil, "uuu"], nsattrs.shift)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(3, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
end
# <nnn xmlns="">
def test_no_default_ns
[{"q"=>"u"}, {nil=>"uu", "q"=>"u"}].each {|inh|
stag = HTree::STag.new("nnn",
[["xmlns", ""], ["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
HTree::DefaultContext.subst_namespaces(inh))
assert_equal("nnn", stag.element_name.qualified_name)
assert_equal("nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal('', stag.element_name.namespace_uri)
assert_equal(nil, stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(1, nsattrs.length)
assert_equal([nil, ""], nsattrs.shift)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(3, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
}
end
# <nnn>
def test_no_ns
stag = HTree::STag.new("nnn",
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
assert_equal("nnn", stag.element_name.qualified_name)
assert_equal("nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal('', stag.element_name.namespace_uri)
assert_equal(nil, stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(0, nsattrs.length)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(3, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
end
# internally allocated element without prefix
def test_universal_name_to_be_default_namespace
stag = HTree::STag.new("{uuu}nnn",
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
assert_equal("nnn", stag.element_name.qualified_name)
assert_equal("{uuu}nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal("uuu", stag.element_name.namespace_uri)
assert_equal(nil, stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(0, nsattrs.length)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(3, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
end
def test_prefixed_universal_name
stag = HTree::STag.new("ppp{uuu}nnn",
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"], ["q{uu}d", "w"]],
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
assert_equal("ppp:nnn", stag.element_name.qualified_name)
assert_equal("{uuu}nnn", stag.element_name.universal_name)
assert_equal("nnn", stag.element_name.local_name)
assert_equal("uuu", stag.element_name.namespace_uri)
assert_equal("ppp", stag.element_name.namespace_prefix)
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
assert_equal(0, nsattrs.length)
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
assert_equal(4, attrs.length)
assert_equal(['', nil, "a", "x"], attrs.shift)
assert_equal(["u", "q", "b", "y"], attrs.shift)
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
assert_equal(["uu", "q", "d", "w"], attrs.shift)
end
end

@ -0,0 +1,133 @@
require 'test/unit'
require 'htree'
class TestOutput < Test::Unit::TestCase
def gen(t, meth=:output, *rest)
encoder = HTree::Encoder.new('US-ASCII', 'US-ASCII')
t.__send__(meth, *(rest + [encoder, HTree::DefaultContext]))
encoder.finish
end
def test_text
assert_equal('a&amp;&lt;&gt;"b', gen(HTree::Text.new('a&<>"b')))
assert_equal("abc&amp;def", gen(HTree::Text.new("abc&def")))
assert_equal('"\'&amp;', gen(HTree::Text.new('"\'&')))
assert_equal('"\'&lt;&amp;&gt;', gen(HTree::Text.new('"\'<&>')))
end
def test_text_attvalue
assert_equal('"a&amp;&lt;&gt;&quot;b"', gen(HTree::Text.new('a&<>"b'), :output_attvalue))
assert_equal('"abc"', gen(HTree::Text.new("abc"), :output_attvalue))
assert_equal('"&quot;"', gen(HTree::Text.new('"'), :output_attvalue))
end
def test_name
assert_equal('abc', gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext)))
assert_equal('n', gen(HTree::Name.new(nil, 'u', 'n')))
assert_equal('p:n', gen(HTree::Name.new('p', 'u', 'n')))
assert_equal('n', gen(HTree::Name.new(nil, '', 'n')))
assert_equal('xmlns', gen(HTree::Name.new('xmlns', nil, nil)))
assert_equal('xmlns:n', gen(HTree::Name.new('xmlns', nil, 'n')))
end
def test_name_attribute
assert_equal('abc="a&amp;&lt;&gt;&quot;b"',
gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext),
:output_attribute,
HTree::Text.new('a&<>"b')))
end
def test_doc
t = HTree::Doc.new(HTree::Elem.new('a'), HTree::Elem.new('b'))
assert_equal("<a\n/><b\n/>", gen(t))
end
def test_elem
t = HTree::Elem.new('a', [])
assert_equal("<a\n></a\n>", gen(t))
assert_equal("<b\n/>",
gen(HTree::Elem.new!(HTree::STag.new('b'))))
assert_equal("<b\n></b\n>",
gen(HTree::Elem.new!(HTree::STag.new('b'), [])))
assert_equal("<a\n><b\n/><c\n/><d\n/></a\n>",
gen(HTree::Elem.new!(HTree::STag.new('a'), [
HTree::Elem.new!(HTree::STag.new('b')),
HTree::Elem.new!(HTree::STag.new('c')),
HTree::Elem.new!(HTree::STag.new('d'))
])))
end
def test_elem_empty
t = HTree::Elem.new('a')
assert_equal("<a\n/>", gen(t))
end
def test_stag
assert_equal("<name\n>",
gen(HTree::STag.new("name"), :output_stag))
assert_equal("<name\n/>",
gen(HTree::STag.new("name"), :output_emptytag))
assert_equal("</name\n>",
gen(HTree::STag.new("name"), :output_etag))
assert_equal("<name a=\"b\"\n/>",
gen(HTree::STag.new("name", [["a", "b"]]), :output_emptytag))
assert_equal("<name a=\"&lt;&quot;\'&gt;\"\n/>",
gen(HTree::STag.new("name", [['a', '<"\'>']]), :output_emptytag))
assert_equal("<ppp:nnn xmlns=\"uuu&quot;b\"\n/>",
gen(HTree::STag.new("ppp:nnn", [["xmlns", "uuu\"b"]]), :output_emptytag))
end
def test_xmldecl
t = HTree::XMLDecl.new('1.0', 'US-ASCII')
assert_equal('', gen(t))
assert_equal('<?xml version="1.0" encoding="US-ASCII"?>',
gen(t, :output_prolog_xmldecl))
end
def test_doctype
t = HTree::DocType.new('html',
'-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd')
assert_equal('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">', gen(t))
end
def test_procins
t = HTree::ProcIns.new('xml-stylesheet', 'type="text/xml" href="#style1"')
assert_equal('<?xml-stylesheet type="text/xml" href="#style1"?>', gen(t))
t = HTree::ProcIns.new('x', nil)
assert_equal('<?x?>', gen(t))
end
def test_comment
t = HTree::Comment.new('xxx')
assert_equal('<!--xxx-->', gen(t))
end
end
class TestHTMLOutput < Test::Unit::TestCase
def test_top_xmlns
assert_equal("<html\n>aaa</html\n>", HTree("<html>aaa").display_html(""))
end
def test_script
assert_equal("<html\n><script\n>a < b</script\n></html\n>",
HTree("<html><script>a < b").display_html(""))
end
def test_script_invalid_content
assert_raise(ArgumentError) {
HTree("<html><script>a </ b").display_html("")
}
end
def test_br
assert_equal("<html\n>a<br\n>b<br\n>c</html\n>",
HTree("<html>a<br>b<br>c").display_html(""))
end
end

@ -0,0 +1,115 @@
require 'test/unit'
require 'htree/parse'
require 'htree/equality'
require 'htree/traverse'
class TestParse < Test::Unit::TestCase
def test_empty
assert_equal(HTree::Doc.new([]), HTree.parse_xml("").eliminate_raw_string)
end
def test_xmlns_default
t1 = HTree::Doc.new([
HTree::Elem.new!(
HTree::STag.new('x1', [['xmlns', 'bb']],
HTree::DefaultContext.subst_namespaces({'xml'=>'http://www.w3.org/XML/1998/namespace'})),
[HTree::Elem.new!(HTree::STag.new('x2', [],
HTree::DefaultContext.subst_namespaces({nil => 'bb', 'xml'=>'http://www.w3.org/XML/1998/namespace'})), nil)])
])
t2 = HTree.parse_xml('<x1 xmlns="bb"><x2>')
assert_equal(t1, t2)
end
def test_doctype_root_element_name
assert_equal('html',
HTree.parse('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>').children[0].root_element_name)
# xxx: should be downcased?
assert_equal('HTML',
HTree.parse('<?xml version="1.0"?><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><HTML>').children[1].root_element_name)
end
def test_doctype_system_identifier
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
HTree.parse("<!DOCTYPE HTML SYSTEM 'http://www.w3.org/TR/html4/loose.dtd'>").children[0].system_identifier)
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
HTree.parse("<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>").children[0].system_identifier)
end
def test_procins
t = HTree.parse_xml("<?x?>").children[0]
assert_equal('x', t.target)
assert_equal(nil, t.content)
end
def test_eol_html
t1 = HTree::Elem.new('a', "\nb\n")
s = "<a>\nb\n</a>"
t2 = HTree.parse_xml(s).root
assert_equal(t1, t2)
assert_equal(s, t2.raw_string)
end
def test_parse_html
t1 = HTree.parse("<html>a</html>")
assert_equal("{http://www.w3.org/1999/xhtml}html", t1.root.element_name.universal_name)
end
def test_bare_url
t1 = HTree::Elem.new('a', {'href'=>'http://host/'})
s = "<a href=http://host/>"
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_bare_slash
t1 = HTree::Elem.new('n', {'a'=>'v/'}, 'x')
s = "<n a=v/>x"
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_bare_slash_empty
t1 = HTree::Elem.new('n', {'a'=>'v/'})
s = "<n a=v/>"
t2 = HTree.parse(s).root
assert_equal(t1, t2)
end
def test_downcase
assert_equal("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF",
HTree.parse('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.name)
end
def test_downcase_name
# HTML && !XML
assert_equal('html', HTree.parse('<HTML>').root.element_name.local_name)
assert_equal('html', HTree.parse('<html>').root.element_name.local_name)
# HTML && XML
assert_equal('html', HTree.parse('<?xml version="1.0"?><html>').root.element_name.local_name)
assert_equal('v', HTree.parse('<?xml version="1.0"?><html X:Y=v xmlns:X=u>').root.get_attr('{u}Y'))
# !HTML && XML
assert_equal('RDF', HTree.parse('<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').children[1].element_name.local_name)
end
def test_script_etag
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}script', [])),
HTree.parse('<script></script>'))
end
def test_html_emptyelem
t = HTree.parse('<html>')
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}html')), t)
assert(!t.children[0].empty_element?)
end
def test_hr_emptyelem
t = HTree.parse('<html><hr>')
assert_equal(
HTree::Doc.new(
HTree::Elem.new('{http://www.w3.org/1999/xhtml}html',
HTree::Elem.new('{http://www.w3.org/1999/xhtml}hr'))), t)
assert(t.children[0].children[0].empty_element?)
end
end

@ -0,0 +1,17 @@
require 'test/unit'
require 'htree'
class TestRawString < Test::Unit::TestCase
def test_elem
t = HTree.parse("<a>x</a>")
assert_equal("<a>x</a>", t.root.raw_string)
assert_equal("<a>x</a>", t.root.raw_string) # raw_string shouldn't have side effect.
end
def test_no_raw_string
t = HTree::Elem.new('a')
assert_equal(nil, t.raw_string)
t = HTree::Elem.new('a', HTree.parse("<a>x</a>").root)
assert_equal(nil, t.raw_string)
end
end

@ -0,0 +1,70 @@
require 'test/unit'
require 'htree/parse'
require 'htree/rexml'
begin
require 'rexml/document'
rescue LoadError
end
class TestREXML < Test::Unit::TestCase
def test_doc
r = HTree.parse('<root/>').to_rexml
assert_instance_of(REXML::Document, r)
end
def test_elem
r = HTree.parse('<root a="b"/>').to_rexml
assert_instance_of(REXML::Element, e = r.root)
assert_equal('root', e.name)
assert_equal('b', e.attribute('a').to_s)
end
def test_text
r = HTree.parse('<root>aaa</root>').to_rexml
assert_instance_of(REXML::Text, t = r.root.children[0])
assert_equal('aaa', t.to_s)
end
def test_xmldecl
s = '<?xml version="1.0"?>'
r = HTree.parse(s + '<root>aaa</root>').to_rexml
assert_instance_of(REXML::XMLDecl, x = r.children[0])
assert_equal('1.0', x.version)
assert_equal(nil, x.standalone)
assert_instance_of(REXML::XMLDecl, HTree.parse(s).children[0].to_rexml)
end
def test_doctype
s = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'
r = HTree.parse(s + '<html><title>xxx</title></html>').to_rexml
assert_instance_of(REXML::DocType, d = r.children[0])
assert_equal('html', d.name)
assert_equal('PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"', d.external_id)
assert_instance_of(REXML::DocType, HTree.parse(s).children[0].to_rexml)
end
def test_procins
r = HTree.parse('<root><?xxx yyy?></root>').to_rexml
assert_instance_of(REXML::Instruction, i = r.root.children[0])
assert_equal('xxx', i.target)
assert_equal('yyy', i.content)
assert_instance_of(REXML::Instruction, HTree.parse('<?xxx yyy?>').children[0].to_rexml)
end
def test_comment
r = HTree.parse('<root><!-- zzz --></root>').to_rexml
assert_instance_of(REXML::Comment, c = r.root.children[0])
assert_equal(' zzz ', c.to_s)
end
def test_bogusetag
assert_equal(nil, HTree.parse('</e>').children[0].to_rexml)
end
def test_style
assert_equal('<style>a&lt;b</style>', HTree.parse('<html><style>a<b</style></html>').to_rexml.to_s[/<style.*style>/])
end
end if defined? REXML

@ -0,0 +1,153 @@
require 'test/unit'
require 'htree/scan'
class TestScan < Test::Unit::TestCase
def scan(str)
result = []
HTree.scan(str) {|e| result << e }
result
end
def test_empty
assert_equal([], scan(''))
end
def t_single(s)
n = yield
assert_equal([n], scan(s))
end
def test_single
s = '<?xml version="1.0"?>'
assert_equal([[:xmldecl, s]], scan(s))
s = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'
assert_equal([[:doctype, s]], scan(s))
s = '<?xxx yyy?>'
assert_equal([[:procins, s]], scan(s))
s = '<a>'
assert_equal([[:stag, s]], scan(s))
s = '</a>'
assert_equal([[:etag, s]], scan(s))
s = '<a/>'
assert_equal([[:emptytag, s]], scan(s))
s = '<!-- abc -->'
assert_equal([[:comment, s]], scan(s))
s = '<![CDATA[abc]]>'
assert_equal([[:text_cdata_section, s]], scan(s))
s = 'abc'
assert_equal([[:text_pcdata, s]], scan(s))
end
def test_xmldecl_seen
s0 = '<?xml version="1.0"?>'
s1 = '<A>'
assert_equal([[:stag, s1]], scan(s1))
assert_equal([[:xmldecl, s0], [:stag, s1]], scan(s0 + s1))
end
def test_cdata_content
s = '<html><script><a></script><a>'
assert_equal([
[:stag, '<html>'],
[:stag, '<script>'],
[:text_cdata_content, '<a>'],
[:etag, '</script>'],
[:stag, '<a>'],
], scan(s))
s = '<html><script><a>'
assert_equal([
[:stag, '<html>'],
[:stag, '<script>'],
[:text_cdata_content, '<a>'],
], scan(s))
end
def test_text
s = 'a<e>b<e>c<e>d'
assert_equal([
[:text_pcdata, 'a'],
[:stag, '<e>'],
[:text_pcdata, 'b'],
[:stag, '<e>'],
[:text_pcdata, 'c'],
[:stag, '<e>'],
[:text_pcdata, 'd'],
], scan(s))
end
def test_eol_html
# In SGML, a line break just after start tag and
# a line break just before end tag is ignored.
# http://www.w3.org/TR/REC-html40/appendix/notes.html#notes-line-breaks
#
# But usual browser including mozilla doesn't.
# So HTree doesn't ignore them and treat as usual text.
s = "<html>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
assert_equal([
[:stag, "<html>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<f>"],
[:text_pcdata, "\nc\n"],
[:etag, "</f>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
s = "<html>a\n<e>\nb\n<script>\nc\n</script>\nd\n</e>\ne"
assert_equal([
[:stag, "<html>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<script>"],
[:text_cdata_content, "\nc\n"],
[:etag, "</script>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_eol_xml
# In XML, line breaks are treated as part of content.
# It's because KEEPRSRE is yes in XML.
# http://www.satoshii.org/markup/websgml/valid-xml#keeprsre
s = "<?xml version='1.0'?>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
assert_equal([
[:xmldecl, "<?xml version='1.0'?>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<f>"],
[:text_pcdata, "\nc\n"],
[:etag, "</f>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_xml_html_detection
assert_equal([false, true], HTree.scan("<html></html>") {})
assert_equal([true, false], HTree.scan("<rss></rss>") {})
assert_equal([true, true], HTree.scan('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">') {})
end
def test_quoted_attr
assert_equal([[:emptytag, '<e a=">"/>']], scan('<e a=">"/>'))
end
def test_bare_slash
assert_equal([[:stag, '<n dir=/foo/bar/>']], scan('<n dir=/foo/bar/>'))
assert_equal([[:stag, '<n a=v/>']], scan('<n a=v/>'))
end
end

@ -0,0 +1,37 @@
require 'test/unit'
require 'htree/parse'
require 'htree/template'
require 'pathname'
class TestSecurity < Test::Unit::TestCase
def safe(n)
assert_equal(0, $SAFE)
Thread.new {
$SAFE = n
assert_equal(n, $SAFE)
yield
}.join
assert_equal(0, $SAFE)
end
def test_parse
safe(1) {
assert_equal(1, $SAFE)
assert_nothing_raised { HTree.parse("") }
assert_raise(SecurityError) { HTree.parse("".taint) }
}
assert_nothing_raised { HTree.parse("") }
assert_nothing_raised { HTree.parse("".taint) }
end
def test_template
safe(1) {
assert_nothing_raised { HTree.expand_template("/dev/null", nil, '') }
assert_raise(SecurityError) { HTree.expand_template("/dev/null".taint, nil, '') }
}
assert_nothing_raised { HTree.expand_template("/dev/null", nil, '') }
assert_nothing_raised { HTree.expand_template("/dev/null".taint, nil, '') }
end
end

@ -0,0 +1,142 @@
require 'test/unit'
require 'htree'
class TestSubnode < Test::Unit::TestCase
def test_elem_get
e1 = HTree.parse("<a href=x>abc</a>").root
assert_equal(HTree::Text.new("x"), e1.get_subnode("href"))
assert_equal(HTree::Text.new("abc"), e1.get_subnode(0))
end
def test_elem_subst
e1 = HTree.parse_xml("<a href=x>abc</a>").root
e2 = e1.subst_subnode("href"=>"xxx", 0=>"def")
assert_equal("a", e2.name)
assert_equal("xxx", e2.fetch_attr("href"))
assert_equal([HTree::Text.new("def")], e2.children)
assert_equal([], e1.subst_subnode(0=>nil).children)
end
def test_elem_subst_empty
e1 = HTree.parse("<img />").root
assert_equal(true, e1.empty_element?)
assert_equal(true, e1.subst_subnode("src"=>"xxx").empty_element?)
assert_equal(false, e1.subst_subnode(0=>"xxx").empty_element?)
end
def test_elem_multiple_attr_value
h = {"b"=>"c", HTree::Name.new(nil, "", "b")=>"d"}
assert_match(/\A(cd|dc)\z/,
HTree::Elem.new("a").subst_subnode(h).get_subnode('b').to_s)
a = [["b","c"], [HTree::Name.new(nil, "", "b"),"d"]]
assert_equal('cd',
HTree::Elem.new("a").subst_subnode(a).get_subnode('b').to_s)
assert_equal('dc',
HTree::Elem.new("a").subst_subnode(a.reverse).get_subnode('b').to_s)
end
def test_elem_subst_outrange
e1 = HTree("<r>abc</r>").root
e2 = e1.subst_subnode(-1=>HTree('<x/>'), 1=>HTree('<y/>'))
assert_equal(HTree('<r><x/>abc<y/></r>').root, e2)
end
def test_doc_subst_outrange
d1 = HTree("<r>abc</r>")
d2 = d1.subst_subnode(-1=>HTree('<x/>'), 1=>HTree('<y/>'))
assert_equal(HTree('<x/><r>abc</r><y/>'), d2)
end
def test_doc_get
doc = HTree.parse("<?xml?><a href=x>abc</a> ")
assert_equal(doc.root, doc.get_subnode(1))
end
def test_doc_subst
doc1 = HTree.parse("<?xml?><a href=x>abc</a> ")
doc2 = doc1.subst_subnode(1=>"yy")
assert_equal(HTree::Text.new("yy"), doc2.children[1])
assert_equal([], doc1.subst_subnode(0=>nil, 1=>nil, 2=>nil).children)
end
def test_doc_loc
d1 = HTree.parse("<r>a</r>")
d2 = HTree.parse("<q/>")
assert_equal(d2, d1.subst_subnode(0=>d2.make_loc))
end
def test_doc
e = HTree.parse("<r>a</r>").root
d = HTree.parse("<?xml version='1.0'?><!DOCTYPE q><q/>")
r = HTree('<r><q/></r>').root
assert_equal(r, e.subst_subnode(0=>d))
assert_equal(r, e.subst_subnode(0=>d.make_loc))
assert_equal(r, e.subst_subnode(0=>[d]))
assert_equal(r, e.subst_subnode(0=>[d.make_loc]))
end
def test_doc2
e = HTree.parse("<r>a</r>")
d = HTree.parse("<?xml version='1.0'?><!DOCTYPE q><q/>")
r = HTree('<q/>')
assert_equal(r, e.subst_subnode(0=>d))
assert_equal(r, e.subst_subnode(0=>d.make_loc))
assert_equal(r, e.subst_subnode(0=>[d]))
assert_equal(r, e.subst_subnode(0=>[d.make_loc]))
end
def test_change_by_subst_itself
l = HTree("<r>a</r>").make_loc
l2 = l.get_subnode(0, 0).subst_itself('x')
assert_equal(HTree::Text.new('x'), l2.to_node)
assert_equal(HTree('<r>x</r>'), l2.top.to_node)
l2 = l.get_subnode(0).subst_itself('xxx')
assert_equal(HTree::Text.new('xxx'), l2.to_node)
assert_equal(HTree('xxx'), l2.top.to_node)
end
def test_add_by_subst_itself
l = HTree("<r>a</r>").make_loc
l2 = l.get_subnode(0, 'x').subst_itself('y')
assert_equal(HTree::Text.new('y'), l2.to_node)
assert_equal(HTree('<r x="y">a</r>'), l2.top.to_node)
l2 = l.get_subnode(0, 0).subst_itself('b')
assert_equal(HTree::Text.new('b'), l2.to_node)
assert_equal(HTree('<r>b</r>'), l2.top.to_node)
xmldecl = HTree('<?xml version="1.0"?>').get_subnode(0)
l2 = l.get_subnode(-1).subst_itself(xmldecl)
assert_equal(0, l2.index)
assert_equal(xmldecl, l2.to_node)
assert_equal(HTree('<?xml version="1.0"?><r>a</r>'), l2.top.to_node)
procins = HTree('<?xxx yyy?>').get_subnode(0)
l2 = l.get_subnode(10).subst_itself(procins)
assert_equal(1, l2.index)
assert_equal(procins, l2.to_node)
assert_equal(HTree('<r>a</r><?xxx yyy?>'), l2.top.to_node)
end
def test_del_by_subst_itself
l = HTree("<r x='y'><x/>y<z/></r>").make_loc
l2 = l.get_subnode(0, 'x').subst_itself(nil)
assert_equal(nil, l2.to_node)
assert_equal(HTree('<r><x/>y<z/></r>'), l2.top.to_node)
l2 = l.get_subnode(0, 1).subst_itself(nil)
assert_equal(HTree('<r x="y"><x/><z/></r>'), l2.top.to_node)
l = HTree('<?xml version="1.0"?><r/>').make_loc
l2 = l.get_subnode(0).subst_itself(nil)
assert_equal(HTree('<r/>'), l2.top.to_node)
end
def test_subst
l = HTree('<?xml version="1.0"?><r><x/><y/><z/></r>').make_loc
assert_equal(HTree("<r>x<y>a</y><z k=v /></r>"),
l.to_node.subst({
l.get_subnode(0) => nil,
l.get_subnode(1, 0) => 'x',
l.get_subnode(1, 1, 0) => 'a',
l.get_subnode(1, 2, 'k') => 'v'
}))
end
end

@ -0,0 +1,287 @@
require 'test/unit'
require 'htree/template'
require 'stringio'
class TestTemplate < Test::Unit::TestCase
Decl = '<?xml version="1.0" encoding="US-ASCII"?>'
def assert_xhtml(expected, template, message=nil)
prefix = '<?xml version="1.0" encoding="US-ASCII"?>' +
"<html xmlns=\"http://www.w3.org/1999/xhtml\"\n>"
suffix = "</html\n>"
result = HTree.expand_template(''){"<?xml version=\"1.0\"?><html>#{template}</html>"}
assert_match(/\A#{Regexp.quote prefix}/, result)
assert_match(/#{Regexp.quote suffix}\z/, result)
result = result[prefix.length..(-suffix.length-1)]
assert_equal(expected, result, message)
end
def test_text
assert_xhtml("<e\n>1</e\n>", '<e _text=1>d</e>')
assert_xhtml('1', '<span _text=1>d</span>')
assert_xhtml("<span x=\"2\"\n>1</span\n>", '<span x=2 _text=1>d</span>')
assert_xhtml("abc", %q{a<span _text="'b'"/>c})
end
def test_tree
assert_xhtml("<e\n><z\n>x</z\n></e\n>", '<e _tree="HTree(&quot;<z>x</z>&quot;)">d</e>')
assert_xhtml("<n:e xmlns:n=\"a\"\n><n:z\n>x</n:z\n></n:e\n>", '<n:e xmlns:n=a _tree="HTree(&quot;<n:z xmlns:n=a>x</n:z>&quot;)">d</n:e>')
end
def test_attr
assert_xhtml("<e x=\"1\"\n>d</e\n>", '<e _attr_x=1>d</e>')
assert_xhtml("<span x=\"1\"\n>d</span\n>", '<span _attr_x=1>d</span>')
assert_xhtml("<span x=\"&quot;\"\n>d</span\n>", '<span _attr_x=\'"\x22"\'>d</span>')
end
def test_if
assert_xhtml("<e\n>d</e\n>", '<e _if=true>d</e>')
assert_xhtml('', '<e _if=false>d</e>')
assert_xhtml("<f\n>dd</f\n>", '<e _if=false _else=m>d</e><f _template=m>dd</f>')
assert_xhtml('d', '<span _if=true>d</span>')
end
def test_iter
assert_xhtml("<o\n><i\n>1</i\n></o\n><o\n><i\n>2</i\n></o\n><o\n><i\n>3</i\n></o\n>",
'<o _iter=[1,2,3].each//v><i _text=v /></o>')
assert_xhtml("<i\n>1</i\n><i\n>2</i\n><i\n>3</i\n>",
'<span _iter=[1,2,3].each//v><i _text=v /></span>')
end
def test_iter_content
assert_xhtml("<o\n><i\n>1</i\n><i\n>2</i\n><i\n>3</i\n></o\n>",
'<o _iter_content=[1,2,3].each//v><i _text=v /></o>')
assert_xhtml("<i\n>1</i\n><i\n>2</i\n><i\n>3</i\n>",
'<span _iter_content=[1,2,3].each//v><i _text=v /></span>')
end
def test_iter_local_template
assert_xhtml("<o\n><i\n>1</i\n></o\n><o\n><i\n>2</i\n></o\n><o\n><i\n>3</i\n></o\n>",
'<o _iter=[1,2,3].each//v><i _call=m /><i _template=m _text=v></i></o>')
end
def test_call
assert_xhtml("<f\n>1</f\n>",
'<e _call=m(1) /><f _template=m(v) _text=v></f>')
end
def test_template
assert_xhtml('d',
'<span _template="span()">d</span><e _call="span()"></e>')
end
def test_file
assert_equal(<<'End'.chop,
<?xml version="1.0" encoding="US-ASCII"?><html xmlns="http://www.w3.org/1999/xhtml"
><title
>aaa</title
></html
>
End
HTree.expand_template("#{File.dirname __FILE__}/template.html", "aaa", ''))
end
def test_whitespace
assert_xhtml("<x\n></x\n>", '<x> </x>')
assert_xhtml("<x\n>&#32;</x\n>", '<x>&#32;</x>')
assert_xhtml("<pre\n> </pre\n>", '<pre> </pre>')
assert_xhtml(" ", %q{<span _text="' '"> </span>})
assert_xhtml(" ", %q{<span _text="' '"/>})
end
def test_ignorable
assert_xhtml("<div\n>a</div\n>", '<div>a</div>')
assert_xhtml("<span\n>a</span\n>", '<span>a</span>')
end
def test_template_in_attr
assert_xhtml("<a x=\"1\"\n></a\n>", '<a _attr_x=1><b _template=m></b></a>')
end
def test_empty_block_argument
assert_xhtml("vv", '<span _iter="2.times//">v</span>')
end
def test_empty_element
assert_xhtml("<elem\n/>", '<elem />') # 2004-06-10: reported by Takuo KITAME
assert_xhtml("<elem x=\"1\"\n/>", '<elem _attr_x=1 />')
assert_xhtml("<elem\n></elem\n>", '<elem _text=\'""\' />')
assert_xhtml("<elem\n/>", '<elem _if="true" />')
assert_xhtml("", '<elem _if="false" />')
assert_xhtml("<foo\n/>", '<elem _if="false" _else="foo"/><foo _template="foo"/>')
assert_xhtml("<elem\n/><elem\n/>", '<elem _iter="2.times//" />')
assert_xhtml("<elem\n></elem\n>", '<elem _iter_content="2.times//" />')
end
def test_empty_element_start_end_tag
assert_xhtml("<elem\n></elem\n>", '<elem></elem>')
assert_xhtml("<elem x=\"1\"\n></elem\n>", '<elem _attr_x=1 ></elem>')
assert_xhtml("<elem\n></elem\n>", '<elem _text=\'""\' ></elem>')
assert_xhtml("<elem\n></elem\n>", '<elem _if="true" ></elem>')
assert_xhtml("", '<elem _if="false" ></elem>')
assert_xhtml("<foo\n></foo\n>", '<elem _if="false" _else="foo"></elem><foo _template="foo"></foo>')
assert_xhtml("<elem\n></elem\n><elem\n></elem\n>", '<elem _iter="2.times//" ></elem>')
assert_xhtml("<elem\n></elem\n>", '<elem _iter_content="2.times//" ></elem>')
end
def test_toplevel_local_variable
eval("htree_test_toplevel_local_variable = :non_modified_value", TOPLEVEL_BINDING)
HTree.expand_template("#{File.dirname __FILE__}/assign.html", "aaa", '')
assert_equal(:non_modified_value, eval("htree_test_toplevel_local_variable", TOPLEVEL_BINDING))
eval("htree_test_toplevel_local_variable = 1", TOPLEVEL_BINDING)
end
def test_extend_compiled_template
m = HTree.compile_template('<div _template="m">self is <span _text="inspect"></span></div>')
o = "zzz"
o.extend m
assert_equal('<?xml version="1.0" encoding="US-ASCII"?>self is "zzz"',
HTree.expand_template(''){'<div _call="o.m"></div>'})
end
def test_attr_nbsp
@t = HTree::Text.parse_pcdata('&nbsp;')
assert_xhtml("<span x=\"&nbsp;\"\n>d</span\n>", '<span _attr_x="@t">d</span>')
end
def test_text_nbsp
@t = HTree::Text.parse_pcdata('&nbsp;')
assert_xhtml("&nbsp;", '<span _text="@t">d</span>')
end
def test_content_text
assert_xhtml("<e\n>ab</e\n>", '<e _text>"a"+"b"</e>')
assert_xhtml("<e\n>2</e\n>", '<e _text>1+1</e>')
end
end
class MemFile
def initialize(str)
@str = str
end
def read
@str
end
end
class TestTemplateScopeObj
Const = 'good_const'
@@cvar = 'good_cvar'
def initialize
@ivar = 'good_ivar'
end
end
class TestTemplateScope < Test::Unit::TestCase
Const = 'bad_const'
@@cvar = 'bad_cvar'
def setup
@ivar = 'bad_ivar'
eval("test_local_variable = 'bad_lvar'", TOPLEVEL_BINDING)
end
XMLDeclStr = '<?xml version="1.0" encoding="US-ASCII"?>'
def test_expand_template
obj = TestTemplateScopeObj.new
assert_equal("#{XMLDeclStr}[TestTemplateScopeObj]",
HTree.expand_template(MemFile.new('<span _text="Module.nesting.inspect"/>'), obj, ''))
assert_equal("#{XMLDeclStr}good_ivar",
HTree.expand_template(MemFile.new('<span _text="@ivar"/>'), obj, ''))
assert_equal("#{XMLDeclStr}good_cvar",
HTree.expand_template(MemFile.new('<span _text="@@cvar"/>'), obj, ''))
assert_equal("#{XMLDeclStr}good_const",
HTree.expand_template(MemFile.new('<span _text="Const"/>'), obj, ''))
test_local_variable = 'bad_lvar'
assert_equal("#{XMLDeclStr}good_lvar",
HTree.expand_template(MemFile.new('<span _text="begin test_local_variable rescue NameError; \'good_lvar\' end"/>'), obj, ''))
end
def test_compile_template
obj = TestTemplateScopeObj.new
mod = HTree.compile_template(MemFile.new(<<-'End'))
<span _template=test_nesting _text="Module.nesting.inspect"/>
<span _template=test_const _text="Const"/>
<span _template=test_cvar _text="@@cvar"/>
<span _template=test_ivar _text="@ivar"/>
End
mod.module_eval <<-'End'
Const = 'mod_const'
@@cvar = 'mod_cvar'
@ivar = 'mod_ivar'
End
assert_equal("[#{mod.inspect}]", mod.test_nesting.extract_text.to_s)
assert_equal("mod_const", mod.test_const.extract_text.to_s)
assert_equal("mod_cvar", mod.test_cvar.extract_text.to_s)
assert_equal("mod_ivar", mod.test_ivar.extract_text.to_s)
obj = Object.new
obj.instance_variable_set :@ivar, 'obj_ivar'
obj.extend mod
assert_equal("[#{mod.inspect}]", obj.__send__(:test_nesting).extract_text.to_s)
assert_equal("mod_const", obj.__send__(:test_const).extract_text.to_s)
assert_equal("mod_cvar", obj.__send__(:test_cvar).extract_text.to_s)
assert_equal("obj_ivar", obj.__send__(:test_ivar).extract_text.to_s)
end
end
class TestCDATA < Test::Unit::TestCase
def test_html_script
v = "x<y"
assert_equal("<html><script>x<y</script></html>",
HTree.expand_template('') {"<html><script _text=\"v\">ab</script>"}.gsub(/\n/, ''))
end
def test_xml_script
v = "x<y"
assert_equal("<?xml version=\"1.0\" encoding=\"US-ASCII\"?><html xmlns=\"http://www.w3.org/1999/xhtml\"><script>x&lt;y</script></html>",
HTree.expand_template('') {"<?xml version=\"1.0\"?><html><script _text=\"v\">ab</script>"}.gsub(/\n/, ''))
end
def test_html_script_invalid_content
v = "x</y"
assert_raise(ArgumentError) {
HTree.expand_template('') {"<html><script _text=\"v\">ab</script>"}
}
end
end
class TestCharset < Test::Unit::TestCase
class CharsetString < String
attr_accessor :charset
end
def with_kcode(kcode)
old_kcode = $KCODE
begin
$KCODE = kcode
yield
ensure
$KCODE = old_kcode
end
end
def test_us_ascii
with_kcode('E') {
out = HTree.expand_template(CharsetString.new) { "<html>abc" }
assert_equal(out.charset, 'US-ASCII')
}
end
def test_euc_jp
with_kcode('E') {
out = HTree.expand_template(CharsetString.new) { "<html>\xa1\xa1" }
assert_equal(out.charset, 'EUC-JP')
}
end
end
class TestTemplateDOCTYPE < Test::Unit::TestCase
def test_html
assert_equal(
'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html></html>',
HTree.expand_template('') {'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html>'}.gsub(/\n/, ''))
end
end

@ -0,0 +1,35 @@
require 'test/unit'
require 'htree/text'
class TestText < Test::Unit::TestCase
def test_new
assert_equal("abc&amp;amp;def", HTree::Text.new("abc&amp;def").rcdata)
end
=begin
def test_parse
assert_equal("abc&amp;def", HTree::Text.parse("abc&amp;def").rcdata)
end
def test_to_s
assert_equal("abc&def", HTree::Text.parse("abc&amp;def").to_s)
end
=end
def kcode(kc)
old = $KCODE
begin
$KCODE = kc
yield
ensure
$KCODE = old
end
end
def test_normalize
kcode('EUC') {
assert_equal("<ABC&#38;&#38;&#160;\xa6\xc1",
HTree::Text.new_internal("&lt;&#65;&#x42;C&amp;&#38;&nbsp;&alpha;").normalized_rcdata)
}
end
end

@ -0,0 +1,69 @@
require 'test/unit'
require 'htree/traverse'
require 'htree/parse'
require 'htree/equality'
class TestTraverse < Test::Unit::TestCase
def test_filter
l = HTree.parse('<a><b>x</b><b/><a/>').make_loc
l2 = l.filter {|n| n.path != 'doc()/a/b[1]' }
assert_equal(HTree.parse('<a><b/><a/>'), l2)
end
def test_title
inputs = [
HTree.parse('<html><title>aaa</title></html>'),
HTree.parse(<<'End')
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns="http://purl.org/rss/1.0/">
<channel>
<title>aaa</title>
</channel>
</rdf:RDF>
End
]
result = HTree::Text.new('aaa')
inputs.each {|input|
assert_equal(result, input.title)
}
inputs.each {|input|
assert_equal(result, input.make_loc.title)
}
end
def test_author
inputs = [
HTree.parse('<html><meta name=author content=xxx></html>'),
HTree.parse('<html><link rev=made title=xxx></html>'),
HTree.parse(<<'End'),
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns="http://purl.org/rss/1.0/">
<channel>
<dc:creator>xxx</dc:creator>
</channel>
</rdf:RDF>
End
HTree.parse(<<'End')
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns="http://purl.org/rss/1.0/">
<channel>
<dc:publisher>xxx</dc:publisher>
</channel>
</rdf:RDF>
End
]
result = HTree::Text.new('xxx')
inputs.each {|input|
#assert_equal(result, input.author)
}
inputs.each {|input|
assert_equal(result, input.make_loc.author)
}
end
end