ALL YOUR LIBRARIES ARE BELONG TO US
parent
85295f82e8
commit
6d05f8ef2d
@ -0,0 +1,3 @@
|
||||
index.html
|
||||
README.html
|
||||
doc
|
@ -0,0 +1,21 @@
|
||||
RUBY=ruby
|
||||
RD2HTML=rd2 -r rd/rd2html-lib.rb
|
||||
|
||||
all: README.html doc/index.html
|
||||
|
||||
README.html: README.rd
|
||||
$(RD2HTML) --html-title='htree - HTML/XML tree library' -o README README.rd
|
||||
|
||||
check test:
|
||||
$(RUBY) -I. test-all.rb
|
||||
|
||||
install:
|
||||
$(RUBY) install.rb
|
||||
|
||||
.PHONY: check test all install
|
||||
|
||||
RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb)
|
||||
doc/index.html: $(RB)
|
||||
rm -rf doc
|
||||
rdoc $(RB)
|
||||
|
@ -0,0 +1,48 @@
|
||||
= htree - HTML/XML tree library
|
||||
|
||||
htree provides a tree data structure which represent HTML and XML data.
|
||||
|
||||
== Features
|
||||
|
||||
* Permissive unified HTML/XML parser
|
||||
* byte-to-byte round-tripping unparser
|
||||
* XML namespace support
|
||||
* Dedicated class for escaped string. This ease sanitization.
|
||||
* HTML/XHTML/XML generator
|
||||
* template engine
|
||||
* recursive template expansion
|
||||
* converter to REXML document
|
||||
|
||||
== Home Page
|
||||
|
||||
((<URL:http://cvs.m17n.org/~akr/htree/>))
|
||||
|
||||
== Download
|
||||
|
||||
* ((<URL:http://cvs.m17n.org/viewcvs/ruby/htree.tar.gz>))
|
||||
|
||||
== Install
|
||||
|
||||
% ruby install.rb
|
||||
|
||||
== Reference Manual
|
||||
|
||||
((<URL:doc/index.html>))
|
||||
|
||||
== Usage Example
|
||||
|
||||
Following two-line script convert HTML to XHTML.
|
||||
|
||||
require 'htree'
|
||||
HTree(STDIN).display_xml
|
||||
|
||||
The conversion method to REXML is provided as to_rexml.
|
||||
|
||||
HTree(...).to_rexml
|
||||
|
||||
== License
|
||||
|
||||
Ruby's
|
||||
|
||||
== Author
|
||||
Tanaka Akira <akr@m17n.org>
|
@ -0,0 +1,97 @@
|
||||
#
|
||||
# = htree.rb
|
||||
#
|
||||
# HTML/XML document tree
|
||||
#
|
||||
# Author:: Tanaka Akira <akr@m17n.org>
|
||||
#
|
||||
# == Features
|
||||
#
|
||||
# - Permissive unified HTML/XML parser
|
||||
# - byte-to-byte round-tripping unparser
|
||||
# - XML namespace support
|
||||
# - Dedicated class for escaped string. This ease sanitization.
|
||||
# - XHTML/XML generator
|
||||
# - template engine: link:files/htree/template_rb.html
|
||||
# - recursive template expansion
|
||||
# - REXML tree generator: link:files/htree/rexml_rb.html
|
||||
#
|
||||
# == Example
|
||||
#
|
||||
# The following one-liner prints parsed tree object.
|
||||
#
|
||||
# % ruby -rhtree -e 'pp HTree(ARGF)' html-file
|
||||
#
|
||||
# The following two-line script convert HTML to XHTML.
|
||||
#
|
||||
# require 'htree'
|
||||
# HTree(STDIN).display_xml
|
||||
#
|
||||
# The conversion method to REXML is provided as to_rexml.
|
||||
#
|
||||
# HTree(...).to_rexml
|
||||
#
|
||||
# == Module/Class Hierarchy
|
||||
#
|
||||
# * HTree
|
||||
# * HTree::Name
|
||||
# * HTree::Context
|
||||
# * HTree::Location
|
||||
# * HTree::Node
|
||||
# * HTree::Doc
|
||||
# * HTree::Elem
|
||||
# * HTree::Text
|
||||
# * HTree::XMLDecl
|
||||
# * HTree::DocType
|
||||
# * HTree::ProcIns
|
||||
# * HTree::Comment
|
||||
# * HTree::BogusETag
|
||||
# * HTree::Error
|
||||
#
|
||||
# == Method Summary
|
||||
#
|
||||
# HTree provides following methods.
|
||||
#
|
||||
# - Parsing Methods
|
||||
# - HTree(<i>html_string</i>) -> HTree::Doc
|
||||
# - HTree.parse(<i>input</i>) -> HTree::Doc
|
||||
#
|
||||
# - Generation Methods
|
||||
# - HTree::Node#display_xml -> STDOUT
|
||||
# - HTree::Node#display_xml(<i>out</i>) -> <i>out</i>
|
||||
# - HTree::Node#display_xml(<i>out</i>, <i>encoding</i>) -> <i>out</i>
|
||||
# - HTree::Text#to_s -> String
|
||||
#
|
||||
# - Template Methods
|
||||
# - HTree.expand_template{<i>template_string</i>} -> STDOUT
|
||||
# - HTree.expand_template(<i>out</i>){<i>template_string</i>} -> <i>out</i>
|
||||
# - HTree.expand_template(<i>out</i>, <i>encoding</i>){<i>template_string</i>} -> <i>out</i>
|
||||
# - HTree.compile_template(<i>template_string</i>) -> Module
|
||||
# - HTree{<i>template_string</i>} -> HTree::Doc
|
||||
#
|
||||
# - Traverse Methods
|
||||
# - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text]
|
||||
# - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location]
|
||||
#
|
||||
# - Predicate Methods
|
||||
# - HTree::Traverse#doc? -> true or false
|
||||
# - HTree::Traverse#elem? -> true or false
|
||||
# - HTree::Traverse#text? -> true or false
|
||||
# - HTree::Traverse#xmldecl? -> true or false
|
||||
# - HTree::Traverse#doctype? -> true or false
|
||||
# - HTree::Traverse#procins? -> true or false
|
||||
# - HTree::Traverse#comment? -> true or false
|
||||
# - HTree::Traverse#bogusetag? -> true or false
|
||||
#
|
||||
# - REXML Tree Generator
|
||||
# - HTree::Node#to_rexml -> REXML::Child
|
||||
|
||||
require 'htree/parse'
|
||||
require 'htree/extract_text'
|
||||
require 'htree/equality'
|
||||
require 'htree/inspect'
|
||||
require 'htree/display'
|
||||
require 'htree/loc'
|
||||
require 'htree/traverse'
|
||||
require 'htree/template'
|
||||
require 'htree/rexml'
|
@ -0,0 +1,8 @@
|
||||
require 'htree/modules'
|
||||
|
||||
module HTree::Container
|
||||
# +children+ returns children nodes as an array.
|
||||
def children
|
||||
@children.dup
|
||||
end
|
||||
end
|
@ -0,0 +1,69 @@
|
||||
module HTree
|
||||
class Context
|
||||
# :stopdoc:
|
||||
DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'}
|
||||
DefaultNamespaces.default = ""
|
||||
DefaultNamespaces.freeze
|
||||
# :startdoc:
|
||||
|
||||
# The optional argument `namespaces' should be a hash or nil.
|
||||
# HTree::DefaultNamespaces is used if nil is specified.
|
||||
#
|
||||
# If it is a hash, its key should be nil or a string.
|
||||
# nil means default namespace.
|
||||
# The string means some prefix which must not be empty.
|
||||
#
|
||||
# The hash value should be a string.
|
||||
# The empty string "" means unbound namespace.
|
||||
def initialize(namespaces=nil)
|
||||
namespaces ||= DefaultNamespaces
|
||||
namespaces.each_pair {|k, v|
|
||||
check_namespace_prefix(k)
|
||||
check_namespace_uri(v)
|
||||
}
|
||||
namespaces = namespaces.dup.freeze unless namespaces.frozen?
|
||||
@namespaces = namespaces
|
||||
end
|
||||
attr_reader :namespaces
|
||||
|
||||
# return a namespace URI corresponding to _prefix_.
|
||||
# It returns nil if _prefix_ is not defined.
|
||||
def namespace_uri(prefix)
|
||||
@namespaces[prefix]
|
||||
end
|
||||
|
||||
# generate a new Context object which namespaces are substituted by
|
||||
# a hash _declared_namespaces_.
|
||||
def subst_namespaces(declared_namespaces)
|
||||
namespaces = @namespaces.dup
|
||||
declared_namespaces.each {|k, v|
|
||||
check_namespace_prefix(k)
|
||||
check_namespace_uri(v)
|
||||
namespaces[k] = v
|
||||
}
|
||||
if namespaces == @namespaces
|
||||
self
|
||||
else
|
||||
Context.new(namespaces)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def check_namespace_prefix(k)
|
||||
unless (String === k && !k.empty?) || k == nil
|
||||
raise ArgumentError, "invalid namespace prefix: #{k.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def check_namespace_uri(v)
|
||||
unless String === v
|
||||
raise ArgumentError, "invalid namespace URI: #{v.inspect}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
DefaultContext = Context.new
|
||||
HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml")
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,46 @@
|
||||
require 'htree/output'
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
# HTree::Node#display_xml prints the node as XML.
|
||||
#
|
||||
# The first optional argument, <i>out</i>,
|
||||
# specifies output target.
|
||||
# It should respond to <tt><<</tt>.
|
||||
# If it is not specified, $stdout is used.
|
||||
#
|
||||
# The second optional argument, <i>encoding</i>,
|
||||
# specifies output MIME charset (character encoding).
|
||||
# If it is not specified, HTree::Encoder.internal_charset is used.
|
||||
#
|
||||
# HTree::Node#display_xml returns <i>out</i>.
|
||||
def display_xml(out=$stdout, encoding=HTree::Encoder.internal_charset)
|
||||
encoder = HTree::Encoder.new(encoding)
|
||||
self.output(encoder, HTree::DefaultContext)
|
||||
# don't call finish_with_xmldecl because self already has a xml decl.
|
||||
out << encoder.finish
|
||||
out
|
||||
end
|
||||
|
||||
# HTree::Node#display_html prints the node as HTML.
|
||||
#
|
||||
# The first optional argument, <i>out</i>,
|
||||
# specifies output target.
|
||||
# It should respond to <tt><<</tt>.
|
||||
# If it is not specified, $stdout is used.
|
||||
#
|
||||
# The second optional argument, <i>encoding</i>,
|
||||
# specifies output MIME charset (character encoding).
|
||||
# If it is not specified, HTree::Encoder.internal_charset is used.
|
||||
#
|
||||
# HTree::Node#display_html returns <i>out</i>.
|
||||
def display_html(out=$stdout, encoding=HTree::Encoder.internal_charset)
|
||||
encoder = HTree::Encoder.new(encoding)
|
||||
encoder.html_output = true
|
||||
self.output(encoder, HTree::HTMLContext)
|
||||
out << encoder.finish
|
||||
out
|
||||
end
|
||||
|
||||
end
|
||||
end
|
@ -0,0 +1,149 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/container'
|
||||
|
||||
module HTree
|
||||
class Doc
|
||||
# :stopdoc:
|
||||
class << self
|
||||
alias new! new
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
# The arguments should be a sequence of follows.
|
||||
# [String object] specified string is converted to HTree::Text.
|
||||
# [HTree::Node object] used as a child.
|
||||
# [HTree::Doc object]
|
||||
# used as children.
|
||||
# It is expanded except HTree::XMLDecl and HTree::DocType objects.
|
||||
# [Array of String, HTree::Node and HTree::Doc] used as children.
|
||||
#
|
||||
def Doc.new(*args)
|
||||
children = []
|
||||
args.each {|arg|
|
||||
arg = arg.to_node if HTree::Location === arg
|
||||
case arg
|
||||
when Array
|
||||
arg.each {|a|
|
||||
a = a.to_node if HTree::Location === a
|
||||
case a
|
||||
when HTree::Doc
|
||||
children.concat(a.children.reject {|c|
|
||||
HTree::XMLDecl === c || HTree::DocType === c
|
||||
})
|
||||
when HTree::Node
|
||||
children << a
|
||||
when String
|
||||
children << Text.new(a)
|
||||
else
|
||||
raise TypeError, "unexpected argument: #{arg.inspect}"
|
||||
end
|
||||
}
|
||||
when HTree::Doc
|
||||
children.concat(arg.children.reject {|c|
|
||||
HTree::XMLDecl === c || HTree::DocType === c
|
||||
})
|
||||
when HTree::Node
|
||||
children << arg
|
||||
when String
|
||||
children << Text.new(arg)
|
||||
else
|
||||
raise TypeError, "unexpected argument: #{arg.inspect}"
|
||||
end
|
||||
}
|
||||
new!(children)
|
||||
end
|
||||
|
||||
def initialize(children=[]) # :notnew:
|
||||
@children = children.dup.freeze
|
||||
unless @children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
|
||||
unacceptable = @children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
|
||||
unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ')
|
||||
raise TypeError, "Unacceptable document child: #{unacceptable}"
|
||||
end
|
||||
end
|
||||
|
||||
def get_subnode_internal(index) # :nodoc:
|
||||
unless Integer === index
|
||||
raise TypeError, "invalid index: #{index.inspect}"
|
||||
end
|
||||
if index < 0 || @children.length <= index
|
||||
nil
|
||||
else
|
||||
@children[index]
|
||||
end
|
||||
end
|
||||
|
||||
# doc.subst_subnode(pairs) -> doc
|
||||
#
|
||||
# The argument _pairs_ should be a hash or an assocs.
|
||||
# Its key should be an integer which means an index for children.
|
||||
#
|
||||
# Its value should be one of follows.
|
||||
# [HTree::Node object] specified object is used as is.
|
||||
# [String object] specified string is converted to HTree::Text
|
||||
# [Array of above] specified HTree::Node and String is used in that order.
|
||||
# [nil] delete corresponding node.
|
||||
#
|
||||
# d = HTree('<a/><b/><c/>')
|
||||
# p d.subst_subnode({0=>HTree('<x/>'), 2=>HTree('<z/>')})
|
||||
# p d.subst_subnode([[0,HTree('<x/>')], [2,HTree('<z/>')]])
|
||||
# # =>
|
||||
# #<HTree::Doc {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}>
|
||||
# #<HTree::Doc {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}>
|
||||
#
|
||||
def subst_subnode(pairs)
|
||||
hash = {}
|
||||
pairs.each {|index, value|
|
||||
unless Integer === index
|
||||
raise TypeError, "invalid index: #{index.inspect}"
|
||||
end
|
||||
value = value.to_node if HTree::Location === value
|
||||
case value
|
||||
when Node
|
||||
value = [value]
|
||||
when String
|
||||
value = [value]
|
||||
when Array
|
||||
value = value.dup
|
||||
when nil
|
||||
value = []
|
||||
else
|
||||
raise TypeError, "invalid value: #{value.inspect}"
|
||||
end
|
||||
value.map! {|v|
|
||||
v = v.to_node if HTree::Location === v
|
||||
case v
|
||||
when Node
|
||||
v
|
||||
when String
|
||||
Text.new(v)
|
||||
else
|
||||
raise TypeError, "invalid value: #{v.inspect}"
|
||||
end
|
||||
}
|
||||
if !hash.include?(index)
|
||||
hash[index] = []
|
||||
end
|
||||
hash[index].concat value
|
||||
}
|
||||
|
||||
children_left = []
|
||||
children = @children.dup
|
||||
children_right = []
|
||||
|
||||
hash.keys.sort.each {|index|
|
||||
value = hash[index]
|
||||
if index < 0
|
||||
children_left << value
|
||||
elsif children.length <= index
|
||||
children_right << value
|
||||
else
|
||||
children[index] = value
|
||||
end
|
||||
}
|
||||
|
||||
children = [children_left, children, children_right].flatten.compact
|
||||
Doc.new(children)
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,262 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/tag'
|
||||
require 'htree/context'
|
||||
require 'htree/container'
|
||||
|
||||
module HTree
|
||||
class Elem
|
||||
# :stopdoc:
|
||||
class << self
|
||||
alias new! new
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
# The first argument _name_ should be an instance of String or HTree::Name.
|
||||
#
|
||||
# The rest of arguments should be a sequence of follows.
|
||||
# [Hash object] used as attributes.
|
||||
# [String object] specified string is converted to HTree::Text.
|
||||
# [HTree::Node object] used as a child.
|
||||
# [HTree::Doc object]
|
||||
# used as children.
|
||||
# It is expanded except HTree::XMLDecl and HTree::DocType objects.
|
||||
# [Array of String, HTree::Node, HTree::Doc] used as children.
|
||||
# [HTree::Context object]
|
||||
# used as as context which represents XML namespaces.
|
||||
# This should apper once at most.
|
||||
#
|
||||
# HTree::Location object is accepted just as HTree::Node.
|
||||
#
|
||||
# If the rest arguments consists only
|
||||
# Hash and HTree::Context, empty element is created.
|
||||
#
|
||||
# p HTree::Elem.new("e").empty_element? # => true
|
||||
# p HTree::Elem.new("e", []).empty_element? # => false
|
||||
def Elem.new(name, *args)
|
||||
attrs = []
|
||||
children = []
|
||||
context = nil
|
||||
args.each {|arg|
|
||||
arg = arg.to_node if HTree::Location === arg
|
||||
case arg
|
||||
when Context
|
||||
raise ArgumentError, "multiple context" if context
|
||||
context = arg
|
||||
when Hash
|
||||
arg.each {|k, v| attrs << [k, v] }
|
||||
when Array
|
||||
arg.each {|a|
|
||||
a = a.to_node if HTree::Location === a
|
||||
case a
|
||||
when HTree::Doc
|
||||
children.concat(a.children.reject {|c|
|
||||
HTree::XMLDecl === c || HTree::DocType === c
|
||||
})
|
||||
when HTree::Node
|
||||
children << a
|
||||
when String
|
||||
children << Text.new(a)
|
||||
else
|
||||
raise TypeError, "unexpected argument: #{arg.inspect}"
|
||||
end
|
||||
}
|
||||
when HTree::Doc
|
||||
children.concat(arg.children.reject {|c|
|
||||
HTree::XMLDecl === c || HTree::DocType === c
|
||||
})
|
||||
when HTree::Node
|
||||
children << arg
|
||||
when String
|
||||
children << Text.new(arg)
|
||||
|
||||
else
|
||||
raise TypeError, "unexpected argument: #{arg.inspect}"
|
||||
end
|
||||
}
|
||||
context ||= DefaultContext
|
||||
if children.empty? && args.all? {|arg| Hash === arg || Context === arg }
|
||||
children = nil
|
||||
end
|
||||
new!(STag.new(name, attrs, context), children)
|
||||
end
|
||||
|
||||
def initialize(stag, children=nil, etag=nil) # :notnew:
|
||||
unless stag.class == STag
|
||||
raise TypeError, "HTree::STag expected: #{stag.inspect}"
|
||||
end
|
||||
unless !children || children.all? {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
|
||||
unacceptable = children.reject {|c| c.kind_of?(HTree::Node) and !c.kind_of?(HTree::Doc) }
|
||||
unacceptable = unacceptable.map {|uc| uc.inspect }.join(', ')
|
||||
raise TypeError, "Unacceptable element child: #{unacceptable}"
|
||||
end
|
||||
unless !etag || etag.class == ETag
|
||||
raise TypeError, "HTree::ETag expected: #{etag.inspect}"
|
||||
end
|
||||
@stag = stag
|
||||
@children = (children ? children.dup : []).freeze
|
||||
@empty = children == nil && etag == nil
|
||||
@etag = etag
|
||||
end
|
||||
|
||||
def context; @stag.context end
|
||||
|
||||
# +element_name+ returns the name of the element name as a Name object.
|
||||
def element_name() @stag.element_name end
|
||||
|
||||
def empty_element?
|
||||
@empty
|
||||
end
|
||||
|
||||
def each_attribute(&block) # :yields: attr_name, attr_text
|
||||
@stag.each_attribute(&block)
|
||||
end
|
||||
|
||||
def get_subnode_internal(index) # :nodoc:
|
||||
case index
|
||||
when String
|
||||
name = Name.parse_attribute_name(index, DefaultContext)
|
||||
update_attribute_hash[name.universal_name]
|
||||
when Name
|
||||
update_attribute_hash[index.universal_name]
|
||||
when Integer
|
||||
if index < 0 || @children.length <= index
|
||||
nil
|
||||
else
|
||||
@children[index]
|
||||
end
|
||||
else
|
||||
raise TypeError, "invalid index: #{index.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
# call-seq:
|
||||
# elem.subst_subnode(pairs) -> elem
|
||||
#
|
||||
# The argument _pairs_ should be a hash or an assocs.
|
||||
#
|
||||
# The key of pairs should be one of following.
|
||||
# [HTree::Name or String object] attribute name.
|
||||
# [Integer object] child index.
|
||||
#
|
||||
# The value of pairs should be one of follows.
|
||||
# [HTree::Node object] specified object is used as is.
|
||||
# [String object] specified string is converted to HTree::Text
|
||||
# [Array of above] specified HTree::Node and String is used in that order.
|
||||
# [nil] delete corresponding node.
|
||||
#
|
||||
# e = HTree('<r><a/><b/><c/></r>').root
|
||||
# p e.subst_subnode({0=>HTree('<x/>'), 2=>HTree('<z/>')})
|
||||
# p e.subst_subnode([[0, HTree('<x/>')], [2,HTree('<z/>')]])
|
||||
# # =>
|
||||
# {elem <r> {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}}
|
||||
# {elem <r> {emptyelem <x>} {emptyelem <b>} {emptyelem <z>}}
|
||||
#
|
||||
def subst_subnode(pairs)
|
||||
hash = {}
|
||||
pairs.each {|index, value|
|
||||
case index
|
||||
when Name, Integer
|
||||
when String
|
||||
index = Name.parse_attribute_name(index, DefaultContext)
|
||||
else
|
||||
raise TypeError, "invalid index: #{index.inspect}"
|
||||
end
|
||||
value = value.to_node if HTree::Location === value
|
||||
case value
|
||||
when Node
|
||||
value = [value]
|
||||
when String
|
||||
value = [value]
|
||||
when Array
|
||||
value = value.dup
|
||||
when nil
|
||||
value = []
|
||||
else
|
||||
raise TypeError, "invalid value: #{value.inspect}"
|
||||
end
|
||||
value.map! {|v|
|
||||
v = v.to_node if HTree::Location === v
|
||||
case v
|
||||
when Node
|
||||
v
|
||||
when String
|
||||
Text.new(v)
|
||||
else
|
||||
raise TypeError, "invalid value: #{v.inspect}"
|
||||
end
|
||||
}
|
||||
if !hash.include?(index)
|
||||
hash[index] = []
|
||||
end
|
||||
hash[index].concat value
|
||||
}
|
||||
|
||||
attrs = []
|
||||
@stag.attributes.each {|k, v|
|
||||
if hash.include? k
|
||||
v = hash[k]
|
||||
if !v.empty?
|
||||
attrs << {k=>Text.concat(*v)}
|
||||
end
|
||||
hash.delete k
|
||||
else
|
||||
attrs << {k=>v}
|
||||
end
|
||||
}
|
||||
hash.keys.each {|k|
|
||||
if Name === k
|
||||
v = hash[k]
|
||||
if !v.empty?
|
||||
attrs << {k=>Text.concat(*v)}
|
||||
end
|
||||
hash.delete k
|
||||
end
|
||||
}
|
||||
|
||||
children_left = []
|
||||
children = @children.dup
|
||||
children_right = []
|
||||
|
||||
hash.keys.sort.each {|index|
|
||||
value = hash[index]
|
||||
if index < 0
|
||||
children_left << value
|
||||
elsif children.length <= index
|
||||
children_right << value
|
||||
else
|
||||
children[index] = value
|
||||
end
|
||||
}
|
||||
|
||||
children = [children_left, children, children_right].flatten
|
||||
|
||||
if children.empty? && @empty
|
||||
Elem.new(
|
||||
@stag.element_name,
|
||||
@stag.context,
|
||||
*attrs)
|
||||
else
|
||||
Elem.new(
|
||||
@stag.element_name,
|
||||
@stag.context,
|
||||
children,
|
||||
*attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
module Elem::Trav
|
||||
private
|
||||
def update_attribute_hash
|
||||
if defined?(@attribute_hash)
|
||||
@attribute_hash
|
||||
else
|
||||
h = {}
|
||||
each_attribute {|name, text|
|
||||
h[name.universal_name] = text
|
||||
}
|
||||
@attribute_hash = h
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,212 @@
|
||||
require 'iconv'
|
||||
|
||||
module HTree
|
||||
class Encoder
|
||||
# HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
|
||||
#
|
||||
# - 'ISO-8859-1' when $KCODE=='NONE'
|
||||
# - 'UTF-8' when $KCODE=='UTF8'
|
||||
# - 'EUC-JP' when $KCODE=='EUC'
|
||||
# - 'Shift_JIS' when $KCODE=='SJIS'
|
||||
#
|
||||
# This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
|
||||
# This should be fixed when Ruby is m17nized.
|
||||
def Encoder.internal_charset
|
||||
KcodeCharset[$KCODE]
|
||||
end
|
||||
|
||||
def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
|
||||
@buf = ''
|
||||
@internal_encoding = internal_encoding
|
||||
@output_encoding = output_encoding
|
||||
@ic = Iconv.new(output_encoding, @internal_encoding)
|
||||
@charpat = FirstCharPattern[internal_encoding]
|
||||
|
||||
@subcharset_list = SubCharset[output_encoding] || []
|
||||
@subcharset_ic = {}
|
||||
@subcharset_list.each {|subcharset|
|
||||
@subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
|
||||
}
|
||||
@html_output = false
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
def html_output?
|
||||
@html_output
|
||||
end
|
||||
|
||||
def html_output=(flag)
|
||||
@html_output = flag
|
||||
end
|
||||
|
||||
def output_cdata_content_do(out, pre, body, post)
|
||||
if @html_output
|
||||
pre.call
|
||||
body.call
|
||||
post.call(out)
|
||||
else
|
||||
body.call
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
def output_slash_if_xml
|
||||
if !@html_output
|
||||
output_string('/')
|
||||
end
|
||||
end
|
||||
|
||||
def output_cdata_content(content, context)
|
||||
if @html_output
|
||||
# xxx: should raise an error for non-text node?
|
||||
texts = content.grep(HTree::Text)
|
||||
text = HTree::Text.concat(*texts)
|
||||
text.output_cdata(self)
|
||||
else
|
||||
content.each {|n| n.output(self, context) }
|
||||
end
|
||||
end
|
||||
|
||||
def output_cdata_for_html(*args)
|
||||
str = args.join('')
|
||||
if %r{</} =~ str
|
||||
raise ArgumentError, "cdata contains '</' : #{str.inspect}"
|
||||
end
|
||||
output_string str
|
||||
end
|
||||
|
||||
def output_string(internal_str, external_str=@ic.iconv(internal_str))
|
||||
@buf << external_str
|
||||
@subcharset_ic.reject! {|subcharset, ic|
|
||||
begin
|
||||
ic.iconv(internal_str) != external_str
|
||||
rescue Iconv::Failure
|
||||
true
|
||||
end
|
||||
}
|
||||
nil
|
||||
end
|
||||
|
||||
def output_text(string)
|
||||
begin
|
||||
output_string string, @ic.iconv(string)
|
||||
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
|
||||
output_string string[0, string.length - e.failed.length], e.success
|
||||
unless @charpat =~ e.failed
|
||||
# xxx: should be configulable?
|
||||
#raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
|
||||
string = e.failed[1, e.failed.length-1]
|
||||
output_string '?'
|
||||
retry
|
||||
end
|
||||
char = $&
|
||||
rest = $'
|
||||
begin
|
||||
ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
|
||||
char = "&##{ucode};"
|
||||
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
|
||||
# xxx: should be configulable?
|
||||
char = '?'
|
||||
end
|
||||
output_string char
|
||||
string = rest
|
||||
retry
|
||||
end
|
||||
end
|
||||
|
||||
ChRef = {
|
||||
'&' => '&',
|
||||
'<' => '<',
|
||||
'>' => '>',
|
||||
'"' => '"',
|
||||
}
|
||||
|
||||
def output_dynamic_text(string)
|
||||
if string.respond_to? :rcdata
|
||||
output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
|
||||
else
|
||||
output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
|
||||
end
|
||||
end
|
||||
|
||||
def output_dynamic_attvalue(string)
|
||||
if string.respond_to? :rcdata
|
||||
output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
|
||||
else
|
||||
output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
|
||||
end
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
|
||||
def finish
|
||||
external_str = @ic.close
|
||||
@buf << external_str
|
||||
@subcharset_ic.reject! {|subcharset, ic|
|
||||
begin
|
||||
ic.close != external_str
|
||||
rescue Iconv::Failure
|
||||
true
|
||||
end
|
||||
}
|
||||
@buf
|
||||
end
|
||||
|
||||
def finish_with_xmldecl
|
||||
content = finish
|
||||
xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
|
||||
"<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
|
||||
xmldecl + content
|
||||
end
|
||||
|
||||
def minimal_charset
|
||||
@subcharset_list.each {|subcharset|
|
||||
if @subcharset_ic.include? subcharset
|
||||
return subcharset
|
||||
end
|
||||
}
|
||||
@output_encoding
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
KcodeCharset = {
|
||||
'EUC' => 'EUC-JP',
|
||||
'SJIS' => 'Shift_JIS',
|
||||
'UTF8' => 'UTF-8',
|
||||
'NONE' => 'ISO-8859-1',
|
||||
}
|
||||
|
||||
FirstCharPattern = {
|
||||
'EUC-JP' => /\A(?:
|
||||
[\x00-\x7f]
|
||||
|[\xa1-\xfe][\xa1-\xfe]
|
||||
|\x8e[\xa1-\xfe]
|
||||
|\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
|
||||
'Shift_JIS' => /\A(?:
|
||||
[\x00-\x7f]
|
||||
|[\x81-\x9f][\x40-\x7e\x80-\xfc]
|
||||
|[\xa1-\xdf]
|
||||
|[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
|
||||
'UTF-8' => /\A(?:
|
||||
[\x00-\x7f]
|
||||
|[\xc0-\xdf][\x80-\xbf]
|
||||
|[\xe0-\xef][\x80-\xbf][\x80-\xbf]
|
||||
|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|
||||
|[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|
||||
|[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
|
||||
'ISO-8859-1' => /\A[\x00-\xff]/n
|
||||
}
|
||||
|
||||
SubCharset = {
|
||||
'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
|
||||
'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
|
||||
'UTF-16BE' => [],
|
||||
'UTF-16LE' => [],
|
||||
'UTF-16' => [],
|
||||
}
|
||||
SubCharset.default = ['US-ASCII']
|
||||
|
||||
# :startdoc:
|
||||
end
|
||||
end
|
@ -0,0 +1,219 @@
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/leaf'
|
||||
require 'htree/tag'
|
||||
require 'htree/raw_string'
|
||||
require 'htree/context'
|
||||
|
||||
module HTree
|
||||
# compare tree structures.
|
||||
def ==(other)
|
||||
check_equality(self, other, :usual_equal_object)
|
||||
end
|
||||
alias eql? ==
|
||||
|
||||
# hash value for the tree structure.
|
||||
def hash
|
||||
return @hash_code if defined? @hash_code
|
||||
@hash_code = usual_equal_object.hash
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
def usual_equal_object
|
||||
return @usual_equal_object if defined? @usual_equal_object
|
||||
@usual_equal_object = make_usual_equal_object
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def exact_equal_object
|
||||
return @exact_equal_object if defined? @exact_equal_object
|
||||
@exact_equal_object = make_exact_equal_object
|
||||
end
|
||||
|
||||
def make_exact_equal_object
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def exact_equal?(other)
|
||||
check_equality(self, other, :exact_equal_object)
|
||||
end
|
||||
|
||||
def check_equality(obj1, obj2, equal_object_method)
|
||||
return false unless obj1.class == obj2.class
|
||||
if obj1.class == Array
|
||||
return false unless obj1.length == obj2.length
|
||||
obj1.each_with_index {|c1, i|
|
||||
return false unless c1.class == obj2[i].class
|
||||
}
|
||||
obj1.each_with_index {|c1, i|
|
||||
return false unless check_equality(c1, obj2[i], equal_object_method)
|
||||
}
|
||||
true
|
||||
elsif obj1.respond_to? equal_object_method
|
||||
o1 = obj1.send(equal_object_method)
|
||||
o2 = obj2.send(equal_object_method)
|
||||
check_equality(o1, o2, equal_object_method)
|
||||
else
|
||||
obj1 == obj2
|
||||
end
|
||||
end
|
||||
|
||||
class Doc
|
||||
alias exact_equal_object children
|
||||
alias usual_equal_object children
|
||||
end
|
||||
|
||||
class Elem
|
||||
def make_exact_equal_object
|
||||
[@stag, @children, @empty, @etag]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
[@stag, @children]
|
||||
end
|
||||
end
|
||||
|
||||
class Name
|
||||
def make_exact_equal_object
|
||||
[@namespace_prefix, @namespace_uri, @local_name]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
xmlns? ? @local_name : [@namespace_uri, @local_name]
|
||||
end
|
||||
end
|
||||
|
||||
module Util
|
||||
module_function
|
||||
def cmp_with_nil(a, b)
|
||||
if a == nil
|
||||
if b == nil
|
||||
0
|
||||
else
|
||||
-1
|
||||
end
|
||||
else
|
||||
if b == nil
|
||||
1
|
||||
else
|
||||
a <=> b
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Context
|
||||
def make_exact_equal_object
|
||||
@namespaces.keys.sort {|prefix1, prefix2|
|
||||
Util.cmp_with_nil(prefix1, prefix2)
|
||||
}.map {|prefix| [prefix, @namespaces[prefix]] }
|
||||
end
|
||||
|
||||
# make_usual_equal_object is not used through STag#make_usual_equal_object
|
||||
# NotImplementedError is suitable?
|
||||
alias make_usual_equal_object make_exact_equal_object
|
||||
end
|
||||
|
||||
class STag
|
||||
def make_exact_equal_object
|
||||
[@raw_string,
|
||||
@name,
|
||||
@attributes.sort {|(n1,t1), (n2, t2)|
|
||||
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
|
||||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
|
||||
Util.cmp_with_nil(n1.local_name, n2.local_name)
|
||||
},
|
||||
@inherited_context
|
||||
]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
[@name,
|
||||
@attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
|
||||
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
|
||||
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
|
||||
Util.cmp_with_nil(n1.local_name, n2.local_name)
|
||||
}
|
||||
]
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
class ETag
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @qualified_name]
|
||||
end
|
||||
|
||||
alias usual_equal_object qualified_name
|
||||
end
|
||||
|
||||
class Text
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @rcdata]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
@normalized_rcdata
|
||||
end
|
||||
end
|
||||
|
||||
class XMLDecl
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @version, @encoding, @standalone]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
[@version, @encoding, @standalone]
|
||||
end
|
||||
end
|
||||
|
||||
class DocType
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @root_element_name, @system_identifier, @public_identifier]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
[@root_element_name, @system_identifier, @public_identifier]
|
||||
end
|
||||
end
|
||||
|
||||
class ProcIns
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @target, @content]
|
||||
end
|
||||
|
||||
def make_usual_equal_object
|
||||
[@target, @content]
|
||||
end
|
||||
end
|
||||
|
||||
class Comment
|
||||
def make_exact_equal_object
|
||||
[@raw_string, @content]
|
||||
end
|
||||
|
||||
alias usual_equal_object content
|
||||
end
|
||||
|
||||
class BogusETag
|
||||
def make_exact_equal_object
|
||||
[@etag]
|
||||
end
|
||||
|
||||
alias usual_equal_object make_exact_equal_object
|
||||
end
|
||||
|
||||
class Location
|
||||
def make_exact_equal_object
|
||||
[@parent, @index, @node]
|
||||
end
|
||||
|
||||
alias usual_equal_object make_exact_equal_object
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,37 @@
|
||||
require 'htree/text'
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
def extract_text
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
|
||||
class Location
|
||||
def extract_text
|
||||
to_node.extract_text
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
module Container
|
||||
def extract_text
|
||||
Text.concat(*@children.map {|n| n.extract_text })
|
||||
end
|
||||
end
|
||||
|
||||
module Leaf
|
||||
def extract_text
|
||||
Text.new('')
|
||||
end
|
||||
end
|
||||
|
||||
class Text
|
||||
def extract_text
|
||||
self
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,32 @@
|
||||
require 'htree/modules'
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
def HTree.with_frozen_string_hash
|
||||
if Thread.current[:htree_frozen_string_hash]
|
||||
yield
|
||||
else
|
||||
begin
|
||||
Thread.current[:htree_frozen_string_hash] = {}
|
||||
yield
|
||||
ensure
|
||||
Thread.current[:htree_frozen_string_hash] = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def HTree.frozen_string(str)
|
||||
if h = Thread.current[:htree_frozen_string_hash]
|
||||
if s = h[str]
|
||||
s
|
||||
else
|
||||
str = str.dup.freeze unless str.frozen?
|
||||
h[str] = str
|
||||
end
|
||||
else
|
||||
str = str.dup.freeze unless str.frozen?
|
||||
str
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,193 @@
|
||||
require 'htree/encoder'
|
||||
require 'htree/output'
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
def generate_xml_output_code(outvar='out', contextvar='top_context')
|
||||
namespaces = HTree::Context::DefaultNamespaces.dup
|
||||
namespaces.default = nil
|
||||
context = Context.new(namespaces)
|
||||
gen = HTree::GenCode.new(outvar, contextvar)
|
||||
output(gen, context)
|
||||
gen.finish
|
||||
end
|
||||
end
|
||||
|
||||
class GenCode
|
||||
def initialize(outvar, contextvar, internal_encoding=Encoder.internal_charset)
|
||||
@outvar = outvar
|
||||
@contextvar = contextvar
|
||||
@state = :none
|
||||
@buffer = ''
|
||||
@internal_encoding = internal_encoding
|
||||
@code = ''
|
||||
@html_output = nil
|
||||
end
|
||||
attr_reader :outvar, :contextvar
|
||||
|
||||
def html_output?
|
||||
@html_output
|
||||
end
|
||||
|
||||
def html_output=(flag)
|
||||
@html_output = flag
|
||||
end
|
||||
|
||||
class CDATABuffer
|
||||
def initialize
|
||||
@buf = ''
|
||||
end
|
||||
|
||||
def html_output?
|
||||
true
|
||||
end
|
||||
|
||||
def not_valid_for_html_cdata(*args)
|
||||
raise ArgumentError, "CDATA content only accept texts."
|
||||
end
|
||||
alias output_slash_if_xml not_valid_for_html_cdata
|
||||
alias output_cdata_content not_valid_for_html_cdata
|
||||
alias output_dynamic_attvalue not_valid_for_html_cdata
|
||||
|
||||
def output_string(string)
|
||||
@buf << string
|
||||
end
|
||||
|
||||
def output_text(string)
|
||||
@buf << string
|
||||
end
|
||||
|
||||
ChRef = {
|
||||
'&' => '&',
|
||||
'<' => '<',
|
||||
'>' => '>',
|
||||
'"' => '"',
|
||||
}
|
||||
|
||||
def output_dynamic_text(string)
|
||||
if string.respond_to? :rcdata
|
||||
@buf << string.rcdata.gsub(/[<>]/) { ChRef[$&] }
|
||||
else
|
||||
@buf << string.to_s.gsub(/[&<>]/) { ChRef[$&] }
|
||||
end
|
||||
end
|
||||
|
||||
def result
|
||||
if %r{[<>]} =~ @buf
|
||||
raise ArgumentError, "cdata contains non-text : #{@buf.inspect}"
|
||||
end
|
||||
str = HTree::Text.parse_pcdata(@buf).to_s
|
||||
if %r{</} =~ str
|
||||
raise ArgumentError, "cdata contains '</' : #{str.inspect}"
|
||||
end
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
def output_cdata_content(content, context)
|
||||
tmp_outvar = @outvar + '_tmp'
|
||||
output_logic_line "#{@outvar} = #{@outvar}.output_cdata_content_do(#{@outvar},"
|
||||
output_logic_line "lambda { #{@outvar} = HTree::GenCode::CDATABuffer.new },"
|
||||
output_logic_line "lambda {"
|
||||
content.each {|n| n.output(self, context) }
|
||||
output_logic_line "},"
|
||||
output_logic_line "lambda {|#{tmp_outvar}| #{tmp_outvar}.output_string(#{@outvar}.result) })"
|
||||
end
|
||||
|
||||
def output_slash_if_xml
|
||||
output_logic_line "#{@outvar}.output_slash_if_xml"
|
||||
end
|
||||
|
||||
def output_dynamic_text(expr)
|
||||
flush_buffer
|
||||
@code << "#{@outvar}.output_dynamic_text((#{expr}))\n"
|
||||
end
|
||||
|
||||
def output_dynamic_tree(expr, context_expr)
|
||||
flush_buffer
|
||||
@code << "(#{expr}).output(#{@outvar}, #{context_expr})\n"
|
||||
end
|
||||
|
||||
def output_dynamic_attvalue(expr)
|
||||
flush_buffer
|
||||
@code << "#{@outvar}.output_dynamic_attvalue((#{expr}))\n"
|
||||
end
|
||||
|
||||
def output_logic_line(line)
|
||||
flush_buffer
|
||||
@code << line << "\n"
|
||||
end
|
||||
|
||||
def output_string(str)
|
||||
return if str.empty?
|
||||
if @state != :string
|
||||
flush_buffer
|
||||
@state = :string
|
||||
end
|
||||
@buffer << str
|
||||
end
|
||||
|
||||
def output_text(str)
|
||||
return if str.empty?
|
||||
if /\A[\s\x21-\x7e]+\z/ =~ str && @state == :string
|
||||
# Assumption: external charset can represent white spaces and
|
||||
# ASCII printable.
|
||||
output_string(str)
|
||||
return
|
||||
end
|
||||
if @state != :text
|
||||
flush_buffer
|
||||
@state = :text
|
||||
end
|
||||
@buffer << str
|
||||
end
|
||||
|
||||
ChRef = {
|
||||
'&' => '&',
|
||||
'>' => '>',
|
||||
'<' => '<',
|
||||
'"' => '"',
|
||||
}
|
||||
def output_xmlns(namespaces)
|
||||
unless namespaces.empty?
|
||||
flush_buffer
|
||||
namespaces.each {|k, v|
|
||||
if k
|
||||
ks = k.dump
|
||||
aname = "xmlns:#{k}"
|
||||
else
|
||||
ks = "nil"
|
||||
aname = "xmlns"
|
||||
end
|
||||
@code << "if #{@contextvar}.namespace_uri(#{ks}) != #{v.dump}\n"
|
||||
output_string " #{aname}=\""
|
||||
output_text v.gsub(/[&<>"]/) {|s| ChRef[s] }
|
||||
output_string '"'
|
||||
flush_buffer
|
||||
@code << "end\n"
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def flush_buffer
|
||||
return if @buffer.empty?
|
||||
case @state
|
||||
when :string
|
||||
@code << "#{@outvar}.output_string #{@buffer.dump}\n"
|
||||
@buffer = ''
|
||||
when :text
|
||||
@code << "#{@outvar}.output_text #{@buffer.dump}\n"
|
||||
@buffer = ''
|
||||
end
|
||||
end
|
||||
|
||||
def finish
|
||||
flush_buffer
|
||||
@code
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# :startdoc:
|
@ -0,0 +1,672 @@
|
||||
module HTree
|
||||
# The code below is auto-generated. Don't edit manually.
|
||||
# :stopdoc:
|
||||
NamedCharacters =
|
||||
{"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
|
||||
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
|
||||
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
|
||||
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
|
||||
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
|
||||
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
|
||||
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
|
||||
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
|
||||
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
|
||||
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
|
||||
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
|
||||
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
|
||||
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
|
||||
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
|
||||
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
|
||||
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
|
||||
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
|
||||
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
|
||||
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
|
||||
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
|
||||
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
|
||||
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
|
||||
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
|
||||
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
|
||||
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
|
||||
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
|
||||
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
|
||||
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
|
||||
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
|
||||
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
|
||||
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
|
||||
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
|
||||
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
|
||||
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
|
||||
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
|
||||
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
|
||||
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
|
||||
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
|
||||
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
|
||||
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
|
||||
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
|
||||
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
|
||||
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
|
||||
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
|
||||
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
|
||||
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
|
||||
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
|
||||
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
|
||||
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
|
||||
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
|
||||
|
||||
|
||||
NamedCharactersPattern = /\A(?-mix:AElig|Aacute|Acirc|Agrave|Alpha|Aring|Atilde|Auml|Beta|Ccedil|Chi|Dagger|Delta|ETH|Eacute|Ecirc|Egrave|Epsilon|Eta|Euml|Gamma|Iacute|Icirc|Igrave|Iota|Iuml|Kappa|Lambda|Mu|Ntilde|Nu|OElig|Oacute|Ocirc|Ograve|Omega|Omicron|Oslash|Otilde|Ouml|Phi|Pi|Prime|Psi|Rho|Scaron|Sigma|THORN|Tau|Theta|Uacute|Ucirc|Ugrave|Upsilon|Uuml|Xi|Yacute|Yuml|Zeta|aacute|acirc|acute|aelig|agrave|alefsym|alpha|amp|and|ang|apos|aring|asymp|atilde|auml|bdquo|beta|brvbar|bull|cap|ccedil|cedil|cent|chi|circ|clubs|cong|copy|crarr|cup|curren|dArr|dagger|darr|deg|delta|diams|divide|eacute|ecirc|egrave|empty|emsp|ensp|epsilon|equiv|eta|eth|euml|euro|exist|fnof|forall|frac12|frac14|frac34|frasl|gamma|ge|gt|hArr|harr|hearts|hellip|iacute|icirc|iexcl|igrave|image|infin|int|iota|iquest|isin|iuml|kappa|lArr|lambda|lang|laquo|larr|lceil|ldquo|le|lfloor|lowast|loz|lrm|lsaquo|lsquo|lt|macr|mdash|micro|middot|minus|mu|nabla|nbsp|ndash|ne|ni|not|notin|nsub|ntilde|nu|oacute|ocirc|oelig|ograve|oline|omega|omicron|oplus|or|ordf|ordm|oslash|otilde|otimes|ouml|para|part|permil|perp|phi|pi|piv|plusmn|pound|prime|prod|prop|psi|quot|rArr|radic|rang|raquo|rarr|rceil|rdquo|real|reg|rfloor|rho|rlm|rsaquo|rsquo|sbquo|scaron|sdot|sect|shy|sigma|sigmaf|sim|spades|sub|sube|sum|sup|sup1|sup2|sup3|supe|szlig|tau|there4|theta|thetasym|thinsp|thorn|tilde|times|trade|uArr|uacute|uarr|ucirc|ugrave|uml|upsih|upsilon|uuml|weierp|xi|yacute|yen|yuml|zeta|zwj|zwnj)\z/
|
||||
|
||||
ElementContent =
|
||||
{"h6"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"object"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
|
||||
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"dl"=>["dd", "dt"],
|
||||
"p"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"acronym"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"code"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"ul"=>["li"],
|
||||
"tt"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"label"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"form"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"q"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"thead"=>["tr"],
|
||||
"area"=>:EMPTY,
|
||||
"td"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"title"=>[],
|
||||
"dir"=>["li"],
|
||||
"s"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"ol"=>["li"],
|
||||
"hr"=>:EMPTY,
|
||||
"applet"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "param", "pre", "q",
|
||||
"s", "samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"table"=>["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"],
|
||||
"legend"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"cite"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"a"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"html"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "base", "basefont", "bdo",
|
||||
"big", "blockquote", "body", "br", "button", "center", "cite", "code",
|
||||
"dfn", "dir", "div", "dl", "em", "fieldset", "font", "form", "h1", "h2",
|
||||
"h3", "h4", "h5", "h6", "head", "hr", "i", "iframe", "img", "input",
|
||||
"isindex", "kbd", "label", "map", "menu", "noframes", "noscript", "object",
|
||||
"ol", "p", "pre", "q", "s", "samp", "script", "select", "small", "span",
|
||||
"strike", "strong", "sub", "sup", "table", "textarea", "title", "tt", "u",
|
||||
"ul", "var"],
|
||||
"u"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"blockquote"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"center"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"b"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"base"=>:EMPTY,
|
||||
"th"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"link"=>:EMPTY,
|
||||
"var"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"samp"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"div"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"textarea"=>[],
|
||||
"pre"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"head"=>["base", "isindex", "title"],
|
||||
"span"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"br"=>:EMPTY,
|
||||
"script"=>:CDATA,
|
||||
"noframes"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"style"=>:CDATA,
|
||||
"meta"=>:EMPTY,
|
||||
"dt"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"option"=>[],
|
||||
"kbd"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"big"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"tfoot"=>["tr"],
|
||||
"sup"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"bdo"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"isindex"=>:EMPTY,
|
||||
"dfn"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"fieldset"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "legend",
|
||||
"map", "menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"em"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"font"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"tbody"=>["tr"],
|
||||
"noscript"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"li"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"col"=>:EMPTY,
|
||||
"small"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"dd"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"i"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"menu"=>["li"],
|
||||
"strong"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"basefont"=>:EMPTY,
|
||||
"img"=>:EMPTY,
|
||||
"optgroup"=>["option"],
|
||||
"map"=>
|
||||
["address", "area", "blockquote", "center", "dir", "div", "dl", "fieldset",
|
||||
"form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu",
|
||||
"noframes", "noscript", "ol", "p", "pre", "table", "ul"],
|
||||
"h1"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"address"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "p", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"sub"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"param"=>:EMPTY,
|
||||
"input"=>:EMPTY,
|
||||
"h2"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"abbr"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"h3"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"strike"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"body"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"ins"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"button"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"h4"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"select"=>["optgroup", "option"],
|
||||
"caption"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"colgroup"=>["col"],
|
||||
"tr"=>["td", "th"],
|
||||
"del"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"],
|
||||
"h5"=>
|
||||
["a", "abbr", "acronym", "applet", "b", "basefont", "bdo", "big", "br",
|
||||
"button", "cite", "code", "dfn", "em", "font", "i", "iframe", "img",
|
||||
"input", "kbd", "label", "map", "object", "q", "s", "samp", "script",
|
||||
"select", "small", "span", "strike", "strong", "sub", "sup", "textarea",
|
||||
"tt", "u", "var"],
|
||||
"iframe"=>
|
||||
["a", "abbr", "acronym", "address", "applet", "b", "basefont", "bdo", "big",
|
||||
"blockquote", "br", "button", "center", "cite", "code", "dfn", "dir", "div",
|
||||
"dl", "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"hr", "i", "iframe", "img", "input", "isindex", "kbd", "label", "map",
|
||||
"menu", "noframes", "noscript", "object", "ol", "p", "pre", "q", "s",
|
||||
"samp", "script", "select", "small", "span", "strike", "strong", "sub",
|
||||
"sup", "table", "textarea", "tt", "u", "ul", "var"]}
|
||||
|
||||
ElementInclusions =
|
||||
{"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]}
|
||||
|
||||
ElementExclusions =
|
||||
{"button"=>
|
||||
["a", "button", "fieldset", "form", "iframe", "input", "isindex", "label",
|
||||
"select", "textarea"],
|
||||
"a"=>["a"],
|
||||
"dir"=>
|
||||
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
|
||||
"noscript", "ol", "p", "pre", "table", "ul"],
|
||||
"title"=>["link", "meta", "object", "script", "style"],
|
||||
"pre"=>
|
||||
["applet", "basefont", "big", "font", "img", "object", "small", "sub",
|
||||
"sup"],
|
||||
"form"=>["form"],
|
||||
"menu"=>
|
||||
["address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6", "hr", "isindex", "menu", "noframes",
|
||||
"noscript", "ol", "p", "pre", "table", "ul"],
|
||||
"label"=>["label"]}
|
||||
|
||||
OmittedAttrName =
|
||||
{"h6"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"object"=>
|
||||
{"bottom"=>"align", "declare"=>"declare", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
|
||||
"dl"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
|
||||
"p"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"acronym"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"code"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"ul"=>
|
||||
{"circle"=>"type", "compact"=>"compact", "disc"=>"type", "ltr"=>"dir",
|
||||
"rtl"=>"dir", "square"=>"type"},
|
||||
"tt"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"label"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"form"=>{"get"=>"method", "ltr"=>"dir", "post"=>"method", "rtl"=>"dir"},
|
||||
"q"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"thead"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"area"=>
|
||||
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "nohref"=>"nohref",
|
||||
"poly"=>"shape", "rect"=>"shape", "rtl"=>"dir"},
|
||||
"td"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
|
||||
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
|
||||
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
|
||||
"top"=>"valign"},
|
||||
"title"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"dir"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
|
||||
"s"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"ol"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
|
||||
"hr"=>
|
||||
{"center"=>"align", "left"=>"align", "ltr"=>"dir", "noshade"=>"noshade",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"applet"=>
|
||||
{"bottom"=>"align", "left"=>"align", "middle"=>"align", "right"=>"align",
|
||||
"top"=>"align"},
|
||||
"table"=>
|
||||
{"above"=>"frame", "all"=>"rules", "below"=>"frame", "border"=>"frame",
|
||||
"box"=>"frame", "center"=>"align", "cols"=>"rules", "groups"=>"rules",
|
||||
"hsides"=>"frame", "left"=>"align", "lhs"=>"frame", "ltr"=>"dir",
|
||||
"none"=>"rules", "rhs"=>"frame", "right"=>"align", "rows"=>"rules",
|
||||
"rtl"=>"dir", "void"=>"frame", "vsides"=>"frame"},
|
||||
"legend"=>
|
||||
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
|
||||
"rtl"=>"dir", "top"=>"align"},
|
||||
"cite"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"a"=>
|
||||
{"circle"=>"shape", "default"=>"shape", "ltr"=>"dir", "poly"=>"shape",
|
||||
"rect"=>"shape", "rtl"=>"dir"},
|
||||
"html"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"u"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"blockquote"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"center"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"b"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"th"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "col"=>"scope", "colgroup"=>"scope", "justify"=>"align",
|
||||
"left"=>"align", "ltr"=>"dir", "middle"=>"valign", "nowrap"=>"nowrap",
|
||||
"right"=>"align", "row"=>"scope", "rowgroup"=>"scope", "rtl"=>"dir",
|
||||
"top"=>"valign"},
|
||||
"link"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"var"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"samp"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"div"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"textarea"=>
|
||||
{"disabled"=>"disabled", "ltr"=>"dir", "readonly"=>"readonly", "rtl"=>"dir"},
|
||||
"pre"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"head"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"span"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"br"=>{"all"=>"clear", "left"=>"clear", "none"=>"clear", "right"=>"clear"},
|
||||
"script"=>{"defer"=>"defer"},
|
||||
"noframes"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"style"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"meta"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"dt"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"option"=>
|
||||
{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir", "selected"=>"selected"},
|
||||
"kbd"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"big"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"tfoot"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"sup"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"bdo"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"isindex"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"dfn"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"fieldset"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"em"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"font"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"tbody"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"noscript"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"li"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"col"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"small"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"dd"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"i"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"menu"=>{"compact"=>"compact", "ltr"=>"dir", "rtl"=>"dir"},
|
||||
"strong"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"img"=>
|
||||
{"bottom"=>"align", "ismap"=>"ismap", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"align", "right"=>"align", "rtl"=>"dir", "top"=>"align"},
|
||||
"optgroup"=>{"disabled"=>"disabled", "ltr"=>"dir", "rtl"=>"dir"},
|
||||
"map"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"address"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"h1"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"sub"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"param"=>{"data"=>"valuetype", "object"=>"valuetype", "ref"=>"valuetype"},
|
||||
"input"=>
|
||||
{"bottom"=>"align", "button"=>"type", "checkbox"=>"type",
|
||||
"checked"=>"checked", "disabled"=>"disabled", "file"=>"type",
|
||||
"hidden"=>"type", "image"=>"type", "ismap"=>"ismap", "left"=>"align",
|
||||
"ltr"=>"dir", "middle"=>"align", "password"=>"type", "radio"=>"type",
|
||||
"readonly"=>"readonly", "reset"=>"type", "right"=>"align", "rtl"=>"dir",
|
||||
"submit"=>"type", "text"=>"type", "top"=>"align"},
|
||||
"h2"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"abbr"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"h3"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"strike"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"body"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"ins"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"button"=>
|
||||
{"button"=>"type", "disabled"=>"disabled", "ltr"=>"dir", "reset"=>"type",
|
||||
"rtl"=>"dir", "submit"=>"type"},
|
||||
"h4"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"select"=>
|
||||
{"disabled"=>"disabled", "ltr"=>"dir", "multiple"=>"multiple", "rtl"=>"dir"},
|
||||
"caption"=>
|
||||
{"bottom"=>"align", "left"=>"align", "ltr"=>"dir", "right"=>"align",
|
||||
"rtl"=>"dir", "top"=>"align"},
|
||||
"colgroup"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"tr"=>
|
||||
{"baseline"=>"valign", "bottom"=>"valign", "center"=>"align",
|
||||
"char"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"middle"=>"valign", "right"=>"align", "rtl"=>"dir", "top"=>"valign"},
|
||||
"del"=>{"ltr"=>"dir", "rtl"=>"dir"},
|
||||
"h5"=>
|
||||
{"center"=>"align", "justify"=>"align", "left"=>"align", "ltr"=>"dir",
|
||||
"right"=>"align", "rtl"=>"dir"},
|
||||
"iframe"=>
|
||||
{"0"=>"frameborder", "1"=>"frameborder", "auto"=>"scrolling",
|
||||
"bottom"=>"align", "left"=>"align", "middle"=>"align", "no"=>"scrolling",
|
||||
"right"=>"align", "top"=>"align", "yes"=>"scrolling"}}
|
||||
|
||||
# :startdoc:
|
||||
# The code above is auto-generated. Don't edit manually.
|
||||
end
|
@ -0,0 +1,108 @@
|
||||
require 'pp'
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/leaf'
|
||||
require 'htree/tag'
|
||||
require 'htree/output'
|
||||
require 'htree/raw_string'
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
class Doc
|
||||
def pretty_print(q)
|
||||
q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
end
|
||||
|
||||
class Elem
|
||||
def pretty_print(q)
|
||||
if @empty
|
||||
q.group(1, '{emptyelem', '}') {
|
||||
q.breakable; q.pp @stag
|
||||
}
|
||||
else
|
||||
q.group(1, "{elem", "}") {
|
||||
q.breakable; q.pp @stag
|
||||
@children.each {|elt| q.breakable; q.pp elt }
|
||||
if @etag
|
||||
q.breakable; q.pp @etag
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
end
|
||||
|
||||
module Leaf
|
||||
def pretty_print(q)
|
||||
q.group(1, '{', '}') {
|
||||
q.text self.class.name.sub(/.*::/,'').downcase
|
||||
if rs = @raw_string
|
||||
rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
|
||||
q.breakable
|
||||
q.pp line
|
||||
}
|
||||
elsif self.respond_to? :display_xml
|
||||
q.breakable
|
||||
q.text self.display_xml('')
|
||||
end
|
||||
}
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
end
|
||||
|
||||
class Name
|
||||
def inspect
|
||||
if xmlns?
|
||||
@local_name ? "xmlns:#{@local_name}" : "xmlns"
|
||||
elsif !@namespace_uri || @namespace_uri.empty?
|
||||
@local_name
|
||||
elsif @namespace_prefix
|
||||
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
|
||||
elsif @namespace_prefix == false
|
||||
"-{#{@namespace_uri}}#{@local_name}"
|
||||
else
|
||||
"{#{@namespace_uri}}#{@local_name}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class STag
|
||||
def pretty_print(q)
|
||||
q.group(1, '<', '>') {
|
||||
q.text @name.inspect
|
||||
|
||||
@attributes.each {|n, t|
|
||||
q.breakable
|
||||
q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
|
||||
}
|
||||
}
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
end
|
||||
|
||||
class ETag
|
||||
def pretty_print(q)
|
||||
q.group(1, '</', '>') {
|
||||
q.text @qualified_name
|
||||
}
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
end
|
||||
|
||||
class BogusETag
|
||||
def pretty_print(q)
|
||||
q.group(1, '{', '}') {
|
||||
q.text self.class.name.sub(/.*::/,'').downcase
|
||||
if rs = @raw_string
|
||||
q.breakable
|
||||
q.text rs
|
||||
else
|
||||
q.text "</#{@qualified_name}>"
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,92 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/raw_string'
|
||||
|
||||
module HTree
|
||||
class XMLDecl
|
||||
def initialize(version, encoding=nil, standalone=nil)
|
||||
init_raw_string
|
||||
if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
|
||||
raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
|
||||
end
|
||||
if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
|
||||
raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
|
||||
end
|
||||
unless standalone == nil || standalone == true || standalone == false
|
||||
raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
|
||||
end
|
||||
@version = version
|
||||
@encoding = encoding
|
||||
@standalone = standalone
|
||||
end
|
||||
attr_reader :version, :encoding, :standalone
|
||||
end
|
||||
|
||||
class DocType
|
||||
def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
|
||||
init_raw_string
|
||||
if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
|
||||
raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
|
||||
end
|
||||
if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
|
||||
raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
|
||||
end
|
||||
|
||||
@root_element_name = root_element_name
|
||||
@public_identifier = public_identifier
|
||||
@system_identifier = system_identifier
|
||||
end
|
||||
attr_reader :root_element_name, :public_identifier, :system_identifier
|
||||
end
|
||||
|
||||
class ProcIns
|
||||
# :stopdoc:
|
||||
class << self
|
||||
alias new! new
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
def ProcIns.new(target, content)
|
||||
content = content.gsub(/\?>/, '? >') if content
|
||||
new! target, content
|
||||
end
|
||||
|
||||
def initialize(target, content) # :notnew:
|
||||
init_raw_string
|
||||
if content && /\?>/ =~ content
|
||||
raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
|
||||
end
|
||||
@target = target
|
||||
@content = content
|
||||
end
|
||||
attr_reader :target, :content
|
||||
end
|
||||
|
||||
class Comment
|
||||
# :stopdoc:
|
||||
class << self
|
||||
alias new! new
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
def Comment.new(content)
|
||||
content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
|
||||
new! content
|
||||
end
|
||||
|
||||
def initialize(content) # :notnew:
|
||||
init_raw_string
|
||||
if /--/ =~ content || /-\z/ =~ content
|
||||
raise HTree::Error, "invalid comment content: #{content.inspect}"
|
||||
end
|
||||
@content = content
|
||||
end
|
||||
attr_reader :content
|
||||
end
|
||||
|
||||
class BogusETag
|
||||
def initialize(qualified_name)
|
||||
init_raw_string
|
||||
@etag = ETag.new(qualified_name)
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,369 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/elem'
|
||||
require 'htree/inspect'
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
# creates a location object which points to self.
|
||||
def make_loc
|
||||
self.class::Loc.new(nil, nil, self)
|
||||
end
|
||||
|
||||
# return self.
|
||||
def to_node
|
||||
self
|
||||
end
|
||||
|
||||
# +subst+ substitutes several subtrees at once.
|
||||
#
|
||||
# t = HTree('<r><x/><y/><z/></r>')
|
||||
# l = t.make_loc
|
||||
# t2 = t.subst({
|
||||
# l.get_subnode(0, 'k') => 'v',
|
||||
# l.get_subnode(0, -1) => HTree('<a/>'),
|
||||
# l.get_subnode(0, 1) => nil,
|
||||
# l.get_subnode(0, 2, 0) => HTree('<b/>'),
|
||||
# })
|
||||
# pp t2
|
||||
# # =>
|
||||
# #<HTree::Doc
|
||||
# {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
|
||||
def subst(pairs)
|
||||
pairs = pairs.map {|key, val|
|
||||
key = key.index_list(self)
|
||||
unless Array === val
|
||||
val = [val]
|
||||
end
|
||||
[key, val]
|
||||
}
|
||||
|
||||
pairs_empty_key, pairs_nonempty_key =
|
||||
pairs.partition {|key, val| key.empty? }
|
||||
if !pairs_empty_key.empty?
|
||||
if !pairs_nonempty_key.empty?
|
||||
raise ArgumentError, "cannot substitute a node under substituting tree."
|
||||
end
|
||||
result = []
|
||||
pairs_empty_key.each {|key, val| result.concat val }
|
||||
result.compact!
|
||||
if result.length == 1
|
||||
return result[0]
|
||||
else
|
||||
raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
|
||||
end
|
||||
end
|
||||
if pairs_nonempty_key.empty?
|
||||
return self
|
||||
end
|
||||
|
||||
subst_internal(pairs)
|
||||
end
|
||||
|
||||
def subst_internal(pairs) # :nodoc:
|
||||
subnode_pairs = {}
|
||||
pairs.each {|key, val|
|
||||
k = key.pop
|
||||
(subnode_pairs[k] ||= []) << [key, val]
|
||||
}
|
||||
subnode_pairs = subnode_pairs.map {|k, subpairs|
|
||||
s = get_subnode(k)
|
||||
subpairs_empty_key, subpairs_nonempty_key =
|
||||
subpairs.partition {|key, val| key.empty? }
|
||||
if !subpairs_empty_key.empty?
|
||||
if !subpairs_nonempty_key.empty?
|
||||
raise ArgumentError, "cannot substitute a node under substituting tree."
|
||||
end
|
||||
r = []
|
||||
subpairs_empty_key.each {|key, val| r.concat val }
|
||||
[k, r.compact]
|
||||
elsif subpairs_nonempty_key.empty?
|
||||
[k, s]
|
||||
else
|
||||
[k, s.subst_internal(subpairs)]
|
||||
end
|
||||
}
|
||||
subst_subnode(subnode_pairs)
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
class Doc; def node_test_string() 'doc()' end end
|
||||
class Elem; def node_test_string() @stag.element_name.qualified_name end end
|
||||
class Text; def node_test_string() 'text()' end end
|
||||
class BogusETag; def node_test_string() 'bogus-etag()' end end
|
||||
class XMLDecl; def node_test_string() 'xml-declaration()' end end
|
||||
class DocType; def node_test_string() 'doctype()' end end
|
||||
class ProcIns; def node_test_string() 'processing-instruction()' end end
|
||||
class Comment; def node_test_string() 'comment()' end end
|
||||
|
||||
module Container
|
||||
def find_loc_step(index)
|
||||
if index < 0 || @children.length <= index
|
||||
return "*[#{index}]"
|
||||
end
|
||||
|
||||
return @loc_step_children[index].dup if defined? @loc_step_children
|
||||
|
||||
count = {}
|
||||
count.default = 0
|
||||
|
||||
steps = []
|
||||
|
||||
@children.each {|c|
|
||||
node_test = c.node_test_string
|
||||
count[node_test] += 1
|
||||
steps << [node_test, count[node_test]]
|
||||
}
|
||||
|
||||
@loc_step_children = []
|
||||
steps.each {|node_test, i|
|
||||
if count[node_test] == 1
|
||||
@loc_step_children << node_test
|
||||
else
|
||||
@loc_step_children << "#{node_test}[#{i}]"
|
||||
end
|
||||
}
|
||||
|
||||
return @loc_step_children[index].dup
|
||||
end
|
||||
end
|
||||
|
||||
class Elem
|
||||
def find_loc_step(index)
|
||||
return super if Integer === index
|
||||
if String === index
|
||||
index = Name.parse_attribute_name(index, DefaultContext)
|
||||
end
|
||||
unless Name === index
|
||||
raise TypeError, "invalid index: #{index.inspect}"
|
||||
end
|
||||
"@#{index.qualified_name}"
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
||||
|
||||
class HTree::Location
|
||||
def initialize(parent, index, node) # :nodoc:
|
||||
if parent
|
||||
@parent = parent
|
||||
@index = index
|
||||
@node = parent.node.get_subnode(index)
|
||||
if !@node.equal?(node)
|
||||
raise ArgumentError, "unexpected node"
|
||||
end
|
||||
else
|
||||
@parent = nil
|
||||
@index = nil
|
||||
@node = node
|
||||
end
|
||||
if @node && self.class != @node.class::Loc
|
||||
raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
|
||||
end
|
||||
@subloc = {}
|
||||
end
|
||||
attr_reader :parent, :index, :node
|
||||
alias to_node node
|
||||
|
||||
# return self.
|
||||
def make_loc
|
||||
self
|
||||
end
|
||||
|
||||
# +top+ returns the originator location.
|
||||
#
|
||||
# t = HTree('<a><b><c><d>')
|
||||
# l = t.make_loc.get_subnode(0, 0, 0, 0)
|
||||
# p l, l.top
|
||||
# # =>
|
||||
# #<HTree::Location: doc()/a/b/c/d>
|
||||
# #<HTree::Location: doc()>
|
||||
def top
|
||||
result = self
|
||||
while result.parent
|
||||
result = result.parent
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# +subst_itself+ substitutes the node pointed by the location.
|
||||
# It returns the location of substituted node.
|
||||
#
|
||||
# t1 = HTree('<a><b><c><d>')
|
||||
# p t1
|
||||
# l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
|
||||
# p l1
|
||||
# l2 = l1.subst_itself(HTree('<z/>'))
|
||||
# p l2
|
||||
# t2 = l2.top.to_node
|
||||
# p t2
|
||||
# # =>
|
||||
# #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
|
||||
# #<HTree::Location: doc()/a/b/c/d>
|
||||
# #<HTree::Location: doc()/a/b/c/z>
|
||||
# #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
|
||||
#
|
||||
def subst_itself(node)
|
||||
if @parent
|
||||
new_index = @index
|
||||
if !@node
|
||||
if Integer === @index
|
||||
if @index < 0
|
||||
new_index = 0
|
||||
elsif @parent.to_node.children.length < @index
|
||||
new_index = @parent.to_node.children.length
|
||||
end
|
||||
end
|
||||
end
|
||||
@parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
|
||||
else
|
||||
node.make_loc
|
||||
end
|
||||
end
|
||||
|
||||
# +subst+ substitutes several subtrees at once.
|
||||
#
|
||||
# t = HTree('<r><x/><y/><z/></r>')
|
||||
# l = t.make_loc
|
||||
# l2 = l.subst({
|
||||
# l.root.get_subnode('k') => 'v',
|
||||
# l.root.get_subnode(-1) => HTree('<a/>'),
|
||||
# l.find_element('y') => nil,
|
||||
# l.find_element('z').get_subnode(0) => HTree('<b/>'),
|
||||
# })
|
||||
# pp l2, l2.to_node
|
||||
# # =>
|
||||
# #<HTree::Doc::Loc: doc()>
|
||||
# #<HTree::Doc
|
||||
# {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
|
||||
def subst(pairs)
|
||||
subst_itself(@node.subst(pairs))
|
||||
end
|
||||
|
||||
# +loc_list+ returns an array containing from location's root to itself.
|
||||
#
|
||||
# t = HTree('<a><b><c>')
|
||||
# l = t.make_loc.get_subnode(0, 0, 0)
|
||||
# pp l, l.loc_list
|
||||
# # =>
|
||||
# #<HTree::Location: doc()/a/b/c>
|
||||
# [#<HTree::Location: doc()>,
|
||||
# #<HTree::Location: doc()/a>,
|
||||
# #<HTree::Location: doc()/a/b>,
|
||||
# #<HTree::Location: doc()/a/b/c>]
|
||||
#
|
||||
def loc_list
|
||||
loc = self
|
||||
result = [self]
|
||||
while loc = loc.parent
|
||||
result << loc
|
||||
end
|
||||
result.reverse!
|
||||
result
|
||||
end
|
||||
|
||||
# +path+ returns the path of the location.
|
||||
#
|
||||
# l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
|
||||
# l = l.get_subnode(0, 0, 0)
|
||||
# p l.path # => "doc()/a/b[1]/text()"
|
||||
def path
|
||||
result = ''
|
||||
loc_list.each {|loc|
|
||||
if parent = loc.parent
|
||||
result << '/' << parent.node.find_loc_step(loc.index)
|
||||
else
|
||||
result << loc.node.node_test_string
|
||||
end
|
||||
}
|
||||
result
|
||||
end
|
||||
|
||||
def index_list(node) # :nodoc:
|
||||
result = []
|
||||
loc = self
|
||||
while parent = loc.parent
|
||||
return result if loc.to_node.equal? node
|
||||
result << loc.index
|
||||
loc = parent
|
||||
end
|
||||
return result if loc.to_node.equal? node
|
||||
raise ArgumentError, "the location is not under the node: #{self.path}"
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
def pretty_print(q)
|
||||
q.group(1, "#<#{self.class.name}", '>') {
|
||||
q.text ':'
|
||||
q.breakable
|
||||
loc_list.each {|loc|
|
||||
if parent = loc.parent
|
||||
q.text '/'
|
||||
q.group { q.breakable '' }
|
||||
q.text parent.node.find_loc_step(loc.index)
|
||||
else
|
||||
q.text loc.node.node_test_string
|
||||
end
|
||||
}
|
||||
}
|
||||
end
|
||||
alias inspect pretty_print_inspect
|
||||
# :startdoc:
|
||||
end
|
||||
|
||||
module HTree::Container::Loc
|
||||
# +get_subnode+ returns a location object which points to a subnode
|
||||
# indexed by _index_.
|
||||
def get_subnode_internal(index) # :nodoc:
|
||||
return @subloc[index] if @subloc.include? index
|
||||
node = @node.get_subnode(index)
|
||||
if node
|
||||
@subloc[index] = node.class::Loc.new(self, index, node)
|
||||
else
|
||||
@subloc[index] = HTree::Location.new(self, index, node)
|
||||
end
|
||||
end
|
||||
|
||||
# +subst_subnode+ returns the location which refers the substituted tree.
|
||||
# loc.subst_subnode(pairs) -> loc
|
||||
#
|
||||
# t = HTree('<a><b><c>')
|
||||
# l = t.make_loc.get_subnode(0, 0)
|
||||
# l = l.subst_subnode({0=>HTree('<z/>')})
|
||||
# pp t, l.top.to_node
|
||||
# # =>
|
||||
# #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
|
||||
# #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
|
||||
#
|
||||
def subst_subnode(pairs)
|
||||
self.subst_itself(@node.subst_subnode(pairs))
|
||||
end
|
||||
|
||||
# +children+ returns an array of child locations.
|
||||
def children
|
||||
(0...@node.children.length).map {|i| get_subnode(i) }
|
||||
end
|
||||
end
|
||||
|
||||
class HTree::Elem::Loc
|
||||
def context() @node.context end
|
||||
|
||||
# +element_name+ returns the name of the element name as a Name object.
|
||||
def element_name() @node.element_name end
|
||||
|
||||
def empty_element?() @node.empty_element? end
|
||||
|
||||
# +each_attribute+ iterates over each attributes.
|
||||
def each_attribute
|
||||
@node.each_attribute {|attr_name, attr_text|
|
||||
attr_loc = get_subnode(attr_name)
|
||||
yield attr_name, attr_loc
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class HTree::Text::Loc
|
||||
def to_s() @node.to_s end
|
||||
def strip() @node.strip end
|
||||
def empty?() @node.empty? end
|
||||
end
|
@ -0,0 +1,49 @@
|
||||
module HTree
|
||||
class Name; include HTree end
|
||||
class Context; include HTree end
|
||||
|
||||
# :stopdoc:
|
||||
module Tag; include HTree end
|
||||
class STag; include Tag end
|
||||
class ETag; include Tag end
|
||||
# :startdoc:
|
||||
|
||||
module Node; include HTree end
|
||||
module Container; include Node end
|
||||
class Doc; include Container end
|
||||
class Elem; include Container end
|
||||
module Leaf; include Node end
|
||||
class Text; include Leaf end
|
||||
class XMLDecl; include Leaf end
|
||||
class DocType; include Leaf end
|
||||
class ProcIns; include Leaf end
|
||||
class Comment; include Leaf end
|
||||
class BogusETag; include Leaf end
|
||||
|
||||
module Traverse end
|
||||
module Container::Trav; include Traverse end
|
||||
module Leaf::Trav; include Traverse end
|
||||
class Doc; module Trav; include Container::Trav end; include Trav end
|
||||
class Elem; module Trav; include Container::Trav end; include Trav end
|
||||
class Text; module Trav; include Leaf::Trav end; include Trav end
|
||||
class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
|
||||
class DocType; module Trav; include Leaf::Trav end; include Trav end
|
||||
class ProcIns; module Trav; include Leaf::Trav end; include Trav end
|
||||
class Comment; module Trav; include Leaf::Trav end; include Trav end
|
||||
class BogusETag; module Trav; include Leaf::Trav end; include Trav end
|
||||
|
||||
class Location; include HTree end
|
||||
module Container::Loc end
|
||||
module Leaf::Loc end
|
||||
class Doc; class Loc < Location; include Trav, Container::Loc end end
|
||||
class Elem; class Loc < Location; include Trav, Container::Loc end end
|
||||
class Text; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
class XMLDecl; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
class DocType; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
class ProcIns; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
class Comment; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
class BogusETag; class Loc < Location; include Trav, Leaf::Loc end end
|
||||
|
||||
class Error < StandardError; end
|
||||
end
|
||||
|
@ -0,0 +1,122 @@
|
||||
require 'htree/scan' # for Pat::Nmtoken
|
||||
require 'htree/context'
|
||||
|
||||
module HTree
|
||||
# Name represents a element name and attribute name.
|
||||
# It consists of a namespace prefix, a namespace URI and a local name.
|
||||
class Name
|
||||
=begin
|
||||
element name prefix uri localname
|
||||
{u}n, n with xmlns=u nil 'u' 'n'
|
||||
p{u}n, p:n with xmlns:p=u 'p' 'u' 'n'
|
||||
n with xmlns='' nil '' 'n'
|
||||
|
||||
attribute name
|
||||
xmlns= 'xmlns' nil nil
|
||||
xmlns:n= 'xmlns' nil 'n'
|
||||
p{u}n=, p:n= with xmlns:p=u 'p' 'u' 'n'
|
||||
n= nil '' 'n'
|
||||
=end
|
||||
def Name.parse_element_name(name, context)
|
||||
if /\{(.*)\}/ =~ name
|
||||
# "{u}n" means "use default namespace",
|
||||
# "p{u}n" means "use the specified prefix p"
|
||||
$` == '' ? Name.new(nil, $1, $') : Name.new($`, $1, $')
|
||||
elsif /:/ =~ name && !context.namespace_uri($`).empty?
|
||||
Name.new($`, context.namespace_uri($`), $')
|
||||
elsif !context.namespace_uri(nil).empty?
|
||||
Name.new(nil, context.namespace_uri(nil), name)
|
||||
else
|
||||
Name.new(nil, '', name)
|
||||
end
|
||||
end
|
||||
|
||||
def Name.parse_attribute_name(name, context)
|
||||
if name == 'xmlns'
|
||||
Name.new('xmlns', nil, nil)
|
||||
elsif /\Axmlns:/ =~ name
|
||||
Name.new('xmlns', nil, $')
|
||||
elsif /\{(.*)\}/ =~ name
|
||||
case $`
|
||||
when ''; Name.new(nil, $1, $')
|
||||
else Name.new($`, $1, $')
|
||||
end
|
||||
elsif /:/ =~ name && !context.namespace_uri($`).empty?
|
||||
Name.new($`, context.namespace_uri($`), $')
|
||||
else
|
||||
Name.new(nil, '', name)
|
||||
end
|
||||
end
|
||||
|
||||
NameCache = {}
|
||||
def Name.new(namespace_prefix, namespace_uri, local_name)
|
||||
key = [namespace_prefix, namespace_uri, local_name, self]
|
||||
NameCache.fetch(key) {
|
||||
0.upto(2) {|i| key[i] = key[i].dup.freeze if key[i] }
|
||||
NameCache[key] = super(key[0], key[1], key[2])
|
||||
}
|
||||
end
|
||||
|
||||
def initialize(namespace_prefix, namespace_uri, local_name)
|
||||
@namespace_prefix = namespace_prefix
|
||||
@namespace_uri = namespace_uri
|
||||
@local_name = local_name
|
||||
if @namespace_prefix && /\A#{Pat::Nmtoken}\z/o !~ @namespace_prefix
|
||||
raise HTree::Error, "invalid namespace prefix: #{@namespace_prefix.inspect}"
|
||||
end
|
||||
if @local_name && /\A#{Pat::Nmtoken}\z/o !~ @local_name
|
||||
raise HTree::Error, "invalid local name: #{@local_name.inspect}"
|
||||
end
|
||||
if @namespace_prefix == 'xmlns'
|
||||
unless @namespace_uri == nil
|
||||
raise HTree::Error, "Name object for xmlns:* must not have namespace URI: #{@namespace_uri.inspect}"
|
||||
end
|
||||
else
|
||||
unless String === @namespace_uri
|
||||
raise HTree::Error, "invalid namespace URI: #{@namespace_uri.inspect}"
|
||||
end
|
||||
end
|
||||
end
|
||||
attr_reader :namespace_prefix, :namespace_uri, :local_name
|
||||
|
||||
def xmlns?
|
||||
@namespace_prefix == 'xmlns' && @namespace_uri == nil
|
||||
end
|
||||
|
||||
def universal_name
|
||||
if @namespace_uri && !@namespace_uri.empty?
|
||||
"{#{@namespace_uri}}#{@local_name}"
|
||||
else
|
||||
@local_name.dup
|
||||
end
|
||||
end
|
||||
|
||||
def qualified_name
|
||||
if @namespace_uri && !@namespace_uri.empty?
|
||||
if @namespace_prefix
|
||||
"#{@namespace_prefix}:#{@local_name}"
|
||||
else
|
||||
@local_name.dup
|
||||
end
|
||||
elsif @local_name
|
||||
@local_name.dup
|
||||
else
|
||||
"xmlns"
|
||||
end
|
||||
end
|
||||
|
||||
def to_s
|
||||
if @namespace_uri && !@namespace_uri.empty?
|
||||
if @namespace_prefix
|
||||
"#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
|
||||
else
|
||||
"{#{@namespace_uri}}#{@local_name}"
|
||||
end
|
||||
elsif @local_name
|
||||
@local_name.dup
|
||||
else
|
||||
"xmlns"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,212 @@
|
||||
require 'htree/encoder'
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/leaf'
|
||||
require 'htree/text'
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
|
||||
class Text
|
||||
ChRef = {
|
||||
'>' => '>',
|
||||
'<' => '<',
|
||||
'"' => '"',
|
||||
}
|
||||
|
||||
def output(out, context=nil)
|
||||
out.output_text @rcdata.gsub(/[<>]/) {|s| ChRef[s] }
|
||||
end
|
||||
|
||||
def to_attvalue_content
|
||||
@rcdata.gsub(/[<>"]/) {|s| ChRef[s] }
|
||||
end
|
||||
|
||||
def output_attvalue(out, context)
|
||||
out.output_string '"'
|
||||
out.output_text to_attvalue_content
|
||||
out.output_string '"'
|
||||
end
|
||||
|
||||
def output_cdata(out)
|
||||
str = self.to_s
|
||||
if %r{</} =~ str
|
||||
raise ArgumentError, "CDATA cannot contain '</': #{str.inspect}"
|
||||
end
|
||||
out.output_string(str)
|
||||
end
|
||||
end
|
||||
|
||||
class Name
|
||||
def output(out, context)
|
||||
# xxx: validate namespace prefix
|
||||
if xmlns?
|
||||
if @local_name
|
||||
out.output_string "xmlns:#{@local_name}"
|
||||
else
|
||||
out.output_string "xmlns"
|
||||
end
|
||||
else
|
||||
out.output_string qualified_name
|
||||
end
|
||||
end
|
||||
|
||||
def output_attribute(text, out, context)
|
||||
output(out, context)
|
||||
out.output_string '='
|
||||
text.output_attvalue(out, context)
|
||||
end
|
||||
end
|
||||
|
||||
class Doc
|
||||
def output(out, context)
|
||||
xmldecl = false
|
||||
@children.each {|n|
|
||||
if n.respond_to? :output_prolog_xmldecl
|
||||
n.output_prolog_xmldecl(out, context) unless xmldecl # xxx: encoding?
|
||||
xmldecl = true
|
||||
else
|
||||
n.output(out, context)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class Elem
|
||||
def output(out, context)
|
||||
if %r{\A\{http://www.w3.org/1999/xhtml\}(script|style)} =~ @stag.element_name.universal_name
|
||||
children_context = @stag.output_stag(out, context)
|
||||
out.output_cdata_content(@children, children_context)
|
||||
@stag.output_etag(out, context)
|
||||
elsif @empty
|
||||
@stag.output_emptytag(out, context)
|
||||
else
|
||||
children_context = @stag.output_stag(out, context)
|
||||
@children.each {|n| n.output(out, children_context) }
|
||||
@stag.output_etag(out, context)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class STag
|
||||
def output_attributes(out, context)
|
||||
@attributes.each {|aname, text|
|
||||
next if aname.xmlns?
|
||||
out.output_string ' '
|
||||
aname.output_attribute(text, out, context)
|
||||
}
|
||||
@context.output_namespaces(out, context)
|
||||
end
|
||||
|
||||
def output_emptytag(out, context)
|
||||
out.output_string '<'
|
||||
@name.output(out, context)
|
||||
children_context = output_attributes(out, context)
|
||||
out.output_string "\n"
|
||||
out.output_slash_if_xml
|
||||
out.output_string ">"
|
||||
children_context
|
||||
end
|
||||
|
||||
def output_stag(out, context)
|
||||
out.output_string '<'
|
||||
@name.output(out, context)
|
||||
children_context = output_attributes(out, context)
|
||||
out.output_string "\n>"
|
||||
children_context
|
||||
end
|
||||
|
||||
def output_etag(out, context)
|
||||
out.output_string '</'
|
||||
@name.output(out, context)
|
||||
out.output_string "\n>"
|
||||
end
|
||||
end
|
||||
|
||||
class Context
|
||||
def output_namespaces(out, outer_context)
|
||||
unknown_namespaces = {}
|
||||
@namespaces.each {|prefix, uri|
|
||||
outer_uri = outer_context.namespace_uri(prefix)
|
||||
if outer_uri == nil
|
||||
unknown_namespaces[prefix] = uri
|
||||
elsif outer_uri != uri
|
||||
if prefix
|
||||
out.output_string " xmlns:#{prefix}="
|
||||
else
|
||||
out.output_string " xmlns="
|
||||
end
|
||||
Text.new(uri).output_attvalue(out, outer_context)
|
||||
end
|
||||
}
|
||||
unless unknown_namespaces.empty?
|
||||
out.output_xmlns(unknown_namespaces)
|
||||
end
|
||||
outer_context.subst_namespaces(@namespaces)
|
||||
end
|
||||
end
|
||||
|
||||
class BogusETag
|
||||
# don't output anything.
|
||||
def output(out, context)
|
||||
end
|
||||
end
|
||||
|
||||
class XMLDecl
|
||||
# don't output anything.
|
||||
def output(out, context)
|
||||
end
|
||||
|
||||
def output_prolog_xmldecl(out, context)
|
||||
out.output_string "<?xml version=\"#{@version}\""
|
||||
if @encoding
|
||||
out.output_string " encoding=\"#{@encoding}\""
|
||||
end
|
||||
if @standalone != nil
|
||||
out.output_string " standalone=\"#{@standalone ? 'yes' : 'no'}\""
|
||||
end
|
||||
out.output_string "?>"
|
||||
end
|
||||
end
|
||||
|
||||
class DocType
|
||||
def output(out, context)
|
||||
out.output_string "<!DOCTYPE #{@root_element_name} #{generate_content}>"
|
||||
end
|
||||
|
||||
def generate_content # :nodoc:
|
||||
result = ''
|
||||
if @public_identifier
|
||||
result << "PUBLIC \"#{@public_identifier}\""
|
||||
else
|
||||
result << "SYSTEM"
|
||||
end
|
||||
# Although a system identifier is not omissible in XML,
|
||||
# we cannot output it if it is not given.
|
||||
if @system_identifier
|
||||
if /"/ !~ @system_identifier
|
||||
result << " \"#{@system_identifier}\""
|
||||
else
|
||||
result << " '#{@system_identifier}'"
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
class ProcIns
|
||||
def output(out, context)
|
||||
out.output_string "<?#{@target}"
|
||||
out.output_string " #{@content}" if @content
|
||||
out.output_string "?>"
|
||||
end
|
||||
end
|
||||
|
||||
class Comment
|
||||
def output(out, context)
|
||||
out.output_string "<!--#{@content}-->"
|
||||
end
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,410 @@
|
||||
require 'htree/scan'
|
||||
require 'htree/htmlinfo'
|
||||
require 'htree/text'
|
||||
require 'htree/tag'
|
||||
require 'htree/leaf'
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/raw_string'
|
||||
require 'htree/context'
|
||||
require 'htree/encoder'
|
||||
require 'htree/fstr'
|
||||
|
||||
module HTree
|
||||
# HTree.parse parses <i>input</i> and return a document tree.
|
||||
# represented by HTree::Doc.
|
||||
#
|
||||
# <i>input</i> should be a String or
|
||||
# an object which respond to read or open method.
|
||||
# For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable.
|
||||
# Note that the URIs need open-uri.
|
||||
#
|
||||
# HTree.parse guesses <i>input</i> is HTML or not and XML or not.
|
||||
#
|
||||
# If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml
|
||||
# regardless of <i>input</i> has XML namespace declaration or not nor even it is pre-XML HTML.
|
||||
#
|
||||
# If it is guessed as HTML and not XML, all element and attribute names are downcaseed.
|
||||
#
|
||||
# If opened file or read content has charset method,
|
||||
# HTree.parse decode it according to $KCODE before parsing.
|
||||
# Otherwise HTree.parse assumes the character encoding of the content is
|
||||
# compatible to $KCODE.
|
||||
# Note that the charset method is provided by URI::HTTP with open-uri.
|
||||
def HTree.parse(input)
|
||||
HTree.with_frozen_string_hash {
|
||||
parse_as(input, false)
|
||||
}
|
||||
end
|
||||
|
||||
# HTree.parse_xml parses <i>input</i> as XML and
|
||||
# return a document tree represented by HTree::Doc.
|
||||
#
|
||||
# It behaves almost same as HTree.parse but it assumes <i>input</i> is XML
|
||||
# even if no XML declaration.
|
||||
# The assumption causes following differences.
|
||||
# * doesn't downcase element name.
|
||||
# * The content of <script> and <style> element is PCDATA, not CDATA.
|
||||
def HTree.parse_xml(input)
|
||||
HTree.with_frozen_string_hash {
|
||||
parse_as(input, true)
|
||||
}
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
def HTree.parse_as(input, is_xml)
|
||||
input_charset = nil
|
||||
if input.tainted? && 1 <= $SAFE
|
||||
raise SecurityError, "input tainted"
|
||||
end
|
||||
if input.respond_to? :read # IO, StringIO
|
||||
input = input.read.untaint
|
||||
input_charset = input.charset if input.respond_to? :charset
|
||||
elsif input.respond_to? :open # Pathname, URI with open-uri
|
||||
input.open {|f|
|
||||
input = f.read.untaint
|
||||
input_charset = f.charset if f.respond_to? :charset
|
||||
}
|
||||
end
|
||||
if input_charset && input_charset != Encoder.internal_charset
|
||||
input = Iconv.conv(Encoder.internal_charset, input_charset, input)
|
||||
end
|
||||
|
||||
tokens = []
|
||||
is_xml, is_html = HTree.scan(input, is_xml) {|token|
|
||||
tokens << token
|
||||
}
|
||||
context = is_html ? HTMLContext : DefaultContext
|
||||
structure_list = parse_pairs(tokens, is_xml, is_html)
|
||||
structure_list = fix_structure_list(structure_list, is_xml, is_html)
|
||||
nodes = structure_list.map {|s| build_node(s, is_xml, is_html, context) }
|
||||
Doc.new(nodes)
|
||||
end
|
||||
|
||||
def HTree.parse_pairs(tokens, is_xml, is_html)
|
||||
stack = [[nil, nil, []]]
|
||||
tokens.each {|token|
|
||||
case token[0]
|
||||
when :stag
|
||||
stag_raw_string = token[1]
|
||||
stagname = stag_raw_string[Pat::Name]
|
||||
stagname = stagname.downcase if !is_xml && is_html
|
||||
stagname = HTree.frozen_string(stagname)
|
||||
stack << [stagname, stag_raw_string, []]
|
||||
when :etag
|
||||
etag_raw_string = token[1]
|
||||
etagname = etag_raw_string[Pat::Name]
|
||||
etagname = etagname.downcase if !is_xml && is_html
|
||||
etagname = HTree.frozen_string(etagname)
|
||||
matched_elem = nil
|
||||
stack.reverse_each {|elem|
|
||||
stagname, _, _ = elem
|
||||
if stagname == etagname
|
||||
matched_elem = elem
|
||||
break
|
||||
end
|
||||
}
|
||||
if matched_elem
|
||||
until matched_elem.equal? stack.last
|
||||
stagname, stag_raw_string, children = stack.pop
|
||||
stack.last[2] << [:elem, stag_raw_string, children]
|
||||
end
|
||||
stagname, stag_raw_string, children = stack.pop
|
||||
stack.last[2] << [:elem, stag_raw_string, children, etag_raw_string]
|
||||
else
|
||||
stack.last[2] << [:bogus_etag, etag_raw_string]
|
||||
end
|
||||
else
|
||||
stack.last[2] << token
|
||||
end
|
||||
}
|
||||
elem = nil
|
||||
while 1 < stack.length
|
||||
stagname, stag_raw_string, children = stack.pop
|
||||
stack.last[2] << [:elem, stag_raw_string, children]
|
||||
end
|
||||
stack[0][2]
|
||||
end
|
||||
|
||||
def HTree.fix_structure_list(structure_list, is_xml, is_html)
|
||||
result = []
|
||||
rest = structure_list.dup
|
||||
until rest.empty?
|
||||
structure = rest.shift
|
||||
if structure[0] == :elem
|
||||
elem, rest2 = fix_element(structure, [], [], is_xml, is_html)
|
||||
result << elem
|
||||
rest = rest2 + rest
|
||||
else
|
||||
result << structure
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def HTree.fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
|
||||
stag_raw_string = elem[1]
|
||||
children = elem[2]
|
||||
if etag_raw_string = elem[3]
|
||||
return [:elem, stag_raw_string, fix_structure_list(children, is_xml, is_html), etag_raw_string], []
|
||||
else
|
||||
tagname = stag_raw_string[Pat::Name]
|
||||
tagname = tagname.downcase if !is_xml && is_html
|
||||
if ElementContent[tagname] == :EMPTY
|
||||
return [:elem, stag_raw_string, []], children
|
||||
else
|
||||
if ElementContent[tagname] == :CDATA
|
||||
possible_tags = []
|
||||
else
|
||||
possible_tags = ElementContent[tagname]
|
||||
end
|
||||
if possible_tags
|
||||
excluded_tags2 = ElementExclusions[tagname]
|
||||
included_tags2 = ElementInclusions[tagname]
|
||||
excluded_tags |= excluded_tags2 if excluded_tags2
|
||||
included_tags |= included_tags2 if included_tags2
|
||||
containable_tags = (possible_tags | included_tags) - excluded_tags
|
||||
uncontainable_tags = ElementContent.keys - containable_tags
|
||||
else
|
||||
# If the tagname is unknown, it is assumed that any element
|
||||
# except excluded can be contained.
|
||||
uncontainable_tags = excluded_tags
|
||||
end
|
||||
fixed_children = []
|
||||
rest = children
|
||||
until rest.empty?
|
||||
if rest[0][0] == :elem
|
||||
elem = rest.shift
|
||||
elem_tagname = elem[1][Pat::Name]
|
||||
elem_tagname = elem_tagname.downcase if !is_xml && is_html
|
||||
if uncontainable_tags.include? elem_tagname
|
||||
rest.unshift elem
|
||||
break
|
||||
else
|
||||
fixed_elem, rest2 = fix_element(elem, excluded_tags, included_tags, is_xml, is_html)
|
||||
fixed_children << fixed_elem
|
||||
rest = rest2 + rest
|
||||
end
|
||||
else
|
||||
fixed_children << rest.shift
|
||||
end
|
||||
end
|
||||
return [:elem, stag_raw_string, fixed_children], rest
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def HTree.build_node(structure, is_xml, is_html, inherited_context=DefaultContext)
|
||||
case structure[0]
|
||||
when :text_pcdata
|
||||
Text.parse_pcdata(structure[1])
|
||||
when :elem
|
||||
_, stag_rawstring, children, etag_rawstring = structure
|
||||
etag = etag_rawstring && ETag.parse(etag_rawstring, is_xml, is_html)
|
||||
stag = STag.parse(stag_rawstring, true, is_xml, is_html, inherited_context)
|
||||
if !children.empty? || etag ||
|
||||
stag.element_name.namespace_uri != 'http://www.w3.org/1999/xhtml' ||
|
||||
HTree::ElementContent[stag.element_name.local_name] != :EMPTY
|
||||
Elem.new!(stag,
|
||||
children.map {|c| build_node(c, is_xml, is_html, stag.context) },
|
||||
etag)
|
||||
else
|
||||
Elem.new!(stag)
|
||||
end
|
||||
when :emptytag
|
||||
Elem.new!(STag.parse(structure[1], false, is_xml, is_html, inherited_context))
|
||||
when :bogus_etag
|
||||
BogusETag.parse(structure[1], is_xml, is_html)
|
||||
when :xmldecl
|
||||
XMLDecl.parse(structure[1])
|
||||
when :doctype
|
||||
DocType.parse(structure[1], is_xml, is_html)
|
||||
when :procins
|
||||
ProcIns.parse(structure[1])
|
||||
when :comment
|
||||
Comment.parse(structure[1])
|
||||
when :text_cdata_content
|
||||
Text.parse_cdata_content(structure[1])
|
||||
when :text_cdata_section
|
||||
Text.parse_cdata_section(structure[1])
|
||||
else
|
||||
raise Exception, "[bug] unknown structure: #{structure.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def STag.parse(raw_string, is_stag, is_xml, is_html, inherited_context=DefaultContext)
|
||||
attrs = []
|
||||
if (is_stag ? /\A#{Pat::ValidStartTag_C}\z/o : /\A#{Pat::ValidEmptyTag_C}\z/o) =~ raw_string
|
||||
qname = $1
|
||||
$2.scan(Pat::ValidAttr_C) {
|
||||
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
|
||||
}
|
||||
elsif (is_stag ? /\A#{Pat::InvalidStartTag_C}\z/o : /\A#{Pat::InvalidEmptyTag_C}\z/o) =~ raw_string
|
||||
qname = $1
|
||||
last_attr = $3
|
||||
$2.scan(Pat::InvalidAttr1_C) {
|
||||
attrs << ($5 ? [nil, $5] : [$1, $2 || $3 || $4])
|
||||
}
|
||||
if last_attr
|
||||
/#{Pat::InvalidAttr1End_C}/o =~ last_attr
|
||||
attrs << [$1, $2 || $3]
|
||||
end
|
||||
else
|
||||
raise HTree::Error, "cannot recognize as start tag or empty tag: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
qname = qname.downcase if !is_xml && is_html
|
||||
|
||||
attrs.map! {|aname, aval|
|
||||
if aname
|
||||
aname = (!is_xml && is_html) ? aname.downcase : aname
|
||||
[aname, Text.parse_pcdata(aval)]
|
||||
else
|
||||
if val2name = OmittedAttrName[qname]
|
||||
aval_downcase = aval.downcase
|
||||
aname = val2name.fetch(aval_downcase, aval_downcase)
|
||||
else
|
||||
aname = aval
|
||||
end
|
||||
[aname, Text.new(aval)]
|
||||
end
|
||||
}
|
||||
|
||||
result = STag.new(qname, attrs, inherited_context)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def ETag.parse(raw_string, is_xml, is_html)
|
||||
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
qname = $1
|
||||
qname = qname.downcase if !is_xml && is_html
|
||||
|
||||
result = self.new(qname)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def BogusETag.parse(raw_string, is_xml, is_html)
|
||||
unless /\A#{Pat::EndTag_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as end tag: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
qname = $1
|
||||
qname = qname.downcase if !is_xml && is_html
|
||||
|
||||
result = self.new(qname)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def Text.parse_pcdata(raw_string)
|
||||
fixed = raw_string.gsub(/&(?:(?:#[0-9]+|#x[0-9a-fA-F]+|([A-Za-z][A-Za-z0-9]*));?)?/o) {|s|
|
||||
name = $1
|
||||
case s
|
||||
when /;\z/
|
||||
s
|
||||
when /\A&#/
|
||||
"#{s};"
|
||||
when '&'
|
||||
'&'
|
||||
else
|
||||
if NamedCharactersPattern =~ name
|
||||
"&#{name};"
|
||||
else
|
||||
"&#{name}"
|
||||
end
|
||||
end
|
||||
}
|
||||
fixed = raw_string if fixed == raw_string
|
||||
result = Text.new_internal(fixed)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def Text.parse_cdata_content(raw_string)
|
||||
result = Text.new(raw_string)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def Text.parse_cdata_section(raw_string)
|
||||
unless /\A#{Pat::CDATA_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as CDATA section: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
content = $1
|
||||
|
||||
result = Text.new(content)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def XMLDecl.parse(raw_string)
|
||||
unless /\A#{Pat::XmlDecl_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
version = $1 || $2
|
||||
encoding = $3 || $4
|
||||
case $5 || $6
|
||||
when 'yes'
|
||||
standalone = true
|
||||
when 'no'
|
||||
standalone = false
|
||||
else
|
||||
standalone = nil
|
||||
end
|
||||
|
||||
result = XMLDecl.new(version, encoding, standalone)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def DocType.parse(raw_string, is_xml, is_html)
|
||||
unless /\A#{Pat::DocType_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as XML declaration: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
root_element_name = $1
|
||||
public_identifier = $2 || $3
|
||||
system_identifier = $4 || $5
|
||||
|
||||
root_element_name = root_element_name.downcase if !is_xml && is_html
|
||||
|
||||
result = DocType.new(root_element_name, public_identifier, system_identifier)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def ProcIns.parse(raw_string)
|
||||
unless /\A#{Pat::XmlProcIns_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as processing instruction: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
target = $1
|
||||
content = $2
|
||||
|
||||
result = ProcIns.new(target, content)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
def Comment.parse(raw_string)
|
||||
unless /\A#{Pat::Comment_C}\z/o =~ raw_string
|
||||
raise HTree::Error, "cannot recognize as comment: #{raw_string.inspect}"
|
||||
end
|
||||
|
||||
content = $1
|
||||
|
||||
result = Comment.new(content)
|
||||
result.raw_string = raw_string
|
||||
result
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,127 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/fstr'
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
# raw_string returns a source string recorded by parsing.
|
||||
# It returns +nil+ if the node is constructed not via parsing.
|
||||
def raw_string
|
||||
catch(:raw_string_tag) {
|
||||
return raw_string_internal('')
|
||||
}
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
class Doc
|
||||
def raw_string_internal(result)
|
||||
@children.each {|n|
|
||||
n.raw_string_internal(result)
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class Elem
|
||||
def raw_string_internal(result)
|
||||
@stag.raw_string_internal(result)
|
||||
@children.each {|n| n.raw_string_internal(result) }
|
||||
@etag.raw_string_internal(result) if @etag
|
||||
end
|
||||
end
|
||||
|
||||
module Tag
|
||||
def init_raw_string() @raw_string = nil end
|
||||
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
||||
def raw_string_internal(result)
|
||||
throw :raw_string_tag if !@raw_string
|
||||
result << @raw_string
|
||||
end
|
||||
end
|
||||
|
||||
module Leaf
|
||||
def init_raw_string() @raw_string = nil end
|
||||
def raw_string=(arg) @raw_string = HTree.frozen_string(arg) end
|
||||
def raw_string_internal(result)
|
||||
throw :raw_string_tag if !@raw_string
|
||||
result << @raw_string
|
||||
end
|
||||
end
|
||||
|
||||
class Text
|
||||
def raw_string=(arg)
|
||||
if arg == @rcdata then
|
||||
@raw_string = @rcdata
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
module Node
|
||||
def eliminate_raw_string
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
class Doc
|
||||
def eliminate_raw_string
|
||||
Doc.new(@children.map {|c| c.eliminate_raw_string })
|
||||
end
|
||||
end
|
||||
|
||||
class Elem
|
||||
def eliminate_raw_string
|
||||
Elem.new!(
|
||||
@stag.eliminate_raw_string,
|
||||
@empty ? nil : @children.map {|c| c.eliminate_raw_string },
|
||||
@etag && @etag.eliminate_raw_string)
|
||||
end
|
||||
end
|
||||
|
||||
class Text
|
||||
def eliminate_raw_string
|
||||
Text.new_internal(@rcdata)
|
||||
end
|
||||
end
|
||||
|
||||
class STag
|
||||
def eliminate_raw_string
|
||||
STag.new(@qualified_name, @attributes, @inherited_context)
|
||||
end
|
||||
end
|
||||
|
||||
class ETag
|
||||
def eliminate_raw_string
|
||||
self.class.new(@qualified_name)
|
||||
end
|
||||
end
|
||||
|
||||
class XMLDecl
|
||||
def eliminate_raw_string
|
||||
XMLDecl.new(@version, @encoding, @standalone)
|
||||
end
|
||||
end
|
||||
|
||||
class DocType
|
||||
def eliminate_raw_string
|
||||
DocType.new(@root_element_name, @public_identifier, @system_identifier)
|
||||
end
|
||||
end
|
||||
|
||||
class ProcIns
|
||||
def eliminate_raw_string
|
||||
ProcIns.new(@target, @content)
|
||||
end
|
||||
end
|
||||
|
||||
class Comment
|
||||
def eliminate_raw_string
|
||||
Comment.new(@content)
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,14 @@
|
||||
class Regexp
|
||||
def disable_capture
|
||||
re = ''
|
||||
self.source.scan(/\\.|[^\\\(]+|\(\?|\(/m) {|s|
|
||||
if s == '('
|
||||
re << '(?:'
|
||||
else
|
||||
re << s
|
||||
end
|
||||
}
|
||||
Regexp.new(re, self.options, self.kcode)
|
||||
end
|
||||
end
|
||||
|
@ -0,0 +1,131 @@
|
||||
# = REXML Tree Generator
|
||||
#
|
||||
# HTree::Node#to_rexml is used for converting HTree to REXML.
|
||||
#
|
||||
# == Method Summary
|
||||
#
|
||||
# - HTree::Node#to_rexml -> REXML::Child
|
||||
#
|
||||
# == Example
|
||||
#
|
||||
# HTree.parse(...).to_rexml #=> REXML::Document
|
||||
#
|
||||
# == Comparison between HTree and REXML.
|
||||
#
|
||||
# - HTree parser is permissive HTML/XML parser.
|
||||
# REXML parser is strict XML parser.
|
||||
# HTree is recommended if you need to parse realworld HTML.
|
||||
# REXML is recommended if you need strict error checking.
|
||||
# - HTree object is immutable.
|
||||
# REXML object is mutable.
|
||||
# REXML should be used if you need modification.
|
||||
#
|
||||
require 'htree/modules'
|
||||
require 'htree/output' # HTree::DocType#generate_content
|
||||
|
||||
module HTree
|
||||
module Node
|
||||
# convert to REXML tree.
|
||||
def to_rexml
|
||||
require 'rexml/document'
|
||||
to_rexml_internal(nil, DefaultContext)
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
class Doc
|
||||
def to_rexml_internal(parent, context)
|
||||
raise ArgumentError, "parent must be nil" if parent != nil
|
||||
result = REXML::Document.new
|
||||
self.children.each {|c|
|
||||
c.to_rexml_internal(result, context)
|
||||
}
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
class Elem
|
||||
def to_rexml_internal(parent, context)
|
||||
ename = self.element_name
|
||||
ns_decl = {}
|
||||
if context.namespace_uri(ename.namespace_prefix) != ename.namespace_uri
|
||||
ns_decl[ename.namespace_prefix] = ename.namespace_uri
|
||||
end
|
||||
|
||||
if ename.namespace_prefix
|
||||
result = REXML::Element.new("#{ename.namespace_prefix}:#{ename.local_name}", parent)
|
||||
else
|
||||
result = REXML::Element.new(ename.local_name, parent)
|
||||
end
|
||||
|
||||
self.each_attribute {|aname, atext|
|
||||
if aname.namespace_prefix
|
||||
if context.namespace_uri(aname.namespace_prefix) != aname.namespace_uri
|
||||
ns_decl[aname.namespace_prefix] = aname.namespace_uri
|
||||
end
|
||||
result.add_attribute("#{aname.namespace_prefix}:#{aname.local_name}", atext.to_s)
|
||||
else
|
||||
result.add_attribute(aname.local_name, atext.to_s)
|
||||
end
|
||||
}
|
||||
|
||||
ns_decl.each {|k, v|
|
||||
if k
|
||||
result.add_namespace(k, v)
|
||||
else
|
||||
result.add_namespace(v)
|
||||
end
|
||||
}
|
||||
context = context.subst_namespaces(ns_decl)
|
||||
|
||||
self.children.each {|c|
|
||||
c.to_rexml_internal(result, context)
|
||||
}
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
class Text
|
||||
def to_rexml_internal(parent, context)
|
||||
rcdata = self.rcdata.gsub(/[<>]/) { Encoder::ChRef[$&] }
|
||||
REXML::Text.new(rcdata, true, parent, true)
|
||||
end
|
||||
end
|
||||
|
||||
class XMLDecl
|
||||
def to_rexml_internal(parent, context)
|
||||
r = REXML::XMLDecl.new(self.version, self.encoding, self.standalone)
|
||||
parent << r if parent
|
||||
r
|
||||
end
|
||||
end
|
||||
|
||||
class DocType
|
||||
def to_rexml_internal(parent, context)
|
||||
REXML::DocType.new([self.root_element_name, self.generate_content], parent)
|
||||
end
|
||||
end
|
||||
|
||||
class ProcIns
|
||||
def to_rexml_internal(parent, context)
|
||||
r = REXML::Instruction.new(self.target, self.content)
|
||||
parent << r if parent
|
||||
r
|
||||
end
|
||||
end
|
||||
|
||||
class Comment
|
||||
def to_rexml_internal(parent, context)
|
||||
REXML::Comment.new(self.content, parent)
|
||||
end
|
||||
end
|
||||
|
||||
class BogusETag
|
||||
def to_rexml_internal(parent, context)
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,166 @@
|
||||
require 'htree/htmlinfo'
|
||||
require 'htree/regexp-util'
|
||||
require 'htree/fstr'
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
module Pat
|
||||
NameChar = /[-A-Za-z0-9._:]/
|
||||
Name = /[A-Za-z_:]#{NameChar}*/
|
||||
Nmtoken = /#{NameChar}+/
|
||||
|
||||
Comment_C = /<!--(.*?)-->/m
|
||||
Comment = Comment_C.disable_capture
|
||||
CDATA_C = /<!\[CDATA\[(.*?)\]\]>/m
|
||||
CDATA = CDATA_C.disable_capture
|
||||
|
||||
QuotedAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)')/
|
||||
QuotedAttr = QuotedAttr_C.disable_capture
|
||||
ValidAttr_C = /(#{Name})\s*=\s*(?:"([^"]*)"|'([^']*)'|(#{NameChar}*))|(#{Nmtoken})/
|
||||
ValidAttr = ValidAttr_C.disable_capture
|
||||
InvalidAttr1_C = /(#{Name})\s*=\s*(?:'([^'<>]*)'|"([^"<>]*)"|([^\s<>"']*(?![^\s<>"'])))|(#{Nmtoken})/
|
||||
InvalidAttr1 = InvalidAttr1_C.disable_capture
|
||||
InvalidAttr1End_C = /(#{Name})(?:\s*=\s*(?:'([^'<>]*)|"([^"<>]*)))/
|
||||
InvalidAttr1End = InvalidAttr1End_C.disable_capture
|
||||
|
||||
QuotedStartTag_C = /<(#{Name})((?:\s+#{QuotedAttr})*)\s*>/
|
||||
QuotedStartTag = QuotedStartTag_C.disable_capture
|
||||
ValidStartTag_C = /<(#{Name})((?:\s+#{ValidAttr})*)\s*>/
|
||||
ValidStartTag = ValidStartTag_C.disable_capture
|
||||
InvalidStartTag_C = /<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*>/
|
||||
InvalidStartTag = InvalidStartTag_C.disable_capture
|
||||
StartTag = /#{QuotedStartTag}|#{ValidStartTag}|#{InvalidStartTag}/
|
||||
|
||||
QuotedEmptyTag_C = %r{<(#{Name})((?:\s+#{QuotedAttr})*)\s*/>}
|
||||
QuotedEmptyTag = QuotedEmptyTag_C.disable_capture
|
||||
ValidEmptyTag_C = %r{<(#{Name})((?:\s+#{ValidAttr})*)\s*/>}
|
||||
ValidEmptyTag = ValidEmptyTag_C.disable_capture
|
||||
InvalidEmptyTag_C = %r{<(#{Name})((?:(?:\b|\s+)#{InvalidAttr1})*)((?:\b|\s+)#{InvalidAttr1End})?\s*/>}
|
||||
InvalidEmptyTag = InvalidEmptyTag_C.disable_capture
|
||||
EmptyTag = /#{QuotedEmptyTag}|#{ValidEmptyTag}|#{InvalidEmptyTag}/
|
||||
|
||||
EndTag_C = %r{</(#{Name})\s*>}
|
||||
EndTag = EndTag_C.disable_capture
|
||||
|
||||
XmlVersionNum = /[a-zA-Z0-9_.:-]+/
|
||||
XmlVersionInfo_C = /\s+version\s*=\s*(?:'(#{XmlVersionNum})'|"(#{XmlVersionNum})")/
|
||||
XmlVersionInfo = XmlVersionInfo_C.disable_capture
|
||||
XmlEncName = /[A-Za-z][A-Za-z0-9._-]*/
|
||||
XmlEncodingDecl_C = /\s+encoding\s*=\s*(?:"(#{XmlEncName})"|'(#{XmlEncName})')/
|
||||
XmlEncodingDecl = XmlEncodingDecl_C.disable_capture
|
||||
XmlSDDecl_C = /\s+standalone\s*=\s*(?:'(yes|no)'|"(yes|no)")/
|
||||
XmlSDDecl = XmlSDDecl_C.disable_capture
|
||||
XmlDecl_C = /<\?xml#{XmlVersionInfo_C}#{XmlEncodingDecl_C}?#{XmlSDDecl_C}?\s*\?>/
|
||||
XmlDecl = /<\?xml#{XmlVersionInfo}#{XmlEncodingDecl}?#{XmlSDDecl}?\s*\?>/
|
||||
|
||||
# xxx: internal DTD subset is not recognized: '[' (markupdecl | DeclSep)* ']' S?)?
|
||||
SystemLiteral_C = /"([^"]*)"|'([^']*)'/
|
||||
PubidLiteral_C = %r{"([\sa-zA-Z0-9\-'()+,./:=?;!*\#@$_%]*)"|'([\sa-zA-Z0-9\-()+,./:=?;!*\#@$_%]*)'}
|
||||
ExternalID_C = /(?:SYSTEM|PUBLIC\s+#{PubidLiteral_C})(?:\s+#{SystemLiteral_C})?/
|
||||
DocType_C = /<!DOCTYPE\s+(#{Name})(?:\s+#{ExternalID_C})?\s*(?:\[.*?\]\s*)?>/m
|
||||
DocType = DocType_C.disable_capture
|
||||
|
||||
XmlProcIns_C = /<\?(#{Name})(?:\s+(.*?))?\?>/m
|
||||
XmlProcIns = XmlProcIns_C.disable_capture
|
||||
#ProcIns = /<\?([^>]*)>/m
|
||||
end
|
||||
|
||||
def HTree.scan(input, is_xml=false)
|
||||
is_html = false
|
||||
cdata_content = nil
|
||||
text_start = 0
|
||||
first_element = true
|
||||
index_xmldecl = 1
|
||||
index_doctype = 2
|
||||
index_xmlprocins = 3
|
||||
index_quotedstarttag = 4
|
||||
index_quotedemptytag = 5
|
||||
index_starttag = 6
|
||||
index_endtag = 7
|
||||
index_emptytag = 8
|
||||
index_comment = 9
|
||||
index_cdata = 10
|
||||
input.scan(/(#{Pat::XmlDecl})
|
||||
|(#{Pat::DocType})
|
||||
|(#{Pat::XmlProcIns})
|
||||
|(#{Pat::QuotedStartTag})
|
||||
|(#{Pat::QuotedEmptyTag})
|
||||
|(#{Pat::StartTag})
|
||||
|(#{Pat::EndTag})
|
||||
|(#{Pat::EmptyTag})
|
||||
|(#{Pat::Comment})
|
||||
|(#{Pat::CDATA})
|
||||
/ox) {
|
||||
match = $~
|
||||
if cdata_content
|
||||
str = $&
|
||||
if match.begin(index_endtag) && str[Pat::Name] == cdata_content
|
||||
text_end = match.begin(0)
|
||||
if text_start < text_end
|
||||
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
||||
end
|
||||
yield [:etag, HTree.frozen_string(str)]
|
||||
text_start = match.end(0)
|
||||
cdata_content = nil
|
||||
end
|
||||
else
|
||||
str = match[0]
|
||||
text_end = match.begin(0)
|
||||
if text_start < text_end
|
||||
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
||||
end
|
||||
text_start = match.end(0)
|
||||
if match.begin(index_xmldecl)
|
||||
yield [:xmldecl, HTree.frozen_string(str)]
|
||||
is_xml = true
|
||||
elsif match.begin(index_doctype)
|
||||
Pat::DocType_C =~ str
|
||||
root_element_name = $1
|
||||
public_identifier = $2 || $3
|
||||
system_identifier = $4 || $5
|
||||
is_html = true if /\Ahtml\z/i =~ root_element_name
|
||||
is_xml = true if public_identifier && %r{\A-//W3C//DTD XHTML } =~ public_identifier
|
||||
yield [:doctype, HTree.frozen_string(str)]
|
||||
elsif match.begin(index_xmlprocins)
|
||||
yield [:procins, HTree.frozen_string(str)]
|
||||
elsif match.begin(index_starttag) || match.begin(index_quotedstarttag)
|
||||
yield stag = [:stag, HTree.frozen_string(str)]
|
||||
tagname = str[Pat::Name]
|
||||
if first_element
|
||||
if /\A(?:html|head|title|isindex|base|script|style|meta|link|object)\z/i =~ tagname
|
||||
is_html = true
|
||||
else
|
||||
is_xml = true
|
||||
end
|
||||
first_element = false
|
||||
end
|
||||
if !is_xml && ElementContent[tagname] == :CDATA
|
||||
cdata_content = tagname
|
||||
end
|
||||
elsif match.begin(index_endtag)
|
||||
yield [:etag, HTree.frozen_string(str)]
|
||||
elsif match.begin(index_emptytag) || match.begin(index_quotedemptytag)
|
||||
yield [:emptytag, HTree.frozen_string(str)]
|
||||
first_element = false
|
||||
#is_xml = true
|
||||
elsif match.begin(index_comment)
|
||||
yield [:comment, HTree.frozen_string(str)]
|
||||
elsif match.begin(index_cdata)
|
||||
yield [:text_cdata_section, HTree.frozen_string(str)]
|
||||
else
|
||||
raise Exception, "unknown match [bug]"
|
||||
end
|
||||
end
|
||||
}
|
||||
text_end = input.length
|
||||
if text_start < text_end
|
||||
if cdata_content
|
||||
yield [:text_cdata_content, HTree.frozen_string(input[text_start...text_end])]
|
||||
else
|
||||
yield [:text_pcdata, HTree.frozen_string(input[text_start...text_end])]
|
||||
end
|
||||
end
|
||||
return is_xml, is_html
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,113 @@
|
||||
require 'htree/raw_string'
|
||||
require 'htree/text'
|
||||
require 'htree/scan' # for Pat::Name and Pat::Nmtoken
|
||||
require 'htree/context'
|
||||
require 'htree/name'
|
||||
require 'htree/fstr'
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
|
||||
class STag
|
||||
def initialize(name, attributes=[], inherited_context=DefaultContext)
|
||||
init_raw_string
|
||||
# normalize xml declaration name and attribute value.
|
||||
attributes = attributes.map {|aname, val|
|
||||
if !(Name === aname) && /\A(?:#{Pat::Name}?\{.*\})?#{Pat::Nmtoken}\z/o !~ aname
|
||||
raise HTree::Error, "invalid attribute name: #{aname.inspect}"
|
||||
end
|
||||
if !(Name === aname) && /\Axmlns(?:\z|:)/ =~ aname
|
||||
aname = Name.parse_attribute_name(aname, nil)
|
||||
end
|
||||
val = val.to_node if HTree::Location === val
|
||||
val = Text.new(val) unless Text === val
|
||||
[aname, val]
|
||||
}
|
||||
|
||||
@inherited_context = inherited_context
|
||||
@xmlns_decls = {}
|
||||
|
||||
# validate namespace consistency of given Name objects.
|
||||
if Name === name
|
||||
@xmlns_decls[name.namespace_prefix] = name.namespace_uri
|
||||
end
|
||||
attributes.each {|aname, text|
|
||||
next unless Name === aname
|
||||
next if aname.xmlns?
|
||||
if aname.namespace_prefix && aname.namespace_uri
|
||||
if @xmlns_decls.include? aname.namespace_prefix
|
||||
if @xmlns_decls[aname.namespace_prefix] != aname.namespace_uri
|
||||
raise ArgumentError, "inconsistent namespace use: #{aname.namespace_prefix} is used as #{@xmlns_decls[aname.namespace_prefix]} and #{aname.namespace_uri}"
|
||||
end
|
||||
else
|
||||
@xmlns_decls[aname.namespace_prefix] = aname.namespace_uri
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
attributes.each {|aname, text|
|
||||
next unless Name === aname
|
||||
next unless aname.xmlns?
|
||||
next if @xmlns_decls.include? aname.local_name
|
||||
if aname.local_name
|
||||
@xmlns_decls[aname.local_name] = text.to_s
|
||||
else
|
||||
uri = text.to_s
|
||||
@xmlns_decls[nil] = uri
|
||||
end
|
||||
}
|
||||
|
||||
@context = make_context(@inherited_context)
|
||||
|
||||
if Name === name
|
||||
@name = name
|
||||
else
|
||||
@name = Name.parse_element_name(name, @context)
|
||||
end
|
||||
|
||||
@attributes = attributes.map {|aname, text|
|
||||
aname = Name.parse_attribute_name(aname, @context) unless Name === aname
|
||||
if !aname.namespace_prefix && !aname.namespace_uri.empty?
|
||||
# xxx: should recover error?
|
||||
raise HTree::Error, "global attribute without namespace prefix: #{aname.inspect}"
|
||||
end
|
||||
[aname, text]
|
||||
}
|
||||
@attributes.freeze
|
||||
end
|
||||
attr_reader :attributes, :inherited_context, :context
|
||||
|
||||
def element_name
|
||||
@name
|
||||
end
|
||||
|
||||
def make_context(inherited_context)
|
||||
inherited_context.subst_namespaces(@xmlns_decls)
|
||||
end
|
||||
|
||||
def each_namespace_attribute
|
||||
@xmlns_decls.each {|name, uri|
|
||||
yield name, uri
|
||||
}
|
||||
nil
|
||||
end
|
||||
|
||||
def each_attribute
|
||||
@attributes.each {|name, text|
|
||||
next if name.xmlns?
|
||||
yield name, text
|
||||
}
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
class ETag
|
||||
def initialize(qualified_name)
|
||||
init_raw_string
|
||||
@qualified_name = HTree.frozen_string(qualified_name)
|
||||
end
|
||||
attr_reader :qualified_name
|
||||
end
|
||||
|
||||
# :startdoc:
|
||||
end
|
@ -0,0 +1,961 @@
|
||||
# = Template Engine
|
||||
#
|
||||
# The htree template engine converts HTML and some data to HTML or XML.
|
||||
#
|
||||
# == Template Method Summary
|
||||
#
|
||||
# - HTree.expand_template(<i>template_pathname</i>) -> $stdout
|
||||
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>) -> $stdout
|
||||
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>, <i>out</i>) -> <i>out</i>
|
||||
# - HTree.expand_template(<i>template_pathname</i>, <i>obj</i>, <i>out</i>, <i>encoding</i>) -> <i>out</i>
|
||||
#
|
||||
# - HTree.expand_template{<i>template_string</i>} -> $stdout
|
||||
# - HTree.expand_template(<i>out</i>) {<i>template_string</i>} -> <i>out</i>
|
||||
# - HTree.expand_template(<i>out</i>, <i>encoding</i>) {<i>template_string</i>} -> <i>out</i>
|
||||
#
|
||||
# - HTree.compile_template(<i>template_string</i>) -> Module
|
||||
# - HTree{<i>template_string</i>} -> HTree::Doc
|
||||
#
|
||||
# Note that the following method, HTree(), is not a template method.
|
||||
#
|
||||
# - HTree(<i>html_string</i>) -> HTree::Doc
|
||||
#
|
||||
# == Template Directives.
|
||||
#
|
||||
# A template directive is described as a special HTML attribute which name
|
||||
# begins with underscore.
|
||||
#
|
||||
# The template directives are listed as follows.
|
||||
#
|
||||
# - <elem \_attr_<i>name</i>="<i>expr</i>">content</elem>
|
||||
# - <elem _text="<i>expr</i>">dummy-content</elem>
|
||||
# - <elem _text><i>expr</i></elem>
|
||||
# - <elem _tree="<i>expr</i>">dummy-content</elem>
|
||||
# - <elem _tree><i>expr</i></elem>
|
||||
# - <elem _if="<i>expr</i>" _else="<i>mod.name(args)</i>">then-content</elem>
|
||||
# - <elem _iter="<i>expr.meth(args)//vars</i>">content</elem>
|
||||
# - <elem _iter_content="<i>expr.meth(args)//vars</i>">content</elem>
|
||||
# - <elem _call="<i>mod.name(args)</i>">dummy-content</elem>
|
||||
# - <elem _template="<i>name(vars)</i>">body</elem>
|
||||
#
|
||||
# === Template Semantics
|
||||
#
|
||||
# - attribute substitution
|
||||
# - <elem \_attr_<i>name</i>="<i>expr</i>">content</elem>
|
||||
#
|
||||
# \_attr_<i>name</i> is used for a dynamic attribute.
|
||||
#
|
||||
# <elem _attr_xxx="..."/>
|
||||
# -> <elem xxx="..."/>
|
||||
#
|
||||
# It is expanded to <i>name</i>="content".
|
||||
# The content is generated by evaluating _expr_.
|
||||
# Usually you don't need to care escaping: &, <, > and " are automatically escaped.
|
||||
# If you need to output character references,
|
||||
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
|
||||
# If the value has a +rcdata+ method,
|
||||
# it is called and the result is used as the content with escaping <, > and ".
|
||||
#
|
||||
# \_attr_<i>name</i> can be used multiple times in single element.
|
||||
#
|
||||
# - text substitution
|
||||
# - <elem _text="<i>expr</i>">dummy-content</elem>
|
||||
# - <elem _text><i>expr</i></elem>
|
||||
#
|
||||
# _text substitutes the content of the element by the string
|
||||
# evaluated from _expr_.
|
||||
# _expr_ is described in the attribute value or the content of the element.
|
||||
#
|
||||
# If a result of _expr_ have &, < and/or >, they are automatically escaped.
|
||||
# If you need to output character references,
|
||||
# the value of _expr_ should be an object which have a +rcdata+ method such as an HTree::Text.
|
||||
# If the value has a +rcdata+ method,
|
||||
# it is called and the result is used as the content with escaping < and >.
|
||||
#
|
||||
# If the element is span or div, and there is no other attributes,
|
||||
# no tags are produced.
|
||||
#
|
||||
# <elem _text="...">dummy-content</elem>
|
||||
# -> <elem>...</elem>
|
||||
#
|
||||
# - tree substitution
|
||||
# - <elem _tree="<i>expr</i>">dummy-content</elem>
|
||||
# - <elem _tree><i>expr</i></elem>
|
||||
#
|
||||
# _tree substitutes the content of the element by the htree object
|
||||
# evaluated from _expr_.
|
||||
# _expr_ is described in the attribute value or the content of the element.
|
||||
#
|
||||
# If the element is span or div, and there is no other attributes,
|
||||
# no tags are produced.
|
||||
#
|
||||
# <elem _tree="...">dummy-content</elem>
|
||||
# -> <elem>...</elem>
|
||||
#
|
||||
# - conditional
|
||||
# - <elem _if="<i>expr</i>">then-content</elem>
|
||||
# - <elem _if="<i>expr</i>" _else="<i>name(args)</i>">then-content</elem>
|
||||
#
|
||||
# _if is used for conditional.
|
||||
#
|
||||
# If <i>expr</i> is evaluated to true, it expands as follows
|
||||
# regardless of existence of _else.
|
||||
#
|
||||
# <elem _if="<i>expr</i>">then-content</elem>
|
||||
# -> <elem>then-content</elem>
|
||||
#
|
||||
# If <i>expr</i> is evaluated to false, it expands using _else.
|
||||
# If _else is not given, it expands to empty.
|
||||
# If _else is given, it expands as follows.
|
||||
#
|
||||
# <elem _if="<i>expr</i>" _else="<i>name(args)</i>">then-content</elem>
|
||||
# -> <elem _call="<i>name(args)</i>">then-content</elem>
|
||||
# -> see _call for further expansion.
|
||||
#
|
||||
# It is expanded to <elem>then-content</elem> if _expr_ is evaluated to
|
||||
# a true value.
|
||||
# Otherwise, it is replaced by other template specified by _else attribute.
|
||||
# If _else attribute is not given, it just replaced by empty.
|
||||
#
|
||||
# - iteration
|
||||
# - <elem _iter="<i>expr.meth(args)//vars</i>">content</elem>
|
||||
# - <elem _iter_content="<i>expr.meth(args)//vars</i>">content</elem>
|
||||
#
|
||||
# _iter and _iter_content is used for iteration.
|
||||
# _iter iterates the element itself but _iter_content iterates the content.
|
||||
#
|
||||
# <outer _iter="..."><inner/></outer>
|
||||
# -> <outer><inner/></outer><outer><inner/></outer>...
|
||||
#
|
||||
# <outer _iter_content="..."><inner/></outer>
|
||||
# -> <outer><inner/><inner/>...</outer>
|
||||
#
|
||||
# <i>expr.meth(args)</i> specifies iterator method call.
|
||||
# It is actually called with a block.
|
||||
# The block have block parameters <i>vars</i>.
|
||||
# <i>vars</i> must be variables separated by comma.
|
||||
#
|
||||
# - template call
|
||||
# - <elem _call="<i>name(args)</i>">dummy-content</elem>
|
||||
# - <elem _call="<i>mod.name(args)</i>">dummy-content</elem>
|
||||
#
|
||||
# _call is used to expand a template function.
|
||||
# The template function is defined by _template.
|
||||
#
|
||||
# <d _template="m">...</d>
|
||||
# <c _call="m">...</c>
|
||||
# -> <d>...</d>
|
||||
#
|
||||
# A local template can be called as follows:
|
||||
#
|
||||
# HTree.expand_template{<<'End'}
|
||||
# <a _template=ruby_talk(num)
|
||||
# _attr_href='"http://ruby-talk.org/#{num}"'
|
||||
# >[ruby-talk:<span _text=num>nnn</span>]</a>
|
||||
# Ruby 1.8.0 is released at <span _call=ruby_talk(77946) />.
|
||||
# Ruby 1.8.1 is released at <span _call=ruby_talk(88814) />.
|
||||
# End
|
||||
#
|
||||
# <i>mod</i> should be the result of HTree.compile_template.
|
||||
#
|
||||
# M = HTree.compile_template(<<'End')
|
||||
# <a _template=ruby_talk(num)
|
||||
# _attr_href='"http://ruby-talk.org/#{num}"'
|
||||
# >[ruby-talk:<span _text=num>nnn</span>]</a>
|
||||
# End
|
||||
# HTree.expand_template{<<'End'}
|
||||
# <html>
|
||||
# Ruby 1.8.0 is released at <span _call=M.ruby_talk(77946) />.
|
||||
# Ruby 1.8.1 is released at <span _call=M.ruby_talk(88814) />.
|
||||
# </html>
|
||||
# End
|
||||
#
|
||||
# The module can included.
|
||||
# In such case, the template function can be called without <i>mod.</i>
|
||||
# prefix.
|
||||
#
|
||||
# include HTree.compile_template(<<'End')
|
||||
# <a _template=ruby_talk(num)
|
||||
# _attr_href='"http://ruby-talk.org/#{num}"'
|
||||
# >[ruby-talk:<span _text=num>nnn</span>]</a>
|
||||
# End
|
||||
# HTree.expand_template{<<'End'}
|
||||
# <html>
|
||||
# Ruby 1.8.0 is released at <span _call=ruby_talk(77946) />.
|
||||
# Ruby 1.8.1 is released at <span _call=ruby_talk(88814) />.
|
||||
# </html>
|
||||
# End
|
||||
#
|
||||
# - template definition
|
||||
# - <elem _template="<i>name(vars)</i>">body</elem>
|
||||
#
|
||||
# _template defines a template function which is usable by _call.
|
||||
#
|
||||
# When a template is compiled to a module by HTree.compile_template,
|
||||
# the module have a module function for each template function
|
||||
# defined by outermost _template attribute.
|
||||
#
|
||||
# === White Space Handling
|
||||
#
|
||||
# The htree template engine strips whitespace text nodes in a template
|
||||
# except under HTML pre element.
|
||||
#
|
||||
# For example the white space text node between two spans in following template is stripped.
|
||||
#
|
||||
# <span _text="'a'"/> <span _text="'b'"/> -> "ab"
|
||||
#
|
||||
# Character entity references are not stripped.
|
||||
#
|
||||
# <span _text="'a'"/> <span _text="'b'"/> -> "a b"
|
||||
#
|
||||
# Text nodes generated by _text is not stripped.
|
||||
#
|
||||
# <span _text="'a'"/><span _text="' '"> </span><span _text="'b'"/> -> "a b"
|
||||
#
|
||||
# == HTML and XML
|
||||
#
|
||||
# The htree template engine outputs HTML or XML.
|
||||
#
|
||||
# If a template has no XML declaration and the top element is HTML,
|
||||
# the result is HTML.
|
||||
# Otherwise the result is XML.
|
||||
#
|
||||
# They differs as follows.
|
||||
#
|
||||
# - XML declaration is (re-)generated for XML.
|
||||
# - empty elements ends with a slash for XML.
|
||||
# - script and style element is escaped for XML.
|
||||
#
|
||||
# == Design Decision on Design/Logic Separation
|
||||
#
|
||||
# HTree template engine doesn't force you to separate design and logic.
|
||||
# Any logic (Ruby code) can be embedded in design (HTML).
|
||||
#
|
||||
# However the template engine cares the separation by logic refactorings.
|
||||
# The logic is easy to move between a template and an application.
|
||||
# For example, following tangled template
|
||||
#
|
||||
# tmpl.html:
|
||||
# <html>
|
||||
# <head>
|
||||
# <title _text="very-complex-ruby-code">dummy</title>
|
||||
# </head>
|
||||
# ...
|
||||
# </html>
|
||||
#
|
||||
# app.rb:
|
||||
# HTree.expand_template('tmpl.html', obj)
|
||||
#
|
||||
# can be refactored as follows.
|
||||
#
|
||||
# tmpl.html:
|
||||
# <html>
|
||||
# <head>
|
||||
# <title _text="title">dummy</title>
|
||||
# </head>
|
||||
# ...
|
||||
# </html>
|
||||
#
|
||||
# app.rb:
|
||||
# def obj.title
|
||||
# very-complex-ruby-code
|
||||
# end
|
||||
# HTree.expand_template('tmpl.html', obj)
|
||||
#
|
||||
# In general, any expression in a template can be refactored to an application
|
||||
# by extracting it as a method.
|
||||
# In JSP, this is difficult especially for a code fragment of an iteration.
|
||||
#
|
||||
# Also HTree encourages to separate business logic (Ruby code in an application)
|
||||
# and presentation logic (Ruby code in a template).
|
||||
# For example, presentation logic to color table rows stripe
|
||||
# can be embedded in a template.
|
||||
# It doesn't need to tangle an application.
|
||||
#
|
||||
|
||||
module HTree
|
||||
# :stopdoc:
|
||||
EmptyBindingObject = Object.new
|
||||
# :startdoc:
|
||||
end
|
||||
# :stopdoc:
|
||||
def (HTree::EmptyBindingObject).empty_binding
|
||||
binding
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
require 'htree/parse'
|
||||
require 'htree/gencode'
|
||||
require 'htree/equality'
|
||||
require 'htree/traverse'
|
||||
|
||||
# call-seq:
|
||||
# HTree.expand_template(template_pathname, obj=Object.new, out=$stdout, encoding=internal_encoding) -> out
|
||||
# HTree.expand_template(out=$stdout, encoding=internal_encoding) { template_string } -> out
|
||||
#
|
||||
# <code>HTree.expand_template</code> expands a template.
|
||||
#
|
||||
# The arguments should be specified as follows.
|
||||
# All argument except <i>pathname</i> are optional.
|
||||
#
|
||||
# - HTree.expand_template(<i>pathname</i>, <i>obj</i>, <i>out</i>, <i>encoding</i>) -> <i>out</i>
|
||||
# - HTree.expand_template(<i>out</i>, <i>encoding</i>) {<i>template_string</i>} -> <i>out</i>
|
||||
#
|
||||
# The template is specified by a file or a string.
|
||||
# If a block is not given, the first argument represent a template pathname.
|
||||
# Otherwise, the block is yielded and its value is interpreted as a template
|
||||
# string.
|
||||
# So it can be called as follows in simplest case.
|
||||
#
|
||||
# - HTree.expand_template(<i>template_pathname</i>)
|
||||
# - HTree.expand_template{<i>template_string</i>}
|
||||
#
|
||||
# Ruby expressions in the template file specified by _template_pathname_ are
|
||||
# evaluated in the context of the optional second argument <i>obj</i> as follows.
|
||||
# I.e. the pseudo variable self in the expressions is bound to <i>obj</i>.
|
||||
#
|
||||
# HTree.expand_template(template_pathname, obj)
|
||||
#
|
||||
# Ruby expressions in the template_string are evaluated
|
||||
# in the context of the caller of HTree.expand_template.
|
||||
# (binding information is specified by the block.)
|
||||
# I.e. they can access local variables etc.
|
||||
# We recommend to specify template_string as a literal string without
|
||||
# interpolation because dynamically generated string may break lexical scope.
|
||||
#
|
||||
# HTree.expand_template has two more optional arguments:
|
||||
# <i>out</i>, <i>encoding</i>.
|
||||
#
|
||||
# <i>out</i> specifies output target.
|
||||
# It should have <tt><<</tt> method: IO and String for example.
|
||||
# If it is not specified, $stdout is used.
|
||||
# If it has a method <tt>charset=</tt>, it is called to set the minimal charset
|
||||
# of the result before <tt><<</tt> is called.
|
||||
#
|
||||
# <i>encoding</i> specifies output character encoding.
|
||||
# If it is not specified, internal encoding is used.
|
||||
#
|
||||
# HTree.expand_template returns <i>out</i> or $stdout if <i>out</i> is not
|
||||
# specified.
|
||||
#
|
||||
def HTree.expand_template(*args, &block)
|
||||
if block
|
||||
template = block.call
|
||||
binding = block
|
||||
else
|
||||
pathname = args.fetch(0) { raise ArgumentError, "pathname not given" }
|
||||
args.shift
|
||||
obj = args.fetch(0) { Object.new }
|
||||
args.shift
|
||||
if pathname.respond_to? :read
|
||||
template = pathname.read.untaint
|
||||
if template.respond_to? :charset
|
||||
template = Iconv.conv(HTree::Encoder.internal_charset, template.charset, template)
|
||||
end
|
||||
else
|
||||
template = File.read(pathname).untaint
|
||||
end
|
||||
Thread.current[:htree_expand_template_obj] = obj
|
||||
binding = eval(<<-'End',
|
||||
Thread.current[:htree_expand_template_obj].class.class_eval <<-'EE'
|
||||
Thread.current[:htree_expand_template_obj].instance_eval { binding }
|
||||
EE
|
||||
End
|
||||
HTree::EmptyBindingObject.empty_binding)
|
||||
Thread.current[:htree_expand_template_obj] = nil
|
||||
end
|
||||
|
||||
out = args.shift || $stdout
|
||||
encoding = args.shift || HTree::Encoder.internal_charset
|
||||
if !args.empty?
|
||||
raise ArgumentError, "wrong number of arguments"
|
||||
end
|
||||
HTree::TemplateCompiler.new.expand_template(template, out, encoding, binding)
|
||||
end
|
||||
|
||||
# call-seq:
|
||||
# HTree(html_string) -> doc
|
||||
# HTree{template_string} -> doc
|
||||
#
|
||||
# <code>HTree(<i>html_string</i>)</code> parses <i>html_string</i>.
|
||||
# <code>HTree{<i>template_string</i>}</code> parses <i>template_string</i> and expand it as a template.
|
||||
# Ruby expressions in <i>template_string</i> is evaluated in the scope of the caller.
|
||||
#
|
||||
# <code>HTree()</code> and <code>HTree{}</code> returns a tree as an instance of HTree::Doc.
|
||||
def HTree(html_string=nil, &block)
|
||||
if block_given?
|
||||
raise ArgumentError, "both argument and block given." if html_string
|
||||
template = block.call
|
||||
HTree.parse(HTree::TemplateCompiler.new.expand_template(template, '', HTree::Encoder.internal_charset, block))
|
||||
else
|
||||
HTree.parse(html_string)
|
||||
end
|
||||
end
|
||||
|
||||
# call-seq:
|
||||
# HTree.compile_template(template_string) -> module
|
||||
#
|
||||
# <code>HTree.compile_template(<i>template_string</i>)</code> compiles
|
||||
# <i>template_string</i> as a template.
|
||||
#
|
||||
# HTree.compile_template returns a module.
|
||||
# The module has module functions for each templates defined in
|
||||
# <i>template_string</i>.
|
||||
# The returned module can be used for +include+.
|
||||
#
|
||||
# M = HTree.compile_template(<<'End')
|
||||
# <p _template=birthday(subj,t)>
|
||||
# <span _text=subj />'s birthday is <span _text="t.strftime('%B %dth %Y')"/>.
|
||||
# </p>
|
||||
# End
|
||||
# M.birthday('Ruby', Time.utc(1993, 2, 24)).display_xml
|
||||
# # <p>Ruby's birthday is February 24th 1993.</p>
|
||||
#
|
||||
# The module function takes arguments specifies by a <code>_template</code>
|
||||
# attribute and returns a tree represented as HTree::Node.
|
||||
#
|
||||
def HTree.compile_template(template_string)
|
||||
code = HTree::TemplateCompiler.new.compile_template(template_string)
|
||||
Thread.current[:htree_compile_template_code] = code
|
||||
mod = eval(<<-'End',
|
||||
eval(Thread.current[:htree_compile_template_code])
|
||||
End
|
||||
HTree::EmptyBindingObject.empty_binding)
|
||||
Thread.current[:htree_compile_template_code] = nil
|
||||
mod
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
|
||||
class HTree::TemplateCompiler
|
||||
IGNORABLE_ELEMENTS = {
|
||||
'span' => true,
|
||||
'div' => true,
|
||||
'{http://www.w3.org/1999/xhtml}span' => true,
|
||||
'{http://www.w3.org/1999/xhtml}div' => true,
|
||||
}
|
||||
|
||||
def initialize
|
||||
@gensym_id = 0
|
||||
end
|
||||
|
||||
def gensym(suffix='')
|
||||
@gensym_id += 1
|
||||
"g#{@gensym_id}#{suffix}"
|
||||
end
|
||||
|
||||
def parse_template(template)
|
||||
strip_whitespaces(HTree.parse(template))
|
||||
end
|
||||
|
||||
WhiteSpacePreservingElements = {
|
||||
'{http://www.w3.org/1999/xhtml}pre' => true
|
||||
}
|
||||
|
||||
def strip_whitespaces(template)
|
||||
case template
|
||||
when HTree::Doc
|
||||
HTree::Doc.new(*template.children.map {|c| strip_whitespaces(c) }.compact)
|
||||
when HTree::Elem, HTree::Doc
|
||||
return template if WhiteSpacePreservingElements[template.name]
|
||||
subst = {}
|
||||
template.children.each_with_index {|c, i|
|
||||
subst[i] = strip_whitespaces(c)
|
||||
}
|
||||
template.subst_subnode(subst)
|
||||
when HTree::Text
|
||||
if /\A[ \t\r\n]*\z/ =~ template.rcdata
|
||||
nil
|
||||
else
|
||||
template
|
||||
end
|
||||
else
|
||||
template
|
||||
end
|
||||
end
|
||||
|
||||
def template_is_html(template)
|
||||
template.each_child {|c|
|
||||
return false if c.xmldecl?
|
||||
return true if c.elem? && c.element_name.namespace_uri == 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
false
|
||||
end
|
||||
|
||||
def expand_template(template, out, encoding, binding)
|
||||
template = parse_template(template)
|
||||
is_html = template_is_html(template)
|
||||
outvar = gensym('out')
|
||||
contextvar = gensym('top_context')
|
||||
code = ''
|
||||
code << "#{outvar} = HTree::Encoder.new(#{encoding.dump})\n"
|
||||
code << "#{outvar}.html_output = true\n" if is_html
|
||||
code << "#{contextvar} = #{is_html ? "HTree::HTMLContext" : "HTree::DefaultContext"}\n"
|
||||
code << compile_body(outvar, contextvar, template, false)
|
||||
code << "[#{outvar}.#{is_html ? "finish" : "finish_with_xmldecl"}, #{outvar}.minimal_charset]\n"
|
||||
#puts code; STDOUT.flush
|
||||
result, minimal_charset = eval(code, binding)
|
||||
out.charset = minimal_charset if out.respond_to? :charset=
|
||||
out << result
|
||||
out
|
||||
end
|
||||
|
||||
def compile_template(src)
|
||||
srcdoc = parse_template(src)
|
||||
templates = []
|
||||
body = extract_templates(srcdoc, templates, true)
|
||||
methods = []
|
||||
templates.each {|name_args, node|
|
||||
methods << compile_global_template(name_args, node)
|
||||
}
|
||||
<<"End"
|
||||
require 'htree/encoder'
|
||||
require 'htree/context'
|
||||
Module.new.module_eval <<'EE'
|
||||
module_function
|
||||
#{methods.join('').chomp}
|
||||
self
|
||||
EE
|
||||
End
|
||||
end
|
||||
|
||||
def template_attribute?(name)
|
||||
/\A_/ =~ name.local_name
|
||||
end
|
||||
|
||||
def extract_templates(node, templates, is_toplevel)
|
||||
case node
|
||||
when HTree::Doc
|
||||
subst = {}
|
||||
node.children.each_with_index {|n, i|
|
||||
subst[i] = extract_templates(n, templates, is_toplevel)
|
||||
}
|
||||
node.subst_subnode(subst)
|
||||
when HTree::Elem
|
||||
ht_attrs, rest_attrs = node.attributes.partition {|name, text| template_attribute? name }
|
||||
if ht_attrs.empty?
|
||||
subst = {}
|
||||
node.children.each_with_index {|n, i|
|
||||
subst[i] = extract_templates(n, templates, is_toplevel)
|
||||
}
|
||||
node.subst_subnode(subst)
|
||||
else
|
||||
ht_attrs.each {|htname, text|
|
||||
if htname.universal_name == '_template'
|
||||
name_fargs = text.to_s
|
||||
templates << [name_fargs, node.subst_subnode('_template' => nil)]
|
||||
return nil
|
||||
end
|
||||
}
|
||||
if is_toplevel
|
||||
raise HTree::Error, "unexpected template attributes in toplevel: #{ht_attrs.inspect}"
|
||||
else
|
||||
node
|
||||
end
|
||||
end
|
||||
else
|
||||
node
|
||||
end
|
||||
end
|
||||
|
||||
ID_PAT = /[a-z][a-z0-9_]*/
|
||||
NAME_FARGS_PAT = /(#{ID_PAT})(?:\(\s*(|#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)\))?/
|
||||
def compile_global_template(name_fargs, node)
|
||||
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
|
||||
raise HTree::Error, "invalid template declaration: #{name_fargs}"
|
||||
end
|
||||
name = $1
|
||||
fargs = $2 ? $2.scan(ID_PAT) : []
|
||||
|
||||
outvar = gensym('out')
|
||||
contextvar = gensym('top_context')
|
||||
args2 = [outvar, contextvar, *fargs]
|
||||
|
||||
<<"End"
|
||||
def #{name}(#{fargs.join(',')})
|
||||
HTree.parse(_xml_#{name}(#{fargs.join(',')}))
|
||||
end
|
||||
def _xml_#{name}(#{fargs.join(',')})
|
||||
#{outvar} = HTree::Encoder.new(HTree::Encoder.internal_charset)
|
||||
#{contextvar} = HTree::DefaultContext
|
||||
_ht_#{name}(#{args2.join(',')})
|
||||
#{outvar}.finish
|
||||
end
|
||||
def _ht_#{name}(#{args2.join(',')})
|
||||
#{compile_body(outvar, contextvar, node, false)}\
|
||||
end
|
||||
public :_ht_#{name}
|
||||
End
|
||||
end
|
||||
|
||||
def compile_local_template(name_fargs, node, local_templates)
|
||||
unless /\A#{NAME_FARGS_PAT}\z/o =~ name_fargs
|
||||
raise HTree::Error, "invalid template declaration: #{name_fargs}"
|
||||
end
|
||||
name = $1
|
||||
fargs = $2 ? $2.scan(ID_PAT) : []
|
||||
|
||||
outvar = gensym('out')
|
||||
contextvar = gensym('top_context')
|
||||
args2 = [outvar, contextvar, *fargs]
|
||||
|
||||
<<"End"
|
||||
#{name} = lambda {|#{args2.join(',')}|
|
||||
#{compile_body(outvar, contextvar, node, false, local_templates)}\
|
||||
}
|
||||
End
|
||||
end
|
||||
|
||||
def compile_body(outvar, contextvar, node, is_toplevel, local_templates={})
|
||||
if node.elem? && IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
|
||||
node = TemplateNode.new(node.children)
|
||||
else
|
||||
node = TemplateNode.new(node)
|
||||
end
|
||||
generate_logic_node([:content], node, local_templates).generate_xml_output_code(outvar, contextvar)
|
||||
end
|
||||
|
||||
def compile_node(node, local_templates)
|
||||
case node
|
||||
when HTree::Doc
|
||||
TemplateNode.new(node.children.map {|n| compile_node(n, local_templates) })
|
||||
when HTree::Elem
|
||||
ht_attrs = node.attributes.find_all {|name, text| template_attribute? name }
|
||||
ht_attrs = ht_attrs.sort_by {|htname, text| htname.universal_name }
|
||||
ignore_tag = false
|
||||
unless ht_attrs.empty?
|
||||
attr_mod = {}
|
||||
ht_attrs.each {|htname, text|
|
||||
attr_mod[htname] = nil
|
||||
if /\A_attr_/ =~ htname.local_name
|
||||
attr_mod[TemplateAttrName.new(htname.namespace_prefix, htname.namespace_uri, $')] = text
|
||||
end
|
||||
}
|
||||
ht_attrs.reject! {|htname, text| /\A_attr_/ =~ htname.local_name }
|
||||
node = node.subst_subnode(attr_mod)
|
||||
ignore_tag = IGNORABLE_ELEMENTS[node.name] && node.attributes.empty?
|
||||
end
|
||||
ht_names = ht_attrs.map {|htname, text| htname.universal_name }
|
||||
ht_vals = ht_attrs.map {|htname, text| text.to_s }
|
||||
case ht_names
|
||||
when []
|
||||
generate_logic_node([:tag, [:content]], node, local_templates)
|
||||
when ['_text'] # <n _text="expr" /> or <n _text>expr</n>
|
||||
if ht_vals[0] != '_text' # xxx: attribute value is really omitted?
|
||||
expr = ht_vals[0]
|
||||
else
|
||||
children = node.children
|
||||
if children.length != 1
|
||||
raise HTree::Error, "_text expression has #{children.length} nodes"
|
||||
end
|
||||
if !children[0].text?
|
||||
raise HTree::Error, "_text expression is not text: #{children[0].class}"
|
||||
end
|
||||
expr = children[0].to_s
|
||||
end
|
||||
if /\A\s*'((?:[^'\\]|\\[\0-\377])*)'\s*\z/ =~ expr
|
||||
# if expr is just a constant string literal, use it as a literal text.
|
||||
# This saves dynamic evaluation of <span _text="' '"/>
|
||||
# xxx: handle "..." as well if it has no #{}.
|
||||
HTree::Text.new($1.gsub(/\\([\0-\377])/, '\1'))
|
||||
else
|
||||
generate_logic_node(compile_dynamic_text(ignore_tag, expr), node, local_templates)
|
||||
end
|
||||
when ['_tree'] # <n _tree="expr" /> or <n _tree>expr</n>
|
||||
if ht_vals[0] != '_tree' # xxx: attribute value is really omitted?
|
||||
expr = ht_vals[0]
|
||||
else
|
||||
children = node.children
|
||||
if children.length != 1
|
||||
raise HTree::Error, "_tree expression has #{children.length} nodes"
|
||||
end
|
||||
if !children[0].text?
|
||||
raise HTree::Error, "_tree expression is not text: #{children[0].class}"
|
||||
end
|
||||
expr = children[0].to_s
|
||||
end
|
||||
generate_logic_node(compile_dynamic_tree(ignore_tag, expr), node, local_templates)
|
||||
when ['_if'] # <n _if="expr" >...</n>
|
||||
generate_logic_node(compile_if(ignore_tag, ht_vals[0], nil), node, local_templates)
|
||||
when ['_else', '_if'] # <n _if="expr" _else="expr.meth(args)" >...</n>
|
||||
generate_logic_node(compile_if(ignore_tag, ht_vals[1], ht_vals[0]), node, local_templates)
|
||||
when ['_call'] # <n _call="recv.meth(args)" />
|
||||
generate_logic_node(compile_call(ignore_tag, ht_vals[0]), node, local_templates)
|
||||
when ['_iter'] # <n _iter="expr.meth(args)//fargs" >...</n>
|
||||
generate_logic_node(compile_iter(ignore_tag, ht_vals[0]), node, local_templates)
|
||||
when ['_iter_content'] # <n _iter_content="expr.meth(args)//fargs" >...</n>
|
||||
generate_logic_node(compile_iter_content(ignore_tag, ht_vals[0]), node, local_templates)
|
||||
else
|
||||
raise HTree::Error, "unexpected template attributes: #{ht_attrs.inspect}"
|
||||
end
|
||||
else
|
||||
return node
|
||||
end
|
||||
end
|
||||
|
||||
def valid_syntax?(code)
|
||||
begin
|
||||
eval("BEGIN {return true}\n#{code.untaint}")
|
||||
rescue SyntaxError
|
||||
raise SyntaxError, "invalid code: #{code}"
|
||||
end
|
||||
end
|
||||
|
||||
def check_syntax(code)
|
||||
unless valid_syntax?(code)
|
||||
raise HTree::Error, "invalid ruby code: #{code}"
|
||||
end
|
||||
end
|
||||
|
||||
def compile_dynamic_text(ignore_tag, expr)
|
||||
check_syntax(expr)
|
||||
logic = [:text, expr]
|
||||
logic = [:tag, logic] unless ignore_tag
|
||||
logic
|
||||
end
|
||||
|
||||
def compile_dynamic_tree(ignore_tag, expr)
|
||||
check_syntax(expr)
|
||||
logic = [:tree, expr]
|
||||
logic = [:tag, logic] unless ignore_tag
|
||||
logic
|
||||
end
|
||||
|
||||
def compile_if(ignore_tag, expr, else_call)
|
||||
check_syntax(expr)
|
||||
then_logic = [:content]
|
||||
unless ignore_tag
|
||||
then_logic = [:tag, then_logic]
|
||||
end
|
||||
else_logic = nil
|
||||
if else_call
|
||||
else_logic = compile_call(true, else_call)
|
||||
end
|
||||
[:if, expr, then_logic, else_logic]
|
||||
end
|
||||
|
||||
def split_args(spec)
|
||||
return spec, '' if /\)\z/ !~ spec
|
||||
i = spec.length - 1
|
||||
nest = 0
|
||||
begin
|
||||
raise HTree::Error, "unmatched paren: #{spec}" if i < 0
|
||||
case spec[i]
|
||||
when ?\)
|
||||
nest += 1
|
||||
when ?\(
|
||||
nest -= 1
|
||||
end
|
||||
i -= 1
|
||||
end while nest != 0
|
||||
i += 1
|
||||
return spec[0, i], spec[(i+1)...-1]
|
||||
end
|
||||
|
||||
def compile_call(ignore_tag, spec)
|
||||
# spec : [recv.]meth[(args)]
|
||||
spec = spec.strip
|
||||
spec, args = split_args(spec)
|
||||
unless /#{ID_PAT}\z/o =~ spec
|
||||
raise HTree::Error, "invalid _call: #{spec}"
|
||||
end
|
||||
meth = $&
|
||||
spec = $`
|
||||
if /\A\s*\z/ =~ spec
|
||||
recv = nil
|
||||
elsif /\A\s*(.*)\.\z/ =~ spec
|
||||
recv = $1
|
||||
else
|
||||
raise HTree::Error, "invalid _call: #{spec}"
|
||||
end
|
||||
if recv
|
||||
check_syntax(recv)
|
||||
check_syntax("#{recv}.#{meth}(#{args})")
|
||||
end
|
||||
check_syntax("#{meth}(#{args})")
|
||||
[:call, recv, meth, args]
|
||||
end
|
||||
|
||||
def compile_iter(ignore_tag, spec)
|
||||
# spec: <n _iter="expr.meth[(args)]//fargs" >...</n>
|
||||
spec = spec.strip
|
||||
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
|
||||
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
|
||||
end
|
||||
call = $`.strip
|
||||
fargs = $1 ? $1.strip : ''
|
||||
check_syntax("#{call} {|#{fargs}| }")
|
||||
logic = [:content]
|
||||
unless ignore_tag
|
||||
logic = [:tag, logic]
|
||||
end
|
||||
[:iter, call, fargs, logic]
|
||||
end
|
||||
|
||||
def compile_iter_content(ignore_tag, spec)
|
||||
# spec: <n _iter_content="expr.meth[(args)]//fargs" >...</n>
|
||||
spec = spec.strip
|
||||
unless %r{\s*//\s*(#{ID_PAT}\s*(?:,\s*#{ID_PAT}\s*)*)?\z}o =~ spec
|
||||
raise HTree::Error, "invalid block arguments for _iter: #{spec}"
|
||||
end
|
||||
call = $`.strip
|
||||
fargs = $1 ? $1.strip : ''
|
||||
check_syntax("#{call} {|#{fargs}| }")
|
||||
logic = [:content]
|
||||
logic = [:iter, call, fargs, logic]
|
||||
unless ignore_tag
|
||||
logic = [:tag, logic]
|
||||
end
|
||||
logic
|
||||
end
|
||||
|
||||
def generate_logic_node(logic, node, local_templates)
|
||||
# logic ::= [:if, expr, then_logic, else_logic]
|
||||
# | [:iter, call, fargs, logic]
|
||||
# | [:tag, logic]
|
||||
# | [:text, expr]
|
||||
# | [:tree, expr]
|
||||
# | [:call, expr, meth, args]
|
||||
# | [:content]
|
||||
# | [:empty]
|
||||
case logic.first
|
||||
when :empty
|
||||
nil
|
||||
when :content
|
||||
subtemplates = []
|
||||
children = []
|
||||
node.children.each {|c|
|
||||
children << extract_templates(c, subtemplates, false)
|
||||
}
|
||||
if subtemplates.empty?
|
||||
TemplateNode.new(node.children.map {|n|
|
||||
compile_node(n, local_templates)
|
||||
})
|
||||
else
|
||||
local_templates = local_templates.dup
|
||||
decl = ''
|
||||
subtemplates.each {|sub_name_args, sub_node|
|
||||
sub_name = sub_name_args[ID_PAT]
|
||||
local_templates[sub_name] = sub_name
|
||||
decl << "#{sub_name} = "
|
||||
}
|
||||
decl << "nil\n"
|
||||
defs = []
|
||||
subtemplates.each {|sub_name_args, sub_node|
|
||||
defs << lambda {|out, context|
|
||||
out.output_logic_line compile_local_template(sub_name_args, sub_node, local_templates)
|
||||
}
|
||||
}
|
||||
TemplateNode.new(
|
||||
lambda {|out, context| out.output_logic_line decl },
|
||||
defs,
|
||||
children.map {|n| compile_node(n, local_templates) }
|
||||
)
|
||||
end
|
||||
when :text
|
||||
_, expr = logic
|
||||
TemplateNode.new(lambda {|out, context| out.output_dynamic_text expr })
|
||||
when :tree
|
||||
_, expr = logic
|
||||
TemplateNode.new(lambda {|out, context| out.output_dynamic_tree expr, make_context_expr(out, context) })
|
||||
when :tag
|
||||
_, rest_logic = logic
|
||||
if rest_logic == [:content] && node.empty_element?
|
||||
node
|
||||
else
|
||||
subst = {}
|
||||
node.children.each_index {|i| subst[i] = nil }
|
||||
subst[0] = TemplateNode.new(generate_logic_node(rest_logic, node, local_templates))
|
||||
node.subst_subnode(subst)
|
||||
end
|
||||
when :if
|
||||
_, expr, then_logic, else_logic = logic
|
||||
children = [
|
||||
lambda {|out, context| out.output_logic_line "if (#{expr})" },
|
||||
generate_logic_node(then_logic, node, local_templates)
|
||||
]
|
||||
if else_logic
|
||||
children.concat [
|
||||
lambda {|out, context| out.output_logic_line "else" },
|
||||
generate_logic_node(else_logic, node, local_templates)
|
||||
]
|
||||
end
|
||||
children <<
|
||||
lambda {|out, context| out.output_logic_line "end" }
|
||||
TemplateNode.new(*children)
|
||||
when :iter
|
||||
_, call, fargs, rest_logic = logic
|
||||
TemplateNode.new(
|
||||
lambda {|out, context| out.output_logic_line "#{call} {|#{fargs}|" },
|
||||
generate_logic_node(rest_logic, node, local_templates),
|
||||
lambda {|out, context| out.output_logic_line "}" }
|
||||
)
|
||||
when :call
|
||||
_, recv, meth, args = logic
|
||||
TemplateNode.new(
|
||||
lambda {|out, context|
|
||||
as = [out.outvar, ", ", make_context_expr(out, context)]
|
||||
unless args.empty?
|
||||
as << ", " << args
|
||||
end
|
||||
if recv
|
||||
out.output_logic_line "(#{recv})._ht_#{meth}(#{as.join('')})"
|
||||
elsif local_templates.include? meth
|
||||
out.output_logic_line "#{meth}.call(#{as.join('')})"
|
||||
else
|
||||
out.output_logic_line "_ht_#{meth}(#{as.join('')})"
|
||||
end
|
||||
}
|
||||
)
|
||||
else
|
||||
raise Exception, "[bug] invalid logic: #{logic.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def make_context_expr(out, context)
|
||||
ns = context.namespaces.reject {|k, v| HTree::Context::DefaultNamespaces[k] == v }
|
||||
if ns.empty?
|
||||
result = out.contextvar
|
||||
else
|
||||
result = "#{out.contextvar}.subst_namespaces("
|
||||
sep = ''
|
||||
ns.each {|k, v|
|
||||
result << sep << (k ? k.dump : "nil") << '=>' << v.dump
|
||||
sep = ', '
|
||||
}
|
||||
result << ")"
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
class TemplateNode
|
||||
include HTree::Node
|
||||
|
||||
def initialize(*children)
|
||||
@children = children.flatten.compact
|
||||
end
|
||||
attr_reader :children
|
||||
|
||||
def output(out, context)
|
||||
@children.each {|c|
|
||||
if c.respond_to? :call
|
||||
c.call(out, context)
|
||||
else
|
||||
c.output(out, context)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TemplateAttrName < HTree::Name
|
||||
def output_attribute(text, out, context)
|
||||
output(out, context)
|
||||
out.output_string '="'
|
||||
out.output_dynamic_attvalue(text.to_s)
|
||||
out.output_string '"'
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# :startdoc:
|
@ -0,0 +1,115 @@
|
||||
require 'htree/modules'
|
||||
require 'htree/raw_string'
|
||||
require 'htree/htmlinfo'
|
||||
require 'htree/encoder'
|
||||
require 'htree/fstr'
|
||||
require 'iconv'
|
||||
|
||||
module HTree
|
||||
class Text
|
||||
# :stopdoc:
|
||||
class << self
|
||||
alias new_internal new
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
def Text.new(arg)
|
||||
arg = arg.to_node if HTree::Location === arg
|
||||
if Text === arg
|
||||
new_internal arg.rcdata, arg.normalized_rcdata
|
||||
elsif String === arg
|
||||
arg2 = arg.gsub(/&/, '&')
|
||||
arg = arg2.freeze if arg != arg2
|
||||
new_internal arg
|
||||
else
|
||||
raise TypeError, "cannot initialize Text with #{arg.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(rcdata, normalized_rcdata=internal_normalize(rcdata)) # :notnew:
|
||||
init_raw_string
|
||||
@rcdata = rcdata && HTree.frozen_string(rcdata)
|
||||
@normalized_rcdata = @rcdata == normalized_rcdata ? @rcdata : normalized_rcdata
|
||||
end
|
||||
attr_reader :rcdata, :normalized_rcdata
|
||||
|
||||
def internal_normalize(rcdata)
|
||||
# - character references are decoded as much as possible.
|
||||
# - undecodable character references are converted to decimal numeric character refereces.
|
||||
result = rcdata.gsub(/&(?:#([0-9]+)|#x([0-9a-fA-F]+)|([A-Za-z][A-Za-z0-9]*));/o) {|s|
|
||||
u = nil
|
||||
if $1
|
||||
u = $1.to_i
|
||||
elsif $2
|
||||
u = $2.hex
|
||||
elsif $3
|
||||
u = NamedCharacters[$3]
|
||||
end
|
||||
if !u || u < 0 || 0x7fffffff < u
|
||||
'?'
|
||||
elsif u == 38 # '&' character.
|
||||
'&'
|
||||
elsif u <= 0x7f
|
||||
[u].pack("C")
|
||||
else
|
||||
begin
|
||||
Iconv.conv(Encoder.internal_charset, 'UTF-8', [u].pack("U"))
|
||||
rescue Iconv::Failure
|
||||
"&##{u};"
|
||||
end
|
||||
end
|
||||
}
|
||||
HTree.frozen_string(result)
|
||||
end
|
||||
private :internal_normalize
|
||||
|
||||
# HTree::Text#to_s converts the text to a string.
|
||||
# - character references are decoded as much as possible.
|
||||
# - undecodable character reference are converted to `?' character.
|
||||
def to_s
|
||||
@normalized_rcdata.gsub(/&(?:#([0-9]+));/o) {|s|
|
||||
u = $1.to_i
|
||||
if 0 <= u && u <= 0x7f
|
||||
[u].pack("C")
|
||||
else
|
||||
'?'
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
def empty?
|
||||
@normalized_rcdata.empty?
|
||||
end
|
||||
|
||||
def strip
|
||||
rcdata = @normalized_rcdata.dup
|
||||
rcdata.sub!(/\A(?:\s| )+/, '')
|
||||
rcdata.sub!(/(?:\s| )+\z/, '')
|
||||
if rcdata == @normalized_rcdata
|
||||
self
|
||||
else
|
||||
rcdata.freeze
|
||||
Text.new_internal(rcdata, rcdata)
|
||||
end
|
||||
end
|
||||
|
||||
# HTree::Text.concat returns a text which is concatenation of arguments.
|
||||
#
|
||||
# An argument should be one of follows.
|
||||
# - String
|
||||
# - HTree::Text
|
||||
# - HTree::Location which points HTree::Text
|
||||
def Text.concat(*args)
|
||||
rcdata = ''
|
||||
args.each {|arg|
|
||||
arg = arg.to_node if HTree::Location === arg
|
||||
if Text === arg
|
||||
rcdata << arg.rcdata
|
||||
else
|
||||
rcdata << arg.gsub(/&/, '&')
|
||||
end
|
||||
}
|
||||
new_internal rcdata
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,497 @@
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/loc'
|
||||
require 'htree/extract_text'
|
||||
require 'uri'
|
||||
|
||||
module HTree
|
||||
module Traverse
|
||||
def doc?() Doc::Trav === self end
|
||||
def elem?() Elem::Trav === self end
|
||||
def text?() Text::Trav === self end
|
||||
def xmldecl?() XMLDecl::Trav === self end
|
||||
def doctype?() DocType::Trav === self end
|
||||
def procins?() ProcIns::Trav === self end
|
||||
def comment?() Comment::Trav === self end
|
||||
def bogusetag?() BogusETag::Trav === self end
|
||||
|
||||
def get_subnode(*indexes)
|
||||
n = self
|
||||
indexes.each {|index|
|
||||
n = n.get_subnode_internal(index)
|
||||
}
|
||||
n
|
||||
end
|
||||
end
|
||||
|
||||
module Container::Trav
|
||||
# +each_child+ iterates over each child.
|
||||
def each_child(&block) # :yields: child_node
|
||||
children.each(&block)
|
||||
nil
|
||||
end
|
||||
|
||||
# +each_child_with_index+ iterates over each child.
|
||||
def each_child_with_index(&block) # :yields: child_node, index
|
||||
children.each_with_index(&block)
|
||||
nil
|
||||
end
|
||||
|
||||
# +find_element+ searches an element which universal name is specified by
|
||||
# the arguments.
|
||||
# It returns nil if not found.
|
||||
def find_element(*names)
|
||||
traverse_element(*names) {|e| return e }
|
||||
nil
|
||||
end
|
||||
|
||||
# +traverse_element+ traverses elements in the tree.
|
||||
# It yields elements in depth first order.
|
||||
#
|
||||
# If _names_ are empty, it yields all elements.
|
||||
# If non-empty _names_ are given, it should be list of universal names.
|
||||
#
|
||||
# A nested element is yielded in depth first order as follows.
|
||||
#
|
||||
# t = HTree('<a id=0><b><a id=1 /></b><c id=2 /></a>')
|
||||
# t.traverse_element("a", "c") {|e| p e}
|
||||
# # =>
|
||||
# {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
|
||||
# {emptyelem <a id="1">}
|
||||
# {emptyelem <c id="2">}
|
||||
#
|
||||
# Universal names are specified as follows.
|
||||
#
|
||||
# t = HTree(<<'End')
|
||||
# <html>
|
||||
# <meta name="robots" content="index,nofollow">
|
||||
# <meta name="author" content="Who am I?">
|
||||
# </html>
|
||||
# End
|
||||
# t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
|
||||
# # =>
|
||||
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
|
||||
# {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
|
||||
#
|
||||
def traverse_element(*names, &block) # :yields: element
|
||||
if names.empty?
|
||||
traverse_all_element(&block)
|
||||
else
|
||||
name_set = {}
|
||||
names.each {|n| name_set[n] = true }
|
||||
traverse_some_element(name_set, &block)
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
def each_hyperlink_attribute
|
||||
traverse_element(
|
||||
'{http://www.w3.org/1999/xhtml}a',
|
||||
'{http://www.w3.org/1999/xhtml}area',
|
||||
'{http://www.w3.org/1999/xhtml}link',
|
||||
'{http://www.w3.org/1999/xhtml}img',
|
||||
'{http://www.w3.org/1999/xhtml}object',
|
||||
'{http://www.w3.org/1999/xhtml}q',
|
||||
'{http://www.w3.org/1999/xhtml}blockquote',
|
||||
'{http://www.w3.org/1999/xhtml}ins',
|
||||
'{http://www.w3.org/1999/xhtml}del',
|
||||
'{http://www.w3.org/1999/xhtml}form',
|
||||
'{http://www.w3.org/1999/xhtml}input',
|
||||
'{http://www.w3.org/1999/xhtml}head',
|
||||
'{http://www.w3.org/1999/xhtml}base',
|
||||
'{http://www.w3.org/1999/xhtml}script') {|elem|
|
||||
case elem.name
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:base|a|area|link)\z}i
|
||||
attrs = ['href']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:img)\z}i
|
||||
attrs = ['src', 'longdesc', 'usemap']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:object)\z}i
|
||||
attrs = ['classid', 'codebase', 'data', 'usemap']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:q|blockquote|ins|del)\z}i
|
||||
attrs = ['cite']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:form)\z}i
|
||||
attrs = ['action']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:input)\z}i
|
||||
attrs = ['src', 'usemap']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:head)\z}i
|
||||
attrs = ['profile']
|
||||
when %r{\{http://www.w3.org/1999/xhtml\}(?:script)\z}i
|
||||
attrs = ['src', 'for']
|
||||
end
|
||||
attrs.each {|attr|
|
||||
if hyperlink = elem.get_attribute(attr)
|
||||
yield elem, attr, hyperlink
|
||||
end
|
||||
}
|
||||
}
|
||||
end
|
||||
private :each_hyperlink_attribute
|
||||
|
||||
# +each_hyperlink_uri+ traverses hyperlinks such as HTML href attribute
|
||||
# of A element.
|
||||
#
|
||||
# It yields HTree::Text (or HTree::Loc) and URI for each hyperlink.
|
||||
#
|
||||
# The URI objects are created with a base URI which is given by
|
||||
# HTML BASE element or the argument ((|base_uri|)).
|
||||
# +each_hyperlink_uri+ doesn't yields href of the BASE element.
|
||||
def each_hyperlink_uri(base_uri=nil) # :yields: hyperlink, uri
|
||||
base_uri = URI.parse(base_uri) if String === base_uri
|
||||
links = []
|
||||
each_hyperlink_attribute {|elem, attr, hyperlink|
|
||||
if %r{\{http://www.w3.org/1999/xhtml\}(?:base)\z}i =~ elem.name
|
||||
base_uri = URI.parse(hyperlink.to_s)
|
||||
else
|
||||
links << hyperlink
|
||||
end
|
||||
}
|
||||
if base_uri
|
||||
links.each {|hyperlink| yield hyperlink, base_uri + hyperlink.to_s }
|
||||
else
|
||||
links.each {|hyperlink| yield hyperlink, URI.parse(hyperlink.to_s) }
|
||||
end
|
||||
end
|
||||
|
||||
# +each_hyperlink+ traverses hyperlinks such as HTML href attribute
|
||||
# of A element.
|
||||
#
|
||||
# It yields HTree::Text or HTree::Loc.
|
||||
#
|
||||
# Note that +each_hyperlink+ yields HTML href attribute of BASE element.
|
||||
def each_hyperlink # :yields: text
|
||||
links = []
|
||||
each_hyperlink_attribute {|elem, attr, hyperlink|
|
||||
yield hyperlink
|
||||
}
|
||||
end
|
||||
|
||||
# +each_uri+ traverses hyperlinks such as HTML href attribute
|
||||
# of A element.
|
||||
#
|
||||
# It yields URI for each hyperlink.
|
||||
#
|
||||
# The URI objects are created with a base URI which is given by
|
||||
# HTML BASE element or the argument ((|base_uri|)).
|
||||
def each_uri(base_uri=nil) # :yields: URI
|
||||
each_hyperlink_uri(base_uri) {|hyperlink, uri| yield uri }
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
module Doc::Trav
|
||||
def traverse_all_element(&block)
|
||||
children.each {|c| c.traverse_all_element(&block) }
|
||||
end
|
||||
end
|
||||
|
||||
module Elem::Trav
|
||||
def traverse_all_element(&block)
|
||||
yield self
|
||||
children.each {|c| c.traverse_all_element(&block) }
|
||||
end
|
||||
end
|
||||
|
||||
module Leaf::Trav
|
||||
def traverse_all_element
|
||||
end
|
||||
end
|
||||
|
||||
module Doc::Trav
|
||||
def traverse_some_element(name_set, &block)
|
||||
children.each {|c| c.traverse_some_element(name_set, &block) }
|
||||
end
|
||||
end
|
||||
|
||||
module Elem::Trav
|
||||
def traverse_some_element(name_set, &block)
|
||||
yield self if name_set.include? self.name
|
||||
children.each {|c| c.traverse_some_element(name_set, &block) }
|
||||
end
|
||||
end
|
||||
|
||||
module Leaf::Trav
|
||||
def traverse_some_element(name_set)
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
module Traverse
|
||||
# +traverse_text+ traverses texts in the tree
|
||||
def traverse_text(&block) # :yields: text
|
||||
traverse_text_internal(&block)
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
module Container::Trav
|
||||
def traverse_text_internal(&block)
|
||||
each_child {|c| c.traverse_text_internal(&block) }
|
||||
end
|
||||
end
|
||||
|
||||
module Leaf::Trav
|
||||
def traverse_text_internal
|
||||
end
|
||||
end
|
||||
|
||||
module Text::Trav
|
||||
def traverse_text_internal
|
||||
yield self
|
||||
end
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
module Container::Trav
|
||||
# +filter+ rebuilds the tree without some components.
|
||||
#
|
||||
# node.filter {|descendant_node| predicate } -> node
|
||||
# loc.filter {|descendant_loc| predicate } -> node
|
||||
#
|
||||
# +filter+ yields each node except top node.
|
||||
# If given block returns false, corresponding node is dropped.
|
||||
# If given block returns true, corresponding node is retained and
|
||||
# inner nodes are examined.
|
||||
#
|
||||
# +filter+ returns an node.
|
||||
# It doesn't return location object even if self is location object.
|
||||
#
|
||||
def filter(&block)
|
||||
subst = {}
|
||||
each_child_with_index {|descendant, i|
|
||||
if yield descendant
|
||||
if descendant.elem?
|
||||
subst[i] = descendant.filter(&block)
|
||||
else
|
||||
subst[i] = descendant
|
||||
end
|
||||
else
|
||||
subst[i] = nil
|
||||
end
|
||||
}
|
||||
to_node.subst_subnode(subst)
|
||||
end
|
||||
end
|
||||
|
||||
module Doc::Trav
|
||||
# +title+ searches title and return it as a text.
|
||||
# It returns nil if not found.
|
||||
#
|
||||
# +title+ searchs following information.
|
||||
#
|
||||
# - <title>...</title> in HTML
|
||||
# - <title>...</title> in RSS
|
||||
# - <title>...</title> in Atom
|
||||
def title
|
||||
e = find_element('title',
|
||||
'{http://www.w3.org/1999/xhtml}title',
|
||||
'{http://purl.org/rss/1.0/}title',
|
||||
'{http://my.netscape.com/rdf/simple/0.9/}title',
|
||||
'{http://www.w3.org/2005/Atom}title',
|
||||
'{http://purl.org/atom/ns#}title')
|
||||
e && e.extract_text
|
||||
end
|
||||
|
||||
# +author+ searches author and return it as a text.
|
||||
# It returns nil if not found.
|
||||
#
|
||||
# +author+ searchs following information.
|
||||
#
|
||||
# - <meta name="author" content="author-name"> in HTML
|
||||
# - <link rev="made" title="author-name"> in HTML
|
||||
# - <dc:creator>author-name</dc:creator> in RSS
|
||||
# - <dc:publisher>author-name</dc:publisher> in RSS
|
||||
# - <author><name>author-name</name></author> in Atom
|
||||
def author
|
||||
traverse_element('meta',
|
||||
'{http://www.w3.org/1999/xhtml}meta') {|e|
|
||||
begin
|
||||
next unless e.fetch_attr('name').downcase == 'author'
|
||||
author = e.fetch_attribute('content').strip
|
||||
return author if !author.empty?
|
||||
rescue IndexError
|
||||
end
|
||||
}
|
||||
|
||||
traverse_element('link',
|
||||
'{http://www.w3.org/1999/xhtml}link') {|e|
|
||||
begin
|
||||
next unless e.fetch_attr('rev').downcase == 'made'
|
||||
author = e.fetch_attribute('title').strip
|
||||
return author if !author.empty?
|
||||
rescue IndexError
|
||||
end
|
||||
}
|
||||
|
||||
if channel = find_element('{http://purl.org/rss/1.0/}channel')
|
||||
channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
|
||||
begin
|
||||
author = e.extract_text.strip
|
||||
return author if !author.empty?
|
||||
rescue IndexError
|
||||
end
|
||||
}
|
||||
channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e|
|
||||
begin
|
||||
author = e.extract_text.strip
|
||||
return author if !author.empty?
|
||||
rescue IndexError
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns|
|
||||
each_child {|top|
|
||||
next unless top.elem?
|
||||
if top.name == "{#{xmlns}}feed"
|
||||
if feed_author = find_element("{#{xmlns}}author")
|
||||
feed_author.traverse_element("{#{xmlns}}name") {|e|
|
||||
begin
|
||||
author = e.extract_text.strip
|
||||
return author if !author.empty?
|
||||
rescue IndexError
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
}
|
||||
}
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
module Doc::Trav
|
||||
# +root+ searches root element.
|
||||
# If there is no element on top level, it raise HTree::Error.
|
||||
# If there is two or more elements on top level, it raise HTree::Error.
|
||||
def root
|
||||
es = []
|
||||
children.each {|c| es << c if c.elem? }
|
||||
raise HTree::Error, "no element" if es.empty?
|
||||
raise HTree::Error, "multiple top elements" if 1 < es.length
|
||||
es[0]
|
||||
end
|
||||
|
||||
# +has_xmldecl?+ returns true if there is an XML declaration on top level.
|
||||
def has_xmldecl?
|
||||
children.each {|c| return true if c.xmldecl? }
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
module Elem::Trav
|
||||
# +name+ returns the universal name of the element as a string.
|
||||
#
|
||||
# p HTree('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.name
|
||||
# # =>
|
||||
# "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF"
|
||||
#
|
||||
def name() element_name.universal_name end
|
||||
|
||||
# +qualified_name+ returns the qualified name of the element as a string.
|
||||
#
|
||||
# p HTree('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.qualified_name
|
||||
# # =>
|
||||
# "rdf:RDF"
|
||||
def qualified_name() element_name.qualified_name end
|
||||
|
||||
# +attributes+ returns attributes as a hash.
|
||||
# The hash keys are HTree::Name objects.
|
||||
# The hash values are HTree::Text or HTree::Location objects.
|
||||
#
|
||||
# p HTree('<a name="xx" href="uu">').root.attributes
|
||||
# # =>
|
||||
# {href=>{text "uu"}, name=>{text "xx"}}
|
||||
#
|
||||
# p HTree('<a name="xx" href="uu">').make_loc.root.attributes
|
||||
# # =>
|
||||
# {href=>#<HTree::Location: doc()/a/@href>, name=>#<HTree::Location: doc()/a/@name>}
|
||||
#
|
||||
def attributes
|
||||
result = {}
|
||||
each_attribute {|name, text|
|
||||
result[name] = text
|
||||
}
|
||||
result
|
||||
end
|
||||
|
||||
def each_attr
|
||||
each_attribute {|name, text|
|
||||
uname = name.universal_name
|
||||
str = text.to_s
|
||||
yield uname, str
|
||||
}
|
||||
end
|
||||
|
||||
# call-seq:
|
||||
# elem.fetch_attribute(name) -> text or raise IndexError
|
||||
# elem.fetch_attribute(name, default) -> text or default
|
||||
# elem.fetch_attribute(name) {|uname| default } -> text or default
|
||||
#
|
||||
# +fetch_attribute+ returns an attribute value as a text.
|
||||
#
|
||||
# elem may be an instance of HTree::Elem or a location points to it.
|
||||
def fetch_attribute(uname, *rest, &block)
|
||||
if 1 < rest.length
|
||||
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
|
||||
end
|
||||
if !rest.empty? && block_given?
|
||||
raise ArgumentError, "block supersedes default value argument"
|
||||
end
|
||||
uname = uname.universal_name if uname.respond_to? :universal_name
|
||||
return update_attribute_hash.fetch(uname) {
|
||||
if block_given?
|
||||
return yield(uname)
|
||||
elsif !rest.empty?
|
||||
return rest[0]
|
||||
else
|
||||
raise IndexError, "attribute not found: #{uname.inspect}"
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
# call-seq:
|
||||
# elem.fetch_attr(name) -> string or raise IndexError
|
||||
# elem.fetch_attr(name, default) -> string or default
|
||||
# elem.fetch_attr(name) {|uname| default } -> string or default
|
||||
#
|
||||
# +fetch_attr+ returns an attribute value as a string.
|
||||
#
|
||||
# elem may be an instance of HTree::Elem or a location points to it.
|
||||
def fetch_attr(uname, *rest, &block)
|
||||
if 1 < rest.length
|
||||
raise ArgumentError, "wrong number of arguments (#{1+rest.length} for 2)"
|
||||
end
|
||||
if !rest.empty? && block_given?
|
||||
raise ArgumentError, "block supersedes default value argument"
|
||||
end
|
||||
uname = uname.universal_name if uname.respond_to? :universal_name
|
||||
return update_attribute_hash.fetch(uname) {
|
||||
if block_given?
|
||||
return yield(uname)
|
||||
elsif !rest.empty?
|
||||
return rest[0]
|
||||
else
|
||||
raise IndexError, "attribute not found: #{uname.inspect}"
|
||||
end
|
||||
}.to_s
|
||||
end
|
||||
|
||||
def get_attribute(uname)
|
||||
uname = uname.universal_name if uname.respond_to? :universal_name
|
||||
update_attribute_hash[uname]
|
||||
end
|
||||
|
||||
def get_attr(uname)
|
||||
if text = update_attribute_hash[uname]
|
||||
text.to_s
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
# usage: ruby install.rb [-n]
|
||||
# options:
|
||||
# -n : don't install
|
||||
#
|
||||
# Author: Tanaka Akira <akr@m17n.org>
|
||||
|
||||
require 'optparse'
|
||||
require 'fileutils'
|
||||
|
||||
def target_directory
|
||||
$:.each {|loc|
|
||||
if %r{/site_ruby/[\d.]+\z} =~ loc
|
||||
return loc
|
||||
end
|
||||
}
|
||||
raise "could not find target install directory"
|
||||
end
|
||||
|
||||
CVS_FILES = {}
|
||||
def cvs_files(dir)
|
||||
return CVS_FILES[dir] if CVS_FILES.include? dir
|
||||
if File.directory? "#{dir}/CVS"
|
||||
result = {}
|
||||
File.foreach("#{dir}/CVS/Entries") {|line|
|
||||
case line
|
||||
when %r{\A/([^/]+)/} then result[$1] = true
|
||||
when %r{\AD/([^/]+)/} then result[$1] = true
|
||||
end
|
||||
}
|
||||
else
|
||||
result = nil
|
||||
end
|
||||
CVS_FILES[dir] = result
|
||||
result
|
||||
end
|
||||
|
||||
def each_target(&block)
|
||||
target_set = {}
|
||||
cvs = cvs_files('.')
|
||||
Dir.glob("*.rb") {|filename|
|
||||
next if /\Atest-/ =~ filename
|
||||
next if /\Ainstall/ =~ filename
|
||||
next if cvs && !cvs.include?(filename)
|
||||
target_set[filename] = true
|
||||
yield filename
|
||||
each_require(filename, target_set, &block)
|
||||
}
|
||||
end
|
||||
|
||||
def each_require(file, target_set, &block)
|
||||
File.foreach(file) {|line|
|
||||
next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line
|
||||
feature = $1
|
||||
filename = "#{feature}.rb"
|
||||
next if target_set.include? filename
|
||||
next if !File.exist?(filename)
|
||||
target_set[filename] = true
|
||||
yield filename
|
||||
each_require(filename, target_set, &block)
|
||||
}
|
||||
end
|
||||
|
||||
def collect_target
|
||||
result = []
|
||||
each_target {|filename| result << filename }
|
||||
result.sort!
|
||||
result
|
||||
end
|
||||
|
||||
def install_file(src, dst)
|
||||
ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst }
|
||||
# check shadow
|
||||
ignore_exc(Errno::ENOENT) { File.unlink dst }
|
||||
FileUtils.mkdir_p(File.dirname(dst), :mode=>0755)
|
||||
FileUtils.cp(src, dst, :verbose => true)
|
||||
File.chmod(0644, dst)
|
||||
end
|
||||
|
||||
def ignore_exc(exc)
|
||||
begin
|
||||
yield
|
||||
rescue exc
|
||||
end
|
||||
end
|
||||
|
||||
$opt_n = false
|
||||
ARGV.options {|q|
|
||||
q.banner = 'ruby install.rb [opts]'
|
||||
q.def_option('--help', 'show this message') {puts q; exit(0)}
|
||||
q.def_option('-n', "don't install") { $opt_n = true }
|
||||
q.parse!
|
||||
}
|
||||
|
||||
if $opt_n
|
||||
dir = target_directory
|
||||
collect_target.each {|filename|
|
||||
puts "-> #{dir}/#{filename}"
|
||||
}
|
||||
exit
|
||||
else
|
||||
File.umask 022
|
||||
dir = target_directory
|
||||
collect_target.each {|filename|
|
||||
install_file filename, "#{dir}/#{filename}"
|
||||
}
|
||||
end
|
||||
|
@ -0,0 +1,5 @@
|
||||
$VERBOSE = true
|
||||
|
||||
Dir.glob('test/test-*.rb') {|filename|
|
||||
load filename
|
||||
}
|
@ -0,0 +1 @@
|
||||
<span _text="htree_test_toplevel_local_variable = :modified" />
|
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0"?>
|
||||
<html>
|
||||
<title _text="self">dummy_title</title>
|
||||
</html>
|
@ -0,0 +1,67 @@
|
||||
require 'test/unit'
|
||||
require 'htree/tag'
|
||||
require 'htree/elem'
|
||||
require 'htree/traverse'
|
||||
|
||||
class TestAttr < Test::Unit::TestCase
|
||||
def test_each_attribute
|
||||
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
|
||||
t = HTree::Elem.new!(t)
|
||||
t.each_attribute {|n, v|
|
||||
assert_instance_of(HTree::Name, n)
|
||||
assert_instance_of(HTree::Text, v)
|
||||
assert_equal('{u}n', n.universal_name)
|
||||
assert_equal('a&b', v.rcdata)
|
||||
}
|
||||
end
|
||||
|
||||
def test_each_attr
|
||||
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
|
||||
t = HTree::Elem.new!(t)
|
||||
t.each_attr {|n, v|
|
||||
assert_instance_of(String, n)
|
||||
assert_instance_of(String, v)
|
||||
assert_equal('{u}n', n)
|
||||
assert_equal('a&b', v)
|
||||
}
|
||||
end
|
||||
|
||||
def test_fetch_attribute
|
||||
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
|
||||
t = HTree::Elem.new!(t)
|
||||
v = t.fetch_attribute('{u}n')
|
||||
assert_instance_of(HTree::Text, v)
|
||||
assert_equal('a&b', v.rcdata)
|
||||
assert_equal('y', t.fetch_attribute('x', 'y'))
|
||||
assert_raises(IndexError) { t.fetch_attribute('x') }
|
||||
end
|
||||
|
||||
def test_get_attribute
|
||||
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
|
||||
t = HTree::Elem.new!(t)
|
||||
v = t.get_attribute('{u}n')
|
||||
assert_instance_of(HTree::Text, v)
|
||||
assert_equal('a&b', v.rcdata)
|
||||
assert_equal(nil, t.get_attribute('x'))
|
||||
end
|
||||
|
||||
def test_get_attr
|
||||
t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
|
||||
t = HTree::Elem.new!(t)
|
||||
v = t.get_attr('{u}n')
|
||||
assert_instance_of(String, v)
|
||||
assert_equal('a&b', v)
|
||||
assert_equal(nil, t.get_attr('x'))
|
||||
end
|
||||
|
||||
def test_loc_get_attr
|
||||
t = HTree::Elem.new('e', {'k'=>'v'})
|
||||
v = t.make_loc.get_attr('k')
|
||||
assert_instance_of(String, v)
|
||||
assert_equal('v', v)
|
||||
v = t.make_loc.fetch_attr('k')
|
||||
assert_instance_of(String, v)
|
||||
assert_equal('v', v)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,53 @@
|
||||
require 'test/unit'
|
||||
require 'htree/parse'
|
||||
|
||||
class TestCharset < Test::Unit::TestCase
|
||||
def setup
|
||||
@old_kcode = $KCODE
|
||||
end
|
||||
|
||||
def teardown
|
||||
$KCODE = @old_kcode
|
||||
end
|
||||
|
||||
def self.mark_string(str, charset)
|
||||
def str.read() self end
|
||||
class << str; self end.__send__(:define_method, :charset) { charset }
|
||||
str
|
||||
end
|
||||
|
||||
# HIRAGANA LETTER A in various charset
|
||||
UTF8 = mark_string("\343\201\202", 'UTF-8')
|
||||
EUCKR = mark_string("\252\242", 'EUC-KR')
|
||||
EUCJP = mark_string("\244\242", 'EUC-JP')
|
||||
SJIS = mark_string("\202\240", 'Shift_JIS')
|
||||
ISO2022JP = mark_string("\e$B$\"\e(B", 'ISO-2022-JP')
|
||||
|
||||
def test_u
|
||||
$KCODE = 'u'
|
||||
assert_equal(UTF8, HTree.parse(UTF8).children[0].to_s)
|
||||
assert_equal(UTF8, HTree.parse(EUCKR).children[0].to_s)
|
||||
assert_equal(UTF8, HTree.parse(EUCJP).children[0].to_s)
|
||||
assert_equal(UTF8, HTree.parse(SJIS).children[0].to_s)
|
||||
assert_equal(UTF8, HTree.parse(ISO2022JP).children[0].to_s)
|
||||
end
|
||||
|
||||
def test_e
|
||||
$KCODE = 'e'
|
||||
assert_equal(EUCJP, HTree.parse(UTF8).children[0].to_s)
|
||||
assert_equal(EUCJP, HTree.parse(EUCKR).children[0].to_s)
|
||||
assert_equal(EUCJP, HTree.parse(EUCJP).children[0].to_s)
|
||||
assert_equal(EUCJP, HTree.parse(SJIS).children[0].to_s)
|
||||
assert_equal(EUCJP, HTree.parse(ISO2022JP).children[0].to_s)
|
||||
end
|
||||
|
||||
def test_s
|
||||
$KCODE = 's'
|
||||
assert_equal(SJIS, HTree.parse(UTF8).children[0].to_s)
|
||||
assert_equal(SJIS, HTree.parse(EUCKR).children[0].to_s)
|
||||
assert_equal(SJIS, HTree.parse(EUCJP).children[0].to_s)
|
||||
assert_equal(SJIS, HTree.parse(SJIS).children[0].to_s)
|
||||
assert_equal(SJIS, HTree.parse(ISO2022JP).children[0].to_s)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,29 @@
|
||||
require 'test/unit'
|
||||
require 'htree/context'
|
||||
|
||||
class TestContext < Test::Unit::TestCase
|
||||
def test_namespaces_validation
|
||||
assert_raise(ArgumentError) { HTree::Context.new({1=>'u'}) }
|
||||
assert_raise(ArgumentError) { HTree::Context.new({''=>'u'}) }
|
||||
assert_raise(ArgumentError) { HTree::Context.new({'p'=>nil}) }
|
||||
assert_nothing_raised { HTree::Context.new({nil=>'u'}) }
|
||||
end
|
||||
|
||||
def test_namespace_uri
|
||||
assert_equal('http://www.w3.org/XML/1998/namespace',
|
||||
HTree::Context.new.namespace_uri('xml'))
|
||||
assert_equal('u', HTree::Context.new({nil=>'u'}).namespace_uri(nil))
|
||||
assert_equal('u', HTree::Context.new({'p'=>'u'}).namespace_uri('p'))
|
||||
assert_equal(nil, HTree::Context.new({'p'=>'u'}).namespace_uri('q'))
|
||||
end
|
||||
|
||||
def test_subst_namespaces
|
||||
c1 = HTree::Context.new({'p'=>'u'})
|
||||
c2 = c1.subst_namespaces({'q'=>'v'})
|
||||
assert_equal('u', c1.namespace_uri('p'))
|
||||
assert_equal(nil, c1.namespace_uri('q'))
|
||||
assert_equal('u', c2.namespace_uri('p'))
|
||||
assert_equal('v', c2.namespace_uri('q'))
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,45 @@
|
||||
require 'test/unit'
|
||||
require 'htree/elem'
|
||||
require 'htree/display'
|
||||
|
||||
class TestXMLNS < Test::Unit::TestCase
|
||||
def assert_xml(expected, node)
|
||||
assert_equal(expected, node.display_xml('', 'US-ASCII'))
|
||||
end
|
||||
|
||||
def test_update_xmlns_empty
|
||||
assert_xml("<n\n/>", HTree::Elem.new('n'))
|
||||
end
|
||||
|
||||
def test_reduce_xmlns
|
||||
assert_xml(
|
||||
"<p:n xmlns:p=\"u\"\n/>",
|
||||
HTree::Elem.new('p:n', {'xmlns:p'=>'u'}))
|
||||
|
||||
assert_xml(
|
||||
"<n xmlns:p=\"u\"\n><p:n\n/></n\n>",
|
||||
HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'u'})))
|
||||
|
||||
assert_xml(
|
||||
"<n xmlns:p=\"u\"\n><p:n xmlns:p=\"v\"\n/></n\n>",
|
||||
HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'v'})))
|
||||
end
|
||||
|
||||
def test_extra_xmlns
|
||||
assert_xml(
|
||||
"<p:n xmlns:p=\"u\"\n/>",
|
||||
HTree::Elem.new(HTree::Name.new('p', 'u', 'n')))
|
||||
|
||||
assert_xml(
|
||||
"<nn\n><p:n xmlns:p=\"u\"\n/></nn\n>",
|
||||
HTree::Elem.new('nn', HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
|
||||
|
||||
assert_xml(
|
||||
"<nn xmlns:p=\"u\"\n><p:n\n/></nn\n>",
|
||||
HTree::Elem.new('nn', {'xmlns:p'=>'u'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
|
||||
|
||||
assert_xml(
|
||||
"<nn xmlns:p=\"v\"\n><p:n xmlns:p=\"u\"\n/></nn\n>",
|
||||
HTree::Elem.new('nn', {'xmlns:p'=>'v'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
|
||||
end
|
||||
end
|
@ -0,0 +1,101 @@
|
||||
require 'test/unit'
|
||||
require 'htree/doc'
|
||||
require 'htree/elem'
|
||||
require 'htree/equality'
|
||||
require 'htree/traverse'
|
||||
|
||||
class TestElemNew < Test::Unit::TestCase
|
||||
def test_empty
|
||||
e = HTree::Elem.new('a')
|
||||
assert_equal('a', e.qualified_name)
|
||||
assert_equal({}, e.attributes)
|
||||
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
|
||||
assert_equal([], e.children)
|
||||
assert_equal(true, e.empty_element?)
|
||||
assert_nil(e.instance_variable_get(:@etag))
|
||||
end
|
||||
|
||||
def test_empty_array
|
||||
e = HTree::Elem.new('a', [])
|
||||
assert_equal('a', e.qualified_name)
|
||||
assert_equal({}, e.attributes)
|
||||
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
|
||||
assert_equal([], e.children)
|
||||
assert_equal(false, e.empty_element?)
|
||||
assert_equal(nil, e.instance_variable_get(:@etag))
|
||||
end
|
||||
|
||||
def test_empty_attr
|
||||
e = HTree::Elem.new('a', {'href'=>'xxx'})
|
||||
assert_equal('a', e.qualified_name)
|
||||
assert_equal({HTree::Name.parse_attribute_name('href', HTree::DefaultContext)=>HTree::Text.new('xxx')}, e.attributes)
|
||||
assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
|
||||
assert_equal([], e.children)
|
||||
assert_equal(true, e.empty_element?)
|
||||
assert_equal(nil, e.instance_variable_get(:@etag))
|
||||
end
|
||||
|
||||
def test_node
|
||||
t = HTree::Text.new('t')
|
||||
e = HTree::Elem.new('a', t)
|
||||
assert_equal({}, e.attributes)
|
||||
assert_equal([t], e.children)
|
||||
end
|
||||
|
||||
def test_hash
|
||||
t = HTree::Text.new('t')
|
||||
e = HTree::Elem.new('a', {'b' => t})
|
||||
assert_equal([['b', t]], e.attributes.map {|n,v| [n.universal_name, v] })
|
||||
assert_equal([], e.children)
|
||||
end
|
||||
|
||||
def test_string
|
||||
t = HTree::Text.new('s')
|
||||
e = HTree::Elem.new('a', "s")
|
||||
assert_equal({}, e.attributes)
|
||||
assert_equal([t], e.children)
|
||||
end
|
||||
|
||||
def test_interleave
|
||||
t = HTree::Text.new('t')
|
||||
e = HTree::Elem.new('a', t, {'b' => t}, t, {'c' => 'd'}, t)
|
||||
assert_equal([['b', t], ['c', HTree::Text.new('d')]],
|
||||
e.attributes.map {|n,v| [n.universal_name, v] }.sort)
|
||||
assert_equal([t, t, t], e.children)
|
||||
end
|
||||
|
||||
def test_nest
|
||||
t = HTree::Text.new('t')
|
||||
b = HTree::BogusETag.new('a')
|
||||
x = HTree::Elem.new('e', HTree::XMLDecl.new('1.0'))
|
||||
d = HTree::Elem.new('e', HTree::DocType.new('html'))
|
||||
e = HTree::Elem.new('a', [t, t, t, b, x, d])
|
||||
assert_equal({}, e.attributes)
|
||||
assert_equal([t, t, t, b, x, d], e.children)
|
||||
end
|
||||
|
||||
def test_err
|
||||
assert_raises(TypeError) { HTree::Elem.new('e', HTree::STag.new('a')) }
|
||||
assert_raises(TypeError) { HTree::Elem.new('e', HTree::ETag.new('a')) }
|
||||
end
|
||||
|
||||
def test_context
|
||||
context = HTree::DefaultContext.subst_namespaces({'p'=>'u'})
|
||||
elem = HTree::Elem.new('p:n', {'p:a'=>'t'}, context)
|
||||
assert_equal('{u}n', elem.name)
|
||||
assert_equal('t', elem.get_attr('{u}a'))
|
||||
|
||||
assert_same(context, elem.instance_variable_get(:@stag).inherited_context)
|
||||
assert_raises(ArgumentError) { HTree::Elem.new('e', context, context) }
|
||||
end
|
||||
|
||||
def test_hash_in_array
|
||||
attrs = [{'a'=>'1'}, {'a'=>'2'}]
|
||||
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
|
||||
attrs.pop
|
||||
assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
|
||||
attrs.pop
|
||||
assert_equal([], attrs)
|
||||
assert_equal(false, HTree::Elem.new('e', attrs).empty_element?)
|
||||
end
|
||||
end
|
@ -0,0 +1,49 @@
|
||||
require 'test/unit'
|
||||
require 'htree/encoder'
|
||||
|
||||
class TestEncoder < Test::Unit::TestCase
|
||||
EUC_JISX0212_CH = "\217\260\241" # cannot encode with Shift_JIS.
|
||||
EUC_JISX0208_CH = "\260\241"
|
||||
|
||||
def test_minimal_charset
|
||||
out = HTree::Encoder.new('Shift_JIS', 'EUC-JP')
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
out.output_text("a")
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
out.output_text(EUC_JISX0212_CH)
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
out.output_text("b")
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
assert_equal("a丂b", out.finish)
|
||||
end
|
||||
|
||||
def test_minimal_charset_2
|
||||
out = HTree::Encoder.new('ISO-2022-JP-2', 'EUC-JP')
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
out.output_text("a")
|
||||
assert_equal("US-ASCII", out.minimal_charset)
|
||||
out.output_text(EUC_JISX0208_CH)
|
||||
assert_equal("ISO-2022-JP", out.minimal_charset)
|
||||
out.output_text("b")
|
||||
assert_equal("ISO-2022-JP", out.minimal_charset)
|
||||
out.output_text(EUC_JISX0212_CH)
|
||||
assert_equal("ISO-2022-JP-2", out.minimal_charset)
|
||||
assert_equal("a\e$B0!\e(Bb\e$(D0!\e(B", out.finish)
|
||||
end
|
||||
|
||||
def test_minimal_charset_u
|
||||
out = HTree::Encoder.new('UTF-16BE', 'EUC-JP')
|
||||
assert_equal("UTF-16BE", out.minimal_charset)
|
||||
out.output_text("a")
|
||||
assert_equal("UTF-16BE", out.minimal_charset)
|
||||
assert_equal("\000a", out.finish)
|
||||
end
|
||||
|
||||
def test_close
|
||||
out = HTree::Encoder.new('ISO-2022-JP', 'EUC-JP')
|
||||
out.output_string(EUC_JISX0208_CH)
|
||||
assert_equal("ISO-2022-JP", out.minimal_charset)
|
||||
assert_equal("\e$B0!\e(B", out.finish)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,55 @@
|
||||
require 'test/unit'
|
||||
require 'htree/equality'
|
||||
|
||||
class TestEQQ < Test::Unit::TestCase
|
||||
def assert_exact_equal(expected, actual, message=nil)
|
||||
full_message = build_message(message, <<EOT, expected, actual)
|
||||
<?> expected but was
|
||||
<?>.
|
||||
EOT
|
||||
assert_block(full_message) { expected.exact_equal? actual }
|
||||
end
|
||||
|
||||
def test_tag_name_prefix
|
||||
tags = [
|
||||
HTree::STag.new('{u}n'),
|
||||
HTree::STag.new('p1{u}n'),
|
||||
HTree::STag.new('p2{u}n'),
|
||||
HTree::STag.new('p1:n', [], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
|
||||
HTree::STag.new('p2:n', [], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
|
||||
]
|
||||
tags.each {|t1|
|
||||
tags.each {|t2|
|
||||
assert_equal(t1, t2)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_tag_attribute_name_prefix
|
||||
tags = [
|
||||
HTree::STag.new('n', [['p1{u}a', 'v']]),
|
||||
HTree::STag.new('n', [['p2{u}a', 'v']]),
|
||||
HTree::STag.new('n', [['p1:a', 'v']], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
|
||||
HTree::STag.new('n', [['p2:a', 'v']], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
|
||||
]
|
||||
tags.each {|t1|
|
||||
tags.each {|t2|
|
||||
assert_equal(t1, t2)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_element
|
||||
assert_equal(HTree::Elem.new('p1{u}n'), HTree::Elem.new('p2{u}n'))
|
||||
assert_equal(HTree::Elem.new('n', {'p1{u}a'=>'v'}),
|
||||
HTree::Elem.new('n', {'p2{u}a'=>'v'}))
|
||||
assert(!HTree::Elem.new('n', {'p1{u}a'=>'v'}).exact_equal?(HTree::Elem.new('n', {'p2{u}a'=>'v'})))
|
||||
end
|
||||
|
||||
def test_tag_namespaces
|
||||
assert_nothing_raised {
|
||||
HTree::STag.new("n", [], HTree::DefaultContext.subst_namespaces({nil=>"u1", "p"=>"u2"})).make_exact_equal_object
|
||||
}
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,18 @@
|
||||
require 'test/unit'
|
||||
require 'htree/extract_text'
|
||||
require 'htree/equality'
|
||||
|
||||
class TestExtractText < Test::Unit::TestCase
|
||||
def test_single
|
||||
n = HTree::Text.new('abc')
|
||||
assert_equal(n, n.extract_text)
|
||||
end
|
||||
|
||||
def test_elem
|
||||
t = HTree::Text.new('abc')
|
||||
n = HTree::Elem.new('e', t)
|
||||
assert_equal(t, n.extract_text)
|
||||
end
|
||||
|
||||
|
||||
end
|
@ -0,0 +1,27 @@
|
||||
require 'test/unit'
|
||||
require 'htree/gencode'
|
||||
require 'htree/parse'
|
||||
|
||||
class TestGenCode < Test::Unit::TestCase
|
||||
def run_code(code, top_context)
|
||||
out = HTree::Encoder.new(HTree::Encoder.internal_charset, HTree::Encoder.internal_charset)
|
||||
eval(code)
|
||||
out.finish
|
||||
end
|
||||
|
||||
def test_xmlns
|
||||
t = HTree.parse_xml('<p:n xmlns:p=z><p:m>bb').children[0].children[0] # <p:m>bb</p:m>
|
||||
code = t.generate_xml_output_code
|
||||
|
||||
assert_equal("<p:m xmlns:p=\"z\"\n>bb</p:m\n>", run_code(code, HTree::DefaultContext))
|
||||
assert_equal("<p:m\n>bb</p:m\n>", run_code(code, HTree::DefaultContext.subst_namespaces("p"=>"z")))
|
||||
end
|
||||
|
||||
def test_xmlns_chref
|
||||
t = HTree.parse_xml('<p:n xmlns:p="a&<>"b"/>').children[0]
|
||||
code = t.generate_xml_output_code
|
||||
assert_equal("<p:n xmlns:p=\"a&<>"b\"\n/>", run_code(code, HTree::DefaultContext))
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -0,0 +1,25 @@
|
||||
require 'test/unit'
|
||||
require 'htree/leaf'
|
||||
|
||||
class TestProcIns < Test::Unit::TestCase
|
||||
def test_initialize
|
||||
assert_raises(HTree::ProcIns::Error) { HTree::ProcIns.new!('target', "?>") }
|
||||
end
|
||||
|
||||
def test_new
|
||||
assert_equal('? >', HTree::ProcIns.new('target', "?>").content)
|
||||
assert_equal(nil, HTree::ProcIns.new('target', nil).content)
|
||||
end
|
||||
end
|
||||
|
||||
class TestComment < Test::Unit::TestCase
|
||||
def test_initialize
|
||||
assert_raises(HTree::Comment::Error) { HTree::Comment.new!("a--b") }
|
||||
assert_raises(HTree::Comment::Error) { HTree::Comment.new!("a-") }
|
||||
end
|
||||
|
||||
def test_new
|
||||
assert_equal('a- -b', HTree::Comment.new("a--b").content)
|
||||
assert_equal('a- ', HTree::Comment.new("a-").content)
|
||||
end
|
||||
end
|
@ -0,0 +1,60 @@
|
||||
require 'test/unit'
|
||||
require 'htree/loc'
|
||||
require 'htree/parse'
|
||||
require 'htree/traverse'
|
||||
|
||||
class TestLoc < Test::Unit::TestCase
|
||||
def test_make_loc
|
||||
t = HTree.parse('<?xml version="1.0"?><!DOCTYPE root><root>a<?x y?><!-- c --></boo>')
|
||||
assert_instance_of(HTree::Doc::Loc, t.make_loc)
|
||||
assert_instance_of(HTree::XMLDecl::Loc, t.children[0].make_loc)
|
||||
assert_instance_of(HTree::DocType::Loc, t.children[1].make_loc)
|
||||
assert_instance_of(HTree::Elem::Loc, t.children[2].make_loc)
|
||||
assert_instance_of(HTree::Text::Loc, t.children[2].children[0].make_loc)
|
||||
assert_instance_of(HTree::ProcIns::Loc, t.children[2].children[1].make_loc)
|
||||
assert_instance_of(HTree::Comment::Loc, t.children[2].children[2].make_loc)
|
||||
assert_instance_of(HTree::BogusETag::Loc, t.children[2].children[3].make_loc)
|
||||
assert_equal(nil, t.make_loc.parent)
|
||||
assert_equal(nil, t.make_loc.index)
|
||||
end
|
||||
|
||||
def test_get_subnode
|
||||
t = HTree.parse('<?xml version="1.0"?><!DOCTYPE root><root>a<?x y?><!-- c --></boo>')
|
||||
l = t.make_loc
|
||||
assert_instance_of(HTree::Doc::Loc, l)
|
||||
assert_instance_of(HTree::Location, l.get_subnode(-1))
|
||||
assert_instance_of(HTree::XMLDecl::Loc, l.get_subnode(0))
|
||||
assert_instance_of(HTree::DocType::Loc, l.get_subnode(1))
|
||||
assert_instance_of(HTree::Elem::Loc, l2 = l.get_subnode(2))
|
||||
assert_instance_of(HTree::Location, l.get_subnode(3))
|
||||
assert_instance_of(HTree::Location, l2.get_subnode(-1))
|
||||
assert_instance_of(HTree::Location, l2.get_subnode('attr'))
|
||||
assert_instance_of(HTree::Text::Loc, l2.get_subnode(0))
|
||||
assert_instance_of(HTree::ProcIns::Loc, l2.get_subnode(1))
|
||||
assert_instance_of(HTree::Comment::Loc, l2.get_subnode(2))
|
||||
assert_instance_of(HTree::BogusETag::Loc, l2.get_subnode(3))
|
||||
assert_instance_of(HTree::Location, l2.get_subnode(4))
|
||||
assert_same(l.get_subnode(0), l.get_subnode(0))
|
||||
end
|
||||
|
||||
def test_find_loc_step
|
||||
t = HTree.parse('<a><b>x<!---->y</a><c/><a/>')
|
||||
assert_equal('a[1]', t.find_loc_step(0))
|
||||
assert_equal('c', t.find_loc_step(1))
|
||||
assert_equal('a[2]', t.find_loc_step(2))
|
||||
t = t.children[0]
|
||||
assert_equal('b', t.find_loc_step(0))
|
||||
t = t.children[0]
|
||||
assert_equal('text()[1]', t.find_loc_step(0))
|
||||
assert_equal('comment()', t.find_loc_step(1))
|
||||
assert_equal('text()[2]', t.find_loc_step(2))
|
||||
end
|
||||
|
||||
def test_path
|
||||
l = HTree.parse('<a><b>x</b><b/><a/>').make_loc
|
||||
l2 = l.get_subnode(0, 0, 0)
|
||||
assert_equal('doc()', l.path)
|
||||
assert_equal('doc()/a/b[1]/text()', l2.path)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,147 @@
|
||||
require 'test/unit'
|
||||
require 'htree/tag'
|
||||
|
||||
class TestNamespace < Test::Unit::TestCase
|
||||
def assert_equal_exact(expected, actual, message=nil)
|
||||
full_message = build_message(message, <<EOT, expected, actual)
|
||||
<?> expected but was
|
||||
<?>.
|
||||
EOT
|
||||
assert_block(full_message) { expected.equal_exact? actual }
|
||||
end
|
||||
|
||||
# <ppp:nnn xmlns:ppp="uuu">
|
||||
def test_prefixed
|
||||
stag = HTree::STag.new("ppp:nnn",
|
||||
[["xmlns:ppp", "uuu"], ["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
|
||||
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
|
||||
assert_equal("ppp:nnn", stag.element_name.qualified_name)
|
||||
assert_equal("{uuu}nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal("uuu", stag.element_name.namespace_uri)
|
||||
assert_equal("ppp", stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(1, nsattrs.length)
|
||||
assert_equal(["ppp", "uuu"], nsattrs.shift)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(3, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
end
|
||||
|
||||
# <nnn xmlns="uuu">
|
||||
def test_default_ns
|
||||
stag = HTree::STag.new("nnn",
|
||||
[["xmlns", "uuu"],
|
||||
["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
|
||||
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
|
||||
|
||||
assert_equal("nnn", stag.element_name.qualified_name)
|
||||
assert_equal("{uuu}nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal("uuu", stag.element_name.namespace_uri)
|
||||
assert_equal(nil, stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(1, nsattrs.length)
|
||||
assert_equal([nil, "uuu"], nsattrs.shift)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(3, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
end
|
||||
|
||||
# <nnn xmlns="">
|
||||
def test_no_default_ns
|
||||
[{"q"=>"u"}, {nil=>"uu", "q"=>"u"}].each {|inh|
|
||||
stag = HTree::STag.new("nnn",
|
||||
[["xmlns", ""], ["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
|
||||
HTree::DefaultContext.subst_namespaces(inh))
|
||||
assert_equal("nnn", stag.element_name.qualified_name)
|
||||
assert_equal("nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal('', stag.element_name.namespace_uri)
|
||||
assert_equal(nil, stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(1, nsattrs.length)
|
||||
assert_equal([nil, ""], nsattrs.shift)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(3, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
}
|
||||
end
|
||||
|
||||
# <nnn>
|
||||
def test_no_ns
|
||||
stag = HTree::STag.new("nnn",
|
||||
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
|
||||
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
|
||||
|
||||
assert_equal("nnn", stag.element_name.qualified_name)
|
||||
assert_equal("nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal('', stag.element_name.namespace_uri)
|
||||
assert_equal(nil, stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(0, nsattrs.length)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(3, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
end
|
||||
|
||||
# internally allocated element without prefix
|
||||
def test_universal_name_to_be_default_namespace
|
||||
stag = HTree::STag.new("{uuu}nnn",
|
||||
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"]],
|
||||
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
|
||||
assert_equal("nnn", stag.element_name.qualified_name)
|
||||
assert_equal("{uuu}nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal("uuu", stag.element_name.namespace_uri)
|
||||
assert_equal(nil, stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(0, nsattrs.length)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(3, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
end
|
||||
|
||||
def test_prefixed_universal_name
|
||||
stag = HTree::STag.new("ppp{uuu}nnn",
|
||||
[["a", "x"], ["q:b", "y"], ["pp{uu}c", "z"], ["q{uu}d", "w"]],
|
||||
HTree::DefaultContext.subst_namespaces({"q"=>"u"}))
|
||||
assert_equal("ppp:nnn", stag.element_name.qualified_name)
|
||||
assert_equal("{uuu}nnn", stag.element_name.universal_name)
|
||||
assert_equal("nnn", stag.element_name.local_name)
|
||||
assert_equal("uuu", stag.element_name.namespace_uri)
|
||||
assert_equal("ppp", stag.element_name.namespace_prefix)
|
||||
|
||||
nsattrs = []; stag.each_namespace_attribute {|p, u| nsattrs << [p, u] }
|
||||
assert_equal(0, nsattrs.length)
|
||||
|
||||
attrs = []; stag.each_attribute {|n,t| attrs << [n.namespace_uri,n.namespace_prefix,n.local_name,t.to_s] }
|
||||
assert_equal(4, attrs.length)
|
||||
assert_equal(['', nil, "a", "x"], attrs.shift)
|
||||
assert_equal(["u", "q", "b", "y"], attrs.shift)
|
||||
assert_equal(["uu", "pp", "c", "z"], attrs.shift)
|
||||
assert_equal(["uu", "q", "d", "w"], attrs.shift)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,133 @@
|
||||
require 'test/unit'
|
||||
require 'htree'
|
||||
|
||||
class TestOutput < Test::Unit::TestCase
|
||||
def gen(t, meth=:output, *rest)
|
||||
encoder = HTree::Encoder.new('US-ASCII', 'US-ASCII')
|
||||
t.__send__(meth, *(rest + [encoder, HTree::DefaultContext]))
|
||||
encoder.finish
|
||||
end
|
||||
|
||||
def test_text
|
||||
assert_equal('a&<>"b', gen(HTree::Text.new('a&<>"b')))
|
||||
|
||||
assert_equal("abc&def", gen(HTree::Text.new("abc&def")))
|
||||
assert_equal('"\'&', gen(HTree::Text.new('"\'&')))
|
||||
assert_equal('"\'<&>', gen(HTree::Text.new('"\'<&>')))
|
||||
end
|
||||
|
||||
def test_text_attvalue
|
||||
assert_equal('"a&<>"b"', gen(HTree::Text.new('a&<>"b'), :output_attvalue))
|
||||
|
||||
assert_equal('"abc"', gen(HTree::Text.new("abc"), :output_attvalue))
|
||||
assert_equal('"""', gen(HTree::Text.new('"'), :output_attvalue))
|
||||
end
|
||||
|
||||
def test_name
|
||||
assert_equal('abc', gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext)))
|
||||
assert_equal('n', gen(HTree::Name.new(nil, 'u', 'n')))
|
||||
assert_equal('p:n', gen(HTree::Name.new('p', 'u', 'n')))
|
||||
assert_equal('n', gen(HTree::Name.new(nil, '', 'n')))
|
||||
assert_equal('xmlns', gen(HTree::Name.new('xmlns', nil, nil)))
|
||||
assert_equal('xmlns:n', gen(HTree::Name.new('xmlns', nil, 'n')))
|
||||
end
|
||||
|
||||
def test_name_attribute
|
||||
assert_equal('abc="a&<>"b"',
|
||||
gen(HTree::Name.parse_element_name('abc', HTree::DefaultContext),
|
||||
:output_attribute,
|
||||
HTree::Text.new('a&<>"b')))
|
||||
end
|
||||
|
||||
def test_doc
|
||||
t = HTree::Doc.new(HTree::Elem.new('a'), HTree::Elem.new('b'))
|
||||
assert_equal("<a\n/><b\n/>", gen(t))
|
||||
end
|
||||
|
||||
def test_elem
|
||||
t = HTree::Elem.new('a', [])
|
||||
assert_equal("<a\n></a\n>", gen(t))
|
||||
|
||||
assert_equal("<b\n/>",
|
||||
gen(HTree::Elem.new!(HTree::STag.new('b'))))
|
||||
assert_equal("<b\n></b\n>",
|
||||
gen(HTree::Elem.new!(HTree::STag.new('b'), [])))
|
||||
assert_equal("<a\n><b\n/><c\n/><d\n/></a\n>",
|
||||
gen(HTree::Elem.new!(HTree::STag.new('a'), [
|
||||
HTree::Elem.new!(HTree::STag.new('b')),
|
||||
HTree::Elem.new!(HTree::STag.new('c')),
|
||||
HTree::Elem.new!(HTree::STag.new('d'))
|
||||
])))
|
||||
end
|
||||
|
||||
def test_elem_empty
|
||||
t = HTree::Elem.new('a')
|
||||
assert_equal("<a\n/>", gen(t))
|
||||
end
|
||||
|
||||
def test_stag
|
||||
assert_equal("<name\n>",
|
||||
gen(HTree::STag.new("name"), :output_stag))
|
||||
assert_equal("<name\n/>",
|
||||
gen(HTree::STag.new("name"), :output_emptytag))
|
||||
assert_equal("</name\n>",
|
||||
gen(HTree::STag.new("name"), :output_etag))
|
||||
|
||||
assert_equal("<name a=\"b\"\n/>",
|
||||
gen(HTree::STag.new("name", [["a", "b"]]), :output_emptytag))
|
||||
assert_equal("<name a=\"<"\'>\"\n/>",
|
||||
gen(HTree::STag.new("name", [['a', '<"\'>']]), :output_emptytag))
|
||||
|
||||
assert_equal("<ppp:nnn xmlns=\"uuu"b\"\n/>",
|
||||
gen(HTree::STag.new("ppp:nnn", [["xmlns", "uuu\"b"]]), :output_emptytag))
|
||||
end
|
||||
|
||||
def test_xmldecl
|
||||
t = HTree::XMLDecl.new('1.0', 'US-ASCII')
|
||||
assert_equal('', gen(t))
|
||||
assert_equal('<?xml version="1.0" encoding="US-ASCII"?>',
|
||||
gen(t, :output_prolog_xmldecl))
|
||||
end
|
||||
|
||||
def test_doctype
|
||||
t = HTree::DocType.new('html',
|
||||
'-//W3C//DTD HTML 4.01//EN',
|
||||
'http://www.w3.org/TR/html4/strict.dtd')
|
||||
assert_equal('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">', gen(t))
|
||||
end
|
||||
|
||||
def test_procins
|
||||
t = HTree::ProcIns.new('xml-stylesheet', 'type="text/xml" href="#style1"')
|
||||
assert_equal('<?xml-stylesheet type="text/xml" href="#style1"?>', gen(t))
|
||||
t = HTree::ProcIns.new('x', nil)
|
||||
assert_equal('<?x?>', gen(t))
|
||||
end
|
||||
|
||||
def test_comment
|
||||
t = HTree::Comment.new('xxx')
|
||||
assert_equal('<!--xxx-->', gen(t))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
class TestHTMLOutput < Test::Unit::TestCase
|
||||
def test_top_xmlns
|
||||
assert_equal("<html\n>aaa</html\n>", HTree("<html>aaa").display_html(""))
|
||||
end
|
||||
|
||||
def test_script
|
||||
assert_equal("<html\n><script\n>a < b</script\n></html\n>",
|
||||
HTree("<html><script>a < b").display_html(""))
|
||||
end
|
||||
|
||||
def test_script_invalid_content
|
||||
assert_raise(ArgumentError) {
|
||||
HTree("<html><script>a </ b").display_html("")
|
||||
}
|
||||
end
|
||||
|
||||
def test_br
|
||||
assert_equal("<html\n>a<br\n>b<br\n>c</html\n>",
|
||||
HTree("<html>a<br>b<br>c").display_html(""))
|
||||
end
|
||||
end
|
@ -0,0 +1,115 @@
|
||||
require 'test/unit'
|
||||
require 'htree/parse'
|
||||
require 'htree/equality'
|
||||
require 'htree/traverse'
|
||||
|
||||
class TestParse < Test::Unit::TestCase
|
||||
def test_empty
|
||||
assert_equal(HTree::Doc.new([]), HTree.parse_xml("").eliminate_raw_string)
|
||||
end
|
||||
|
||||
def test_xmlns_default
|
||||
t1 = HTree::Doc.new([
|
||||
HTree::Elem.new!(
|
||||
HTree::STag.new('x1', [['xmlns', 'bb']],
|
||||
HTree::DefaultContext.subst_namespaces({'xml'=>'http://www.w3.org/XML/1998/namespace'})),
|
||||
[HTree::Elem.new!(HTree::STag.new('x2', [],
|
||||
HTree::DefaultContext.subst_namespaces({nil => 'bb', 'xml'=>'http://www.w3.org/XML/1998/namespace'})), nil)])
|
||||
])
|
||||
t2 = HTree.parse_xml('<x1 xmlns="bb"><x2>')
|
||||
assert_equal(t1, t2)
|
||||
end
|
||||
|
||||
def test_doctype_root_element_name
|
||||
assert_equal('html',
|
||||
HTree.parse('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>').children[0].root_element_name)
|
||||
|
||||
# xxx: should be downcased?
|
||||
assert_equal('HTML',
|
||||
HTree.parse('<?xml version="1.0"?><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><HTML>').children[1].root_element_name)
|
||||
end
|
||||
|
||||
def test_doctype_system_identifier
|
||||
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
|
||||
HTree.parse("<!DOCTYPE HTML SYSTEM 'http://www.w3.org/TR/html4/loose.dtd'>").children[0].system_identifier)
|
||||
assert_equal('http://www.w3.org/TR/html4/loose.dtd',
|
||||
HTree.parse("<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>").children[0].system_identifier)
|
||||
end
|
||||
|
||||
def test_procins
|
||||
t = HTree.parse_xml("<?x?>").children[0]
|
||||
assert_equal('x', t.target)
|
||||
assert_equal(nil, t.content)
|
||||
end
|
||||
|
||||
def test_eol_html
|
||||
t1 = HTree::Elem.new('a', "\nb\n")
|
||||
s = "<a>\nb\n</a>"
|
||||
t2 = HTree.parse_xml(s).root
|
||||
assert_equal(t1, t2)
|
||||
assert_equal(s, t2.raw_string)
|
||||
end
|
||||
|
||||
def test_parse_html
|
||||
t1 = HTree.parse("<html>a</html>")
|
||||
assert_equal("{http://www.w3.org/1999/xhtml}html", t1.root.element_name.universal_name)
|
||||
end
|
||||
|
||||
def test_bare_url
|
||||
t1 = HTree::Elem.new('a', {'href'=>'http://host/'})
|
||||
s = "<a href=http://host/>"
|
||||
t2 = HTree.parse(s).root
|
||||
assert_equal(t1, t2)
|
||||
end
|
||||
|
||||
def test_bare_slash
|
||||
t1 = HTree::Elem.new('n', {'a'=>'v/'}, 'x')
|
||||
s = "<n a=v/>x"
|
||||
t2 = HTree.parse(s).root
|
||||
assert_equal(t1, t2)
|
||||
end
|
||||
|
||||
def test_bare_slash_empty
|
||||
t1 = HTree::Elem.new('n', {'a'=>'v/'})
|
||||
s = "<n a=v/>"
|
||||
t2 = HTree.parse(s).root
|
||||
assert_equal(t1, t2)
|
||||
end
|
||||
|
||||
def test_downcase
|
||||
assert_equal("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF",
|
||||
HTree.parse('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').root.name)
|
||||
end
|
||||
|
||||
def test_downcase_name
|
||||
# HTML && !XML
|
||||
assert_equal('html', HTree.parse('<HTML>').root.element_name.local_name)
|
||||
assert_equal('html', HTree.parse('<html>').root.element_name.local_name)
|
||||
# HTML && XML
|
||||
assert_equal('html', HTree.parse('<?xml version="1.0"?><html>').root.element_name.local_name)
|
||||
assert_equal('v', HTree.parse('<?xml version="1.0"?><html X:Y=v xmlns:X=u>').root.get_attr('{u}Y'))
|
||||
# !HTML && XML
|
||||
assert_equal('RDF', HTree.parse('<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>').children[1].element_name.local_name)
|
||||
end
|
||||
|
||||
def test_script_etag
|
||||
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}script', [])),
|
||||
HTree.parse('<script></script>'))
|
||||
end
|
||||
|
||||
def test_html_emptyelem
|
||||
t = HTree.parse('<html>')
|
||||
assert_equal(HTree::Doc.new(HTree::Elem.new('{http://www.w3.org/1999/xhtml}html')), t)
|
||||
assert(!t.children[0].empty_element?)
|
||||
end
|
||||
|
||||
def test_hr_emptyelem
|
||||
t = HTree.parse('<html><hr>')
|
||||
assert_equal(
|
||||
HTree::Doc.new(
|
||||
HTree::Elem.new('{http://www.w3.org/1999/xhtml}html',
|
||||
HTree::Elem.new('{http://www.w3.org/1999/xhtml}hr'))), t)
|
||||
assert(t.children[0].children[0].empty_element?)
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,17 @@
|
||||
require 'test/unit'
|
||||
require 'htree'
|
||||
|
||||
class TestRawString < Test::Unit::TestCase
|
||||
def test_elem
|
||||
t = HTree.parse("<a>x</a>")
|
||||
assert_equal("<a>x</a>", t.root.raw_string)
|
||||
assert_equal("<a>x</a>", t.root.raw_string) # raw_string shouldn't have side effect.
|
||||
end
|
||||
|
||||
def test_no_raw_string
|
||||
t = HTree::Elem.new('a')
|
||||
assert_equal(nil, t.raw_string)
|
||||
t = HTree::Elem.new('a', HTree.parse("<a>x</a>").root)
|
||||
assert_equal(nil, t.raw_string)
|
||||
end
|
||||
end
|
@ -0,0 +1,70 @@
|
||||
require 'test/unit'
|
||||
require 'htree/parse'
|
||||
require 'htree/rexml'
|
||||
begin
|
||||
require 'rexml/document'
|
||||
rescue LoadError
|
||||
end
|
||||
|
||||
class TestREXML < Test::Unit::TestCase
|
||||
def test_doc
|
||||
r = HTree.parse('<root/>').to_rexml
|
||||
assert_instance_of(REXML::Document, r)
|
||||
end
|
||||
|
||||
def test_elem
|
||||
r = HTree.parse('<root a="b"/>').to_rexml
|
||||
assert_instance_of(REXML::Element, e = r.root)
|
||||
assert_equal('root', e.name)
|
||||
assert_equal('b', e.attribute('a').to_s)
|
||||
end
|
||||
|
||||
def test_text
|
||||
r = HTree.parse('<root>aaa</root>').to_rexml
|
||||
assert_instance_of(REXML::Text, t = r.root.children[0])
|
||||
assert_equal('aaa', t.to_s)
|
||||
end
|
||||
|
||||
def test_xmldecl
|
||||
s = '<?xml version="1.0"?>'
|
||||
r = HTree.parse(s + '<root>aaa</root>').to_rexml
|
||||
assert_instance_of(REXML::XMLDecl, x = r.children[0])
|
||||
assert_equal('1.0', x.version)
|
||||
assert_equal(nil, x.standalone)
|
||||
|
||||
assert_instance_of(REXML::XMLDecl, HTree.parse(s).children[0].to_rexml)
|
||||
end
|
||||
|
||||
def test_doctype
|
||||
s = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'
|
||||
r = HTree.parse(s + '<html><title>xxx</title></html>').to_rexml
|
||||
assert_instance_of(REXML::DocType, d = r.children[0])
|
||||
assert_equal('html', d.name)
|
||||
assert_equal('PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"', d.external_id)
|
||||
|
||||
assert_instance_of(REXML::DocType, HTree.parse(s).children[0].to_rexml)
|
||||
end
|
||||
|
||||
def test_procins
|
||||
r = HTree.parse('<root><?xxx yyy?></root>').to_rexml
|
||||
assert_instance_of(REXML::Instruction, i = r.root.children[0])
|
||||
assert_equal('xxx', i.target)
|
||||
assert_equal('yyy', i.content)
|
||||
|
||||
assert_instance_of(REXML::Instruction, HTree.parse('<?xxx yyy?>').children[0].to_rexml)
|
||||
end
|
||||
|
||||
def test_comment
|
||||
r = HTree.parse('<root><!-- zzz --></root>').to_rexml
|
||||
assert_instance_of(REXML::Comment, c = r.root.children[0])
|
||||
assert_equal(' zzz ', c.to_s)
|
||||
end
|
||||
|
||||
def test_bogusetag
|
||||
assert_equal(nil, HTree.parse('</e>').children[0].to_rexml)
|
||||
end
|
||||
|
||||
def test_style
|
||||
assert_equal('<style>a<b</style>', HTree.parse('<html><style>a<b</style></html>').to_rexml.to_s[/<style.*style>/])
|
||||
end
|
||||
end if defined? REXML
|
@ -0,0 +1,153 @@
|
||||
require 'test/unit'
|
||||
require 'htree/scan'
|
||||
|
||||
class TestScan < Test::Unit::TestCase
|
||||
def scan(str)
|
||||
result = []
|
||||
HTree.scan(str) {|e| result << e }
|
||||
result
|
||||
end
|
||||
|
||||
def test_empty
|
||||
assert_equal([], scan(''))
|
||||
end
|
||||
|
||||
def t_single(s)
|
||||
n = yield
|
||||
assert_equal([n], scan(s))
|
||||
end
|
||||
|
||||
def test_single
|
||||
s = '<?xml version="1.0"?>'
|
||||
assert_equal([[:xmldecl, s]], scan(s))
|
||||
|
||||
s = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'
|
||||
assert_equal([[:doctype, s]], scan(s))
|
||||
|
||||
s = '<?xxx yyy?>'
|
||||
assert_equal([[:procins, s]], scan(s))
|
||||
|
||||
s = '<a>'
|
||||
assert_equal([[:stag, s]], scan(s))
|
||||
s = '</a>'
|
||||
assert_equal([[:etag, s]], scan(s))
|
||||
s = '<a/>'
|
||||
assert_equal([[:emptytag, s]], scan(s))
|
||||
s = '<!-- abc -->'
|
||||
assert_equal([[:comment, s]], scan(s))
|
||||
s = '<![CDATA[abc]]>'
|
||||
assert_equal([[:text_cdata_section, s]], scan(s))
|
||||
s = 'abc'
|
||||
assert_equal([[:text_pcdata, s]], scan(s))
|
||||
end
|
||||
|
||||
def test_xmldecl_seen
|
||||
s0 = '<?xml version="1.0"?>'
|
||||
s1 = '<A>'
|
||||
assert_equal([[:stag, s1]], scan(s1))
|
||||
assert_equal([[:xmldecl, s0], [:stag, s1]], scan(s0 + s1))
|
||||
end
|
||||
|
||||
def test_cdata_content
|
||||
s = '<html><script><a></script><a>'
|
||||
assert_equal([
|
||||
[:stag, '<html>'],
|
||||
[:stag, '<script>'],
|
||||
[:text_cdata_content, '<a>'],
|
||||
[:etag, '</script>'],
|
||||
[:stag, '<a>'],
|
||||
], scan(s))
|
||||
|
||||
s = '<html><script><a>'
|
||||
assert_equal([
|
||||
[:stag, '<html>'],
|
||||
[:stag, '<script>'],
|
||||
[:text_cdata_content, '<a>'],
|
||||
], scan(s))
|
||||
end
|
||||
|
||||
def test_text
|
||||
s = 'a<e>b<e>c<e>d'
|
||||
assert_equal([
|
||||
[:text_pcdata, 'a'],
|
||||
[:stag, '<e>'],
|
||||
[:text_pcdata, 'b'],
|
||||
[:stag, '<e>'],
|
||||
[:text_pcdata, 'c'],
|
||||
[:stag, '<e>'],
|
||||
[:text_pcdata, 'd'],
|
||||
], scan(s))
|
||||
end
|
||||
|
||||
def test_eol_html
|
||||
# In SGML, a line break just after start tag and
|
||||
# a line break just before end tag is ignored.
|
||||
# http://www.w3.org/TR/REC-html40/appendix/notes.html#notes-line-breaks
|
||||
#
|
||||
# But usual browser including mozilla doesn't.
|
||||
# So HTree doesn't ignore them and treat as usual text.
|
||||
s = "<html>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
|
||||
assert_equal([
|
||||
[:stag, "<html>"],
|
||||
[:text_pcdata, "a\n"],
|
||||
[:stag, "<e>"],
|
||||
[:text_pcdata, "\nb\n"],
|
||||
[:stag, "<f>"],
|
||||
[:text_pcdata, "\nc\n"],
|
||||
[:etag, "</f>"],
|
||||
[:text_pcdata, "\nd\n"],
|
||||
[:etag, "</e>"],
|
||||
[:text_pcdata, "\ne"],
|
||||
], scan(s))
|
||||
|
||||
s = "<html>a\n<e>\nb\n<script>\nc\n</script>\nd\n</e>\ne"
|
||||
assert_equal([
|
||||
[:stag, "<html>"],
|
||||
[:text_pcdata, "a\n"],
|
||||
[:stag, "<e>"],
|
||||
[:text_pcdata, "\nb\n"],
|
||||
[:stag, "<script>"],
|
||||
[:text_cdata_content, "\nc\n"],
|
||||
[:etag, "</script>"],
|
||||
[:text_pcdata, "\nd\n"],
|
||||
[:etag, "</e>"],
|
||||
[:text_pcdata, "\ne"],
|
||||
], scan(s))
|
||||
|
||||
end
|
||||
|
||||
def test_eol_xml
|
||||
# In XML, line breaks are treated as part of content.
|
||||
# It's because KEEPRSRE is yes in XML.
|
||||
# http://www.satoshii.org/markup/websgml/valid-xml#keeprsre
|
||||
s = "<?xml version='1.0'?>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
|
||||
assert_equal([
|
||||
[:xmldecl, "<?xml version='1.0'?>"],
|
||||
[:text_pcdata, "a\n"],
|
||||
[:stag, "<e>"],
|
||||
[:text_pcdata, "\nb\n"],
|
||||
[:stag, "<f>"],
|
||||
[:text_pcdata, "\nc\n"],
|
||||
[:etag, "</f>"],
|
||||
[:text_pcdata, "\nd\n"],
|
||||
[:etag, "</e>"],
|
||||
[:text_pcdata, "\ne"],
|
||||
], scan(s))
|
||||
end
|
||||
|
||||
def test_xml_html_detection
|
||||
assert_equal([false, true], HTree.scan("<html></html>") {})
|
||||
assert_equal([true, false], HTree.scan("<rss></rss>") {})
|
||||
assert_equal([true, true], HTree.scan('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">') {})
|
||||
end
|
||||
|
||||
def test_quoted_attr
|
||||
assert_equal([[:emptytag, '<e a=">"/>']], scan('<e a=">"/>'))
|
||||
end
|
||||
|
||||
def test_bare_slash
|
||||
assert_equal([[:stag, '<n dir=/foo/bar/>']], scan('<n dir=/foo/bar/>'))
|
||||
assert_equal([[:stag, '<n a=v/>']], scan('<n a=v/>'))
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,37 @@
|
||||
require 'test/unit'
|
||||
require 'htree/parse'
|
||||
require 'htree/template'
|
||||
require 'pathname'
|
||||
|
||||
class TestSecurity < Test::Unit::TestCase
|
||||
def safe(n)
|
||||
assert_equal(0, $SAFE)
|
||||
Thread.new {
|
||||
$SAFE = n
|
||||
assert_equal(n, $SAFE)
|
||||
yield
|
||||
}.join
|
||||
assert_equal(0, $SAFE)
|
||||
end
|
||||
|
||||
def test_parse
|
||||
safe(1) {
|
||||
assert_equal(1, $SAFE)
|
||||
assert_nothing_raised { HTree.parse("") }
|
||||
assert_raise(SecurityError) { HTree.parse("".taint) }
|
||||
}
|
||||
assert_nothing_raised { HTree.parse("") }
|
||||
assert_nothing_raised { HTree.parse("".taint) }
|
||||
end
|
||||
|
||||
def test_template
|
||||
safe(1) {
|
||||
assert_nothing_raised { HTree.expand_template("/dev/null", nil, '') }
|
||||
assert_raise(SecurityError) { HTree.expand_template("/dev/null".taint, nil, '') }
|
||||
}
|
||||
assert_nothing_raised { HTree.expand_template("/dev/null", nil, '') }
|
||||
assert_nothing_raised { HTree.expand_template("/dev/null".taint, nil, '') }
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -0,0 +1,142 @@
|
||||
require 'test/unit'
|
||||
require 'htree'
|
||||
|
||||
class TestSubnode < Test::Unit::TestCase
|
||||
def test_elem_get
|
||||
e1 = HTree.parse("<a href=x>abc</a>").root
|
||||
assert_equal(HTree::Text.new("x"), e1.get_subnode("href"))
|
||||
assert_equal(HTree::Text.new("abc"), e1.get_subnode(0))
|
||||
end
|
||||
|
||||
def test_elem_subst
|
||||
e1 = HTree.parse_xml("<a href=x>abc</a>").root
|
||||
e2 = e1.subst_subnode("href"=>"xxx", 0=>"def")
|
||||
assert_equal("a", e2.name)
|
||||
assert_equal("xxx", e2.fetch_attr("href"))
|
||||
assert_equal([HTree::Text.new("def")], e2.children)
|
||||
assert_equal([], e1.subst_subnode(0=>nil).children)
|
||||
end
|
||||
|
||||
def test_elem_subst_empty
|
||||
e1 = HTree.parse("<img />").root
|
||||
assert_equal(true, e1.empty_element?)
|
||||
assert_equal(true, e1.subst_subnode("src"=>"xxx").empty_element?)
|
||||
assert_equal(false, e1.subst_subnode(0=>"xxx").empty_element?)
|
||||
end
|
||||
|
||||
def test_elem_multiple_attr_value
|
||||
h = {"b"=>"c", HTree::Name.new(nil, "", "b")=>"d"}
|
||||
assert_match(/\A(cd|dc)\z/,
|
||||
HTree::Elem.new("a").subst_subnode(h).get_subnode('b').to_s)
|
||||
|
||||
a = [["b","c"], [HTree::Name.new(nil, "", "b"),"d"]]
|
||||
assert_equal('cd',
|
||||
HTree::Elem.new("a").subst_subnode(a).get_subnode('b').to_s)
|
||||
assert_equal('dc',
|
||||
HTree::Elem.new("a").subst_subnode(a.reverse).get_subnode('b').to_s)
|
||||
end
|
||||
|
||||
def test_elem_subst_outrange
|
||||
e1 = HTree("<r>abc</r>").root
|
||||
e2 = e1.subst_subnode(-1=>HTree('<x/>'), 1=>HTree('<y/>'))
|
||||
assert_equal(HTree('<r><x/>abc<y/></r>').root, e2)
|
||||
end
|
||||
|
||||
def test_doc_subst_outrange
|
||||
d1 = HTree("<r>abc</r>")
|
||||
d2 = d1.subst_subnode(-1=>HTree('<x/>'), 1=>HTree('<y/>'))
|
||||
assert_equal(HTree('<x/><r>abc</r><y/>'), d2)
|
||||
end
|
||||
|
||||
def test_doc_get
|
||||
doc = HTree.parse("<?xml?><a href=x>abc</a> ")
|
||||
assert_equal(doc.root, doc.get_subnode(1))
|
||||
end
|
||||
|
||||
def test_doc_subst
|
||||
doc1 = HTree.parse("<?xml?><a href=x>abc</a> ")
|
||||
doc2 = doc1.subst_subnode(1=>"yy")
|
||||
assert_equal(HTree::Text.new("yy"), doc2.children[1])
|
||||
assert_equal([], doc1.subst_subnode(0=>nil, 1=>nil, 2=>nil).children)
|
||||
end
|
||||
|
||||
def test_doc_loc
|
||||
d1 = HTree.parse("<r>a</r>")
|
||||
d2 = HTree.parse("<q/>")
|
||||
assert_equal(d2, d1.subst_subnode(0=>d2.make_loc))
|
||||
end
|
||||
|
||||
def test_doc
|
||||
e = HTree.parse("<r>a</r>").root
|
||||
d = HTree.parse("<?xml version='1.0'?><!DOCTYPE q><q/>")
|
||||
r = HTree('<r><q/></r>').root
|
||||
assert_equal(r, e.subst_subnode(0=>d))
|
||||
assert_equal(r, e.subst_subnode(0=>d.make_loc))
|
||||
assert_equal(r, e.subst_subnode(0=>[d]))
|
||||
assert_equal(r, e.subst_subnode(0=>[d.make_loc]))
|
||||
end
|
||||
|
||||
def test_doc2
|
||||
e = HTree.parse("<r>a</r>")
|
||||
d = HTree.parse("<?xml version='1.0'?><!DOCTYPE q><q/>")
|
||||
r = HTree('<q/>')
|
||||
assert_equal(r, e.subst_subnode(0=>d))
|
||||
assert_equal(r, e.subst_subnode(0=>d.make_loc))
|
||||
assert_equal(r, e.subst_subnode(0=>[d]))
|
||||
assert_equal(r, e.subst_subnode(0=>[d.make_loc]))
|
||||
end
|
||||
|
||||
def test_change_by_subst_itself
|
||||
l = HTree("<r>a</r>").make_loc
|
||||
l2 = l.get_subnode(0, 0).subst_itself('x')
|
||||
assert_equal(HTree::Text.new('x'), l2.to_node)
|
||||
assert_equal(HTree('<r>x</r>'), l2.top.to_node)
|
||||
l2 = l.get_subnode(0).subst_itself('xxx')
|
||||
assert_equal(HTree::Text.new('xxx'), l2.to_node)
|
||||
assert_equal(HTree('xxx'), l2.top.to_node)
|
||||
end
|
||||
|
||||
def test_add_by_subst_itself
|
||||
l = HTree("<r>a</r>").make_loc
|
||||
l2 = l.get_subnode(0, 'x').subst_itself('y')
|
||||
assert_equal(HTree::Text.new('y'), l2.to_node)
|
||||
assert_equal(HTree('<r x="y">a</r>'), l2.top.to_node)
|
||||
l2 = l.get_subnode(0, 0).subst_itself('b')
|
||||
assert_equal(HTree::Text.new('b'), l2.to_node)
|
||||
assert_equal(HTree('<r>b</r>'), l2.top.to_node)
|
||||
xmldecl = HTree('<?xml version="1.0"?>').get_subnode(0)
|
||||
l2 = l.get_subnode(-1).subst_itself(xmldecl)
|
||||
assert_equal(0, l2.index)
|
||||
assert_equal(xmldecl, l2.to_node)
|
||||
assert_equal(HTree('<?xml version="1.0"?><r>a</r>'), l2.top.to_node)
|
||||
procins = HTree('<?xxx yyy?>').get_subnode(0)
|
||||
l2 = l.get_subnode(10).subst_itself(procins)
|
||||
assert_equal(1, l2.index)
|
||||
assert_equal(procins, l2.to_node)
|
||||
assert_equal(HTree('<r>a</r><?xxx yyy?>'), l2.top.to_node)
|
||||
end
|
||||
|
||||
def test_del_by_subst_itself
|
||||
l = HTree("<r x='y'><x/>y<z/></r>").make_loc
|
||||
l2 = l.get_subnode(0, 'x').subst_itself(nil)
|
||||
assert_equal(nil, l2.to_node)
|
||||
assert_equal(HTree('<r><x/>y<z/></r>'), l2.top.to_node)
|
||||
l2 = l.get_subnode(0, 1).subst_itself(nil)
|
||||
assert_equal(HTree('<r x="y"><x/><z/></r>'), l2.top.to_node)
|
||||
l = HTree('<?xml version="1.0"?><r/>').make_loc
|
||||
l2 = l.get_subnode(0).subst_itself(nil)
|
||||
assert_equal(HTree('<r/>'), l2.top.to_node)
|
||||
end
|
||||
|
||||
def test_subst
|
||||
l = HTree('<?xml version="1.0"?><r><x/><y/><z/></r>').make_loc
|
||||
assert_equal(HTree("<r>x<y>a</y><z k=v /></r>"),
|
||||
l.to_node.subst({
|
||||
l.get_subnode(0) => nil,
|
||||
l.get_subnode(1, 0) => 'x',
|
||||
l.get_subnode(1, 1, 0) => 'a',
|
||||
l.get_subnode(1, 2, 'k') => 'v'
|
||||
}))
|
||||
end
|
||||
|
||||
end
|
@ -0,0 +1,287 @@
|
||||
require 'test/unit'
|
||||
require 'htree/template'
|
||||
require 'stringio'
|
||||
|
||||
class TestTemplate < Test::Unit::TestCase
|
||||
Decl = '<?xml version="1.0" encoding="US-ASCII"?>'
|
||||
|
||||
def assert_xhtml(expected, template, message=nil)
|
||||
prefix = '<?xml version="1.0" encoding="US-ASCII"?>' +
|
||||
"<html xmlns=\"http://www.w3.org/1999/xhtml\"\n>"
|
||||
suffix = "</html\n>"
|
||||
result = HTree.expand_template(''){"<?xml version=\"1.0\"?><html>#{template}</html>"}
|
||||
assert_match(/\A#{Regexp.quote prefix}/, result)
|
||||
assert_match(/#{Regexp.quote suffix}\z/, result)
|
||||
result = result[prefix.length..(-suffix.length-1)]
|
||||
assert_equal(expected, result, message)
|
||||
end
|
||||
|
||||
def test_text
|
||||
assert_xhtml("<e\n>1</e\n>", '<e _text=1>d</e>')
|
||||
assert_xhtml('1', '<span _text=1>d</span>')
|
||||
assert_xhtml("<span x=\"2\"\n>1</span\n>", '<span x=2 _text=1>d</span>')
|
||||
assert_xhtml("abc", %q{a<span _text="'b'"/>c})
|
||||
end
|
||||
|
||||
def test_tree
|
||||
assert_xhtml("<e\n><z\n>x</z\n></e\n>", '<e _tree="HTree("<z>x</z>")">d</e>')
|
||||
assert_xhtml("<n:e xmlns:n=\"a\"\n><n:z\n>x</n:z\n></n:e\n>", '<n:e xmlns:n=a _tree="HTree("<n:z xmlns:n=a>x</n:z>")">d</n:e>')
|
||||
end
|
||||
|
||||
def test_attr
|
||||
assert_xhtml("<e x=\"1\"\n>d</e\n>", '<e _attr_x=1>d</e>')
|
||||
assert_xhtml("<span x=\"1\"\n>d</span\n>", '<span _attr_x=1>d</span>')
|
||||
assert_xhtml("<span x=\""\"\n>d</span\n>", '<span _attr_x=\'"\x22"\'>d</span>')
|
||||
end
|
||||
|
||||
def test_if
|
||||
assert_xhtml("<e\n>d</e\n>", '<e _if=true>d</e>')
|
||||
assert_xhtml('', '<e _if=false>d</e>')
|
||||
assert_xhtml("<f\n>dd</f\n>", '<e _if=false _else=m>d</e><f _template=m>dd</f>')
|
||||
|
||||
assert_xhtml('d', '<span _if=true>d</span>')
|
||||
end
|
||||
|
||||
def test_iter
|
||||
assert_xhtml("<o\n><i\n>1</i\n></o\n><o\n><i\n>2</i\n></o\n><o\n><i\n>3</i\n></o\n>",
|
||||
'<o _iter=[1,2,3].each//v><i _text=v /></o>')
|
||||
assert_xhtml("<i\n>1</i\n><i\n>2</i\n><i\n>3</i\n>",
|
||||
'<span _iter=[1,2,3].each//v><i _text=v /></span>')
|
||||
end
|
||||
|
||||
def test_iter_content
|
||||
assert_xhtml("<o\n><i\n>1</i\n><i\n>2</i\n><i\n>3</i\n></o\n>",
|
||||
'<o _iter_content=[1,2,3].each//v><i _text=v /></o>')
|
||||
assert_xhtml("<i\n>1</i\n><i\n>2</i\n><i\n>3</i\n>",
|
||||
'<span _iter_content=[1,2,3].each//v><i _text=v /></span>')
|
||||
end
|
||||
|
||||
def test_iter_local_template
|
||||
assert_xhtml("<o\n><i\n>1</i\n></o\n><o\n><i\n>2</i\n></o\n><o\n><i\n>3</i\n></o\n>",
|
||||
'<o _iter=[1,2,3].each//v><i _call=m /><i _template=m _text=v></i></o>')
|
||||
end
|
||||
|
||||
def test_call
|
||||
assert_xhtml("<f\n>1</f\n>",
|
||||
'<e _call=m(1) /><f _template=m(v) _text=v></f>')
|
||||
end
|
||||
|
||||
def test_template
|
||||
assert_xhtml('d',
|
||||
'<span _template="span()">d</span><e _call="span()"></e>')
|
||||
end
|
||||
|
||||
def test_file
|
||||
assert_equal(<<'End'.chop,
|
||||
<?xml version="1.0" encoding="US-ASCII"?><html xmlns="http://www.w3.org/1999/xhtml"
|
||||
><title
|
||||
>aaa</title
|
||||
></html
|
||||
>
|
||||
End
|
||||
HTree.expand_template("#{File.dirname __FILE__}/template.html", "aaa", ''))
|
||||
end
|
||||
|
||||
def test_whitespace
|
||||
assert_xhtml("<x\n></x\n>", '<x> </x>')
|
||||
assert_xhtml("<x\n> </x\n>", '<x> </x>')
|
||||
assert_xhtml("<pre\n> </pre\n>", '<pre> </pre>')
|
||||
assert_xhtml(" ", %q{<span _text="' '"> </span>})
|
||||
assert_xhtml(" ", %q{<span _text="' '"/>})
|
||||
end
|
||||
|
||||
def test_ignorable
|
||||
assert_xhtml("<div\n>a</div\n>", '<div>a</div>')
|
||||
assert_xhtml("<span\n>a</span\n>", '<span>a</span>')
|
||||
end
|
||||
|
||||
def test_template_in_attr
|
||||
assert_xhtml("<a x=\"1\"\n></a\n>", '<a _attr_x=1><b _template=m></b></a>')
|
||||
end
|
||||
|
||||
def test_empty_block_argument
|
||||
assert_xhtml("vv", '<span _iter="2.times//">v</span>')
|
||||
end
|
||||
|
||||
def test_empty_element
|
||||
assert_xhtml("<elem\n/>", '<elem />') # 2004-06-10: reported by Takuo KITAME
|
||||
assert_xhtml("<elem x=\"1\"\n/>", '<elem _attr_x=1 />')
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem _text=\'""\' />')
|
||||
assert_xhtml("<elem\n/>", '<elem _if="true" />')
|
||||
assert_xhtml("", '<elem _if="false" />')
|
||||
assert_xhtml("<foo\n/>", '<elem _if="false" _else="foo"/><foo _template="foo"/>')
|
||||
assert_xhtml("<elem\n/><elem\n/>", '<elem _iter="2.times//" />')
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem _iter_content="2.times//" />')
|
||||
end
|
||||
|
||||
def test_empty_element_start_end_tag
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem></elem>')
|
||||
assert_xhtml("<elem x=\"1\"\n></elem\n>", '<elem _attr_x=1 ></elem>')
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem _text=\'""\' ></elem>')
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem _if="true" ></elem>')
|
||||
assert_xhtml("", '<elem _if="false" ></elem>')
|
||||
assert_xhtml("<foo\n></foo\n>", '<elem _if="false" _else="foo"></elem><foo _template="foo"></foo>')
|
||||
assert_xhtml("<elem\n></elem\n><elem\n></elem\n>", '<elem _iter="2.times//" ></elem>')
|
||||
assert_xhtml("<elem\n></elem\n>", '<elem _iter_content="2.times//" ></elem>')
|
||||
end
|
||||
|
||||
def test_toplevel_local_variable
|
||||
eval("htree_test_toplevel_local_variable = :non_modified_value", TOPLEVEL_BINDING)
|
||||
HTree.expand_template("#{File.dirname __FILE__}/assign.html", "aaa", '')
|
||||
assert_equal(:non_modified_value, eval("htree_test_toplevel_local_variable", TOPLEVEL_BINDING))
|
||||
eval("htree_test_toplevel_local_variable = 1", TOPLEVEL_BINDING)
|
||||
end
|
||||
|
||||
def test_extend_compiled_template
|
||||
m = HTree.compile_template('<div _template="m">self is <span _text="inspect"></span></div>')
|
||||
o = "zzz"
|
||||
o.extend m
|
||||
assert_equal('<?xml version="1.0" encoding="US-ASCII"?>self is "zzz"',
|
||||
HTree.expand_template(''){'<div _call="o.m"></div>'})
|
||||
end
|
||||
|
||||
def test_attr_nbsp
|
||||
@t = HTree::Text.parse_pcdata(' ')
|
||||
assert_xhtml("<span x=\" \"\n>d</span\n>", '<span _attr_x="@t">d</span>')
|
||||
end
|
||||
|
||||
def test_text_nbsp
|
||||
@t = HTree::Text.parse_pcdata(' ')
|
||||
assert_xhtml(" ", '<span _text="@t">d</span>')
|
||||
end
|
||||
|
||||
def test_content_text
|
||||
assert_xhtml("<e\n>ab</e\n>", '<e _text>"a"+"b"</e>')
|
||||
assert_xhtml("<e\n>2</e\n>", '<e _text>1+1</e>')
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
class MemFile
|
||||
def initialize(str)
|
||||
@str = str
|
||||
end
|
||||
|
||||
def read
|
||||
@str
|
||||
end
|
||||
end
|
||||
|
||||
class TestTemplateScopeObj
|
||||
Const = 'good_const'
|
||||
@@cvar = 'good_cvar'
|
||||
def initialize
|
||||
@ivar = 'good_ivar'
|
||||
end
|
||||
end
|
||||
|
||||
class TestTemplateScope < Test::Unit::TestCase
|
||||
Const = 'bad_const'
|
||||
@@cvar = 'bad_cvar'
|
||||
def setup
|
||||
@ivar = 'bad_ivar'
|
||||
eval("test_local_variable = 'bad_lvar'", TOPLEVEL_BINDING)
|
||||
end
|
||||
|
||||
XMLDeclStr = '<?xml version="1.0" encoding="US-ASCII"?>'
|
||||
|
||||
def test_expand_template
|
||||
obj = TestTemplateScopeObj.new
|
||||
assert_equal("#{XMLDeclStr}[TestTemplateScopeObj]",
|
||||
HTree.expand_template(MemFile.new('<span _text="Module.nesting.inspect"/>'), obj, ''))
|
||||
assert_equal("#{XMLDeclStr}good_ivar",
|
||||
HTree.expand_template(MemFile.new('<span _text="@ivar"/>'), obj, ''))
|
||||
assert_equal("#{XMLDeclStr}good_cvar",
|
||||
HTree.expand_template(MemFile.new('<span _text="@@cvar"/>'), obj, ''))
|
||||
assert_equal("#{XMLDeclStr}good_const",
|
||||
HTree.expand_template(MemFile.new('<span _text="Const"/>'), obj, ''))
|
||||
test_local_variable = 'bad_lvar'
|
||||
assert_equal("#{XMLDeclStr}good_lvar",
|
||||
HTree.expand_template(MemFile.new('<span _text="begin test_local_variable rescue NameError; \'good_lvar\' end"/>'), obj, ''))
|
||||
end
|
||||
|
||||
def test_compile_template
|
||||
obj = TestTemplateScopeObj.new
|
||||
mod = HTree.compile_template(MemFile.new(<<-'End'))
|
||||
<span _template=test_nesting _text="Module.nesting.inspect"/>
|
||||
<span _template=test_const _text="Const"/>
|
||||
<span _template=test_cvar _text="@@cvar"/>
|
||||
<span _template=test_ivar _text="@ivar"/>
|
||||
End
|
||||
mod.module_eval <<-'End'
|
||||
Const = 'mod_const'
|
||||
@@cvar = 'mod_cvar'
|
||||
@ivar = 'mod_ivar'
|
||||
End
|
||||
assert_equal("[#{mod.inspect}]", mod.test_nesting.extract_text.to_s)
|
||||
assert_equal("mod_const", mod.test_const.extract_text.to_s)
|
||||
assert_equal("mod_cvar", mod.test_cvar.extract_text.to_s)
|
||||
assert_equal("mod_ivar", mod.test_ivar.extract_text.to_s)
|
||||
obj = Object.new
|
||||
obj.instance_variable_set :@ivar, 'obj_ivar'
|
||||
obj.extend mod
|
||||
assert_equal("[#{mod.inspect}]", obj.__send__(:test_nesting).extract_text.to_s)
|
||||
assert_equal("mod_const", obj.__send__(:test_const).extract_text.to_s)
|
||||
assert_equal("mod_cvar", obj.__send__(:test_cvar).extract_text.to_s)
|
||||
assert_equal("obj_ivar", obj.__send__(:test_ivar).extract_text.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
class TestCDATA < Test::Unit::TestCase
|
||||
def test_html_script
|
||||
v = "x<y"
|
||||
assert_equal("<html><script>x<y</script></html>",
|
||||
HTree.expand_template('') {"<html><script _text=\"v\">ab</script>"}.gsub(/\n/, ''))
|
||||
end
|
||||
|
||||
def test_xml_script
|
||||
v = "x<y"
|
||||
assert_equal("<?xml version=\"1.0\" encoding=\"US-ASCII\"?><html xmlns=\"http://www.w3.org/1999/xhtml\"><script>x<y</script></html>",
|
||||
HTree.expand_template('') {"<?xml version=\"1.0\"?><html><script _text=\"v\">ab</script>"}.gsub(/\n/, ''))
|
||||
end
|
||||
|
||||
def test_html_script_invalid_content
|
||||
v = "x</y"
|
||||
assert_raise(ArgumentError) {
|
||||
HTree.expand_template('') {"<html><script _text=\"v\">ab</script>"}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TestCharset < Test::Unit::TestCase
|
||||
class CharsetString < String
|
||||
attr_accessor :charset
|
||||
end
|
||||
|
||||
def with_kcode(kcode)
|
||||
old_kcode = $KCODE
|
||||
begin
|
||||
$KCODE = kcode
|
||||
yield
|
||||
ensure
|
||||
$KCODE = old_kcode
|
||||
end
|
||||
end
|
||||
|
||||
def test_us_ascii
|
||||
with_kcode('E') {
|
||||
out = HTree.expand_template(CharsetString.new) { "<html>abc" }
|
||||
assert_equal(out.charset, 'US-ASCII')
|
||||
}
|
||||
end
|
||||
|
||||
def test_euc_jp
|
||||
with_kcode('E') {
|
||||
out = HTree.expand_template(CharsetString.new) { "<html>\xa1\xa1" }
|
||||
assert_equal(out.charset, 'EUC-JP')
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TestTemplateDOCTYPE < Test::Unit::TestCase
|
||||
def test_html
|
||||
assert_equal(
|
||||
'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html></html>',
|
||||
HTree.expand_template('') {'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html>'}.gsub(/\n/, ''))
|
||||
end
|
||||
end
|
@ -0,0 +1,35 @@
|
||||
require 'test/unit'
|
||||
require 'htree/text'
|
||||
|
||||
class TestText < Test::Unit::TestCase
|
||||
def test_new
|
||||
assert_equal("abc&amp;def", HTree::Text.new("abc&def").rcdata)
|
||||
end
|
||||
|
||||
=begin
|
||||
def test_parse
|
||||
assert_equal("abc&def", HTree::Text.parse("abc&def").rcdata)
|
||||
end
|
||||
|
||||
def test_to_s
|
||||
assert_equal("abc&def", HTree::Text.parse("abc&def").to_s)
|
||||
end
|
||||
=end
|
||||
|
||||
def kcode(kc)
|
||||
old = $KCODE
|
||||
begin
|
||||
$KCODE = kc
|
||||
yield
|
||||
ensure
|
||||
$KCODE = old
|
||||
end
|
||||
end
|
||||
|
||||
def test_normalize
|
||||
kcode('EUC') {
|
||||
assert_equal("<ABC&& \xa6\xc1",
|
||||
HTree::Text.new_internal("<ABC&& α").normalized_rcdata)
|
||||
}
|
||||
end
|
||||
end
|
@ -0,0 +1,69 @@
|
||||
require 'test/unit'
|
||||
require 'htree/traverse'
|
||||
require 'htree/parse'
|
||||
require 'htree/equality'
|
||||
|
||||
class TestTraverse < Test::Unit::TestCase
|
||||
def test_filter
|
||||
l = HTree.parse('<a><b>x</b><b/><a/>').make_loc
|
||||
l2 = l.filter {|n| n.path != 'doc()/a/b[1]' }
|
||||
assert_equal(HTree.parse('<a><b/><a/>'), l2)
|
||||
end
|
||||
|
||||
def test_title
|
||||
inputs = [
|
||||
HTree.parse('<html><title>aaa</title></html>'),
|
||||
HTree.parse(<<'End')
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>aaa</title>
|
||||
</channel>
|
||||
</rdf:RDF>
|
||||
End
|
||||
]
|
||||
result = HTree::Text.new('aaa')
|
||||
|
||||
inputs.each {|input|
|
||||
assert_equal(result, input.title)
|
||||
}
|
||||
|
||||
inputs.each {|input|
|
||||
assert_equal(result, input.make_loc.title)
|
||||
}
|
||||
|
||||
end
|
||||
|
||||
def test_author
|
||||
inputs = [
|
||||
HTree.parse('<html><meta name=author content=xxx></html>'),
|
||||
HTree.parse('<html><link rev=made title=xxx></html>'),
|
||||
HTree.parse(<<'End'),
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<dc:creator>xxx</dc:creator>
|
||||
</channel>
|
||||
</rdf:RDF>
|
||||
End
|
||||
HTree.parse(<<'End')
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<dc:publisher>xxx</dc:publisher>
|
||||
</channel>
|
||||
</rdf:RDF>
|
||||
End
|
||||
]
|
||||
result = HTree::Text.new('xxx')
|
||||
inputs.each {|input|
|
||||
#assert_equal(result, input.author)
|
||||
}
|
||||
inputs.each {|input|
|
||||
assert_equal(result, input.make_loc.author)
|
||||
}
|
||||
end
|
||||
end
|
Reference in New Issue