neingeist
/
neinomaten
Archived
1
0
Fork 0
You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

154 lines
4.0 KiB
Ruby

require 'test/unit'
require 'htree/scan'
class TestScan < Test::Unit::TestCase
def scan(str)
result = []
HTree.scan(str) {|e| result << e }
result
end
def test_empty
assert_equal([], scan(''))
end
def t_single(s)
n = yield
assert_equal([n], scan(s))
end
def test_single
s = '<?xml version="1.0"?>'
assert_equal([[:xmldecl, s]], scan(s))
s = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'
assert_equal([[:doctype, s]], scan(s))
s = '<?xxx yyy?>'
assert_equal([[:procins, s]], scan(s))
s = '<a>'
assert_equal([[:stag, s]], scan(s))
s = '</a>'
assert_equal([[:etag, s]], scan(s))
s = '<a/>'
assert_equal([[:emptytag, s]], scan(s))
s = '<!-- abc -->'
assert_equal([[:comment, s]], scan(s))
s = '<![CDATA[abc]]>'
assert_equal([[:text_cdata_section, s]], scan(s))
s = 'abc'
assert_equal([[:text_pcdata, s]], scan(s))
end
def test_xmldecl_seen
s0 = '<?xml version="1.0"?>'
s1 = '<A>'
assert_equal([[:stag, s1]], scan(s1))
assert_equal([[:xmldecl, s0], [:stag, s1]], scan(s0 + s1))
end
def test_cdata_content
s = '<html><script><a></script><a>'
assert_equal([
[:stag, '<html>'],
[:stag, '<script>'],
[:text_cdata_content, '<a>'],
[:etag, '</script>'],
[:stag, '<a>'],
], scan(s))
s = '<html><script><a>'
assert_equal([
[:stag, '<html>'],
[:stag, '<script>'],
[:text_cdata_content, '<a>'],
], scan(s))
end
def test_text
s = 'a<e>b<e>c<e>d'
assert_equal([
[:text_pcdata, 'a'],
[:stag, '<e>'],
[:text_pcdata, 'b'],
[:stag, '<e>'],
[:text_pcdata, 'c'],
[:stag, '<e>'],
[:text_pcdata, 'd'],
], scan(s))
end
def test_eol_html
# In SGML, a line break just after start tag and
# a line break just before end tag is ignored.
# http://www.w3.org/TR/REC-html40/appendix/notes.html#notes-line-breaks
#
# But usual browser including mozilla doesn't.
# So HTree doesn't ignore them and treat as usual text.
s = "<html>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
assert_equal([
[:stag, "<html>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<f>"],
[:text_pcdata, "\nc\n"],
[:etag, "</f>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
s = "<html>a\n<e>\nb\n<script>\nc\n</script>\nd\n</e>\ne"
assert_equal([
[:stag, "<html>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<script>"],
[:text_cdata_content, "\nc\n"],
[:etag, "</script>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_eol_xml
# In XML, line breaks are treated as part of content.
# It's because KEEPRSRE is yes in XML.
# http://www.satoshii.org/markup/websgml/valid-xml#keeprsre
s = "<?xml version='1.0'?>a\n<e>\nb\n<f>\nc\n</f>\nd\n</e>\ne"
assert_equal([
[:xmldecl, "<?xml version='1.0'?>"],
[:text_pcdata, "a\n"],
[:stag, "<e>"],
[:text_pcdata, "\nb\n"],
[:stag, "<f>"],
[:text_pcdata, "\nc\n"],
[:etag, "</f>"],
[:text_pcdata, "\nd\n"],
[:etag, "</e>"],
[:text_pcdata, "\ne"],
], scan(s))
end
def test_xml_html_detection
assert_equal([false, true], HTree.scan("<html></html>") {})
assert_equal([true, false], HTree.scan("<rss></rss>") {})
assert_equal([true, true], HTree.scan('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">') {})
end
def test_quoted_attr
assert_equal([[:emptytag, '<e a=">"/>']], scan('<e a=">"/>'))
end
def test_bare_slash
assert_equal([[:stag, '<n dir=/foo/bar/>']], scan('<n dir=/foo/bar/>'))
assert_equal([[:stag, '<n a=v/>']], scan('<n a=v/>'))
end
end