|
|
|
@ -37,3 +37,21 @@ def test_Page_counts():
|
|
|
|
|
assert d['Layout_Page_TextBlock-count'] == 1
|
|
|
|
|
assert d['Layout_Page_TextLine-count'] == 3
|
|
|
|
|
assert d['Layout_Page_String-count'] == 6
|
|
|
|
|
|
|
|
|
|
def test_Tags_counts():
|
|
|
|
|
d = dict_fromstring("""
|
|
|
|
|
<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
|
|
|
|
|
<Tags>
|
|
|
|
|
<NamedEntityTag ID="PER0" LABEL="Pentlings"/>
|
|
|
|
|
<NamedEntityTag ID="LOC1" LABEL="Pentling"/>
|
|
|
|
|
<NamedEntityTag ID="LOC2" LABEL="Hamm"/>
|
|
|
|
|
<NamedEntityTag ID="PER4" LABEL="Hofes Pentling"/>
|
|
|
|
|
<NamedEntityTag ID="LOC5" LABEL="Hofs Pentling"/>
|
|
|
|
|
<NamedEntityTag ID="LOC7" LABEL="Hilbeck"/>
|
|
|
|
|
<NamedEntityTag ID="PER8" LABEL="Hoff"/>
|
|
|
|
|
<NamedEntityTag ID="PER9" LABEL="L i b e r"/>
|
|
|
|
|
<NamedEntityTag ID="PER10" LABEL="Jhesu Christi"/>
|
|
|
|
|
</Tags>
|
|
|
|
|
</alto>
|
|
|
|
|
""")
|
|
|
|
|
assert d['Tags_NamedEntityTag-count'] == 9
|
|
|
|
|