mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 03:40:01 +02:00
✨ ALTO: Count alto:Tags
This commit is contained in:
parent
de50f13043
commit
a40716a320
2 changed files with 20 additions and 1 deletions
|
@ -86,7 +86,8 @@ def alto_to_dict(alto, raise_errors=True):
|
|||
elif localname == 'Styles':
|
||||
pass
|
||||
elif localname == 'Tags':
|
||||
pass
|
||||
value[localname] = {}
|
||||
value[localname].update(TagGroup(tag, group).subelement_counts())
|
||||
else:
|
||||
if raise_errors:
|
||||
print(value)
|
||||
|
|
|
@ -37,3 +37,21 @@ def test_Page_counts():
|
|||
assert d['Layout_Page_TextBlock-count'] == 1
|
||||
assert d['Layout_Page_TextLine-count'] == 3
|
||||
assert d['Layout_Page_String-count'] == 6
|
||||
|
||||
def test_Tags_counts():
|
||||
d = dict_fromstring("""
|
||||
<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
|
||||
<Tags>
|
||||
<NamedEntityTag ID="PER0" LABEL="Pentlings"/>
|
||||
<NamedEntityTag ID="LOC1" LABEL="Pentling"/>
|
||||
<NamedEntityTag ID="LOC2" LABEL="Hamm"/>
|
||||
<NamedEntityTag ID="PER4" LABEL="Hofes Pentling"/>
|
||||
<NamedEntityTag ID="LOC5" LABEL="Hofs Pentling"/>
|
||||
<NamedEntityTag ID="LOC7" LABEL="Hilbeck"/>
|
||||
<NamedEntityTag ID="PER8" LABEL="Hoff"/>
|
||||
<NamedEntityTag ID="PER9" LABEL="L i b e r"/>
|
||||
<NamedEntityTag ID="PER10" LABEL="Jhesu Christi"/>
|
||||
</Tags>
|
||||
</alto>
|
||||
""")
|
||||
assert d['Tags_NamedEntityTag-count'] == 9
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue