mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-04 03:14:14 +01:00 
			
		
		
		
	✨ ALTO: Count alto:Tags
This commit is contained in:
		
							parent
							
								
									de50f13043
								
							
						
					
					
						commit
						a40716a320
					
				
					 2 changed files with 20 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -86,7 +86,8 @@ def alto_to_dict(alto, raise_errors=True):
 | 
			
		|||
        elif localname == 'Styles':
 | 
			
		||||
            pass
 | 
			
		||||
        elif localname == 'Tags':
 | 
			
		||||
            pass
 | 
			
		||||
            value[localname] = {}
 | 
			
		||||
            value[localname].update(TagGroup(tag, group).subelement_counts())
 | 
			
		||||
        else:
 | 
			
		||||
            if raise_errors:
 | 
			
		||||
                print(value)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,3 +37,21 @@ def test_Page_counts():
 | 
			
		|||
    assert d['Layout_Page_TextBlock-count'] == 1
 | 
			
		||||
    assert d['Layout_Page_TextLine-count'] == 3
 | 
			
		||||
    assert d['Layout_Page_String-count'] == 6
 | 
			
		||||
 | 
			
		||||
def test_Tags_counts():
 | 
			
		||||
    d = dict_fromstring("""
 | 
			
		||||
    <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
 | 
			
		||||
      <Tags>
 | 
			
		||||
        <NamedEntityTag ID="PER0" LABEL="Pentlings"/>
 | 
			
		||||
        <NamedEntityTag ID="LOC1" LABEL="Pentling"/>
 | 
			
		||||
        <NamedEntityTag ID="LOC2" LABEL="Hamm"/>
 | 
			
		||||
        <NamedEntityTag ID="PER4" LABEL="Hofes Pentling"/>
 | 
			
		||||
        <NamedEntityTag ID="LOC5" LABEL="Hofs Pentling"/>
 | 
			
		||||
        <NamedEntityTag ID="LOC7" LABEL="Hilbeck"/>
 | 
			
		||||
        <NamedEntityTag ID="PER8" LABEL="Hoff"/>
 | 
			
		||||
        <NamedEntityTag ID="PER9" LABEL="L i b e r"/>
 | 
			
		||||
        <NamedEntityTag ID="PER10" LABEL="Jhesu Christi"/>
 | 
			
		||||
      </Tags>
 | 
			
		||||
    </alto>
 | 
			
		||||
    """)
 | 
			
		||||
    assert d['Tags_NamedEntityTag-count'] == 9
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue