mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-10-24 23:14:15 +02:00 
			
		
		
		
	✨ ALTO: Count alto:Tags
This commit is contained in:
		
							parent
							
								
									de50f13043
								
							
						
					
					
						commit
						a40716a320
					
				
					 2 changed files with 20 additions and 1 deletions
				
			
		|  | @ -86,7 +86,8 @@ def alto_to_dict(alto, raise_errors=True): | |||
|         elif localname == 'Styles': | ||||
|             pass | ||||
|         elif localname == 'Tags': | ||||
|             pass | ||||
|             value[localname] = {} | ||||
|             value[localname].update(TagGroup(tag, group).subelement_counts()) | ||||
|         else: | ||||
|             if raise_errors: | ||||
|                 print(value) | ||||
|  |  | |||
|  | @ -37,3 +37,21 @@ def test_Page_counts(): | |||
|     assert d['Layout_Page_TextBlock-count'] == 1 | ||||
|     assert d['Layout_Page_TextLine-count'] == 3 | ||||
|     assert d['Layout_Page_String-count'] == 6 | ||||
| 
 | ||||
| def test_Tags_counts(): | ||||
|     d = dict_fromstring(""" | ||||
|     <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#"> | ||||
|       <Tags> | ||||
|         <NamedEntityTag ID="PER0" LABEL="Pentlings"/> | ||||
|         <NamedEntityTag ID="LOC1" LABEL="Pentling"/> | ||||
|         <NamedEntityTag ID="LOC2" LABEL="Hamm"/> | ||||
|         <NamedEntityTag ID="PER4" LABEL="Hofes Pentling"/> | ||||
|         <NamedEntityTag ID="LOC5" LABEL="Hofs Pentling"/> | ||||
|         <NamedEntityTag ID="LOC7" LABEL="Hilbeck"/> | ||||
|         <NamedEntityTag ID="PER8" LABEL="Hoff"/> | ||||
|         <NamedEntityTag ID="PER9" LABEL="L i b e r"/> | ||||
|         <NamedEntityTag ID="PER10" LABEL="Jhesu Christi"/> | ||||
|       </Tags> | ||||
|     </alto> | ||||
|     """) | ||||
|     assert d['Tags_NamedEntityTag-count'] == 9 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue