mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-10-31 09:24:13 +01:00 
			
		
		
		
	🚧 ALTO: Extract a function to calculate statistics on xpath expressions
This commit is contained in:
		
							parent
							
								
									e24a846ea2
								
							
						
					
					
						commit
						9246519162
					
				
					 1 changed files with 16 additions and 8 deletions
				
			
		|  | @ -81,14 +81,22 @@ def alto_to_dict(alto, raise_errors=True): | |||
|             value['Page'].update(TagGroup(tag, group).subelement_counts()) | ||||
| 
 | ||||
|             xpath_expr = "//alto:String/@WC" | ||||
|             alto_namespace = ET.QName(group[0]).namespace | ||||
|             namespaces={"alto": alto_namespace} | ||||
| 
 | ||||
|             def xpath_statistics(xpath_expr, namespaces): | ||||
|                 values = [] | ||||
|                 for e in group: | ||||
|                 # TODO need a smart way to always have the correct namespaces for a document | ||||
|                 alto_namespace = ET.QName(e).namespace | ||||
|                 r = e.xpath(xpath_expr, namespaces={"alto": alto_namespace}) | ||||
|                     r = e.xpath(xpath_expr, namespaces=namespaces) | ||||
|                     values += r | ||||
|                 values = np.array([float(v) for v in values]) | ||||
|             value['Page'][f'{xpath_expr}-mean'] = np.mean(values) | ||||
| 
 | ||||
|                 statistics = {} | ||||
|                 statistics[f'{xpath_expr}-mean'] = np.mean(values) | ||||
|                 return statistics | ||||
| 
 | ||||
|             value['Page'].update(xpath_statistics(xpath_expr, namespaces)) | ||||
| 
 | ||||
| 
 | ||||
|         elif localname == 'Styles': | ||||
|             pass | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue