mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-04 03:14:14 +01:00 
			
		
		
		
	🚧 ALTO: Extract a function to calculate statistics on xpath expressions
This commit is contained in:
		
							parent
							
								
									e24a846ea2
								
							
						
					
					
						commit
						9246519162
					
				
					 1 changed files with 16 additions and 8 deletions
				
			
		| 
						 | 
					@ -81,14 +81,22 @@ def alto_to_dict(alto, raise_errors=True):
 | 
				
			||||||
            value['Page'].update(TagGroup(tag, group).subelement_counts())
 | 
					            value['Page'].update(TagGroup(tag, group).subelement_counts())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            xpath_expr = "//alto:String/@WC"
 | 
					            xpath_expr = "//alto:String/@WC"
 | 
				
			||||||
            values = []
 | 
					            alto_namespace = ET.QName(group[0]).namespace
 | 
				
			||||||
            for e in group:
 | 
					            namespaces={"alto": alto_namespace}
 | 
				
			||||||
                # TODO need a smart way to always have the correct namespaces for a document
 | 
					
 | 
				
			||||||
                alto_namespace = ET.QName(e).namespace
 | 
					            def xpath_statistics(xpath_expr, namespaces):
 | 
				
			||||||
                r = e.xpath(xpath_expr, namespaces={"alto": alto_namespace})
 | 
					                values = []
 | 
				
			||||||
                values += r
 | 
					                for e in group:
 | 
				
			||||||
            values = np.array([float(v) for v in values])
 | 
					                    r = e.xpath(xpath_expr, namespaces=namespaces)
 | 
				
			||||||
            value['Page'][f'{xpath_expr}-mean'] = np.mean(values)
 | 
					                    values += r
 | 
				
			||||||
 | 
					                values = np.array([float(v) for v in values])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                statistics = {}
 | 
				
			||||||
 | 
					                statistics[f'{xpath_expr}-mean'] = np.mean(values)
 | 
				
			||||||
 | 
					                return statistics
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            value['Page'].update(xpath_statistics(xpath_expr, namespaces))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        elif localname == 'Styles':
 | 
					        elif localname == 'Styles':
 | 
				
			||||||
            pass
 | 
					            pass
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue