mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-10-31 01:14:14 +01:00 
			
		
		
		
	⚡ Make get_mets_file aa lot faster by using find() instead of xpath()
This commit is contained in:
		
							parent
							
								
									16a3a3bcc8
								
							
						
					
					
						commit
						90c60ebb80
					
				
					 1 changed files with 7 additions and 5 deletions
				
			
		|  | @ -273,6 +273,12 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: | ||||||
|     div_physSequence = structMap_PHYSICAL[0] |     div_physSequence = structMap_PHYSICAL[0] | ||||||
|     assert div_physSequence.attrib.get("TYPE") == "physSequence" |     assert div_physSequence.attrib.get("TYPE") == "physSequence" | ||||||
| 
 | 
 | ||||||
|  |     def get_mets_file(*, ID): | ||||||
|  |         if ID: | ||||||
|  |             file_ = mets.find(f'.//{{{ns["mets"]}}}file[@ID="{ID}"]') | ||||||
|  |             return file_ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|     for page in div_physSequence: |     for page in div_physSequence: | ||||||
| 
 | 
 | ||||||
|         # TODO sort by ORDER? |         # TODO sort by ORDER? | ||||||
|  | @ -285,12 +291,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: | ||||||
|             file_id = fptr.attrib.get("FILEID") |             file_id = fptr.attrib.get("FILEID") | ||||||
|             assert file_id |             assert file_id | ||||||
| 
 | 
 | ||||||
|             def get_mets_file(*, ID): |  | ||||||
|                 if ID: |  | ||||||
|                     file_ = (mets.xpath(f'//mets:file[@ID="{ID}"]', namespaces=ns) or [None])[0] |  | ||||||
|                     return file_ |  | ||||||
| 
 |  | ||||||
|             file_ = get_mets_file(ID=file_id) |             file_ = get_mets_file(ID=file_id) | ||||||
|  |             assert file_ is not None | ||||||
|             fileGrp_USE = file_.getparent().attrib.get("USE") |             fileGrp_USE = file_.getparent().attrib.get("USE") | ||||||
|             file_FLocat_href = (file_.xpath('mets:FLocat/@xlink:href', namespaces=ns) or [None])[0] |             file_FLocat_href = (file_.xpath('mets:FLocat/@xlink:href', namespaces=ns) or [None])[0] | ||||||
|             page_dict[f"fileGrp_{fileGrp_USE}_file_FLocat_href"] = file_FLocat_href |             page_dict[f"fileGrp_{fileGrp_USE}_file_FLocat_href"] = file_FLocat_href | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue