mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 19:24:12 +01:00 
			
		
		
		
	🐛 ppn2ocr: Don't break now that we have IIIF URLs
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				continuous-integration/drone/push Build is failing
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	continuous-integration/drone/push Build is failing
				
			This commit is contained in:
		
							parent
							
								
									054a37a669
								
							
						
					
					
						commit
						b2e02dbf64
					
				
					 1 changed files with 32 additions and 1 deletions
				
			
		
							
								
								
									
										33
									
								
								ppn2ocr
									
										
									
									
									
								
							
							
						
						
									
										33
									
								
								ppn2ocr
									
										
									
									
									
								
							|  | @ -39,6 +39,19 @@ def oai_mets(ppn): | ||||||
|     return mets |     return mets | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def iiif_url_for_sbb_url(sbb_url, ppn, size, format): | ||||||
|  |     """ | ||||||
|  |     Construct an IIIF URL from a dms or an IIIF URL. | ||||||
|  | 
 | ||||||
|  |     This function exists as long as dms URL exist (or as long as we may need to | ||||||
|  |     rewrite IIIF URLs for a different size) | ||||||
|  |     """ | ||||||
|  |     if "/dms/" in sbb_url: | ||||||
|  |         return iiif_url_for_dms_url(sbb_url, ppn, size, format) | ||||||
|  |     else: | ||||||
|  |         return iiif_url_for_iiif_url(sbb_url, ppn, size, format) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def iiif_url_for_dms_url(dms_url, ppn, size, format): | def iiif_url_for_dms_url(dms_url, ppn, size, format): | ||||||
|     """ |     """ | ||||||
|     Construct an IIIF URL from a dms URL. |     Construct an IIIF URL from a dms URL. | ||||||
|  | @ -59,6 +72,24 @@ def iiif_url_for_dms_url(dms_url, ppn, size, format): | ||||||
|     return iiif_url |     return iiif_url | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def iiif_url_for_iiif_url(iiif_url, ppn, size, format): | ||||||
|  |     """ | ||||||
|  |     Construct an IIIF URL from an already existing IIIF URL. | ||||||
|  |     """ | ||||||
|  |     if ppn not in iiif_url: | ||||||
|  |         raise ValueError(f"Unexpected URL {iiif_url}") | ||||||
|  |     m = re.search(rf'/dc/{ppn}-([0-9]+)/', iiif_url) | ||||||
|  |     if m: | ||||||
|  |         page_num = m.group(1) | ||||||
|  |     else: | ||||||
|  |         raise ValueError(f"Unexpected URL {iiif_url}") | ||||||
|  |     iiif_identifier = f'{ppn}-{page_num}' | ||||||
|  |     iiif_quality = 'default' | ||||||
|  |     iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/{iiif_quality}.{format}' | ||||||
|  | 
 | ||||||
|  |     return iiif_url | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def remove_file_grp(mets, use): | def remove_file_grp(mets, use): | ||||||
|     for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS): |     for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS): | ||||||
|         for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS): |         for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS): | ||||||
|  | @ -134,7 +165,7 @@ def make_workspace(ppn, workspace): | ||||||
|         # XXX Need to fumble around with the URL for now |         # XXX Need to fumble around with the URL for now | ||||||
|         flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat") |         flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat") | ||||||
|         old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"] |         old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"] | ||||||
|         url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_) |         url_iiif_full = iiif_url_for_sbb_url(old_url, ppn, 'full', format_) | ||||||
|         flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full |         flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full | ||||||
| 
 | 
 | ||||||
|     mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best) |     mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue