mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-30 18:54:14 +01:00 
			
		
		
		
	✨ ppn2ocr: Support TIFF in the BEST group
This commit is contained in:
		
							parent
							
								
									4e37a52899
								
							
						
					
					
						commit
						f7b43bbefa
					
				
					 1 changed files with 17 additions and 3 deletions
				
			
		
							
								
								
									
										20
									
								
								ppn2ocr
									
										
									
									
									
								
							
							
						
						
									
										20
									
								
								ppn2ocr
									
										
									
									
									
								
							|  | @ -39,7 +39,7 @@ def oai_mets(ppn): | |||
|     return mets | ||||
| 
 | ||||
| 
 | ||||
| def iiif_url_for_dms_url(dms_url, ppn, size): | ||||
| def iiif_url_for_dms_url(dms_url, ppn, size, format): | ||||
|     """ | ||||
|     Construct an IIIF URL from a dms URL. | ||||
| 
 | ||||
|  | @ -53,7 +53,8 @@ def iiif_url_for_dms_url(dms_url, ppn, size): | |||
|     else: | ||||
|         raise ValueError(f"Unexpected URL {dms_url}") | ||||
|     iiif_identifier = f'{ppn}-{page_num}' | ||||
|     iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/default.jpg' | ||||
|     iiif_quality = 'default' | ||||
|     iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/{iiif_quality}.{format}' | ||||
| 
 | ||||
|     return iiif_url | ||||
| 
 | ||||
|  | @ -66,6 +67,17 @@ def remove_file_grp(mets, use): | |||
|         bad.getparent().remove(bad) | ||||
| 
 | ||||
| 
 | ||||
| def mime_type_for_format(format_): | ||||
|     if format_ == 'tif': | ||||
|         mime_type = 'image/tiff' | ||||
|     elif format_ == 'jpg': | ||||
|         mime_type = 'image/jpg' | ||||
|     else: | ||||
|         raise ValueError() | ||||
| 
 | ||||
|     return mime_type | ||||
| 
 | ||||
| 
 | ||||
| def make_workspace(ppn, workspace): | ||||
|     # Make workspace directory | ||||
|     os.mkdir(workspace) | ||||
|  | @ -81,6 +93,7 @@ def make_workspace(ppn, workspace): | |||
| 
 | ||||
| 
 | ||||
|     # Duplicate DEFAULT file group into a new file group BEST | ||||
|     format_ = 'tif' | ||||
|     file_grp_default = mets.find('//mets:fileGrp[@USE="DEFAULT"]', namespaces=XMLNS) | ||||
|     file_grp_best = deepcopy(file_grp_default) | ||||
| 
 | ||||
|  | @ -89,6 +102,7 @@ def make_workspace(ppn, workspace): | |||
|         old_id = f.attrib['ID'] | ||||
|         new_id = re.sub('DEFAULT', 'BEST', old_id) | ||||
|         f.attrib['ID'] = new_id | ||||
|         f.attrib['MIMETYPE'] = mime_type_for_format(format_) | ||||
| 
 | ||||
|         for fptr in mets.findall(f'//mets:fptr[@FILEID="{old_id}"]', namespaces=XMLNS): | ||||
|             new_fptr = deepcopy(fptr) | ||||
|  | @ -98,7 +112,7 @@ def make_workspace(ppn, workspace): | |||
|         # XXX Need to fumble around with the URL for now | ||||
|         flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat") | ||||
|         old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"] | ||||
|         url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full') | ||||
|         url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_) | ||||
|         flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full | ||||
| 
 | ||||
|     mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue