diff --git a/ppn2ocr b/ppn2ocr index 140a8a3..bfcc7da 100755 --- a/ppn2ocr +++ b/ppn2ocr @@ -39,6 +39,19 @@ def oai_mets(ppn): return mets +def iiif_url_for_sbb_url(sbb_url, ppn, size, format): + """ + Construct an IIIF URL from a dms or an IIIF URL. + + This function exists as long as dms URL exist (or as long as we may need to + rewrite IIIF URLs for a different size) + """ + if "/dms/" in sbb_url: + return iiif_url_for_dms_url(sbb_url, ppn, size, format) + else: + return iiif_url_for_iiif_url(sbb_url, ppn, size, format) + + def iiif_url_for_dms_url(dms_url, ppn, size, format): """ Construct an IIIF URL from a dms URL. @@ -59,6 +72,24 @@ def iiif_url_for_dms_url(dms_url, ppn, size, format): return iiif_url +def iiif_url_for_iiif_url(iiif_url, ppn, size, format): + """ + Construct an IIIF URL from an already existing IIIF URL. + """ + if ppn not in iiif_url: + raise ValueError(f"Unexpected URL {iiif_url}") + m = re.search(rf'/dc/{ppn}-([0-9]+)/', iiif_url) + if m: + page_num = m.group(1) + else: + raise ValueError(f"Unexpected URL {iiif_url}") + iiif_identifier = f'{ppn}-{page_num}' + iiif_quality = 'default' + iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/{iiif_quality}.{format}' + + return iiif_url + + def remove_file_grp(mets, use): for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS): for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS): @@ -134,7 +165,7 @@ def make_workspace(ppn, workspace): # XXX Need to fumble around with the URL for now flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat") old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"] - url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_) + url_iiif_full = iiif_url_for_sbb_url(old_url, ppn, 'full', format_) flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)