mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 14:49:53 +02:00
🐛 ppn2ocr: Don't break now that we have IIIF URLs
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
This commit is contained in:
parent
054a37a669
commit
b2e02dbf64
1 changed files with 32 additions and 1 deletions
33
ppn2ocr
33
ppn2ocr
|
@ -39,6 +39,19 @@ def oai_mets(ppn):
|
||||||
return mets
|
return mets
|
||||||
|
|
||||||
|
|
||||||
|
def iiif_url_for_sbb_url(sbb_url, ppn, size, format):
|
||||||
|
"""
|
||||||
|
Construct an IIIF URL from a dms or an IIIF URL.
|
||||||
|
|
||||||
|
This function exists as long as dms URL exist (or as long as we may need to
|
||||||
|
rewrite IIIF URLs for a different size)
|
||||||
|
"""
|
||||||
|
if "/dms/" in sbb_url:
|
||||||
|
return iiif_url_for_dms_url(sbb_url, ppn, size, format)
|
||||||
|
else:
|
||||||
|
return iiif_url_for_iiif_url(sbb_url, ppn, size, format)
|
||||||
|
|
||||||
|
|
||||||
def iiif_url_for_dms_url(dms_url, ppn, size, format):
|
def iiif_url_for_dms_url(dms_url, ppn, size, format):
|
||||||
"""
|
"""
|
||||||
Construct an IIIF URL from a dms URL.
|
Construct an IIIF URL from a dms URL.
|
||||||
|
@ -59,6 +72,24 @@ def iiif_url_for_dms_url(dms_url, ppn, size, format):
|
||||||
return iiif_url
|
return iiif_url
|
||||||
|
|
||||||
|
|
||||||
|
def iiif_url_for_iiif_url(iiif_url, ppn, size, format):
|
||||||
|
"""
|
||||||
|
Construct an IIIF URL from an already existing IIIF URL.
|
||||||
|
"""
|
||||||
|
if ppn not in iiif_url:
|
||||||
|
raise ValueError(f"Unexpected URL {iiif_url}")
|
||||||
|
m = re.search(rf'/dc/{ppn}-([0-9]+)/', iiif_url)
|
||||||
|
if m:
|
||||||
|
page_num = m.group(1)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unexpected URL {iiif_url}")
|
||||||
|
iiif_identifier = f'{ppn}-{page_num}'
|
||||||
|
iiif_quality = 'default'
|
||||||
|
iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/{iiif_quality}.{format}'
|
||||||
|
|
||||||
|
return iiif_url
|
||||||
|
|
||||||
|
|
||||||
def remove_file_grp(mets, use):
|
def remove_file_grp(mets, use):
|
||||||
for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS):
|
for bad_fileid in mets.xpath(f'//mets:fileGrp[@USE="{use}"]/mets:file/@ID', namespaces=XMLNS):
|
||||||
for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS):
|
for bad in mets.xpath(f'//mets:fptr[@FILEID="{bad_fileid}"]', namespaces=XMLNS):
|
||||||
|
@ -134,7 +165,7 @@ def make_workspace(ppn, workspace):
|
||||||
# XXX Need to fumble around with the URL for now
|
# XXX Need to fumble around with the URL for now
|
||||||
flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat")
|
flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat")
|
||||||
old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"]
|
old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"]
|
||||||
url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_)
|
url_iiif_full = iiif_url_for_sbb_url(old_url, ppn, 'full', format_)
|
||||||
flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full
|
flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full
|
||||||
|
|
||||||
mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
|
mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue