mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 06:39:53 +02:00
🚧 ppn2ocr: Extract a function to contain the IIIF hack
This commit is contained in:
parent
c7c8934e89
commit
74cb361723
1 changed files with 20 additions and 6 deletions
26
ppn2ocr
26
ppn2ocr
|
@ -50,6 +50,25 @@ def oai_mets(ppn):
|
||||||
return mets
|
return mets
|
||||||
|
|
||||||
|
|
||||||
|
def iiif_url_for_dms_url(dms_url, ppn, size):
|
||||||
|
"""
|
||||||
|
Construct an IIIF URL from a dms URL.
|
||||||
|
|
||||||
|
This function exists to contain the hack of rewriting the URL to get IIIF.
|
||||||
|
"""
|
||||||
|
if ppn not in dms_url:
|
||||||
|
raise ValueError(f"Unexpected URL {dms_url}")
|
||||||
|
m = re.search(r'/dms/.*/([0-9]+)\.jpg$', dms_url)
|
||||||
|
if m:
|
||||||
|
page_num = m.group(1)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unexpected URL {dms_url}")
|
||||||
|
iiif_identifier = f'{ppn}-{page_num}'
|
||||||
|
iiif_url = f'https://content.staatsbibliothek-berlin.de/dc/{iiif_identifier}/full/{size}/0/default.jpg'
|
||||||
|
|
||||||
|
return iiif_url
|
||||||
|
|
||||||
|
|
||||||
def make_workspace(ppn, workspace):
|
def make_workspace(ppn, workspace):
|
||||||
# Make workspace directory
|
# Make workspace directory
|
||||||
os.mkdir(workspace)
|
os.mkdir(workspace)
|
||||||
|
@ -82,12 +101,7 @@ def make_workspace(ppn, workspace):
|
||||||
# XXX Need to fumble around with the URL for now
|
# XXX Need to fumble around with the URL for now
|
||||||
flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat")
|
flocat = f.find(f".//{{{XMLNS['mets']}}}FLocat")
|
||||||
old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"]
|
old_url = flocat.attrib[f"{{{XMLNS['xlink']}}}href"]
|
||||||
m = re.search(r'/dms/.*/([0-9]+)\.jpg$', old_url)
|
url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full')
|
||||||
if m:
|
|
||||||
page_num = m.group(1)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unexpected DEFAULT URL {old_url}")
|
|
||||||
url_iiif_full = f'https://content.staatsbibliothek-berlin.de/dc/{ppn}-{page_num}/full/full/0/default.jpg'
|
|
||||||
flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full
|
flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full
|
||||||
|
|
||||||
mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
|
mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue