mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 22:59:53 +02:00
🚧 Check image dimensions if FULLTEXT exists
This commit is contained in:
parent
9d42de5da4
commit
cae56e19db
1 changed files with 26 additions and 1 deletions
27
ppn2ocr
27
ppn2ocr
|
@ -8,6 +8,7 @@ import re
|
|||
import subprocess
|
||||
import click
|
||||
from copy import deepcopy
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
XMLNS = {
|
||||
|
@ -114,7 +115,31 @@ def make_workspace(ppn, workspace):
|
|||
url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_)
|
||||
flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full
|
||||
|
||||
mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
|
||||
mets.find("//mets:fileSec", namespaces=XMLNS).append(file_grp_best)
|
||||
|
||||
# Check image dimensions if FULLTEXT exists
|
||||
file_grp_fulltext = mets.find('//mets:fileGrp[@USE="FULLTEXT"]', namespaces=XMLNS)
|
||||
if file_grp_fulltext is not None:
|
||||
# Collect all file IDs for every file group
|
||||
file_ids_for_group = defaultdict(list)
|
||||
for file_grp in mets.findall("//mets:fileGrp", namespaces=XMLNS):
|
||||
for file_ in file_grp.findall("mets:file", namespaces=XMLNS):
|
||||
print(file_.attrib["ID"])
|
||||
file_ids_for_group[file_grp.attrib["USE"]].append(file_.attrib["ID"])
|
||||
|
||||
for page in mets.findall(
|
||||
'//mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]',
|
||||
namespaces=XMLNS,
|
||||
):
|
||||
print()
|
||||
for fptr in page.findall("mets:fptr", namespaces=XMLNS):
|
||||
print(fptr)
|
||||
# XXX
|
||||
# For every TYPE="page" in <mets:structMap TYPE="PHYSICAL">:
|
||||
# Check image dimenstion of
|
||||
# <mets:fptr FILEID="FILE_0001_FULLTEXT"/>
|
||||
# <mets:fptr FILEID="FILE_0001_BEST"/>
|
||||
# (need to find FILEID for pageid in given file group)
|
||||
|
||||
# Write mets.xml
|
||||
mets.write('mets.xml', pretty_print=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue