🚧 Check image dimensions if FULLTEXT exists

2026-06-21 03:59:27 +02:00 · 2020-09-25 18:40:00 +02:00 · 2020-09-25 18:40:00 +02:00 · cae56e19db
commit cae56e19db
parent 9d42de5da4
1 changed files with 26 additions and 1 deletions
--- a/27
+++ b/27
@ -8,6 +8,7 @@ import re
 import subprocess
 import click
 from copy import deepcopy
+from collections import defaultdict


 XMLNS = {
@ -114,7 +115,31 @@ def make_workspace(ppn, workspace):
        url_iiif_full = iiif_url_for_dms_url(old_url, ppn, 'full', format_)
        flocat.attrib[f"{{{XMLNS['xlink']}}}href"] = url_iiif_full

-    mets.find('//mets:fileSec', namespaces=XMLNS).append(file_grp_best)
+    mets.find("//mets:fileSec", namespaces=XMLNS).append(file_grp_best)
+
+    # Check image dimensions if FULLTEXT exists
+    file_grp_fulltext = mets.find('//mets:fileGrp[@USE="FULLTEXT"]', namespaces=XMLNS)
+    if file_grp_fulltext is not None:
+        # Collect all file IDs for every file group
+        file_ids_for_group = defaultdict(list)
+        for file_grp in mets.findall("//mets:fileGrp", namespaces=XMLNS):
+            for file_ in file_grp.findall("mets:file", namespaces=XMLNS):
+                print(file_.attrib["ID"])
+                file_ids_for_group[file_grp.attrib["USE"]].append(file_.attrib["ID"])
+
+        for page in mets.findall(
+            '//mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]',
+            namespaces=XMLNS,
+        ):
+            print()
+            for fptr in page.findall("mets:fptr", namespaces=XMLNS):
+                print(fptr)
+        # XXX
+        # For every TYPE="page" in <mets:structMap TYPE="PHYSICAL">:
+        #   Check image dimenstion of
+        #       <mets:fptr FILEID="FILE_0001_FULLTEXT"/>
+        #       <mets:fptr FILEID="FILE_0001_BEST"/>
+        #       (need to find FILEID for pageid in given file group)

    # Write mets.xml
    mets.write('mets.xml', pretty_print=True)