From 8c269b35a488244635eff26cc6728d13c45a1c66 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 8 Dec 2023 15:58:59 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=94=20Test=20creation=20of=20page=5Finfo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...507109.xml => PPN821507109-1361-pages.xml} | 0 qurator/mods4pandas/tests/test_page_info.py | 29 +++++++++++++++++++ 2 files changed, 29 insertions(+) rename qurator/mods4pandas/tests/data/mets-mods/{sbb-mets-PPN821507109.xml => PPN821507109-1361-pages.xml} (100%) mode change 100755 => 100644 create mode 100644 qurator/mods4pandas/tests/test_page_info.py diff --git a/qurator/mods4pandas/tests/data/mets-mods/sbb-mets-PPN821507109.xml b/qurator/mods4pandas/tests/data/mets-mods/PPN821507109-1361-pages.xml old mode 100755 new mode 100644 similarity index 100% rename from qurator/mods4pandas/tests/data/mets-mods/sbb-mets-PPN821507109.xml rename to qurator/mods4pandas/tests/data/mets-mods/PPN821507109-1361-pages.xml diff --git a/qurator/mods4pandas/tests/test_page_info.py b/qurator/mods4pandas/tests/test_page_info.py new file mode 100644 index 0000000..441230b --- /dev/null +++ b/qurator/mods4pandas/tests/test_page_info.py @@ -0,0 +1,29 @@ +from pathlib import Path + +from lxml import etree as ET + +from qurator.mods4pandas.mods4pandas import pages_to_dict + + +TESTS_DATA_DIR = Path(__file__).parent / "data" + + +def test_page_info(): + """Test creation of page_info""" + mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN821507109-1361-pages.xml") + page_info = pages_to_dict(mets) + + # We have 1361 pages for this one work. + assert len(page_info) == 1361 + assert all(p["ppn"] == "PPN821507109" for p in page_info) + + # Look closer at an interesting page + from pprint import pprint; pprint(page_info[0]) + page_info_page = next(p for p in page_info if p["ID"] == "PHYS_0005") + + assert page_info_page["fileGrp_PRESENTATION_file_FLocat_href"] == "file:///goobi/tiff001/sbb/PPN821507109/00000005.tif" + + # This is a title page with an illustration, check that we correctly got this info from the + # structMap. + struct_types = sorted(k.removeprefix("structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1) + assert struct_types == ["illustration", "monograph", "title_page"]