mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 19:59:57 +02:00
✔ Test creation of page_info
This commit is contained in:
parent
f243dd204a
commit
8c269b35a4
2 changed files with 29 additions and 0 deletions
29
qurator/mods4pandas/tests/test_page_info.py
Normal file
29
qurator/mods4pandas/tests/test_page_info.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
from pathlib import Path
|
||||
|
||||
from lxml import etree as ET
|
||||
|
||||
from qurator.mods4pandas.mods4pandas import pages_to_dict
|
||||
|
||||
|
||||
TESTS_DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
|
||||
def test_page_info():
|
||||
"""Test creation of page_info"""
|
||||
mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN821507109-1361-pages.xml")
|
||||
page_info = pages_to_dict(mets)
|
||||
|
||||
# We have 1361 pages for this one work.
|
||||
assert len(page_info) == 1361
|
||||
assert all(p["ppn"] == "PPN821507109" for p in page_info)
|
||||
|
||||
# Look closer at an interesting page
|
||||
from pprint import pprint; pprint(page_info[0])
|
||||
page_info_page = next(p for p in page_info if p["ID"] == "PHYS_0005")
|
||||
|
||||
assert page_info_page["fileGrp_PRESENTATION_file_FLocat_href"] == "file:///goobi/tiff001/sbb/PPN821507109/00000005.tif"
|
||||
|
||||
# This is a title page with an illustration, check that we correctly got this info from the
|
||||
# structMap.
|
||||
struct_types = sorted(k.removeprefix("structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1)
|
||||
assert struct_types == ["illustration", "monograph", "title_page"]
|
Loading…
Add table
Add a link
Reference in a new issue