1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-14 12:09:52 +02:00
This commit is contained in:
Gerber, Mike 2025-08-08 12:06:48 +02:00
commit f332f46e99
6 changed files with 43 additions and 2 deletions

View file

@ -449,6 +449,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
def get_mets_div(*, ID):
if ID:
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
else:
return []
for page in div_physSequence:
# TODO sort by ORDER?
@ -591,6 +593,8 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
with contextlib.suppress(FileNotFoundError):
os.remove(output_page_info_sqlite3)
con_page_info = sqlite3.connect(output_page_info_sqlite3)
else:
con_page_info = None
# Process METS files
with open(output_file + ".warnings.csv", "w") as csvfile:

View file

@ -135,6 +135,7 @@ def test_dtypes(tmp_path):
assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})"
if edt == "object":
assert einner_types is not None
inner_types = set(type(v).__name__ for v in df[c])
assert all(
it in einner_types for it in inner_types

View file

@ -153,6 +153,7 @@ def test_originInfo_no_event_type():
assert d == {} # empty
assert len(ws) == 1
assert isinstance(ws[0].message, Warning)
assert (
ws[0].message.args[0]
== "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"