mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-14 12:09:52 +02:00
Merge branch 'master' of https://github.com/qurator-spk/mods4pandas
This commit is contained in:
commit
f332f46e99
6 changed files with 43 additions and 2 deletions
|
@ -449,6 +449,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
|||
def get_mets_div(*, ID):
|
||||
if ID:
|
||||
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
|
||||
else:
|
||||
return []
|
||||
|
||||
for page in div_physSequence:
|
||||
# TODO sort by ORDER?
|
||||
|
@ -591,6 +593,8 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
|
|||
with contextlib.suppress(FileNotFoundError):
|
||||
os.remove(output_page_info_sqlite3)
|
||||
con_page_info = sqlite3.connect(output_page_info_sqlite3)
|
||||
else:
|
||||
con_page_info = None
|
||||
|
||||
# Process METS files
|
||||
with open(output_file + ".warnings.csv", "w") as csvfile:
|
||||
|
|
|
@ -135,6 +135,7 @@ def test_dtypes(tmp_path):
|
|||
assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})"
|
||||
|
||||
if edt == "object":
|
||||
assert einner_types is not None
|
||||
inner_types = set(type(v).__name__ for v in df[c])
|
||||
assert all(
|
||||
it in einner_types for it in inner_types
|
||||
|
|
|
@ -153,6 +153,7 @@ def test_originInfo_no_event_type():
|
|||
assert d == {} # empty
|
||||
|
||||
assert len(ws) == 1
|
||||
assert isinstance(ws[0].message, Warning)
|
||||
assert (
|
||||
ws[0].message.args[0]
|
||||
== "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue