1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-14 03:59:53 +02:00
This commit is contained in:
Gerber, Mike 2025-08-08 12:06:48 +02:00
commit f332f46e99
6 changed files with 43 additions and 2 deletions

View file

@ -1,2 +1,29 @@
root = true
[*] [*]
charset = utf-8
end_of_line = lf
indent_size = 4
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
max_line_length = 120 max_line_length = 120
tab_width = 4
[{*.cfg, *.ini, *.html, *.yaml, *.yml}]
indent_size = 2
[*.json]
indent_size = 2
insert_final_newline = true
# trailing spaces in markdown indicate word wrap
[*.md]
trim_trailing_whitespace = false
[*.py]
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
use_parentheses = True
ensure_newline_before_comments = True

View file

@ -16,7 +16,7 @@ repos:
- id: black - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.13 rev: v0.12.7
hooks: hooks:
- args: - args:
- --fix - --fix
@ -24,7 +24,7 @@ repos:
id: ruff-check id: ruff-check
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update - repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
rev: v0.7.0 rev: v0.8.0
hooks: hooks:
- id: pre-commit-update - id: pre-commit-update

View file

@ -16,6 +16,14 @@ To run a test with profiling:
pytest --profile-svg -k test_page_info pytest --profile-svg -k test_page_info
``` ```
To directly test the CLIs using our test data, run:
```
mods4pandas src/mods4pandas/tests/data/mets-mods
alto4pandas src/mods4pandas/tests/data/alto
```
# How to use pre-commit # How to use pre-commit
This project optionally uses [pre-commit](https://pre-commit.com) to check commits. To use it: This project optionally uses [pre-commit](https://pre-commit.com) to check commits. To use it:

View file

@ -449,6 +449,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
def get_mets_div(*, ID): def get_mets_div(*, ID):
if ID: if ID:
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns) return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
else:
return []
for page in div_physSequence: for page in div_physSequence:
# TODO sort by ORDER? # TODO sort by ORDER?
@ -591,6 +593,8 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
with contextlib.suppress(FileNotFoundError): with contextlib.suppress(FileNotFoundError):
os.remove(output_page_info_sqlite3) os.remove(output_page_info_sqlite3)
con_page_info = sqlite3.connect(output_page_info_sqlite3) con_page_info = sqlite3.connect(output_page_info_sqlite3)
else:
con_page_info = None
# Process METS files # Process METS files
with open(output_file + ".warnings.csv", "w") as csvfile: with open(output_file + ".warnings.csv", "w") as csvfile:

View file

@ -135,6 +135,7 @@ def test_dtypes(tmp_path):
assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})" assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})"
if edt == "object": if edt == "object":
assert einner_types is not None
inner_types = set(type(v).__name__ for v in df[c]) inner_types = set(type(v).__name__ for v in df[c])
assert all( assert all(
it in einner_types for it in inner_types it in einner_types for it in inner_types

View file

@ -153,6 +153,7 @@ def test_originInfo_no_event_type():
assert d == {} # empty assert d == {} # empty
assert len(ws) == 1 assert len(ws) == 1
assert isinstance(ws[0].message, Warning)
assert ( assert (
ws[0].message.args[0] ws[0].message.args[0]
== "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)" == "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"