mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-14 03:59:53 +02:00
Merge branch 'master' of https://github.com/qurator-spk/mods4pandas
This commit is contained in:
commit
f332f46e99
6 changed files with 43 additions and 2 deletions
|
@ -1,2 +1,29 @@
|
||||||
|
root = true
|
||||||
|
|
||||||
[*]
|
[*]
|
||||||
|
charset = utf-8
|
||||||
|
end_of_line = lf
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = space
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
max_line_length = 120
|
max_line_length = 120
|
||||||
|
tab_width = 4
|
||||||
|
|
||||||
|
[{*.cfg, *.ini, *.html, *.yaml, *.yml}]
|
||||||
|
indent_size = 2
|
||||||
|
|
||||||
|
[*.json]
|
||||||
|
indent_size = 2
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
# trailing spaces in markdown indicate word wrap
|
||||||
|
[*.md]
|
||||||
|
trim_trailing_whitespace = false
|
||||||
|
|
||||||
|
[*.py]
|
||||||
|
multi_line_output = 3
|
||||||
|
include_trailing_comma = True
|
||||||
|
force_grid_wrap = 0
|
||||||
|
use_parentheses = True
|
||||||
|
ensure_newline_before_comments = True
|
||||||
|
|
|
@ -16,7 +16,7 @@ repos:
|
||||||
- id: black
|
- id: black
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.11.13
|
rev: v0.12.7
|
||||||
hooks:
|
hooks:
|
||||||
- args:
|
- args:
|
||||||
- --fix
|
- --fix
|
||||||
|
@ -24,7 +24,7 @@ repos:
|
||||||
id: ruff-check
|
id: ruff-check
|
||||||
|
|
||||||
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
|
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
|
||||||
rev: v0.7.0
|
rev: v0.8.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: pre-commit-update
|
- id: pre-commit-update
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,14 @@ To run a test with profiling:
|
||||||
pytest --profile-svg -k test_page_info
|
pytest --profile-svg -k test_page_info
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To directly test the CLIs using our test data, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
mods4pandas src/mods4pandas/tests/data/mets-mods
|
||||||
|
alto4pandas src/mods4pandas/tests/data/alto
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
# How to use pre-commit
|
# How to use pre-commit
|
||||||
|
|
||||||
This project optionally uses [pre-commit](https://pre-commit.com) to check commits. To use it:
|
This project optionally uses [pre-commit](https://pre-commit.com) to check commits. To use it:
|
||||||
|
|
|
@ -449,6 +449,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
|
||||||
def get_mets_div(*, ID):
|
def get_mets_div(*, ID):
|
||||||
if ID:
|
if ID:
|
||||||
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
|
return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
for page in div_physSequence:
|
for page in div_physSequence:
|
||||||
# TODO sort by ORDER?
|
# TODO sort by ORDER?
|
||||||
|
@ -591,6 +593,8 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
|
||||||
with contextlib.suppress(FileNotFoundError):
|
with contextlib.suppress(FileNotFoundError):
|
||||||
os.remove(output_page_info_sqlite3)
|
os.remove(output_page_info_sqlite3)
|
||||||
con_page_info = sqlite3.connect(output_page_info_sqlite3)
|
con_page_info = sqlite3.connect(output_page_info_sqlite3)
|
||||||
|
else:
|
||||||
|
con_page_info = None
|
||||||
|
|
||||||
# Process METS files
|
# Process METS files
|
||||||
with open(output_file + ".warnings.csv", "w") as csvfile:
|
with open(output_file + ".warnings.csv", "w") as csvfile:
|
||||||
|
|
|
@ -135,6 +135,7 @@ def test_dtypes(tmp_path):
|
||||||
assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})"
|
assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})"
|
||||||
|
|
||||||
if edt == "object":
|
if edt == "object":
|
||||||
|
assert einner_types is not None
|
||||||
inner_types = set(type(v).__name__ for v in df[c])
|
inner_types = set(type(v).__name__ for v in df[c])
|
||||||
assert all(
|
assert all(
|
||||||
it in einner_types for it in inner_types
|
it in einner_types for it in inner_types
|
||||||
|
|
|
@ -153,6 +153,7 @@ def test_originInfo_no_event_type():
|
||||||
assert d == {} # empty
|
assert d == {} # empty
|
||||||
|
|
||||||
assert len(ws) == 1
|
assert len(ws) == 1
|
||||||
|
assert isinstance(ws[0].message, Warning)
|
||||||
assert (
|
assert (
|
||||||
ws[0].message.args[0]
|
ws[0].message.args[0]
|
||||||
== "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"
|
== "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue