diff --git a/.editorconfig b/.editorconfig index fac7a92..a63f7f6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,2 +1,29 @@ +root = true + [*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true max_line_length = 120 +tab_width = 4 + +[{*.cfg, *.ini, *.html, *.yaml, *.yml}] +indent_size = 2 + +[*.json] +indent_size = 2 +insert_final_newline = true + +# trailing spaces in markdown indicate word wrap +[*.md] +trim_trailing_whitespace = false + +[*.py] +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +use_parentheses = True +ensure_newline_before_comments = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 085ca09..26ea3e1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.13 + rev: v0.12.7 hooks: - args: - --fix @@ -24,7 +24,7 @@ repos: id: ruff-check - repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update - rev: v0.7.0 + rev: v0.8.0 hooks: - id: pre-commit-update diff --git a/README-DEV.md b/README-DEV.md index 0765657..b3a56c9 100644 --- a/README-DEV.md +++ b/README-DEV.md @@ -16,6 +16,14 @@ To run a test with profiling: pytest --profile-svg -k test_page_info ``` +To directly test the CLIs using our test data, run: + +``` +mods4pandas src/mods4pandas/tests/data/mets-mods +alto4pandas src/mods4pandas/tests/data/alto +``` + + # How to use pre-commit This project optionally uses [pre-commit](https://pre-commit.com) to check commits. To use it: diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 60a3a60..e947657 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -449,6 +449,8 @@ def pages_to_dict(mets, raise_errors=True) -> List[Dict]: def get_mets_div(*, ID): if ID: return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns) + else: + return [] for page in div_physSequence: # TODO sort by ORDER? @@ -591,6 +593,8 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets with contextlib.suppress(FileNotFoundError): os.remove(output_page_info_sqlite3) con_page_info = sqlite3.connect(output_page_info_sqlite3) + else: + con_page_info = None # Process METS files with open(output_file + ".warnings.csv", "w") as csvfile: diff --git a/src/mods4pandas/tests/test_alto.py b/src/mods4pandas/tests/test_alto.py index 849e076..a215f89 100644 --- a/src/mods4pandas/tests/test_alto.py +++ b/src/mods4pandas/tests/test_alto.py @@ -135,6 +135,7 @@ def test_dtypes(tmp_path): assert dt == edt, f"Unexpected dtype {dt} for column {c} (expected {edt})" if edt == "object": + assert einner_types is not None inner_types = set(type(v).__name__ for v in df[c]) assert all( it in einner_types for it in inner_types diff --git a/src/mods4pandas/tests/test_mods4pandas.py b/src/mods4pandas/tests/test_mods4pandas.py index d7daf84..24f051f 100644 --- a/src/mods4pandas/tests/test_mods4pandas.py +++ b/src/mods4pandas/tests/test_mods4pandas.py @@ -153,6 +153,7 @@ def test_originInfo_no_event_type(): assert d == {} # empty assert len(ws) == 1 + assert isinstance(ws[0].message, Warning) assert ( ws[0].message.args[0] == "Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)"