diff --git a/requirements.txt b/requirements.txt index 6b18f99..aee014e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ tqdm lxml pyarrow XlsxWriter +loguru diff --git a/src/mods4pandas/alto4pandas.py b/src/mods4pandas/alto4pandas.py index 500ac31..7b39c3c 100755 --- a/src/mods4pandas/alto4pandas.py +++ b/src/mods4pandas/alto4pandas.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import contextlib import csv -import logging import os import sqlite3 import warnings @@ -9,6 +8,7 @@ from operator import attrgetter from typing import List import click +from loguru import logger from lxml import etree as ET from tqdm import tqdm @@ -21,8 +21,6 @@ from .lib import ( sorted_groupby, ) -logger = logging.getLogger("alto4pandas") - def alto_to_dict(alto, raise_errors=True): """Convert ALTO metadata to a nested dictionary""" @@ -151,8 +149,8 @@ def alto_to_dict(alto, raise_errors=True): def walk(m): # XXX do this in mods4pandas, too if os.path.isdir(m): - tqdm.write(f"Scanning directory {m}") - for f in tqdm(os.scandir(m), leave=False): + logger.info(f"Scanning directory {m}") + for f in os.scandir(m): if f.is_file() and not f.name.startswith("."): yield f.path elif f.is_dir(): @@ -247,8 +245,6 @@ def process(alto_files: List[str], output_file: str): def main(): - logging.basicConfig(level=logging.INFO) - for prefix, uri in ns.items(): ET.register_namespace(prefix, uri) diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 4dde5fc..e9c87b9 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import contextlib import csv -import logging import os import sqlite3 import warnings @@ -9,6 +8,7 @@ from operator import attrgetter from typing import Dict, List import click +from loguru import logger from lxml import etree as ET from tqdm import tqdm @@ -22,8 +22,6 @@ from .lib import ( sorted_groupby, ) -logger = logging.getLogger("mods4pandas") - def mods_to_dict(mods, raise_errors=True): """Convert MODS metadata to a nested dictionary""" @@ -603,7 +601,7 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets with open(output_file + ".warnings.csv", "w") as csvfile: csvwriter = csv.writer(csvfile) logger.info("Processing METS files") - for mets_file in tqdm(mets_files_real, leave=True): + for mets_file in tqdm(mets_files_real, leave=False): try: root = ET.parse(mets_file).getroot() mets = root # XXX .find('mets:mets', ns) does not work here @@ -665,8 +663,6 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets def main(): - logging.basicConfig(level=logging.INFO) - for prefix, uri in ns.items(): ET.register_namespace(prefix, uri)