From 46847c5000276098bf40a1a157f1d9682a724aca Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 8 Aug 2025 14:34:06 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=84=20Use=20loguru=20for=20logging/rem?= =?UTF-8?q?ove=20extra=20progress=20bars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes gh-42. --- requirements.txt | 1 + src/mods4pandas/alto4pandas.py | 10 +++------- src/mods4pandas/mods4pandas.py | 8 ++------ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6b18f99..aee014e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ tqdm lxml pyarrow XlsxWriter +loguru diff --git a/src/mods4pandas/alto4pandas.py b/src/mods4pandas/alto4pandas.py index 500ac31..7b39c3c 100755 --- a/src/mods4pandas/alto4pandas.py +++ b/src/mods4pandas/alto4pandas.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import contextlib import csv -import logging import os import sqlite3 import warnings @@ -9,6 +8,7 @@ from operator import attrgetter from typing import List import click +from loguru import logger from lxml import etree as ET from tqdm import tqdm @@ -21,8 +21,6 @@ from .lib import ( sorted_groupby, ) -logger = logging.getLogger("alto4pandas") - def alto_to_dict(alto, raise_errors=True): """Convert ALTO metadata to a nested dictionary""" @@ -151,8 +149,8 @@ def alto_to_dict(alto, raise_errors=True): def walk(m): # XXX do this in mods4pandas, too if os.path.isdir(m): - tqdm.write(f"Scanning directory {m}") - for f in tqdm(os.scandir(m), leave=False): + logger.info(f"Scanning directory {m}") + for f in os.scandir(m): if f.is_file() and not f.name.startswith("."): yield f.path elif f.is_dir(): @@ -247,8 +245,6 @@ def process(alto_files: List[str], output_file: str): def main(): - logging.basicConfig(level=logging.INFO) - for prefix, uri in ns.items(): ET.register_namespace(prefix, uri) diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 4dde5fc..e9c87b9 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import contextlib import csv -import logging import os import sqlite3 import warnings @@ -9,6 +8,7 @@ from operator import attrgetter from typing import Dict, List import click +from loguru import logger from lxml import etree as ET from tqdm import tqdm @@ -22,8 +22,6 @@ from .lib import ( sorted_groupby, ) -logger = logging.getLogger("mods4pandas") - def mods_to_dict(mods, raise_errors=True): """Convert MODS metadata to a nested dictionary""" @@ -603,7 +601,7 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets with open(output_file + ".warnings.csv", "w") as csvfile: csvwriter = csv.writer(csvfile) logger.info("Processing METS files") - for mets_file in tqdm(mets_files_real, leave=True): + for mets_file in tqdm(mets_files_real, leave=False): try: root = ET.parse(mets_file).getroot() mets = root # XXX .find('mets:mets', ns) does not work here @@ -665,8 +663,6 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets def main(): - logging.basicConfig(level=logging.INFO) - for prefix, uri in ns.items(): ET.register_namespace(prefix, uri)