1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-13 03:29:53 +02:00

💄 Use loguru for logging/remove extra progress bars

Closes gh-42.
This commit is contained in:
Gerber, Mike 2025-08-08 14:34:06 +02:00
parent 308d2430a8
commit 46847c5000
3 changed files with 6 additions and 13 deletions

View file

@ -5,3 +5,4 @@ tqdm
lxml
pyarrow
XlsxWriter
loguru

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
import contextlib
import csv
import logging
import os
import sqlite3
import warnings
@ -9,6 +8,7 @@ from operator import attrgetter
from typing import List
import click
from loguru import logger
from lxml import etree as ET
from tqdm import tqdm
@ -21,8 +21,6 @@ from .lib import (
sorted_groupby,
)
logger = logging.getLogger("alto4pandas")
def alto_to_dict(alto, raise_errors=True):
"""Convert ALTO metadata to a nested dictionary"""
@ -151,8 +149,8 @@ def alto_to_dict(alto, raise_errors=True):
def walk(m):
# XXX do this in mods4pandas, too
if os.path.isdir(m):
tqdm.write(f"Scanning directory {m}")
for f in tqdm(os.scandir(m), leave=False):
logger.info(f"Scanning directory {m}")
for f in os.scandir(m):
if f.is_file() and not f.name.startswith("."):
yield f.path
elif f.is_dir():
@ -247,8 +245,6 @@ def process(alto_files: List[str], output_file: str):
def main():
logging.basicConfig(level=logging.INFO)
for prefix, uri in ns.items():
ET.register_namespace(prefix, uri)

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
import contextlib
import csv
import logging
import os
import sqlite3
import warnings
@ -9,6 +8,7 @@ from operator import attrgetter
from typing import Dict, List
import click
from loguru import logger
from lxml import etree as ET
from tqdm import tqdm
@ -22,8 +22,6 @@ from .lib import (
sorted_groupby,
)
logger = logging.getLogger("mods4pandas")
def mods_to_dict(mods, raise_errors=True):
"""Convert MODS metadata to a nested dictionary"""
@ -603,7 +601,7 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
with open(output_file + ".warnings.csv", "w") as csvfile:
csvwriter = csv.writer(csvfile)
logger.info("Processing METS files")
for mets_file in tqdm(mets_files_real, leave=True):
for mets_file in tqdm(mets_files_real, leave=False):
try:
root = ET.parse(mets_file).getroot()
mets = root # XXX .find('mets:mets', ns) does not work here
@ -665,8 +663,6 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
def main():
logging.basicConfig(level=logging.INFO)
for prefix, uri in ns.items():
ET.register_namespace(prefix, uri)