1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-08-13 03:29:53 +02:00

💄 Use loguru for logging/remove extra progress bars

Closes gh-42.
This commit is contained in:
Gerber, Mike 2025-08-08 14:34:06 +02:00
parent 308d2430a8
commit 46847c5000
3 changed files with 6 additions and 13 deletions

View file

@ -5,3 +5,4 @@ tqdm
lxml lxml
pyarrow pyarrow
XlsxWriter XlsxWriter
loguru

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import contextlib import contextlib
import csv import csv
import logging
import os import os
import sqlite3 import sqlite3
import warnings import warnings
@ -9,6 +8,7 @@ from operator import attrgetter
from typing import List from typing import List
import click import click
from loguru import logger
from lxml import etree as ET from lxml import etree as ET
from tqdm import tqdm from tqdm import tqdm
@ -21,8 +21,6 @@ from .lib import (
sorted_groupby, sorted_groupby,
) )
logger = logging.getLogger("alto4pandas")
def alto_to_dict(alto, raise_errors=True): def alto_to_dict(alto, raise_errors=True):
"""Convert ALTO metadata to a nested dictionary""" """Convert ALTO metadata to a nested dictionary"""
@ -151,8 +149,8 @@ def alto_to_dict(alto, raise_errors=True):
def walk(m): def walk(m):
# XXX do this in mods4pandas, too # XXX do this in mods4pandas, too
if os.path.isdir(m): if os.path.isdir(m):
tqdm.write(f"Scanning directory {m}") logger.info(f"Scanning directory {m}")
for f in tqdm(os.scandir(m), leave=False): for f in os.scandir(m):
if f.is_file() and not f.name.startswith("."): if f.is_file() and not f.name.startswith("."):
yield f.path yield f.path
elif f.is_dir(): elif f.is_dir():
@ -247,8 +245,6 @@ def process(alto_files: List[str], output_file: str):
def main(): def main():
logging.basicConfig(level=logging.INFO)
for prefix, uri in ns.items(): for prefix, uri in ns.items():
ET.register_namespace(prefix, uri) ET.register_namespace(prefix, uri)

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import contextlib import contextlib
import csv import csv
import logging
import os import os
import sqlite3 import sqlite3
import warnings import warnings
@ -9,6 +8,7 @@ from operator import attrgetter
from typing import Dict, List from typing import Dict, List
import click import click
from loguru import logger
from lxml import etree as ET from lxml import etree as ET
from tqdm import tqdm from tqdm import tqdm
@ -22,8 +22,6 @@ from .lib import (
sorted_groupby, sorted_groupby,
) )
logger = logging.getLogger("mods4pandas")
def mods_to_dict(mods, raise_errors=True): def mods_to_dict(mods, raise_errors=True):
"""Convert MODS metadata to a nested dictionary""" """Convert MODS metadata to a nested dictionary"""
@ -603,7 +601,7 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
with open(output_file + ".warnings.csv", "w") as csvfile: with open(output_file + ".warnings.csv", "w") as csvfile:
csvwriter = csv.writer(csvfile) csvwriter = csv.writer(csvfile)
logger.info("Processing METS files") logger.info("Processing METS files")
for mets_file in tqdm(mets_files_real, leave=True): for mets_file in tqdm(mets_files_real, leave=False):
try: try:
root = ET.parse(mets_file).getroot() root = ET.parse(mets_file).getroot()
mets = root # XXX .find('mets:mets', ns) does not work here mets = root # XXX .find('mets:mets', ns) does not work here
@ -665,8 +663,6 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
def main(): def main():
logging.basicConfig(level=logging.INFO)
for prefix, uri in ns.items(): for prefix, uri in ns.items():
ET.register_namespace(prefix, uri) ET.register_namespace(prefix, uri)