mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-08-13 03:29:53 +02:00
💄 Use loguru for logging/remove extra progress bars
Closes gh-42.
This commit is contained in:
parent
308d2430a8
commit
46847c5000
3 changed files with 6 additions and 13 deletions
|
@ -5,3 +5,4 @@ tqdm
|
||||||
lxml
|
lxml
|
||||||
pyarrow
|
pyarrow
|
||||||
XlsxWriter
|
XlsxWriter
|
||||||
|
loguru
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import contextlib
|
import contextlib
|
||||||
import csv
|
import csv
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -9,6 +8,7 @@ from operator import attrgetter
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from loguru import logger
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
@ -21,8 +21,6 @@ from .lib import (
|
||||||
sorted_groupby,
|
sorted_groupby,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("alto4pandas")
|
|
||||||
|
|
||||||
|
|
||||||
def alto_to_dict(alto, raise_errors=True):
|
def alto_to_dict(alto, raise_errors=True):
|
||||||
"""Convert ALTO metadata to a nested dictionary"""
|
"""Convert ALTO metadata to a nested dictionary"""
|
||||||
|
@ -151,8 +149,8 @@ def alto_to_dict(alto, raise_errors=True):
|
||||||
def walk(m):
|
def walk(m):
|
||||||
# XXX do this in mods4pandas, too
|
# XXX do this in mods4pandas, too
|
||||||
if os.path.isdir(m):
|
if os.path.isdir(m):
|
||||||
tqdm.write(f"Scanning directory {m}")
|
logger.info(f"Scanning directory {m}")
|
||||||
for f in tqdm(os.scandir(m), leave=False):
|
for f in os.scandir(m):
|
||||||
if f.is_file() and not f.name.startswith("."):
|
if f.is_file() and not f.name.startswith("."):
|
||||||
yield f.path
|
yield f.path
|
||||||
elif f.is_dir():
|
elif f.is_dir():
|
||||||
|
@ -247,8 +245,6 @@ def process(alto_files: List[str], output_file: str):
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
for prefix, uri in ns.items():
|
for prefix, uri in ns.items():
|
||||||
ET.register_namespace(prefix, uri)
|
ET.register_namespace(prefix, uri)
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import contextlib
|
import contextlib
|
||||||
import csv
|
import csv
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -9,6 +8,7 @@ from operator import attrgetter
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from loguru import logger
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
@ -22,8 +22,6 @@ from .lib import (
|
||||||
sorted_groupby,
|
sorted_groupby,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("mods4pandas")
|
|
||||||
|
|
||||||
|
|
||||||
def mods_to_dict(mods, raise_errors=True):
|
def mods_to_dict(mods, raise_errors=True):
|
||||||
"""Convert MODS metadata to a nested dictionary"""
|
"""Convert MODS metadata to a nested dictionary"""
|
||||||
|
@ -603,7 +601,7 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
|
||||||
with open(output_file + ".warnings.csv", "w") as csvfile:
|
with open(output_file + ".warnings.csv", "w") as csvfile:
|
||||||
csvwriter = csv.writer(csvfile)
|
csvwriter = csv.writer(csvfile)
|
||||||
logger.info("Processing METS files")
|
logger.info("Processing METS files")
|
||||||
for mets_file in tqdm(mets_files_real, leave=True):
|
for mets_file in tqdm(mets_files_real, leave=False):
|
||||||
try:
|
try:
|
||||||
root = ET.parse(mets_file).getroot()
|
root = ET.parse(mets_file).getroot()
|
||||||
mets = root # XXX .find('mets:mets', ns) does not work here
|
mets = root # XXX .find('mets:mets', ns) does not work here
|
||||||
|
@ -665,8 +663,6 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
for prefix, uri in ns.items():
|
for prefix, uri in ns.items():
|
||||||
ET.register_namespace(prefix, uri)
|
ET.register_namespace(prefix, uri)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue