mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-08 11:20:07 +02:00
🎨 Improve log/output a bit
This commit is contained in:
parent
db79960ba1
commit
9227575555
1 changed files with 10 additions and 9 deletions
|
@ -20,6 +20,7 @@ ns = {
|
||||||
'mods': 'http://www.loc.gov/mods/v3'
|
'mods': 'http://www.loc.gov/mods/v3'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger = logging.getLogger('modstool')
|
||||||
|
|
||||||
class TagGroup:
|
class TagGroup:
|
||||||
"""Helper class to simplify the parsing and checking of MODS metadata"""
|
"""Helper class to simplify the parsing and checking of MODS metadata"""
|
||||||
|
@ -466,8 +467,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
|
||||||
mets_files_real = []
|
mets_files_real = []
|
||||||
for m in mets_files:
|
for m in mets_files:
|
||||||
if os.path.isdir(m):
|
if os.path.isdir(m):
|
||||||
logging.info('Scanning directory {}'.format(m))
|
logger.info('Scanning directory {}'.format(m))
|
||||||
mets_files_real.extend(f.path for f in tqdm(os.scandir(m))
|
mets_files_real.extend(f.path for f in tqdm(os.scandir(m), leave=False)
|
||||||
if f.is_file() and not f.name.startswith('.'))
|
if f.is_file() and not f.name.startswith('.'))
|
||||||
else:
|
else:
|
||||||
mets_files_real.append(m)
|
mets_files_real.append(m)
|
||||||
|
@ -476,8 +477,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
|
||||||
with open(output_file + '.warnings.csv', 'w') as csvfile:
|
with open(output_file + '.warnings.csv', 'w') as csvfile:
|
||||||
csvwriter = csv.writer(csvfile)
|
csvwriter = csv.writer(csvfile)
|
||||||
mods_info = []
|
mods_info = []
|
||||||
logging.info('Processing METS files')
|
logger.info('Processing METS files')
|
||||||
for mets_file in tqdm(mets_files_real):
|
for mets_file in tqdm(mets_files_real, leave=False):
|
||||||
try:
|
try:
|
||||||
root = ET.parse(mets_file).getroot()
|
root = ET.parse(mets_file).getroot()
|
||||||
mets = root # XXX .find('mets:mets', ns) does not work here
|
mets = root # XXX .find('mets:mets', ns) does not work here
|
||||||
|
@ -503,8 +504,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
|
||||||
for caught_warning in caught_warnings:
|
for caught_warning in caught_warnings:
|
||||||
csvwriter.writerow([mets_file, caught_warning.message])
|
csvwriter.writerow([mets_file, caught_warning.message])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warnings.warn('Exception in {}:\n{}'.format(mets_file, e))
|
logger.error('Exception in {}: {}'.format(mets_file, e))
|
||||||
import traceback; traceback.print_exc()
|
#import traceback; traceback.print_exc()
|
||||||
|
|
||||||
# Convert the mods_info List[Dict] to a pandas DataFrame
|
# Convert the mods_info List[Dict] to a pandas DataFrame
|
||||||
columns = []
|
columns = []
|
||||||
|
@ -517,13 +518,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
|
||||||
mods_info_df = pd.DataFrame(data=data, index=index, columns=columns)
|
mods_info_df = pd.DataFrame(data=data, index=index, columns=columns)
|
||||||
|
|
||||||
# Pickle the DataFrame
|
# Pickle the DataFrame
|
||||||
logging.info('Writing DataFrame to {}'.format(output_file))
|
logger.info('Writing DataFrame to {}'.format(output_file))
|
||||||
mods_info_df.to_pickle(output_file)
|
mods_info_df.to_pickle(output_file)
|
||||||
if output_csv:
|
if output_csv:
|
||||||
logging.info('Writing CSV to {}'.format(output_csv))
|
logger.info('Writing CSV to {}'.format(output_csv))
|
||||||
mods_info_df.to_csv(output_csv)
|
mods_info_df.to_csv(output_csv)
|
||||||
if output_xlsx:
|
if output_xlsx:
|
||||||
logging.info('Writing Excel .xlsx to {}'.format(output_xlsx))
|
logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
|
||||||
mods_info_df.to_excel(output_xlsx)
|
mods_info_df.to_excel(output_xlsx)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue