1
0
Fork 0
mirror of https://github.com/qurator-spk/modstool.git synced 2025-06-26 12:09:55 +02:00

🚧 Write out page_info

This commit is contained in:
Mike Gerber 2023-11-23 16:37:30 +01:00
parent e51fa5750f
commit c5332ae80d
2 changed files with 15 additions and 2 deletions

View file

@ -415,6 +415,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
mods_info_df.to_excel(output_xlsx)
# Convert page_info
# XXX hardcoded filenames + other formats
page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
# Pickle the DataFrame
logger.info('Writing DataFrame to {}'.format("page_info_df.pkl"))
page_info_df.to_pickle("page_info_df.pkl")
def main():
logging.basicConfig(level=logging.INFO)