From 6981efb87cf236f531bb72af026bd18d7c601b4a Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 28 Nov 2024 18:32:40 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Write=20page=5Finfo=20Parquet=20?= =?UTF-8?q?file=20again?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mods4pandas/mods4pandas.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 4fabb52..46ebfc8 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str): # Convert the mods_info SQL to a pandas DataFrame mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier") - - # Save the DataFrame logger.info('Writing DataFrame to {}'.format(output_file)) mods_info_df.to_parquet(output_file) - # Convert page_info - # TODO - # if output_page_info: - # page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID")) - # # Save the DataFrame - # logger.info('Writing DataFrame to {}'.format(output_page_info)) - # page_info_df.to_parquet(output_page_info) + if output_page_info: + # Convert page_info SQL to a pandas DataFrama + page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"]) + # Save the DataFrame + logger.info('Writing DataFrame to {}'.format(output_page_info)) + page_info_df.to_parquet(output_page_info) def main():