diff --git a/src/mods4pandas/mods4pandas.py b/src/mods4pandas/mods4pandas.py index 4fabb52..46ebfc8 100755 --- a/src/mods4pandas/mods4pandas.py +++ b/src/mods4pandas/mods4pandas.py @@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str): # Convert the mods_info SQL to a pandas DataFrame mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier") - - # Save the DataFrame logger.info('Writing DataFrame to {}'.format(output_file)) mods_info_df.to_parquet(output_file) - # Convert page_info - # TODO - # if output_page_info: - # page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID")) - # # Save the DataFrame - # logger.info('Writing DataFrame to {}'.format(output_page_info)) - # page_info_df.to_parquet(output_page_info) + if output_page_info: + # Convert page_info SQL to a pandas DataFrama + page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"]) + # Save the DataFrame + logger.info('Writing DataFrame to {}'.format(output_page_info)) + page_info_df.to_parquet(output_page_info) def main():