🐛 Write page_info Parquet file again

fix/use-temp-sqlite3
Mike Gerber 4 weeks ago
parent 11a04916f3
commit 6981efb87c

@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
# Convert the mods_info SQL to a pandas DataFrame # Convert the mods_info SQL to a pandas DataFrame
mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier") mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
# Save the DataFrame
logger.info('Writing DataFrame to {}'.format(output_file)) logger.info('Writing DataFrame to {}'.format(output_file))
mods_info_df.to_parquet(output_file) mods_info_df.to_parquet(output_file)
# Convert page_info if output_page_info:
# TODO # Convert page_info SQL to a pandas DataFrama
# if output_page_info: page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"])
# page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID")) # Save the DataFrame
# # Save the DataFrame logger.info('Writing DataFrame to {}'.format(output_page_info))
# logger.info('Writing DataFrame to {}'.format(output_page_info)) page_info_df.to_parquet(output_page_info)
# page_info_df.to_parquet(output_page_info)
def main(): def main():

Loading…
Cancel
Save