mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 03:40:01 +02:00
🐛 Write page_info Parquet file again
This commit is contained in:
parent
11a04916f3
commit
6981efb87c
1 changed files with 6 additions and 9 deletions
|
@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
|
|||
|
||||
# Convert the mods_info SQL to a pandas DataFrame
|
||||
mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
|
||||
|
||||
# Save the DataFrame
|
||||
logger.info('Writing DataFrame to {}'.format(output_file))
|
||||
mods_info_df.to_parquet(output_file)
|
||||
|
||||
# Convert page_info
|
||||
# TODO
|
||||
# if output_page_info:
|
||||
# page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
|
||||
# # Save the DataFrame
|
||||
# logger.info('Writing DataFrame to {}'.format(output_page_info))
|
||||
# page_info_df.to_parquet(output_page_info)
|
||||
if output_page_info:
|
||||
# Convert page_info SQL to a pandas DataFrama
|
||||
page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"])
|
||||
# Save the DataFrame
|
||||
logger.info('Writing DataFrame to {}'.format(output_page_info))
|
||||
page_info_df.to_parquet(output_page_info)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue