mirror of
https://github.com/qurator-spk/modstool.git
synced 2025-06-09 19:59:57 +02:00
🐛 Write page_info Parquet file again
This commit is contained in:
parent
11a04916f3
commit
6981efb87c
1 changed files with 6 additions and 9 deletions
|
@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
|
||||||
|
|
||||||
# Convert the mods_info SQL to a pandas DataFrame
|
# Convert the mods_info SQL to a pandas DataFrame
|
||||||
mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
|
mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
|
||||||
|
|
||||||
# Save the DataFrame
|
|
||||||
logger.info('Writing DataFrame to {}'.format(output_file))
|
logger.info('Writing DataFrame to {}'.format(output_file))
|
||||||
mods_info_df.to_parquet(output_file)
|
mods_info_df.to_parquet(output_file)
|
||||||
|
|
||||||
# Convert page_info
|
if output_page_info:
|
||||||
# TODO
|
# Convert page_info SQL to a pandas DataFrama
|
||||||
# if output_page_info:
|
page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"])
|
||||||
# page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
|
# Save the DataFrame
|
||||||
# # Save the DataFrame
|
logger.info('Writing DataFrame to {}'.format(output_page_info))
|
||||||
# logger.info('Writing DataFrame to {}'.format(output_page_info))
|
page_info_df.to_parquet(output_page_info)
|
||||||
# page_info_df.to_parquet(output_page_info)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue