mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-04 03:14:14 +01:00 
			
		
		
		
	🐛 Write page_info Parquet file again
This commit is contained in:
		
							parent
							
								
									11a04916f3
								
							
						
					
					
						commit
						6981efb87c
					
				
					 1 changed files with 6 additions and 9 deletions
				
			
		| 
						 | 
					@ -461,18 +461,15 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Convert the mods_info SQL to a pandas DataFrame
 | 
					    # Convert the mods_info SQL to a pandas DataFrame
 | 
				
			||||||
    mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
 | 
					    mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Save the DataFrame
 | 
					 | 
				
			||||||
    logger.info('Writing DataFrame to {}'.format(output_file))
 | 
					    logger.info('Writing DataFrame to {}'.format(output_file))
 | 
				
			||||||
    mods_info_df.to_parquet(output_file)
 | 
					    mods_info_df.to_parquet(output_file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Convert page_info
 | 
					    if output_page_info:
 | 
				
			||||||
    # TODO
 | 
					          # Convert page_info SQL to a pandas DataFrama
 | 
				
			||||||
    # if output_page_info:
 | 
					          page_info_df = pd.read_sql_query("SELECT * FROM page_info", con_page_info, index_col=["ppn", "ID"])
 | 
				
			||||||
    #     page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
 | 
					          # Save the DataFrame
 | 
				
			||||||
    #     # Save the DataFrame
 | 
					          logger.info('Writing DataFrame to {}'.format(output_page_info))
 | 
				
			||||||
    #     logger.info('Writing DataFrame to {}'.format(output_page_info))
 | 
					          page_info_df.to_parquet(output_page_info)
 | 
				
			||||||
    #     page_info_df.to_parquet(output_page_info)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue