mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-04 11:24:14 +01:00 
			
		
		
		
	🐛 Write mods_info Parquet file again
This commit is contained in:
		
							parent
							
								
									abb20b8ba9
								
							
						
					
					
						commit
						11a04916f3
					
				
					 1 changed files with 12 additions and 9 deletions
				
			
		| 
						 | 
					@ -399,19 +399,24 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
 | 
				
			||||||
            mets_files_real.append(m)
 | 
					            mets_files_real.append(m)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Prepare output files
 | 
				
			||||||
    # Process METS files
 | 
					    with contextlib.suppress(FileNotFoundError):
 | 
				
			||||||
 | 
					        os.remove(output_file)
 | 
				
			||||||
    output_file_sqlite3 = output_file + ".sqlite3"
 | 
					    output_file_sqlite3 = output_file + ".sqlite3"
 | 
				
			||||||
    with contextlib.suppress(FileNotFoundError):
 | 
					    with contextlib.suppress(FileNotFoundError):
 | 
				
			||||||
        os.remove(output_file_sqlite3)
 | 
					        os.remove(output_file_sqlite3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    logger.info('Writing SQLite DB to {}'.format(output_file_sqlite3))
 | 
				
			||||||
    con = sqlite3.connect(output_file_sqlite3)
 | 
					    con = sqlite3.connect(output_file_sqlite3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if output_page_info:
 | 
					    if output_page_info:
 | 
				
			||||||
        output_page_info_sqlite3 = output_page_info + ".sqlite3"
 | 
					        output_page_info_sqlite3 = output_page_info + ".sqlite3"
 | 
				
			||||||
 | 
					        logger.info('Writing SQLite DB to {}'.format(output_page_info_sqlite3))
 | 
				
			||||||
        with contextlib.suppress(FileNotFoundError):
 | 
					        with contextlib.suppress(FileNotFoundError):
 | 
				
			||||||
            os.remove(output_page_info_sqlite3)
 | 
					            os.remove(output_page_info_sqlite3)
 | 
				
			||||||
        con_page_info = sqlite3.connect(output_page_info_sqlite3)
 | 
					        con_page_info = sqlite3.connect(output_page_info_sqlite3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Process METS files
 | 
				
			||||||
    with open(output_file + '.warnings.csv', 'w') as csvfile:
 | 
					    with open(output_file + '.warnings.csv', 'w') as csvfile:
 | 
				
			||||||
        csvwriter = csv.writer(csvfile)
 | 
					        csvwriter = csv.writer(csvfile)
 | 
				
			||||||
        mods_info = []
 | 
					        mods_info = []
 | 
				
			||||||
| 
						 | 
					@ -454,14 +459,12 @@ def process(mets_files: List[str], output_file: str, output_page_info: str):
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
                logger.exception('Exception in {}'.format(mets_file))
 | 
					                logger.exception('Exception in {}'.format(mets_file))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Convert the mods_info List[Dict] to a pandas DataFrame
 | 
					    # Convert the mods_info SQL to a pandas DataFrame
 | 
				
			||||||
    # TODO
 | 
					    mods_info_df = pd.read_sql_query("SELECT * FROM mods_info", con, index_col="recordInfo_recordIdentifier")
 | 
				
			||||||
    # mods_info_df = dicts_to_df(mods_info, index_column="recordInfo_recordIdentifier")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Save the DataFrame
 | 
					    # Save the DataFrame
 | 
				
			||||||
    # TODO
 | 
					    logger.info('Writing DataFrame to {}'.format(output_file))
 | 
				
			||||||
    #logger.info('Writing DataFrame to {}'.format(output_file))
 | 
					    mods_info_df.to_parquet(output_file)
 | 
				
			||||||
    #mods_info_df.to_parquet(output_file)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Convert page_info
 | 
					    # Convert page_info
 | 
				
			||||||
    # TODO
 | 
					    # TODO
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue