mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-11-04 03:14:14 +01:00 
			
		
		
		
	🚧 Write out page_info
This commit is contained in:
		
							parent
							
								
									e51fa5750f
								
							
						
					
					
						commit
						c5332ae80d
					
				
					 2 changed files with 15 additions and 2 deletions
				
			
		| 
						 | 
					@ -300,7 +300,7 @@ def flatten(d: MutableMapping, parent='', separator='_'):
 | 
				
			||||||
    return dict(items)
 | 
					    return dict(items)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def dicts_to_df(data_list: List[Dict], *, index_column: str) -> pd.DataFrame:
 | 
					def dicts_to_df(data_list: List[Dict], *, index_column) -> pd.DataFrame:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Convert the given list of dicts to a Pandas DataFrame.
 | 
					    Convert the given list of dicts to a Pandas DataFrame.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -318,7 +318,13 @@ def dicts_to_df(data_list: List[Dict], *, index_column: str) -> pd.DataFrame:
 | 
				
			||||||
    data = [[m.get(c) for c in columns] for m in data_list]
 | 
					    data = [[m.get(c) for c in columns] for m in data_list]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Build index
 | 
					    # Build index
 | 
				
			||||||
    index = [m[index_column] for m in data_list]
 | 
					    if isinstance(index_column, str):
 | 
				
			||||||
 | 
					        index = [m[index_column] for m in data_list]
 | 
				
			||||||
 | 
					    elif isinstance(index_column, tuple):
 | 
				
			||||||
 | 
					        index = [[m[c] for m in data_list] for c in index_column]
 | 
				
			||||||
 | 
					        index = pd.MultiIndex.from_arrays(index, names=index_column)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        raise ValueError(f"index_column must")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    df = pd.DataFrame(data=data, index=index, columns=columns)
 | 
					    df = pd.DataFrame(data=data, index=index, columns=columns)
 | 
				
			||||||
    return df
 | 
					    return df
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -415,6 +415,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls
 | 
				
			||||||
        logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
 | 
					        logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
 | 
				
			||||||
        mods_info_df.to_excel(output_xlsx)
 | 
					        mods_info_df.to_excel(output_xlsx)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Convert page_info
 | 
				
			||||||
 | 
					    # XXX hardcoded filenames + other formats
 | 
				
			||||||
 | 
					    page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
 | 
				
			||||||
 | 
					    # Pickle the DataFrame
 | 
				
			||||||
 | 
					    logger.info('Writing DataFrame to {}'.format("page_info_df.pkl"))
 | 
				
			||||||
 | 
					    page_info_df.to_pickle("page_info_df.pkl")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    logging.basicConfig(level=logging.INFO)
 | 
					    logging.basicConfig(level=logging.INFO)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue