mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-10-31 17:34:13 +01:00 
			
		
		
		
	🎨 Improve log/output a bit
This commit is contained in:
		
							parent
							
								
									db79960ba1
								
							
						
					
					
						commit
						9227575555
					
				
					 1 changed files with 10 additions and 9 deletions
				
			
		|  | @ -20,6 +20,7 @@ ns = { | |||
|     'mods': 'http://www.loc.gov/mods/v3' | ||||
| } | ||||
| 
 | ||||
| logger = logging.getLogger('modstool') | ||||
| 
 | ||||
| class TagGroup: | ||||
|     """Helper class to simplify the parsing and checking of MODS metadata""" | ||||
|  | @ -466,8 +467,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | |||
|     mets_files_real = [] | ||||
|     for m in mets_files: | ||||
|         if os.path.isdir(m): | ||||
|             logging.info('Scanning directory {}'.format(m)) | ||||
|             mets_files_real.extend(f.path for f in tqdm(os.scandir(m)) | ||||
|             logger.info('Scanning directory {}'.format(m)) | ||||
|             mets_files_real.extend(f.path for f in tqdm(os.scandir(m), leave=False) | ||||
|                                    if f.is_file() and not f.name.startswith('.')) | ||||
|         else: | ||||
|             mets_files_real.append(m) | ||||
|  | @ -476,8 +477,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | |||
|     with open(output_file + '.warnings.csv', 'w') as csvfile: | ||||
|         csvwriter = csv.writer(csvfile) | ||||
|         mods_info = [] | ||||
|         logging.info('Processing METS files') | ||||
|         for mets_file in tqdm(mets_files_real): | ||||
|         logger.info('Processing METS files') | ||||
|         for mets_file in tqdm(mets_files_real, leave=False): | ||||
|             try: | ||||
|                 root = ET.parse(mets_file).getroot() | ||||
|                 mets = root # XXX .find('mets:mets', ns) does not work here | ||||
|  | @ -503,8 +504,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | |||
|                         for caught_warning in caught_warnings: | ||||
|                             csvwriter.writerow([mets_file, caught_warning.message]) | ||||
|             except Exception as e: | ||||
|                 warnings.warn('Exception in {}:\n{}'.format(mets_file, e)) | ||||
|                 import traceback; traceback.print_exc() | ||||
|                 logger.error('Exception in {}: {}'.format(mets_file, e)) | ||||
|                 #import traceback; traceback.print_exc() | ||||
| 
 | ||||
|     # Convert the mods_info List[Dict] to a pandas DataFrame | ||||
|     columns = [] | ||||
|  | @ -517,13 +518,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | |||
|     mods_info_df = pd.DataFrame(data=data, index=index, columns=columns) | ||||
| 
 | ||||
|     # Pickle the DataFrame | ||||
|     logging.info('Writing DataFrame to {}'.format(output_file)) | ||||
|     logger.info('Writing DataFrame to {}'.format(output_file)) | ||||
|     mods_info_df.to_pickle(output_file) | ||||
|     if output_csv: | ||||
|         logging.info('Writing CSV to {}'.format(output_csv)) | ||||
|         logger.info('Writing CSV to {}'.format(output_csv)) | ||||
|         mods_info_df.to_csv(output_csv) | ||||
|     if output_xlsx: | ||||
|         logging.info('Writing Excel .xlsx to {}'.format(output_xlsx)) | ||||
|         logger.info('Writing Excel .xlsx to {}'.format(output_xlsx)) | ||||
|         mods_info_df.to_excel(output_xlsx) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue