mirror of
				https://github.com/qurator-spk/modstool.git
				synced 2025-10-31 17:34:13 +01:00 
			
		
		
		
	🎨 Improve log/output a bit
This commit is contained in:
		
							parent
							
								
									db79960ba1
								
							
						
					
					
						commit
						9227575555
					
				
					 1 changed files with 10 additions and 9 deletions
				
			
		|  | @ -20,6 +20,7 @@ ns = { | ||||||
|     'mods': 'http://www.loc.gov/mods/v3' |     'mods': 'http://www.loc.gov/mods/v3' | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | logger = logging.getLogger('modstool') | ||||||
| 
 | 
 | ||||||
| class TagGroup: | class TagGroup: | ||||||
|     """Helper class to simplify the parsing and checking of MODS metadata""" |     """Helper class to simplify the parsing and checking of MODS metadata""" | ||||||
|  | @ -466,8 +467,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | ||||||
|     mets_files_real = [] |     mets_files_real = [] | ||||||
|     for m in mets_files: |     for m in mets_files: | ||||||
|         if os.path.isdir(m): |         if os.path.isdir(m): | ||||||
|             logging.info('Scanning directory {}'.format(m)) |             logger.info('Scanning directory {}'.format(m)) | ||||||
|             mets_files_real.extend(f.path for f in tqdm(os.scandir(m)) |             mets_files_real.extend(f.path for f in tqdm(os.scandir(m), leave=False) | ||||||
|                                    if f.is_file() and not f.name.startswith('.')) |                                    if f.is_file() and not f.name.startswith('.')) | ||||||
|         else: |         else: | ||||||
|             mets_files_real.append(m) |             mets_files_real.append(m) | ||||||
|  | @ -476,8 +477,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | ||||||
|     with open(output_file + '.warnings.csv', 'w') as csvfile: |     with open(output_file + '.warnings.csv', 'w') as csvfile: | ||||||
|         csvwriter = csv.writer(csvfile) |         csvwriter = csv.writer(csvfile) | ||||||
|         mods_info = [] |         mods_info = [] | ||||||
|         logging.info('Processing METS files') |         logger.info('Processing METS files') | ||||||
|         for mets_file in tqdm(mets_files_real): |         for mets_file in tqdm(mets_files_real, leave=False): | ||||||
|             try: |             try: | ||||||
|                 root = ET.parse(mets_file).getroot() |                 root = ET.parse(mets_file).getroot() | ||||||
|                 mets = root # XXX .find('mets:mets', ns) does not work here |                 mets = root # XXX .find('mets:mets', ns) does not work here | ||||||
|  | @ -503,8 +504,8 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | ||||||
|                         for caught_warning in caught_warnings: |                         for caught_warning in caught_warnings: | ||||||
|                             csvwriter.writerow([mets_file, caught_warning.message]) |                             csvwriter.writerow([mets_file, caught_warning.message]) | ||||||
|             except Exception as e: |             except Exception as e: | ||||||
|                 warnings.warn('Exception in {}:\n{}'.format(mets_file, e)) |                 logger.error('Exception in {}: {}'.format(mets_file, e)) | ||||||
|                 import traceback; traceback.print_exc() |                 #import traceback; traceback.print_exc() | ||||||
| 
 | 
 | ||||||
|     # Convert the mods_info List[Dict] to a pandas DataFrame |     # Convert the mods_info List[Dict] to a pandas DataFrame | ||||||
|     columns = [] |     columns = [] | ||||||
|  | @ -517,13 +518,13 @@ def process(mets_files: List[str], output_file: str, output_csv: str, output_xls | ||||||
|     mods_info_df = pd.DataFrame(data=data, index=index, columns=columns) |     mods_info_df = pd.DataFrame(data=data, index=index, columns=columns) | ||||||
| 
 | 
 | ||||||
|     # Pickle the DataFrame |     # Pickle the DataFrame | ||||||
|     logging.info('Writing DataFrame to {}'.format(output_file)) |     logger.info('Writing DataFrame to {}'.format(output_file)) | ||||||
|     mods_info_df.to_pickle(output_file) |     mods_info_df.to_pickle(output_file) | ||||||
|     if output_csv: |     if output_csv: | ||||||
|         logging.info('Writing CSV to {}'.format(output_csv)) |         logger.info('Writing CSV to {}'.format(output_csv)) | ||||||
|         mods_info_df.to_csv(output_csv) |         mods_info_df.to_csv(output_csv) | ||||||
|     if output_xlsx: |     if output_xlsx: | ||||||
|         logging.info('Writing Excel .xlsx to {}'.format(output_xlsx)) |         logger.info('Writing Excel .xlsx to {}'.format(output_xlsx)) | ||||||
|         mods_info_df.to_excel(output_xlsx) |         mods_info_df.to_excel(output_xlsx) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue