mirror of
				https://github.com/qurator-spk/page2tsv.git
				synced 2025-10-30 16:14:13 +01:00 
			
		
		
		
	enable NED only usage of find-entities
This commit is contained in:
		
							parent
							
								
									975487a233
								
							
						
					
					
						commit
						c12bea2cb0
					
				
					 1 changed files with 17 additions and 5 deletions
				
			
		
							
								
								
									
										22
									
								
								cli.py
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								cli.py
									
										
									
									
									
								
							|  | @ -329,14 +329,26 @@ def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint, | ||||||
| 
 | 
 | ||||||
|             tsv, ner_result = ner(tsv, ner_rest_endpoint) |             tsv, ner_result = ner(tsv, ner_rest_endpoint) | ||||||
| 
 | 
 | ||||||
|             if ned_rest_endpoint is not None: |         elif os.path.exists(tsv_file): | ||||||
| 
 | 
 | ||||||
|                 tsv, ned_result = ned(tsv, ner_result, ned_rest_endpoint, json_file=ned_json_file, threshold=ned_threshold) |             print('Using NER information that is already contained in file: {}'.format(tsv_file)) | ||||||
| 
 | 
 | ||||||
|                 if ned_json_file is not None and not os.path.exists(ned_json_file): |             tmp = tsv.copy() | ||||||
|  |             tmp['sen'] = (tmp['No.'] == 0).cumsum() | ||||||
| 
 | 
 | ||||||
|                     with open(ned_json_file, "w") as fp_json: |             ner_result = [[{'word': row.TOKEN, 'prediction': row['NE-TAG']} for _, row in sen.iterrows] | ||||||
|                         json.dump(ned_result, fp_json, indent=2, separators=(',', ': ')) |                           for _, sen in tmp.groupby('sen')] | ||||||
|  |         else: | ||||||
|  |             raise RuntimeError("Either NER rest endpoint or NER-TAG information within tsv_file required.") | ||||||
|  | 
 | ||||||
|  |         if ned_rest_endpoint is not None: | ||||||
|  | 
 | ||||||
|  |             tsv, ned_result = ned(tsv, ner_result, ned_rest_endpoint, json_file=ned_json_file, threshold=ned_threshold) | ||||||
|  | 
 | ||||||
|  |             if ned_json_file is not None and not os.path.exists(ned_json_file): | ||||||
|  | 
 | ||||||
|  |                 with open(ned_json_file, "w") as fp_json: | ||||||
|  |                     json.dump(ned_result, fp_json, indent=2, separators=(',', ': ')) | ||||||
| 
 | 
 | ||||||
|         print('Writing to {}...'.format(tsv_out_file)) |         print('Writing to {}...'.format(tsv_out_file)) | ||||||
|         tsv.to_csv(tsv_out_file, sep="\t", quoting=3, index=False) |         tsv.to_csv(tsv_out_file, sep="\t", quoting=3, index=False) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue