mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-10-27 15:54:13 +01:00 
			
		
		
		
	let to add dataset abbrevation to extracted textline images and text
This commit is contained in:
		
							parent
							
								
									8c8fa461bb
								
							
						
					
					
						commit
						5d447abcc4
					
				
					 2 changed files with 71 additions and 37 deletions
				
			
		|  | @ -342,7 +342,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ | ||||||
|     "-m", |     "-m", | ||||||
|     help="directory of models", |     help="directory of models", | ||||||
|     type=click.Path(exists=True, file_okay=False), |     type=click.Path(exists=True, file_okay=False), | ||||||
|     required=True, |  | ||||||
| ) | ) | ||||||
| @click.option( | @click.option( | ||||||
|     "--tr_ocr", |     "--tr_ocr", | ||||||
|  | @ -379,6 +378,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ | ||||||
|     "-bs", |     "-bs", | ||||||
|     help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively", |     help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively", | ||||||
| ) | ) | ||||||
|  | @click.option( | ||||||
|  |     "--dataset_abbrevation", | ||||||
|  |     "-ds_pref", | ||||||
|  |     help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset", | ||||||
|  | ) | ||||||
| @click.option( | @click.option( | ||||||
|     "--log_level", |     "--log_level", | ||||||
|     "-l", |     "-l", | ||||||
|  | @ -386,10 +390,18 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ | ||||||
|     help="Override log level globally to this", |     help="Override log level globally to this", | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, log_level): | def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): | ||||||
|     initLogging() |     initLogging() | ||||||
|     if log_level: |     if log_level: | ||||||
|         getLogger('eynollah').setLevel(getLevelName(log_level)) |         getLogger('eynollah').setLevel(getLevelName(log_level)) | ||||||
|  |     assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text  -etit can not be set alongside transformer ocr -tr_ocr" | ||||||
|  |     assert not export_textline_images_and_text or not model, "Exporting textline and text  -etit can not be set alongside model -m" | ||||||
|  |     assert not export_textline_images_and_text or not batch_size, "Exporting textline and text  -etit can not be set alongside batch size -bs" | ||||||
|  |     assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text  -etit can not be set alongside directory of bin images -dib" | ||||||
|  |     assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit" | ||||||
|  |     assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi" | ||||||
|  |     assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb" | ||||||
|  |      | ||||||
|     eynollah_ocr = Eynollah_ocr( |     eynollah_ocr = Eynollah_ocr( | ||||||
|         dir_xmls=dir_xmls, |         dir_xmls=dir_xmls, | ||||||
|         dir_out_image_text=dir_out_image_text, |         dir_out_image_text=dir_out_image_text, | ||||||
|  | @ -403,6 +415,7 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex | ||||||
|         draw_texts_on_image=draw_texts_on_image, |         draw_texts_on_image=draw_texts_on_image, | ||||||
|         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, |         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, | ||||||
|         batch_size=batch_size, |         batch_size=batch_size, | ||||||
|  |         pref_of_dataset=dataset_abbrevation, | ||||||
|     ) |     ) | ||||||
|     eynollah_ocr.run() |     eynollah_ocr.run() | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -4877,6 +4877,7 @@ class Eynollah_ocr: | ||||||
|         do_not_mask_with_textline_contour=False, |         do_not_mask_with_textline_contour=False, | ||||||
|         draw_texts_on_image=False, |         draw_texts_on_image=False, | ||||||
|         prediction_with_both_of_rgb_and_bin=False, |         prediction_with_both_of_rgb_and_bin=False, | ||||||
|  |         pref_of_dataset = None, | ||||||
|         logger=None, |         logger=None, | ||||||
|     ): |     ): | ||||||
|         self.dir_in = dir_in |         self.dir_in = dir_in | ||||||
|  | @ -4890,43 +4891,45 @@ class Eynollah_ocr: | ||||||
|         self.draw_texts_on_image = draw_texts_on_image |         self.draw_texts_on_image = draw_texts_on_image | ||||||
|         self.dir_out_image_text = dir_out_image_text |         self.dir_out_image_text = dir_out_image_text | ||||||
|         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin |         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin | ||||||
|         if tr_ocr: |         self.pref_of_dataset = pref_of_dataset | ||||||
|             self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") |         if not export_textline_images_and_text: | ||||||
|             self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |             if tr_ocr: | ||||||
|             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" |                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") | ||||||
|             self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) |                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||||||
|             self.model_ocr.to(self.device) |                 self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" | ||||||
|             if not batch_size: |                 self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) | ||||||
|                 self.b_s = 2 |                 self.model_ocr.to(self.device) | ||||||
|  |                 if not batch_size: | ||||||
|  |                     self.b_s = 2 | ||||||
|  |                 else: | ||||||
|  |                     self.b_s = int(batch_size) | ||||||
|  | 
 | ||||||
|             else: |             else: | ||||||
|                 self.b_s = int(batch_size) |                 self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" | ||||||
|  |                 model_ocr = load_model(self.model_ocr_dir , compile=False) | ||||||
|                  |                  | ||||||
|         else: |                 self.prediction_model = tf.keras.models.Model( | ||||||
|             self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" |                                 model_ocr.get_layer(name = "image").input,  | ||||||
|             model_ocr = load_model(self.model_ocr_dir , compile=False) |                                 model_ocr.get_layer(name = "dense2").output) | ||||||
|              |                 if not batch_size: | ||||||
|             self.prediction_model = tf.keras.models.Model( |                     self.b_s = 8 | ||||||
|                             model_ocr.get_layer(name = "image").input,  |                 else: | ||||||
|                             model_ocr.get_layer(name = "dense2").output) |                     self.b_s = int(batch_size) | ||||||
|             if not batch_size: |  | ||||||
|                 self.b_s = 8 |  | ||||||
|             else: |  | ||||||
|                 self.b_s = int(batch_size) |  | ||||||
| 
 | 
 | ||||||
|                      |                      | ||||||
|             with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: |                 with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: | ||||||
|                 characters = json.load(config_file) |                     characters = json.load(config_file) | ||||||
| 
 | 
 | ||||||
|                      |                      | ||||||
|             AUTOTUNE = tf.data.AUTOTUNE |                 AUTOTUNE = tf.data.AUTOTUNE | ||||||
| 
 | 
 | ||||||
|             # Mapping characters to integers. |                 # Mapping characters to integers. | ||||||
|             char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) |                 char_to_num = StringLookup(vocabulary=list(characters), mask_token=None) | ||||||
| 
 | 
 | ||||||
|             # Mapping integers back to original characters. |                 # Mapping integers back to original characters. | ||||||
|             self.num_to_char = StringLookup( |                 self.num_to_char = StringLookup( | ||||||
|                 vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True |                     vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True | ||||||
|             ) |                 ) | ||||||
| 
 | 
 | ||||||
|          |          | ||||||
|     def decode_batch_predictions(self, pred, max_len = 128): |     def decode_batch_predictions(self, pred, max_len = 128): | ||||||
|  | @ -5365,10 +5368,28 @@ class Eynollah_ocr: | ||||||
|                                             if cheild_text.tag.endswith("Unicode"): |                                             if cheild_text.tag.endswith("Unicode"): | ||||||
|                                                 textline_text = cheild_text.text |                                                 textline_text = cheild_text.text | ||||||
|                                                 if textline_text: |                                                 if textline_text: | ||||||
|                                                     with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: |                                                     if self.do_not_mask_with_textline_contour: | ||||||
|                                                         text_file.write(textline_text) |                                                         if self.pref_of_dataset: | ||||||
|  |                                                             with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file: | ||||||
|  |                                                                 text_file.write(textline_text) | ||||||
| 
 | 
 | ||||||
|                                                     cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) |                                                             cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop ) | ||||||
|  |                                                         else: | ||||||
|  |                                                             with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file: | ||||||
|  |                                                                 text_file.write(textline_text) | ||||||
|  | 
 | ||||||
|  |                                                             cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop ) | ||||||
|  |                                                     else: | ||||||
|  |                                                         if self.pref_of_dataset: | ||||||
|  |                                                             with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file: | ||||||
|  |                                                                 text_file.write(textline_text) | ||||||
|  | 
 | ||||||
|  |                                                             cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop ) | ||||||
|  |                                                         else: | ||||||
|  |                                                             with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file: | ||||||
|  |                                                                 text_file.write(textline_text) | ||||||
|  | 
 | ||||||
|  |                                                             cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop ) | ||||||
|                                                          |                                                          | ||||||
|                                                 indexer_textlines+=1 |                                                 indexer_textlines+=1 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue