mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-10-27 15:54:13 +01:00 
			
		
		
		
	threshold for textline ocr + new ocr model
This commit is contained in:
		
							parent
							
								
									d968a306e4
								
							
						
					
					
						commit
						0803881f36
					
				
					 2 changed files with 76 additions and 49 deletions
				
			
		|  | @ -496,6 +496,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ | ||||||
|     "-ds_pref", |     "-ds_pref", | ||||||
|     help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset", |     help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset", | ||||||
| ) | ) | ||||||
|  | @click.option( | ||||||
|  |     "--min_conf_value_of_textline_text", | ||||||
|  |     "-min_conf", | ||||||
|  |     help="minimum OCR confidence value. Text lines with a confidence value lower than this threshold will not be included in the output XML file.", | ||||||
|  | ) | ||||||
| @click.option( | @click.option( | ||||||
|     "--log_level", |     "--log_level", | ||||||
|     "-l", |     "-l", | ||||||
|  | @ -503,7 +508,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ | ||||||
|     help="Override log level globally to this", |     help="Override log level globally to this", | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): | def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): | ||||||
|     initLogging() |     initLogging() | ||||||
|     if log_level: |     if log_level: | ||||||
|         getLogger('eynollah').setLevel(getLevelName(log_level)) |         getLogger('eynollah').setLevel(getLevelName(log_level)) | ||||||
|  | @ -530,6 +535,7 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, | ||||||
|         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, |         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, | ||||||
|         batch_size=batch_size, |         batch_size=batch_size, | ||||||
|         pref_of_dataset=dataset_abbrevation, |         pref_of_dataset=dataset_abbrevation, | ||||||
|  |         min_conf_value_of_textline_text=min_conf_value_of_textline_text, | ||||||
|     ) |     ) | ||||||
|     eynollah_ocr.run(overwrite=overwrite) |     eynollah_ocr.run(overwrite=overwrite) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -318,7 +318,7 @@ class Eynollah: | ||||||
|         if self.ocr and self.tr: |         if self.ocr and self.tr: | ||||||
|             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" |             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" | ||||||
|         elif self.ocr and not self.tr: |         elif self.ocr and not self.tr: | ||||||
|             self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716" |             self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725" | ||||||
|         if self.tables: |         if self.tables: | ||||||
|             if self.light_version: |             if self.light_version: | ||||||
|                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124" |                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124" | ||||||
|  | @ -4974,13 +4974,23 @@ class Eynollah: | ||||||
|                 gc.collect() |                 gc.collect() | ||||||
|                 if len(all_found_textline_polygons)>0: |                 if len(all_found_textline_polygons)>0: | ||||||
|                     ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) |                     ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) | ||||||
|  |                 else: | ||||||
|  |                     ocr_all_textlines = None | ||||||
|  |                      | ||||||
|                 if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: |                 if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: | ||||||
|                     ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) |                     ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) | ||||||
|  |                 else: | ||||||
|  |                     ocr_all_textlines_marginals = None | ||||||
|                  |                  | ||||||
|                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: |                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: | ||||||
|                     ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) |                     ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) | ||||||
|  |                 else: | ||||||
|  |                     ocr_all_textlines_h = None | ||||||
|  |                      | ||||||
|                 if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: |                 if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: | ||||||
|                     ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) |                     ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) | ||||||
|  |                 else: | ||||||
|  |                     ocr_all_textlines_drop = None | ||||||
|             else: |             else: | ||||||
|                 ocr_all_textlines = None |                 ocr_all_textlines = None | ||||||
|                 ocr_all_textlines_marginals = None |                 ocr_all_textlines_marginals = None | ||||||
|  | @ -5098,7 +5108,8 @@ class Eynollah_ocr: | ||||||
|         do_not_mask_with_textline_contour=False, |         do_not_mask_with_textline_contour=False, | ||||||
|         draw_texts_on_image=False, |         draw_texts_on_image=False, | ||||||
|         prediction_with_both_of_rgb_and_bin=False, |         prediction_with_both_of_rgb_and_bin=False, | ||||||
|         pref_of_dataset = None, |         pref_of_dataset=None, | ||||||
|  |         min_conf_value_of_textline_text : Optional[float]=None, | ||||||
|         logger=None, |         logger=None, | ||||||
|     ): |     ): | ||||||
|         self.dir_in = dir_in |         self.dir_in = dir_in | ||||||
|  | @ -5117,6 +5128,10 @@ class Eynollah_ocr: | ||||||
|         self.logger = logger if logger else getLogger('eynollah') |         self.logger = logger if logger else getLogger('eynollah') | ||||||
|          |          | ||||||
|         if not export_textline_images_and_text: |         if not export_textline_images_and_text: | ||||||
|  |             if min_conf_value_of_textline_text: | ||||||
|  |                 self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text) | ||||||
|  |             else: | ||||||
|  |                 self.min_conf_value_of_textline_text = 0.3 | ||||||
|             if tr_ocr: |             if tr_ocr: | ||||||
|                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") |                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") | ||||||
|                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||||||
|  | @ -5129,7 +5144,7 @@ class Eynollah_ocr: | ||||||
|                     self.b_s = int(batch_size) |                     self.b_s = int(batch_size) | ||||||
| 
 | 
 | ||||||
|             else: |             else: | ||||||
|                 self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_new6"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"# |                 self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"#"/model_step_1020000_ocr"#"/model_ens_ocrcnn_new10"#"/model_step_255000_ocr"#"/model_ens_ocrcnn_new9"#"/model_step_900000_ocr"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"# | ||||||
|                 model_ocr = load_model(self.model_ocr_dir , compile=False) |                 model_ocr = load_model(self.model_ocr_dir , compile=False) | ||||||
|                  |                  | ||||||
|                 self.prediction_model = tf.keras.models.Model( |                 self.prediction_model = tf.keras.models.Model( | ||||||
|  | @ -5140,8 +5155,7 @@ class Eynollah_ocr: | ||||||
|                 else: |                 else: | ||||||
|                     self.b_s = int(batch_size) |                     self.b_s = int(batch_size) | ||||||
|                      |                      | ||||||
|                      |                 with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: | ||||||
|                 with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file: |  | ||||||
|                     characters = json.load(config_file) |                     characters = json.load(config_file) | ||||||
|                      |                      | ||||||
|                 AUTOTUNE = tf.data.AUTOTUNE |                 AUTOTUNE = tf.data.AUTOTUNE | ||||||
|  | @ -5442,7 +5456,7 @@ class Eynollah_ocr: | ||||||
|                                          |                                          | ||||||
|                                     else: |                                     else: | ||||||
|                                         #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi') |                                         #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi') | ||||||
|                                         if not self.do_not_mask_with_textline_contour: |                                          | ||||||
|                                         if angle_degrees > 3: |                                         if angle_degrees > 3: | ||||||
|                                             better_des_slope = get_orientation_moments(textline_coords) |                                             better_des_slope = get_orientation_moments(textline_coords) | ||||||
|                                              |                                              | ||||||
|  | @ -5460,10 +5474,12 @@ class Eynollah_ocr: | ||||||
|                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :] |                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :] | ||||||
|                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :] |                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :] | ||||||
|                                                  |                                                  | ||||||
|  |                                             if not self.do_not_mask_with_textline_contour: | ||||||
|                                                 img_crop[mask_poly==0] = 255 |                                                 img_crop[mask_poly==0] = 255 | ||||||
|                                              |                                              | ||||||
|                                             if self.prediction_with_both_of_rgb_and_bin: |                                             if self.prediction_with_both_of_rgb_and_bin: | ||||||
|                                                 img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] |                                                 img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] | ||||||
|  |                                                 if not self.do_not_mask_with_textline_contour: | ||||||
|                                                     img_crop_bin[mask_poly==0] = 255 |                                                     img_crop_bin[mask_poly==0] = 255 | ||||||
|                                              |                                              | ||||||
|                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: |                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90: | ||||||
|  | @ -5475,8 +5491,10 @@ class Eynollah_ocr: | ||||||
|                                                  |                                                  | ||||||
|                                         else: |                                         else: | ||||||
|                                             better_des_slope = 0 |                                             better_des_slope = 0 | ||||||
|  |                                             if not self.do_not_mask_with_textline_contour: | ||||||
|                                                 img_crop[mask_poly==0] = 255 |                                                 img_crop[mask_poly==0] = 255 | ||||||
|                                             if self.prediction_with_both_of_rgb_and_bin: |                                             if self.prediction_with_both_of_rgb_and_bin: | ||||||
|  |                                                 if not self.do_not_mask_with_textline_contour: | ||||||
|                                                     img_crop_bin[mask_poly==0] = 255 |                                                     img_crop_bin[mask_poly==0] = 255 | ||||||
|                                             if type_textregion=='drop-capital': |                                             if type_textregion=='drop-capital': | ||||||
|                                                 pass |                                                 pass | ||||||
|  | @ -5716,9 +5734,12 @@ class Eynollah_ocr: | ||||||
| 
 | 
 | ||||||
|                         for ib in range(imgs.shape[0]): |                         for ib in range(imgs.shape[0]): | ||||||
|                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "") |                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "") | ||||||
|  |                             if masked_means[ib] >= self.min_conf_value_of_textline_text: | ||||||
|                                 extracted_texts.append(pred_texts_ib) |                                 extracted_texts.append(pred_texts_ib) | ||||||
|                                 extracted_conf_value.append(masked_means[ib]) |                                 extracted_conf_value.append(masked_means[ib]) | ||||||
|                              |                             else: | ||||||
|  |                                 extracted_texts.append("") | ||||||
|  |                                 extracted_conf_value.append(0) | ||||||
|                     del cropped_lines |                     del cropped_lines | ||||||
|                     if self.prediction_with_both_of_rgb_and_bin: |                     if self.prediction_with_both_of_rgb_and_bin: | ||||||
|                         del cropped_lines_bin |                         del cropped_lines_bin | ||||||
|  | @ -5790,14 +5811,14 @@ class Eynollah_ocr: | ||||||
|                          |                          | ||||||
|                     ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)} |                     ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)} | ||||||
|          |          | ||||||
|                     id_textregions = [] |                     #id_textregions = [] | ||||||
|                     textregions_by_existing_ids = [] |                     #textregions_by_existing_ids = [] | ||||||
|                     indexer = 0 |                     indexer = 0 | ||||||
|                     indexer_textregion = 0 |                     indexer_textregion = 0 | ||||||
|                     for nn in root1.iter(region_tags): |                     for nn in root1.iter(region_tags): | ||||||
|                         id_textregion = nn.attrib['id'] |                         #id_textregion = nn.attrib['id'] | ||||||
|                         id_textregions.append(id_textregion) |                         #id_textregions.append(id_textregion) | ||||||
|                         textregions_by_existing_ids.append(text_by_textregion[indexer_textregion]) |                         #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion]) | ||||||
|                          |                          | ||||||
|                         is_textregion_text = False |                         is_textregion_text = False | ||||||
|                         for childtest in nn: |                         for childtest in nn: | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue