From b93fc112bf8c414186e64de6cc092b1839239128 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 27 May 2025 23:45:22 +0200 Subject: [PATCH] updating ocr --- src/eynollah/cli.py | 10 ++++-- src/eynollah/eynollah.py | 24 ++++++++++++-- src/eynollah/utils/utils_ocr.py | 55 +++++++++++++++++---------------- 3 files changed, 58 insertions(+), 31 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 0c18b2c..2d0d6f9 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="image filename", type=click.Path(exists=True, dir_okay=False), ) +@click.option( + "--overwrite", + "-O", + help="overwrite (instead of skipping) if output xml exists", + is_flag=True, +) @click.option( "--dir_in", "-di", @@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="Override log level globally to this", ) -def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): +def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) @@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ batch_size=batch_size, pref_of_dataset=dataset_abbrevation, ) - eynollah_ocr.run() + eynollah_ocr.run(overwrite=overwrite) if __name__ == "__main__": main() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 1f79995..efa1dde 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5338,6 +5338,8 @@ class Eynollah_ocr: self.dir_out_image_text = dir_out_image_text self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin self.pref_of_dataset = pref_of_dataset + self.logger = logger if logger else getLogger('eynollah') + if not export_textline_images_and_text: if tr_ocr: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") @@ -5351,7 +5353,7 @@ class Eynollah_ocr: self.b_s = int(batch_size) else: - self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5377,7 +5379,7 @@ class Eynollah_ocr: vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True ) - def run(self): + def run(self, overwrite : bool = False): if self.dir_in: ls_imgs = os.listdir(self.dir_in) else: @@ -5394,6 +5396,14 @@ class Eynollah_ocr: dir_img = self.image_filename dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + + if os.path.exists(out_file_ocr): + if overwrite: + self.logger.warning("will overwrite existing output file '%s'", out_file_ocr) + else: + self.logger.warning("will skip input for existing output file '%s'", out_file_ocr) + continue + img = cv2.imread(dir_img) if self.draw_texts_on_image: @@ -5574,6 +5584,14 @@ class Eynollah_ocr: #dir_img = os.path.join(self.dir_in, ind_img) dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') + + if os.path.exists(out_file_ocr): + if overwrite: + self.logger.warning("will overwrite existing output file '%s'", out_file_ocr) + else: + self.logger.warning("will skip input for existing output file '%s'", out_file_ocr) + continue + img = cv2.imread(dir_img) if self.prediction_with_both_of_rgb_and_bin: cropped_lines_bin = [] @@ -5704,7 +5722,7 @@ class Eynollah_ocr: cropped_lines_bin.append(img_fin) else: if self.prediction_with_both_of_rgb_and_bin: - splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin) + splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin) else: splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) if splited_images: diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 524e7ce..9ef344a 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size): def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image): width = np.shape(textline_image)[1] height = np.shape(textline_image)[0] - common_window = int(0.22*width) + common_window = int(0.06*width) width1 = int ( width/2. - common_window ) width2 = int ( width/2. + common_window ) - + img_sum = np.sum(textline_image[:,:,0], axis=0) sum_smoothed = gaussian_filter1d(img_sum, 3) - + peaks_real, _ = find_peaks(sum_smoothed, height=0) - - if len(peaks_real)>35: + if len(peaks_real)>70: - #peaks_real = peaks_real[(peaks_realwidth1)] - argsort = np.argsort(sum_smoothed[peaks_real])[::-1] - peaks_real_top_six = peaks_real[argsort[:6]] - midpoint = textline_image.shape[1] / 2. - arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint)) + peaks_real = peaks_real[(peaks_realwidth1)] - #arg_max = np.argmax(sum_smoothed[peaks_real]) - - peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max] - + arg_max = np.argmax(sum_smoothed[peaks_real]) + peaks_final = peaks_real[arg_max] return peaks_final else: return None - # Function to fit text inside the given area def fit_text_single_line(draw, text, font_path, max_width, max_height): initial_font_size = 50 @@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, #new bounding box x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0]) - mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] - img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] - if img_bin_curved: - img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] - - w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) ) - if w_relative==0: - w_relative = img_in_des.shape[1] - img_in_des = resize_image(img_in_des, 32, w_relative) - if img_bin_curved: - img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative) + if w_n==0 or h_n==0: + img_in_des = np.copy(img_in) + if img_bin_curved: + img_bin_in_des = np.copy(img_bin_in) + w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) ) + if w_relative==0: + w_relative = img_in_des.shape[1] + img_in_des = resize_image(img_in_des, 32, w_relative) + if img_bin_curved: + img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative) + else: + mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] + img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] + if img_bin_curved: + img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] + + w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) ) + if w_relative==0: + w_relative = img_in_des.shape[1] + img_in_des = resize_image(img_in_des, 32, w_relative) + if img_bin_curved: + img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative) else: