updating ocr

This commit is contained in:
vahidrezanezhad 2025-05-27 23:45:22 +02:00
parent 0f154c605a
commit b93fc112bf
3 changed files with 58 additions and 31 deletions

View file

@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
help="image filename",
type=click.Path(exists=True, dir_okay=False),
)
@click.option(
"--overwrite",
"-O",
help="overwrite (instead of skipping) if output xml exists",
is_flag=True,
)
@click.option(
"--dir_in",
"-di",
@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
help="Override log level globally to this",
)
def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
initLogging()
if log_level:
getLogger('eynollah').setLevel(getLevelName(log_level))
@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_
batch_size=batch_size,
pref_of_dataset=dataset_abbrevation,
)
eynollah_ocr.run()
eynollah_ocr.run(overwrite=overwrite)
if __name__ == "__main__":
main()

View file

@ -5338,6 +5338,8 @@ class Eynollah_ocr:
self.dir_out_image_text = dir_out_image_text
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
self.pref_of_dataset = pref_of_dataset
self.logger = logger if logger else getLogger('eynollah')
if not export_textline_images_and_text:
if tr_ocr:
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
@ -5351,7 +5353,7 @@ class Eynollah_ocr:
self.b_s = int(batch_size)
else:
self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
model_ocr = load_model(self.model_ocr_dir , compile=False)
self.prediction_model = tf.keras.models.Model(
@ -5377,7 +5379,7 @@ class Eynollah_ocr:
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)
def run(self):
def run(self, overwrite : bool = False):
if self.dir_in:
ls_imgs = os.listdir(self.dir_in)
else:
@ -5394,6 +5396,14 @@ class Eynollah_ocr:
dir_img = self.image_filename
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
if os.path.exists(out_file_ocr):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
else:
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
continue
img = cv2.imread(dir_img)
if self.draw_texts_on_image:
@ -5574,6 +5584,14 @@ class Eynollah_ocr:
#dir_img = os.path.join(self.dir_in, ind_img)
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
if os.path.exists(out_file_ocr):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
else:
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
continue
img = cv2.imread(dir_img)
if self.prediction_with_both_of_rgb_and_bin:
cropped_lines_bin = []
@ -5704,7 +5722,7 @@ class Eynollah_ocr:
cropped_lines_bin.append(img_fin)
else:
if self.prediction_with_both_of_rgb_and_bin:
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin)
else:
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
if splited_images:

View file

@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size):
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
width = np.shape(textline_image)[1]
height = np.shape(textline_image)[0]
common_window = int(0.22*width)
common_window = int(0.06*width)
width1 = int ( width/2. - common_window )
width2 = int ( width/2. + common_window )
img_sum = np.sum(textline_image[:,:,0], axis=0)
sum_smoothed = gaussian_filter1d(img_sum, 3)
peaks_real, _ = find_peaks(sum_smoothed, height=0)
if len(peaks_real)>35:
if len(peaks_real)>70:
#peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
peaks_real_top_six = peaks_real[argsort[:6]]
midpoint = textline_image.shape[1] / 2.
arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
#arg_max = np.argmax(sum_smoothed[peaks_real])
peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
arg_max = np.argmax(sum_smoothed[peaks_real])
peaks_final = peaks_real[arg_max]
return peaks_final
else:
return None
# Function to fit text inside the given area
def fit_text_single_line(draw, text, font_path, max_width, max_height):
initial_font_size = 50
@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
#new bounding box
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
if img_bin_curved:
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
if w_relative==0:
w_relative = img_in_des.shape[1]
img_in_des = resize_image(img_in_des, 32, w_relative)
if img_bin_curved:
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
if w_n==0 or h_n==0:
img_in_des = np.copy(img_in)
if img_bin_curved:
img_bin_in_des = np.copy(img_bin_in)
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
if w_relative==0:
w_relative = img_in_des.shape[1]
img_in_des = resize_image(img_in_des, 32, w_relative)
if img_bin_curved:
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
else:
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
if img_bin_curved:
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
if w_relative==0:
w_relative = img_in_des.shape[1]
img_in_des = resize_image(img_in_des, 32, w_relative)
if img_bin_curved:
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
else: