strings alignment function is added + new changes needed for prediction with both bin and rgb inputs is implemented

This commit is contained in:
vahidrezanezhad 2025-05-25 21:44:36 +02:00
parent 097520bfd2
commit 0f154c605a
3 changed files with 107 additions and 19 deletions

View file

@ -6,3 +6,4 @@ tensorflow < 2.13
numba <= 0.58.1
scikit-image
loky
biopython

View file

@ -5647,6 +5647,10 @@ class Eynollah_ocr:
better_des_slope = get_orientation_moments(textline_coords)
img_crop = rotate_image_with_padding(img_crop, better_des_slope )
if self.prediction_with_both_of_rgb_and_bin:
img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
mask_poly = mask_poly.astype('uint8')
@ -5658,24 +5662,33 @@ class Eynollah_ocr:
img_crop[mask_poly==0] = 255
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
if self.prediction_with_both_of_rgb_and_bin:
img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
img_crop_bin[mask_poly==0] = 255
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
if self.prediction_with_both_of_rgb_and_bin:
img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
else:
img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
#print(file_name,w_n*h_n , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
else:
img_crop[mask_poly==0] = 255
if self.prediction_with_both_of_rgb_and_bin:
img_crop_bin[mask_poly==0] = 255
if type_textregion=='drop-capital':
pass
else:
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
if self.prediction_with_both_of_rgb_and_bin:
img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
else:
img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
if self.prediction_with_both_of_rgb_and_bin:
img_crop_bin[mask_poly==0] = 255
if not self.export_textline_images_and_text:
if w_scaled < 640:#1.5*image_width:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
@ -5796,6 +5809,14 @@ class Eynollah_ocr:
imgs_bin = cropped_lines_bin[n_start:]
imgs_bin = np.array(imgs_bin)
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
if len(indices_ver)>0:
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_bin_ver_flipped = None
else:
n_start = i*self.b_s
n_end = (i+1)*self.b_s
@ -5819,20 +5840,23 @@ class Eynollah_ocr:
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
if len(indices_ver)>0:
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_bin_ver_flipped = None
preds = self.prediction_model.predict(imgs, verbose=0)
if len(indices_ver)>0:
#cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
#cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
#sys.exit()
#print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
preds_max_fliped = np.max(preds_flipped, axis=2 )
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
#print(masked_means_flipped, 'masked_means_flipped')
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
@ -5852,6 +5876,32 @@ class Eynollah_ocr:
preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
if self.prediction_with_both_of_rgb_and_bin:
preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
if len(indices_ver)>0:
preds_flipped = self.prediction_model.predict(imgs_bin_ver_flipped, verbose=0)
preds_max_fliped = np.max(preds_flipped, axis=2 )
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
masked_means[np.isnan(masked_means)] = 0
masked_means_ver = masked_means[indices_ver]
#print(masked_means_ver, 'pred_max_not_unk')
indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
if len(indices_where_flipped_conf_value_is_higher)>0:
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
preds = (preds + preds_bin) / 2.
pred_texts = decode_batch_predictions(preds, self.num_to_char)

View file

@ -5,6 +5,7 @@ from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
import math
from PIL import Image, ImageDraw, ImageFont
from Bio import pairwise2
from .resize import resize_image
def decode_batch_predictions(pred, num_to_char, max_len = 128):
@ -252,7 +253,7 @@ def return_splitting_point_of_image(image_to_spliited):
return np.sort(peaks_sort_4)
def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, img_bin_curved=None):
peaks_4 = return_splitting_point_of_image(img_curved)
if len(peaks_4)>0:
imgs_tot = []
@ -260,29 +261,44 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
for ind in range(len(peaks_4)+1):
if ind==0:
img = img_curved[:, :peaks_4[ind], :]
if img_bin_curved:
img_bin = img_curved_bin[:, :peaks_4[ind], :]
mask = mask_curved[:, :peaks_4[ind], :]
elif ind==len(peaks_4):
img = img_curved[:, peaks_4[ind-1]:, :]
if img_bin_curved:
img_bin = img_curved_bin[:, peaks_4[ind-1]:, :]
mask = mask_curved[:, peaks_4[ind-1]:, :]
else:
img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
if img_bin_curved:
img_bin = img_curved_bin[:, peaks_4[ind-1]:peaks_4[ind], :]
mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
or_ma = get_orientation_moments_of_mask(mask)
imgs_tot.append([img, mask, or_ma] )
if img_bin_curved:
imgs_tot.append([img, mask, or_ma, img_bin] )
else:
imgs_tot.append([img, mask, or_ma] )
w_tot_des_list = []
w_tot_des = 0
imgs_deskewed_list = []
imgs_bin_deskewed_list = []
for ind in range(len(imgs_tot)):
img_in = imgs_tot[ind][0]
mask_in = imgs_tot[ind][1]
ori_in = imgs_tot[ind][2]
if img_bin_curved:
img_bin_in = imgs_tot[ind][3]
if abs(ori_in)<45:
img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
if img_bin_curved:
img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) )
mask_in_des = rotate_image_with_padding(mask_in, ori_in)
mask_in_des = mask_in_des.astype('uint8')
@ -291,36 +307,52 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
if img_bin_curved:
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
if w_relative==0:
w_relative = img_in_des.shape[1]
img_in_des = resize_image(img_in_des, 32, w_relative)
if img_bin_curved:
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
else:
img_in_des = np.copy(img_in)
if img_bin_curved:
img_bin_in_des = np.copy(img_bin_in)
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
if w_relative==0:
w_relative = img_in_des.shape[1]
img_in_des = resize_image(img_in_des, 32, w_relative)
if img_bin_curved:
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
w_tot_des+=img_in_des.shape[1]
w_tot_des_list.append(img_in_des.shape[1])
imgs_deskewed_list.append(img_in_des)
if img_bin_curved:
imgs_bin_deskewed_list.append(img_bin_in_des)
img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
if img_bin_curved:
img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255
else:
img_bin_final_deskewed = None
w_indexer = 0
for ind in range(len(w_tot_des_list)):
img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
if img_bin_curved:
img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:]
w_indexer = w_indexer+w_tot_des_list[ind]
return img_final_deskewed
return img_final_deskewed, img_bin_final_deskewed
else:
return img_curved
return img_curved, img_bin_curved
def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind):
textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
@ -434,3 +466,8 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
ocr_textline_in_textregion.append(text_textline)
ocr_all_textlines.append(ocr_textline_in_textregion)
return ocr_all_textlines
def biopython_align(str1, str2):
alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2)
best_alignment = alignments[0] # Get the best alignment
return best_alignment.seqA, best_alignment.seqB