mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-08-29 11:59:55 +02:00
strings alignment function is added + new changes needed for prediction with both bin and rgb inputs is implemented
This commit is contained in:
parent
097520bfd2
commit
0f154c605a
3 changed files with 107 additions and 19 deletions
|
@ -6,3 +6,4 @@ tensorflow < 2.13
|
||||||
numba <= 0.58.1
|
numba <= 0.58.1
|
||||||
scikit-image
|
scikit-image
|
||||||
loky
|
loky
|
||||||
|
biopython
|
||||||
|
|
|
@ -5647,6 +5647,10 @@ class Eynollah_ocr:
|
||||||
better_des_slope = get_orientation_moments(textline_coords)
|
better_des_slope = get_orientation_moments(textline_coords)
|
||||||
|
|
||||||
img_crop = rotate_image_with_padding(img_crop, better_des_slope )
|
img_crop = rotate_image_with_padding(img_crop, better_des_slope )
|
||||||
|
|
||||||
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
|
img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
|
||||||
|
|
||||||
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
|
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
|
||||||
mask_poly = mask_poly.astype('uint8')
|
mask_poly = mask_poly.astype('uint8')
|
||||||
|
|
||||||
|
@ -5658,24 +5662,33 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
img_crop[mask_poly==0] = 255
|
img_crop[mask_poly==0] = 255
|
||||||
|
|
||||||
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
|
img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||||
|
img_crop_bin[mask_poly==0] = 255
|
||||||
|
|
||||||
|
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
|
||||||
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
|
img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
|
||||||
|
else:
|
||||||
|
img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
|
||||||
|
|
||||||
|
|
||||||
#print(file_name,w_n*h_n , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
|
|
||||||
else:
|
else:
|
||||||
img_crop[mask_poly==0] = 255
|
img_crop[mask_poly==0] = 255
|
||||||
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
|
img_crop_bin[mask_poly==0] = 255
|
||||||
if type_textregion=='drop-capital':
|
if type_textregion=='drop-capital':
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
|
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
|
||||||
img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
|
img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
|
||||||
|
else:
|
||||||
|
img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if self.prediction_with_both_of_rgb_and_bin:
|
|
||||||
img_crop_bin[mask_poly==0] = 255
|
|
||||||
|
|
||||||
if not self.export_textline_images_and_text:
|
if not self.export_textline_images_and_text:
|
||||||
if w_scaled < 640:#1.5*image_width:
|
if w_scaled < 640:#1.5*image_width:
|
||||||
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
||||||
|
@ -5796,6 +5809,14 @@ class Eynollah_ocr:
|
||||||
imgs_bin = cropped_lines_bin[n_start:]
|
imgs_bin = cropped_lines_bin[n_start:]
|
||||||
imgs_bin = np.array(imgs_bin)
|
imgs_bin = np.array(imgs_bin)
|
||||||
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
|
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
|
||||||
|
|
||||||
|
if len(indices_ver)>0:
|
||||||
|
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||||
|
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||||
|
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||||
|
|
||||||
|
else:
|
||||||
|
imgs_bin_ver_flipped = None
|
||||||
else:
|
else:
|
||||||
n_start = i*self.b_s
|
n_start = i*self.b_s
|
||||||
n_end = (i+1)*self.b_s
|
n_end = (i+1)*self.b_s
|
||||||
|
@ -5819,20 +5840,23 @@ class Eynollah_ocr:
|
||||||
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
|
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
|
||||||
|
|
||||||
|
|
||||||
|
if len(indices_ver)>0:
|
||||||
|
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
|
||||||
|
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
|
||||||
|
#print(imgs_ver_flipped, 'imgs_ver_flipped')
|
||||||
|
else:
|
||||||
|
imgs_bin_ver_flipped = None
|
||||||
|
|
||||||
|
|
||||||
preds = self.prediction_model.predict(imgs, verbose=0)
|
preds = self.prediction_model.predict(imgs, verbose=0)
|
||||||
|
|
||||||
if len(indices_ver)>0:
|
if len(indices_ver)>0:
|
||||||
#cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
|
|
||||||
#cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
|
|
||||||
#sys.exit()
|
|
||||||
#print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
|
|
||||||
preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
|
preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
|
||||||
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
||||||
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
||||||
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
|
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
|
||||||
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
|
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
|
||||||
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
|
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
|
||||||
#print(masked_means_flipped, 'masked_means_flipped')
|
|
||||||
|
|
||||||
preds_max = np.max(preds, axis=2 )
|
preds_max = np.max(preds, axis=2 )
|
||||||
preds_max_args = np.argmax(preds, axis=2 )
|
preds_max_args = np.argmax(preds, axis=2 )
|
||||||
|
@ -5852,6 +5876,32 @@ class Eynollah_ocr:
|
||||||
preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
||||||
if self.prediction_with_both_of_rgb_and_bin:
|
if self.prediction_with_both_of_rgb_and_bin:
|
||||||
preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
|
preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
|
||||||
|
|
||||||
|
if len(indices_ver)>0:
|
||||||
|
preds_flipped = self.prediction_model.predict(imgs_bin_ver_flipped, verbose=0)
|
||||||
|
preds_max_fliped = np.max(preds_flipped, axis=2 )
|
||||||
|
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
|
||||||
|
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
|
||||||
|
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
|
||||||
|
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
|
||||||
|
|
||||||
|
preds_max = np.max(preds, axis=2 )
|
||||||
|
preds_max_args = np.argmax(preds, axis=2 )
|
||||||
|
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
|
||||||
|
|
||||||
|
masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
|
||||||
|
masked_means[np.isnan(masked_means)] = 0
|
||||||
|
|
||||||
|
masked_means_ver = masked_means[indices_ver]
|
||||||
|
#print(masked_means_ver, 'pred_max_not_unk')
|
||||||
|
|
||||||
|
indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
|
||||||
|
|
||||||
|
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
|
||||||
|
if len(indices_where_flipped_conf_value_is_higher)>0:
|
||||||
|
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
|
||||||
|
preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
|
||||||
|
|
||||||
preds = (preds + preds_bin) / 2.
|
preds = (preds + preds_bin) / 2.
|
||||||
|
|
||||||
pred_texts = decode_batch_predictions(preds, self.num_to_char)
|
pred_texts = decode_batch_predictions(preds, self.num_to_char)
|
||||||
|
|
|
@ -5,6 +5,7 @@ from scipy.signal import find_peaks
|
||||||
from scipy.ndimage import gaussian_filter1d
|
from scipy.ndimage import gaussian_filter1d
|
||||||
import math
|
import math
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from Bio import pairwise2
|
||||||
from .resize import resize_image
|
from .resize import resize_image
|
||||||
|
|
||||||
def decode_batch_predictions(pred, num_to_char, max_len = 128):
|
def decode_batch_predictions(pred, num_to_char, max_len = 128):
|
||||||
|
@ -252,7 +253,7 @@ def return_splitting_point_of_image(image_to_spliited):
|
||||||
|
|
||||||
return np.sort(peaks_sort_4)
|
return np.sort(peaks_sort_4)
|
||||||
|
|
||||||
def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
|
def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, img_bin_curved=None):
|
||||||
peaks_4 = return_splitting_point_of_image(img_curved)
|
peaks_4 = return_splitting_point_of_image(img_curved)
|
||||||
if len(peaks_4)>0:
|
if len(peaks_4)>0:
|
||||||
imgs_tot = []
|
imgs_tot = []
|
||||||
|
@ -260,29 +261,44 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
|
||||||
for ind in range(len(peaks_4)+1):
|
for ind in range(len(peaks_4)+1):
|
||||||
if ind==0:
|
if ind==0:
|
||||||
img = img_curved[:, :peaks_4[ind], :]
|
img = img_curved[:, :peaks_4[ind], :]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin = img_curved_bin[:, :peaks_4[ind], :]
|
||||||
mask = mask_curved[:, :peaks_4[ind], :]
|
mask = mask_curved[:, :peaks_4[ind], :]
|
||||||
elif ind==len(peaks_4):
|
elif ind==len(peaks_4):
|
||||||
img = img_curved[:, peaks_4[ind-1]:, :]
|
img = img_curved[:, peaks_4[ind-1]:, :]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin = img_curved_bin[:, peaks_4[ind-1]:, :]
|
||||||
mask = mask_curved[:, peaks_4[ind-1]:, :]
|
mask = mask_curved[:, peaks_4[ind-1]:, :]
|
||||||
else:
|
else:
|
||||||
img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin = img_curved_bin[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||||
mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
|
||||||
|
|
||||||
or_ma = get_orientation_moments_of_mask(mask)
|
or_ma = get_orientation_moments_of_mask(mask)
|
||||||
|
|
||||||
imgs_tot.append([img, mask, or_ma] )
|
if img_bin_curved:
|
||||||
|
imgs_tot.append([img, mask, or_ma, img_bin] )
|
||||||
|
else:
|
||||||
|
imgs_tot.append([img, mask, or_ma] )
|
||||||
|
|
||||||
|
|
||||||
w_tot_des_list = []
|
w_tot_des_list = []
|
||||||
w_tot_des = 0
|
w_tot_des = 0
|
||||||
imgs_deskewed_list = []
|
imgs_deskewed_list = []
|
||||||
|
imgs_bin_deskewed_list = []
|
||||||
|
|
||||||
for ind in range(len(imgs_tot)):
|
for ind in range(len(imgs_tot)):
|
||||||
img_in = imgs_tot[ind][0]
|
img_in = imgs_tot[ind][0]
|
||||||
mask_in = imgs_tot[ind][1]
|
mask_in = imgs_tot[ind][1]
|
||||||
ori_in = imgs_tot[ind][2]
|
ori_in = imgs_tot[ind][2]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in = imgs_tot[ind][3]
|
||||||
|
|
||||||
if abs(ori_in)<45:
|
if abs(ori_in)<45:
|
||||||
img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
|
img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) )
|
||||||
mask_in_des = rotate_image_with_padding(mask_in, ori_in)
|
mask_in_des = rotate_image_with_padding(mask_in, ori_in)
|
||||||
mask_in_des = mask_in_des.astype('uint8')
|
mask_in_des = mask_in_des.astype('uint8')
|
||||||
|
|
||||||
|
@ -291,36 +307,52 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
|
||||||
|
|
||||||
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||||
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||||
|
|
||||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||||
if w_relative==0:
|
if w_relative==0:
|
||||||
w_relative = img_in_des.shape[1]
|
w_relative = img_in_des.shape[1]
|
||||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
img_in_des = np.copy(img_in)
|
img_in_des = np.copy(img_in)
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in_des = np.copy(img_bin_in)
|
||||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||||
if w_relative==0:
|
if w_relative==0:
|
||||||
w_relative = img_in_des.shape[1]
|
w_relative = img_in_des.shape[1]
|
||||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||||
|
|
||||||
w_tot_des+=img_in_des.shape[1]
|
w_tot_des+=img_in_des.shape[1]
|
||||||
w_tot_des_list.append(img_in_des.shape[1])
|
w_tot_des_list.append(img_in_des.shape[1])
|
||||||
imgs_deskewed_list.append(img_in_des)
|
imgs_deskewed_list.append(img_in_des)
|
||||||
|
if img_bin_curved:
|
||||||
|
imgs_bin_deskewed_list.append(img_bin_in_des)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
|
img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255
|
||||||
|
else:
|
||||||
|
img_bin_final_deskewed = None
|
||||||
|
|
||||||
w_indexer = 0
|
w_indexer = 0
|
||||||
for ind in range(len(w_tot_des_list)):
|
for ind in range(len(w_tot_des_list)):
|
||||||
img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
|
img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
|
||||||
|
if img_bin_curved:
|
||||||
|
img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:]
|
||||||
w_indexer = w_indexer+w_tot_des_list[ind]
|
w_indexer = w_indexer+w_tot_des_list[ind]
|
||||||
return img_final_deskewed
|
return img_final_deskewed, img_bin_final_deskewed
|
||||||
else:
|
else:
|
||||||
return img_curved
|
return img_curved, img_bin_curved
|
||||||
|
|
||||||
def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind):
|
def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind):
|
||||||
textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
|
textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
|
||||||
|
@ -434,3 +466,8 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
|
||||||
ocr_textline_in_textregion.append(text_textline)
|
ocr_textline_in_textregion.append(text_textline)
|
||||||
ocr_all_textlines.append(ocr_textline_in_textregion)
|
ocr_all_textlines.append(ocr_textline_in_textregion)
|
||||||
return ocr_all_textlines
|
return ocr_all_textlines
|
||||||
|
|
||||||
|
def biopython_align(str1, str2):
|
||||||
|
alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2)
|
||||||
|
best_alignment = alignments[0] # Get the best alignment
|
||||||
|
return best_alignment.seqA, best_alignment.seqB
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue