diff --git a/requirements.txt b/requirements.txt index aeffd47..4bc0c6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ tensorflow < 2.13 numba <= 0.58.1 scikit-image loky +biopython diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0ee3d14..1f79995 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5647,6 +5647,10 @@ class Eynollah_ocr: better_des_slope = get_orientation_moments(textline_coords) img_crop = rotate_image_with_padding(img_crop, better_des_slope ) + + if self.prediction_with_both_of_rgb_and_bin: + img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope ) + mask_poly = rotate_image_with_padding(mask_poly, better_des_slope ) mask_poly = mask_poly.astype('uint8') @@ -5655,26 +5659,35 @@ class Eynollah_ocr: mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :] img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :] - + img_crop[mask_poly==0] = 255 + if self.prediction_with_both_of_rgb_and_bin: + img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :] + img_crop_bin[mask_poly==0] = 255 + if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100: - img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) - - #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii') + if self.prediction_with_both_of_rgb_and_bin: + img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + else: + img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + + else: img_crop[mask_poly==0] = 255 + if self.prediction_with_both_of_rgb_and_bin: + img_crop_bin[mask_poly==0] = 255 if type_textregion=='drop-capital': pass else: if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100: - img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + if self.prediction_with_both_of_rgb_and_bin: + img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin) + else: + img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) - - - if self.prediction_with_both_of_rgb_and_bin: - img_crop_bin[mask_poly==0] = 255 + if not self.export_textline_images_and_text: if w_scaled < 640:#1.5*image_width: @@ -5796,6 +5809,14 @@ class Eynollah_ocr: imgs_bin = cropped_lines_bin[n_start:] imgs_bin = np.array(imgs_bin) imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3) + + if len(indices_ver)>0: + imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:] + imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:] + #print(imgs_ver_flipped, 'imgs_ver_flipped') + + else: + imgs_bin_ver_flipped = None else: n_start = i*self.b_s n_end = (i+1)*self.b_s @@ -5817,22 +5838,25 @@ class Eynollah_ocr: if self.prediction_with_both_of_rgb_and_bin: imgs_bin = cropped_lines_bin[n_start:n_end] imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3) + + + if len(indices_ver)>0: + imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:] + imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:] + #print(imgs_ver_flipped, 'imgs_ver_flipped') + else: + imgs_bin_ver_flipped = None preds = self.prediction_model.predict(imgs, verbose=0) if len(indices_ver)>0: - #cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8')) - #cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8')) - #sys.exit() - #print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape') preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0) preds_max_fliped = np.max(preds_flipped, axis=2 ) preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256 masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) masked_means_flipped[np.isnan(masked_means_flipped)] = 0 - #print(masked_means_flipped, 'masked_means_flipped') preds_max = np.max(preds, axis=2 ) preds_max_args = np.argmax(preds, axis=2 ) @@ -5852,6 +5876,32 @@ class Eynollah_ocr: preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] if self.prediction_with_both_of_rgb_and_bin: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) + + if len(indices_ver)>0: + preds_flipped = self.prediction_model.predict(imgs_bin_ver_flipped, verbose=0) + preds_max_fliped = np.max(preds_flipped, axis=2 ) + preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) + pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256 + masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped[np.isnan(masked_means_flipped)] = 0 + + preds_max = np.max(preds, axis=2 ) + preds_max_args = np.argmax(preds, axis=2 ) + pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256 + + masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means[np.isnan(masked_means)] = 0 + + masked_means_ver = masked_means[indices_ver] + #print(masked_means_ver, 'pred_max_not_unk') + + indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + + #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') + if len(indices_where_flipped_conf_value_is_higher)>0: + indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] + preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] + preds = (preds + preds_bin) / 2. pred_texts = decode_batch_predictions(preds, self.num_to_char) diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 339b38a..524e7ce 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -5,6 +5,7 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d import math from PIL import Image, ImageDraw, ImageFont +from Bio import pairwise2 from .resize import resize_image def decode_batch_predictions(pred, num_to_char, max_len = 128): @@ -252,7 +253,7 @@ def return_splitting_point_of_image(image_to_spliited): return np.sort(peaks_sort_4) -def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved): +def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, img_bin_curved=None): peaks_4 = return_splitting_point_of_image(img_curved) if len(peaks_4)>0: imgs_tot = [] @@ -260,29 +261,44 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved): for ind in range(len(peaks_4)+1): if ind==0: img = img_curved[:, :peaks_4[ind], :] + if img_bin_curved: + img_bin = img_curved_bin[:, :peaks_4[ind], :] mask = mask_curved[:, :peaks_4[ind], :] elif ind==len(peaks_4): img = img_curved[:, peaks_4[ind-1]:, :] + if img_bin_curved: + img_bin = img_curved_bin[:, peaks_4[ind-1]:, :] mask = mask_curved[:, peaks_4[ind-1]:, :] else: img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :] + if img_bin_curved: + img_bin = img_curved_bin[:, peaks_4[ind-1]:peaks_4[ind], :] mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :] or_ma = get_orientation_moments_of_mask(mask) - - imgs_tot.append([img, mask, or_ma] ) + + if img_bin_curved: + imgs_tot.append([img, mask, or_ma, img_bin] ) + else: + imgs_tot.append([img, mask, or_ma] ) w_tot_des_list = [] w_tot_des = 0 imgs_deskewed_list = [] + imgs_bin_deskewed_list = [] + for ind in range(len(imgs_tot)): img_in = imgs_tot[ind][0] mask_in = imgs_tot[ind][1] ori_in = imgs_tot[ind][2] + if img_bin_curved: + img_bin_in = imgs_tot[ind][3] if abs(ori_in)<45: img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) ) + if img_bin_curved: + img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) ) mask_in_des = rotate_image_with_padding(mask_in, ori_in) mask_in_des = mask_in_des.astype('uint8') @@ -291,36 +307,52 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved): mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] + if img_bin_curved: + img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :] w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) ) if w_relative==0: w_relative = img_in_des.shape[1] img_in_des = resize_image(img_in_des, 32, w_relative) + if img_bin_curved: + img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative) else: img_in_des = np.copy(img_in) + if img_bin_curved: + img_bin_in_des = np.copy(img_bin_in) w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) ) if w_relative==0: w_relative = img_in_des.shape[1] img_in_des = resize_image(img_in_des, 32, w_relative) + if img_bin_curved: + img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative) w_tot_des+=img_in_des.shape[1] w_tot_des_list.append(img_in_des.shape[1]) imgs_deskewed_list.append(img_in_des) + if img_bin_curved: + imgs_bin_deskewed_list.append(img_bin_in_des) img_final_deskewed = np.zeros((32, w_tot_des, 3))+255 + if img_bin_curved: + img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255 + else: + img_bin_final_deskewed = None w_indexer = 0 for ind in range(len(w_tot_des_list)): img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:] + if img_bin_curved: + img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:] w_indexer = w_indexer+w_tot_des_list[ind] - return img_final_deskewed + return img_final_deskewed, img_bin_final_deskewed else: - return img_curved + return img_curved, img_bin_curved def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind): textline_contour[:,0] = textline_contour[:,0] + box_ind[2] @@ -434,3 +466,8 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr ocr_textline_in_textregion.append(text_textline) ocr_all_textlines.append(ocr_textline_in_textregion) return ocr_all_textlines + +def biopython_align(str1, str2): + alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2) + best_alignment = alignments[0] # Get the best alignment + return best_alignment.seqA, best_alignment.seqB