|
|
@ -259,7 +259,7 @@ class Eynollah:
|
|
|
|
self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
|
|
|
|
self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
|
|
|
|
self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
|
|
|
|
self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
|
|
|
|
self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
|
|
|
|
self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
|
|
|
|
self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based"
|
|
|
|
self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_3"#"/model_ens_reading_order_machine_based"
|
|
|
|
#"/modelens_12sp_elay_0_3_4__3_6_n"
|
|
|
|
#"/modelens_12sp_elay_0_3_4__3_6_n"
|
|
|
|
#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
|
|
|
|
#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
|
|
|
|
#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
|
|
|
|
#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
|
|
|
@ -3321,12 +3321,22 @@ class Eynollah:
|
|
|
|
y_len = text_regions_p.shape[0]
|
|
|
|
y_len = text_regions_p.shape[0]
|
|
|
|
x_len = text_regions_p.shape[1]
|
|
|
|
x_len = text_regions_p.shape[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
img_poly = np.zeros((y_len,x_len), dtype='uint8')
|
|
|
|
img_poly = np.zeros((y_len,x_len), dtype='uint8')
|
|
|
|
img_poly[text_regions_p[:,:]==1] = 1
|
|
|
|
img_poly[text_regions_p[:,:]==1] = 1
|
|
|
|
img_poly[text_regions_p[:,:]==2] = 2
|
|
|
|
img_poly[text_regions_p[:,:]==2] = 2
|
|
|
|
img_poly[text_regions_p[:,:]==3] = 4
|
|
|
|
img_poly[text_regions_p[:,:]==3] = 4
|
|
|
|
img_poly[text_regions_p[:,:]==6] = 5
|
|
|
|
img_poly[text_regions_p[:,:]==6] = 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#temp
|
|
|
|
|
|
|
|
sep_mask = (img_poly==5)*1
|
|
|
|
|
|
|
|
sep_mask = sep_mask.astype('uint8')
|
|
|
|
|
|
|
|
sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2)
|
|
|
|
|
|
|
|
img_poly[img_poly==5] = 0
|
|
|
|
|
|
|
|
img_poly[sep_mask==1] = 5
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
|
|
|
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
|
|
|
if contours_only_text_parent_h:
|
|
|
|
if contours_only_text_parent_h:
|
|
|
|
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
|
|
|
|
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
|
|
|
@ -3341,9 +3351,13 @@ class Eynollah:
|
|
|
|
if not len(co_text_all):
|
|
|
|
if not len(co_text_all):
|
|
|
|
return [], []
|
|
|
|
return [], []
|
|
|
|
|
|
|
|
|
|
|
|
labels_con = np.zeros((y_len, x_len, len(co_text_all)), dtype=bool)
|
|
|
|
labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
|
|
|
|
|
|
|
|
co_text_all = [(i/6).astype(int) for i in co_text_all]
|
|
|
|
for i in range(len(co_text_all)):
|
|
|
|
for i in range(len(co_text_all)):
|
|
|
|
img = labels_con[:,:,i].astype(np.uint8)
|
|
|
|
img = labels_con[:,:,i].astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
|
|
|
|
|
|
|
|
|
|
|
|
cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
|
|
|
|
cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
|
|
|
|
labels_con[:,:,i] = img
|
|
|
|
labels_con[:,:,i] = img
|
|
|
|
|
|
|
|
|
|
|
@ -3360,6 +3374,7 @@ class Eynollah:
|
|
|
|
img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
|
|
|
|
img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
|
|
|
|
img_poly = resize_image(img_poly, height3, width3)
|
|
|
|
img_poly = resize_image(img_poly, height3, width3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference_bs = 3
|
|
|
|
inference_bs = 3
|
|
|
|
input_1 = np.zeros((inference_bs, height1, width1, 3))
|
|
|
|
input_1 = np.zeros((inference_bs, height1, width1, 3))
|
|
|
|
ordered = [list(range(len(co_text_all)))]
|
|
|
|
ordered = [list(range(len(co_text_all)))]
|
|
|
@ -4575,10 +4590,6 @@ class Eynollah:
|
|
|
|
return pcgts
|
|
|
|
return pcgts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## check the ro order
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#print("text region early 3 in %.1fs", time.time() - t0)
|
|
|
|
#print("text region early 3 in %.1fs", time.time() - t0)
|
|
|
|
if self.light_version:
|
|
|
|
if self.light_version:
|
|
|
@ -4886,7 +4897,7 @@ class Eynollah_ocr:
|
|
|
|
self.model_ocr.to(self.device)
|
|
|
|
self.model_ocr.to(self.device)
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.model_ocr_dir = dir_models + "/model_step_75000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
|
|
|
self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
|
|
|
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
|
|
|
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
|
|
|
|
|
|
|
|
|
|
|
self.prediction_model = tf.keras.models.Model(
|
|
|
|
self.prediction_model = tf.keras.models.Model(
|
|
|
@ -4974,7 +4985,7 @@ class Eynollah_ocr:
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image):
|
|
|
|
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image):
|
|
|
|
width = np.shape(textline_image)[1]
|
|
|
|
width = np.shape(textline_image)[1]
|
|
|
|
height = np.shape(textline_image)[0]
|
|
|
|
height = np.shape(textline_image)[0]
|
|
|
|
common_window = int(0.06*width)
|
|
|
|
common_window = int(0.22*width)
|
|
|
|
|
|
|
|
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
|
|
width1 = int ( width/2. - common_window )
|
|
|
|
width2 = int ( width/2. + common_window )
|
|
|
|
width2 = int ( width/2. + common_window )
|
|
|
@ -4984,13 +4995,17 @@ class Eynollah_ocr:
|
|
|
|
|
|
|
|
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
|
|
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
|
|
|
|
|
|
|
|
|
|
|
if len(peaks_real)>70:
|
|
|
|
if len(peaks_real)>35:
|
|
|
|
|
|
|
|
|
|
|
|
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
|
|
|
#peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
|
|
|
|
|
|
|
argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
|
|
|
|
|
|
|
|
peaks_real_top_six = peaks_real[argsort[:6]]
|
|
|
|
|
|
|
|
midpoint = textline_image.shape[1] / 2.
|
|
|
|
|
|
|
|
arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
|
|
|
|
|
|
|
|
|
|
|
|
arg_max = np.argmax(sum_smoothed[peaks_real])
|
|
|
|
#arg_max = np.argmax(sum_smoothed[peaks_real])
|
|
|
|
|
|
|
|
|
|
|
|
peaks_final = peaks_real[arg_max]
|
|
|
|
peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
|
|
|
|
|
|
|
|
|
|
|
|
return peaks_final
|
|
|
|
return peaks_final
|
|
|
|
else:
|
|
|
|
else:
|
|
|
@ -5039,9 +5054,18 @@ class Eynollah_ocr:
|
|
|
|
if width_new == 0:
|
|
|
|
if width_new == 0:
|
|
|
|
width_new = img.shape[1]
|
|
|
|
width_new = img.shape[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##if width_new+32 >= image_width:
|
|
|
|
|
|
|
|
##width_new = width_new - 32
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
###patch_zero = np.zeros((32, 32, 3))#+255
|
|
|
|
|
|
|
|
###patch_zero[9:19,8:18,:] = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
img = resize_image(img, image_height, width_new)
|
|
|
|
img = resize_image(img, image_height, width_new)
|
|
|
|
img_fin = np.ones((image_height, image_width, 3))*255
|
|
|
|
img_fin = np.ones((image_height, image_width, 3))*255
|
|
|
|
img_fin[:,:+width_new,:] = img[:,:,:]
|
|
|
|
###img_fin[:,:32,:] = patch_zero[:,:,:]
|
|
|
|
|
|
|
|
###img_fin[:,32:32+width_new,:] = img[:,:,:]
|
|
|
|
|
|
|
|
img_fin[:,:width_new,:] = img[:,:,:]
|
|
|
|
img_fin = img_fin / 255.
|
|
|
|
img_fin = img_fin / 255.
|
|
|
|
return img_fin
|
|
|
|
return img_fin
|
|
|
|
|
|
|
|
|
|
|
@ -5097,7 +5121,7 @@ class Eynollah_ocr:
|
|
|
|
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
|
|
|
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
|
|
|
|
img_crop[mask_poly==0] = 255
|
|
|
|
img_crop[mask_poly==0] = 255
|
|
|
|
|
|
|
|
|
|
|
|
if h2w_ratio > 0.05:
|
|
|
|
if h2w_ratio > 0.1:
|
|
|
|
cropped_lines.append(img_crop)
|
|
|
|
cropped_lines.append(img_crop)
|
|
|
|
cropped_lines_meging_indexing.append(0)
|
|
|
|
cropped_lines_meging_indexing.append(0)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
@ -5234,7 +5258,7 @@ class Eynollah_ocr:
|
|
|
|
if self.draw_texts_on_image:
|
|
|
|
if self.draw_texts_on_image:
|
|
|
|
total_bb_coordinates.append([x,y,w,h])
|
|
|
|
total_bb_coordinates.append([x,y,w,h])
|
|
|
|
|
|
|
|
|
|
|
|
h2w_ratio = h/float(w)
|
|
|
|
w_scaled = w * image_height/float(h)
|
|
|
|
|
|
|
|
|
|
|
|
img_poly_on_img = np.copy(img)
|
|
|
|
img_poly_on_img = np.copy(img)
|
|
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
|
|
if self.prediction_with_both_of_rgb_and_bin:
|
|
|
@ -5252,7 +5276,7 @@ class Eynollah_ocr:
|
|
|
|
img_crop_bin[mask_poly==0] = 255
|
|
|
|
img_crop_bin[mask_poly==0] = 255
|
|
|
|
|
|
|
|
|
|
|
|
if not self.export_textline_images_and_text:
|
|
|
|
if not self.export_textline_images_and_text:
|
|
|
|
if h2w_ratio > 0.1:
|
|
|
|
if w_scaled < 1.5*image_width:
|
|
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
|
|
|
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
|
|
|
|
cropped_lines.append(img_fin)
|
|
|
|
cropped_lines.append(img_fin)
|
|
|
|
cropped_lines_meging_indexing.append(0)
|
|
|
|
cropped_lines_meging_indexing.append(0)
|
|
|
@ -5338,7 +5362,7 @@ class Eynollah_ocr:
|
|
|
|
pred_texts = self.decode_batch_predictions(preds)
|
|
|
|
pred_texts = self.decode_batch_predictions(preds)
|
|
|
|
|
|
|
|
|
|
|
|
for ib in range(imgs.shape[0]):
|
|
|
|
for ib in range(imgs.shape[0]):
|
|
|
|
pred_texts_ib = pred_texts[ib].strip("[UNK]")
|
|
|
|
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
|
|
|
|
extracted_texts.append(pred_texts_ib)
|
|
|
|
extracted_texts.append(pred_texts_ib)
|
|
|
|
|
|
|
|
|
|
|
|
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
|
|
|
extracted_texts_merged = [extracted_texts[ind] if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
|
|
|
@ -5378,7 +5402,7 @@ class Eynollah_ocr:
|
|
|
|
text_by_textregion = []
|
|
|
|
text_by_textregion = []
|
|
|
|
for ind in unique_cropped_lines_region_indexer:
|
|
|
|
for ind in unique_cropped_lines_region_indexer:
|
|
|
|
extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
|
|
|
|
extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
|
|
|
|
text_by_textregion.append(" ".join(extracted_texts_merged_un))
|
|
|
|
text_by_textregion.append("".join(extracted_texts_merged_un))
|
|
|
|
|
|
|
|
|
|
|
|
indexer = 0
|
|
|
|
indexer = 0
|
|
|
|
indexer_textregion = 0
|
|
|
|
indexer_textregion = 0
|
|
|
|