updating light version

pull/138/head^2
vahidrezanezhad 3 months ago
parent 543ed4bc38
commit 1da4b7f589

@ -252,7 +252,7 @@ class Eynollah:
self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based"
self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige"
##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans"
if self.textline_light: if self.textline_light:
@ -2189,7 +2189,7 @@ class Eynollah:
#print(num_col_classifier,'num_col_classifier') #print(num_col_classifier,'num_col_classifier')
if num_col_classifier == 1: if num_col_classifier == 1:
img_w_new = 800 img_w_new = 1000
img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new)
elif num_col_classifier == 2: elif num_col_classifier == 2:
@ -2299,9 +2299,9 @@ class Eynollah:
mask_texts_only = mask_texts_only.astype('uint8') mask_texts_only = mask_texts_only.astype('uint8')
#if num_col_classifier == 1 or num_col_classifier == 2: ##if num_col_classifier == 1 or num_col_classifier == 2:
#mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
#mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
@ -4153,7 +4153,7 @@ class Eynollah:
if dilation_m1<6: if dilation_m1<6:
dilation_m1 = 6 dilation_m1 = 6
#print(dilation_m1, 'dilation_m1') #print(dilation_m1, 'dilation_m1')
dilation_m1 = 5 dilation_m1 = 6
dilation_m2 = int(dilation_m1/2.) +1 dilation_m2 = int(dilation_m1/2.) +1
for i in range(len(x_differential)): for i in range(len(x_differential)):
@ -4657,6 +4657,31 @@ class Eynollah:
all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
return all_found_textline_polygons return all_found_textline_polygons
def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con):
slopes_rem = []
all_found_textline_polygons_rem = []
boxes_text_rem = []
txt_con_org_rem = []
contours_only_text_parent_rem = []
index_by_text_par_con_rem = []
for i, ind_con in enumerate(all_found_textline_polygons):
if len(ind_con):
all_found_textline_polygons_rem.append(ind_con)
slopes_rem.append(slopes[i])
boxes_text_rem.append(boxes_text[i])
txt_con_org_rem.append(txt_con_org[i])
contours_only_text_parent_rem.append(contours_only_text_parent[i])
index_by_text_par_con_rem.append(index_by_text_par_con[i])
index_sort = np.argsort(index_by_text_par_con_rem)
indexes_new = np.array(range(len(index_by_text_par_con_rem)))
index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))]
return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort
def run(self): def run(self):
""" """
Get image and scales, then extract the page of scanned image Get image and scales, then extract the page of scanned image
@ -4923,6 +4948,9 @@ class Eynollah:
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
#slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con)
#slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
#all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
@ -5121,6 +5149,7 @@ class Eynollah:
all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons=[ all_found_textline_polygons ]
all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
order_text_new = [0] order_text_new = [0]

Loading…
Cancel
Save