|
|
|
@ -547,24 +547,6 @@ class eynollah:
|
|
|
|
|
# self.width_org = self.image.shape[1]
|
|
|
|
|
is_image_enhanced = True
|
|
|
|
|
else:
|
|
|
|
|
"""
|
|
|
|
|
if img.shape[0]<=2530 and img.shape[0]>=img.shape[1]:
|
|
|
|
|
img_h_new=3000
|
|
|
|
|
img_w_new=int(img.shape[1]/float(img.shape[0]) * 3000)
|
|
|
|
|
img_new=resize_image(img,img_h_new,img_w_new)
|
|
|
|
|
image_res=self.predict_enhancement(img_new)
|
|
|
|
|
#cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem) + ".tif",self.image)
|
|
|
|
|
#self.image=self.image.astype(np.uint16)
|
|
|
|
|
##self.scale_x=1
|
|
|
|
|
##self.scale_y=1
|
|
|
|
|
##self.height_org = self.image.shape[0]
|
|
|
|
|
##self.width_org = self.image.shape[1]
|
|
|
|
|
is_image_enhanced=True
|
|
|
|
|
else:
|
|
|
|
|
is_image_enhanced=False
|
|
|
|
|
image_res=np.copy(img)
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
is_image_enhanced = False
|
|
|
|
|
num_column_is_classified = True
|
|
|
|
|
image_res = np.copy(img)
|
|
|
|
@ -787,7 +769,6 @@ class eynollah:
|
|
|
|
|
img = img.astype(np.uint8)
|
|
|
|
|
patches = False
|
|
|
|
|
model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
|
|
|
|
|
###img = otsu_copy(self.image)
|
|
|
|
|
for ii in range(1):
|
|
|
|
|
img = cv2.GaussianBlur(img, (5, 5), 0)
|
|
|
|
|
|
|
|
|
@ -831,7 +812,6 @@ class eynollah:
|
|
|
|
|
def extract_page(self):
|
|
|
|
|
patches = False
|
|
|
|
|
model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
|
|
|
|
|
###img = otsu_copy(self.image)
|
|
|
|
|
for ii in range(1):
|
|
|
|
|
img = cv2.GaussianBlur(self.image, (5, 5), 0)
|
|
|
|
|
|
|
|
|
@ -883,28 +863,6 @@ class eynollah:
|
|
|
|
|
img_height_h = img.shape[0]
|
|
|
|
|
img_width_h = img.shape[1]
|
|
|
|
|
|
|
|
|
|
###if patches and cols>=3 :
|
|
|
|
|
###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully)
|
|
|
|
|
###if not patches:
|
|
|
|
|
###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_np)
|
|
|
|
|
|
|
|
|
|
###if patches and cols==2 :
|
|
|
|
|
###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_2col)
|
|
|
|
|
|
|
|
|
|
###if patches and cols==1 :
|
|
|
|
|
###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_2col)
|
|
|
|
|
|
|
|
|
|
###if patches and cols>=2:
|
|
|
|
|
|
|
|
|
|
###img = otsu_copy_binary(img)#otsu_copy(img)
|
|
|
|
|
###img = img.astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
###if patches and cols==1:
|
|
|
|
|
|
|
|
|
|
###img = otsu_copy_binary(img)#otsu_copy(img)
|
|
|
|
|
###img = img.astype(np.uint8)
|
|
|
|
|
###img= resize_image(img, int(img_height_h*1), int(img_width_h*1) )
|
|
|
|
|
|
|
|
|
|
if patches:
|
|
|
|
|
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully)
|
|
|
|
|
if not patches:
|
|
|
|
@ -1306,7 +1264,6 @@ class eynollah:
|
|
|
|
|
slope_for_all = [slope_deskew][0]
|
|
|
|
|
|
|
|
|
|
all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]
|
|
|
|
|
###cnt_clean_rot=textline_contours_postprocessing(all_text_region_raw,slopes[jj],contours_only_text_parent[jj],boxes_text[jj],slope_first)
|
|
|
|
|
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0)
|
|
|
|
|
|
|
|
|
|
textlines_rectangles_per_each_subprocess.append(cnt_clean_rot)
|
|
|
|
@ -1366,7 +1323,6 @@ class eynollah:
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
all_text_region_raw[mask_only_con_region == 0] = 0
|
|
|
|
|
###cnt_clean_rot=textline_contours_postprocessing(all_text_region_raw,slopes[jj],contours_only_text_parent[jj],boxes_text[jj],slope_first)
|
|
|
|
|
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], slope_first)
|
|
|
|
|
|
|
|
|
|
textlines_rectangles_per_each_subprocess.append(cnt_clean_rot)
|
|
|
|
@ -1829,10 +1785,8 @@ class eynollah:
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
###id_indexer=id_indexer
|
|
|
|
|
id_indexer_l=id_indexer_l
|
|
|
|
|
except:
|
|
|
|
|
###id_indexer=0
|
|
|
|
|
id_indexer_l=0
|
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
|
textregion=ET.SubElement(page, 'TextRegion')
|
|
|
|
@ -2191,10 +2145,8 @@ class eynollah:
|
|
|
|
|
#id_indexer_l=0
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
###id_indexer=id_indexer
|
|
|
|
|
id_indexer_l = id_indexer_l
|
|
|
|
|
except:
|
|
|
|
|
###id_indexer=0
|
|
|
|
|
id_indexer_l = 0
|
|
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
@ -2397,7 +2349,6 @@ class eynollah:
|
|
|
|
|
del model_region
|
|
|
|
|
del session_region
|
|
|
|
|
gc.collect()
|
|
|
|
|
###K.clear_session()
|
|
|
|
|
|
|
|
|
|
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2)
|
|
|
|
|
|
|
|
|
@ -2438,7 +2389,6 @@ class eynollah:
|
|
|
|
|
del model_region
|
|
|
|
|
del session_region
|
|
|
|
|
gc.collect()
|
|
|
|
|
###K.clear_session()
|
|
|
|
|
|
|
|
|
|
mask_zeros2=(prediction_regions_org2[:,:,0]==0)*1
|
|
|
|
|
mask_lines2=(prediction_regions_org2[:,:,0]==3)*1
|
|
|
|
@ -2469,67 +2419,6 @@ class eynollah:
|
|
|
|
|
del mask_zeros2
|
|
|
|
|
del prediction_regions_org2
|
|
|
|
|
|
|
|
|
|
#if is_image_enhanced:
|
|
|
|
|
#pass
|
|
|
|
|
#else:
|
|
|
|
|
#model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2)
|
|
|
|
|
|
|
|
|
|
#gaussian_filter=False
|
|
|
|
|
#patches=True
|
|
|
|
|
#binary=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ratio_x=1
|
|
|
|
|
#ratio_y=1
|
|
|
|
|
#median_blur=False
|
|
|
|
|
|
|
|
|
|
#img= resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
|
|
|
|
|
|
|
|
|
#if binary:
|
|
|
|
|
#img = self.otsu_copy_binary(img)#self.otsu_copy(img)
|
|
|
|
|
#img = img.astype(np.uint16)
|
|
|
|
|
|
|
|
|
|
#if median_blur:
|
|
|
|
|
#img=cv2.medianBlur(img,5)
|
|
|
|
|
#if gaussian_filter:
|
|
|
|
|
#img= cv2.GaussianBlur(img,(5,5),0)
|
|
|
|
|
#img = img.astype(np.uint16)
|
|
|
|
|
#prediction_regions_org2=self.do_prediction(patches,img,model_region)
|
|
|
|
|
|
|
|
|
|
#prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
|
|
|
|
|
|
|
|
|
|
##plt.imshow(prediction_regions_org2[:,:,0])
|
|
|
|
|
##plt.show()
|
|
|
|
|
##sys.exit()
|
|
|
|
|
###prediction_regions_org=prediction_regions_org[:,:,0]
|
|
|
|
|
|
|
|
|
|
#session_region.close()
|
|
|
|
|
#del model_region
|
|
|
|
|
#del session_region
|
|
|
|
|
#gc.collect()
|
|
|
|
|
####K.clear_session()
|
|
|
|
|
|
|
|
|
|
#mask_zeros2=(prediction_regions_org2[:,:,0]==0)*1
|
|
|
|
|
#mask_lines2=(prediction_regions_org2[:,:,0]==3)*1
|
|
|
|
|
|
|
|
|
|
#text_sume_early=( (prediction_regions_org[:,:]==1)*1 ).sum()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros2[:,:]==1)]=0
|
|
|
|
|
|
|
|
|
|
###prediction_regions_org[mask_lines2[:,:]==1]=3
|
|
|
|
|
#prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
|
|
|
|
|
|
|
|
|
|
#text_sume_second=( (prediction_regions_org[:,:]==1)*1 ).sum()
|
|
|
|
|
|
|
|
|
|
#print(text_sume_second/float(text_sume_early)*100,'twomodelsratio')
|
|
|
|
|
|
|
|
|
|
#del mask_lines2
|
|
|
|
|
#del mask_zeros2
|
|
|
|
|
#del prediction_regions_org2
|
|
|
|
|
|
|
|
|
|
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], self.kernel, iterations=2)
|
|
|
|
@ -2538,38 +2427,21 @@ class eynollah:
|
|
|
|
|
#plt.show()
|
|
|
|
|
|
|
|
|
|
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], self.kernel, iterations=2)
|
|
|
|
|
|
|
|
|
|
mask_texts_only=(prediction_regions_org[:,:]==1)*1
|
|
|
|
|
|
|
|
|
|
mask_images_only=(prediction_regions_org[:,:]==2)*1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pixel_img=1
|
|
|
|
|
min_area_text=0.00001
|
|
|
|
|
polygons_of_only_texts=return_contours_of_interested_region(mask_texts_only,pixel_img,min_area_text)
|
|
|
|
|
|
|
|
|
|
polygons_of_only_images=return_contours_of_interested_region(mask_images_only,pixel_img)
|
|
|
|
|
|
|
|
|
|
polygons_of_only_lines=return_contours_of_interested_region(mask_lines_only,pixel_img,min_area_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_regions_p_true=np.zeros(prediction_regions_org.shape)
|
|
|
|
|
#text_regions_p_true[:,:]=text_regions_p_1[:,:]
|
|
|
|
|
|
|
|
|
|
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_lines, color=(3,3,3))
|
|
|
|
|
|
|
|
|
|
##text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_images, color=(2,2,2))
|
|
|
|
|
text_regions_p_true[:,:][mask_images_only[:,:]==1]=2
|
|
|
|
|
|
|
|
|
|
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
##print(np.unique(text_regions_p_true))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#text_regions_p_true_3d=np.repeat(text_regions_p_1[:, :, np.newaxis], 3, axis=2)
|
|
|
|
|
#text_regions_p_true_3d=text_regions_p_true_3d.astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
del polygons_of_only_texts
|
|
|
|
|
del polygons_of_only_images
|
|
|
|
|
del polygons_of_only_lines
|
|
|
|
@ -2588,9 +2460,6 @@ class eynollah:
|
|
|
|
|
def write_images_into_directory(self, img_contoures, dir_of_cropped_imgs, image_page):
|
|
|
|
|
index = 0
|
|
|
|
|
for cont_ind in img_contoures:
|
|
|
|
|
# cont_ind[:,0,0]=cont_ind[:,0,0]/self.scale_x
|
|
|
|
|
# cont_ind[:,0,1]=cont_ind[:,0,1]/self.scale_y
|
|
|
|
|
|
|
|
|
|
x, y, w, h = cv2.boundingRect(cont_ind)
|
|
|
|
|
box = [x, y, w, h]
|
|
|
|
|
croped_page, page_coord = crop_image_inside_box(box, image_page)
|
|
|
|
@ -2710,7 +2579,6 @@ class eynollah:
|
|
|
|
|
args_contours_h = np.array(range(len(arg_text_con_h)))
|
|
|
|
|
|
|
|
|
|
order_by_con_head = np.zeros(len(arg_text_con_h))
|
|
|
|
|
#####
|
|
|
|
|
|
|
|
|
|
ref_point = 0
|
|
|
|
|
order_of_texts_tot = []
|
|
|
|
@ -3015,14 +2883,12 @@ class eynollah:
|
|
|
|
|
img_g3[:, :, 1] = img_g[:, :]
|
|
|
|
|
img_g3[:, :, 2] = img_g[:, :]
|
|
|
|
|
|
|
|
|
|
###self.produce_groundtruth_for_textline()
|
|
|
|
|
image_page, page_coord = self.extract_page()
|
|
|
|
|
|
|
|
|
|
# print(image_page.shape,'page')
|
|
|
|
|
|
|
|
|
|
if self.dir_of_all is not None:
|
|
|
|
|
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
|
|
|
|
|
##########
|
|
|
|
|
K.clear_session()
|
|
|
|
|
gc.collect()
|
|
|
|
|
|
|
|
|
@ -3272,7 +3138,6 @@ class eynollah:
|
|
|
|
|
else:
|
|
|
|
|
regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
|
|
|
|
|
|
|
|
|
# regions_fully_np=filter_small_drop_capitals_from_no_patch_layout(regions_fully_np,text_regions_p)
|
|
|
|
|
# plt.imshow(regions_fully_np[:,:,0])
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
@ -3288,7 +3153,6 @@ class eynollah:
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4
|
|
|
|
|
##text_regions_p[:,:][(regions_fully[:,:,0]==7) & (text_regions_p[:,:]!=0)]=7
|
|
|
|
|
|
|
|
|
|
text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
|
|
|
|
|
|
|
|
@ -3313,8 +3177,6 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
print("full layout in: " + str(time.time() - t1))
|
|
|
|
|
|
|
|
|
|
# sys.exit()
|
|
|
|
|
|
|
|
|
|
pixel_img = 5
|
|
|
|
|
polygons_of_images = return_contours_of_interested_region(img_revised_tab, pixel_img)
|
|
|
|
|
|
|
|
|
@ -3322,10 +3184,9 @@ class eynollah:
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# print(img_revised_tab.shape,text_regions_p_1_n.shape)
|
|
|
|
|
|
|
|
|
|
# text_regions_p_1_n=resize_image(text_regions_p_1_n,img_revised_tab.shape[0],img_revised_tab.shape[1])
|
|
|
|
|
|
|
|
|
|
# print(np.unique(text_regions_p_1_n),'uni')
|
|
|
|
|
|
|
|
|
|
text_only = ((img_revised_tab[:, :] == 1)) * 1
|
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
|
|
|
text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1
|
|
|
|
@ -3408,7 +3269,6 @@ class eynollah:
|
|
|
|
|
for i in range(len(contours_only_text_parent)):
|
|
|
|
|
# img1=np.zeros((text_only.shape[0],text_only.shape[1],3))
|
|
|
|
|
# img1=cv2.fillPoly(img1,pts=[contours_only_text_parent[i]] ,color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
# plt.imshow(img1[:,:,0])
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
@ -3425,7 +3285,6 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
# img2=np.zeros((text_only.shape[0],text_only.shape[1],3))
|
|
|
|
|
# img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
# plt.imshow(img2[:,:,0])
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
@ -3448,21 +3307,13 @@ class eynollah:
|
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contoures([contours_biggest])
|
|
|
|
|
cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent)
|
|
|
|
|
# print(areas_cnt_text_parent,'areas_cnt_text_parent')
|
|
|
|
|
|
|
|
|
|
###index_con_parents_d=np.argsort(areas_cnt_text_parent_d)
|
|
|
|
|
##contours_only_text_parent_d=list(np.array(contours_only_text_parent_d)[index_con_parents_d])
|
|
|
|
|
###areas_cnt_text_parent_d=list(np.array(areas_cnt_text_parent_d)[index_con_parents_d])
|
|
|
|
|
|
|
|
|
|
##print(areas_cnt_text_parent_d,'areas_cnt_text_parent_d')
|
|
|
|
|
|
|
|
|
|
# print(areas_cnt_text_parent_d,'areas_cnt_text_parent_d')
|
|
|
|
|
# print(len(contours_only_text_parent),len(contours_only_text_parent_d),'vizzz')
|
|
|
|
|
|
|
|
|
|
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first)
|
|
|
|
|
|
|
|
|
|
###boxes_text,_= get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
|
|
|
|
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
|
|
|
|
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
|
|
|
|
|
####boxes_text_h,_= get_text_region_boxes_by_given_contours(text_only_h,contours_only_text_parent_h,image_page)
|
|
|
|
|
|
|
|
|
|
if not self.curved_line:
|
|
|
|
|
slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
|
|
|
@ -3473,14 +3324,10 @@ class eynollah:
|
|
|
|
|
scale_param = 1
|
|
|
|
|
all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=self.kernel, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew)
|
|
|
|
|
|
|
|
|
|
# all_found_texline_polygons,boxes_text,txt_con_org,contours_only_text_parent,all_box_coord=self.get_slopes_and_deskew_new_curved(txt_con_org,contours_only_text_parent,textline_mask_tot_ea,image_page_rotated,boxes_text,text_only,num_col,scale_param)
|
|
|
|
|
all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier)
|
|
|
|
|
|
|
|
|
|
# slopes=list(np.zeros(len(contours_only_text_parent)))
|
|
|
|
|
|
|
|
|
|
all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, index_by_text_par_con_marginal, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=self.kernel, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew)
|
|
|
|
|
|
|
|
|
|
# all_found_texline_polygons,boxes_text,txt_con_org,contours_only_text_parent,all_box_coord=self.get_slopes_and_deskew_new_curved(txt_con_org,contours_only_text_parent,textline_mask_tot_ea,image_page_rotated,boxes_text,text_only,num_col,scale_param)
|
|
|
|
|
all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
|
|
|
|
|
|
|
|
|
|
index_of_vertical_text_contours = np.array(range(len(slopes)))[(abs(np.array(slopes)) > 60)]
|
|
|
|
@ -3490,23 +3337,9 @@ class eynollah:
|
|
|
|
|
K.clear_session()
|
|
|
|
|
gc.collect()
|
|
|
|
|
|
|
|
|
|
# contours_only_text_parent_d_ordered=list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
|
|
|
|
###print(index_by_text_par_con,'index_by_text_par_con')
|
|
|
|
|
# print(index_by_text_par_con,'index_by_text_par_con')
|
|
|
|
|
|
|
|
|
|
if self.full_layout:
|
|
|
|
|
##for iii in range(len(contours_only_text_parent)):
|
|
|
|
|
##img1=np.zeros((text_only.shape[0],text_only.shape[1],3))
|
|
|
|
|
##img1=cv2.fillPoly(img1,pts=[contours_only_text_parent[iii]] ,color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
##plt.imshow(img1[:,:,0])
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
##img2=np.zeros((text_only.shape[0],text_only.shape[1],3))
|
|
|
|
|
##img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d_ordered[iii]] ,color=(1,1,1))
|
|
|
|
|
|
|
|
|
|
##plt.imshow(img2[:,:,0])
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
|
|
|
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
|
|
|
|
|
|
|
|
@ -3516,11 +3349,6 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
|
|
|
|
|
|
|
|
|
|
###text_regions_p,contours_only_text_parent,contours_only_text_parent_h,all_box_coord,all_box_coord_h,all_found_texline_polygons,all_found_texline_polygons_h=check_any_text_region_in_model_one_is_main_or_header(text_regions_p,regions_fully,contours_only_text_parent,all_box_coord,all_found_texline_polygons)
|
|
|
|
|
# text_regions_p=self.return_region_segmentation_after_implementing_not_head_maintext_parallel(text_regions_p,boxes)
|
|
|
|
|
|
|
|
|
|
# if you want to save the layout result just uncommet following plot
|
|
|
|
|
|
|
|
|
|
if self.dir_of_layout is not None:
|
|
|
|
|
self.save_plot_of_layout(text_regions_p, image_page)
|
|
|
|
|
if self.dir_of_all is not None:
|
|
|
|
@ -3535,7 +3363,6 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
pixel_img = 4
|
|
|
|
|
polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
|
|
|
|
|
# polygons_of_drop_capitals=[]
|
|
|
|
|
|
|
|
|
|
all_found_texline_polygons = adhere_drop_capital_region_into_cprresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=self.kernel, curved_line=self.curved_line)
|
|
|
|
|
|
|
|
|
@ -3554,9 +3381,7 @@ class eynollah:
|
|
|
|
|
num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, spliter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
|
|
|
|
|
|
|
|
|
# print(peaks_neg_fin,peaks_neg_fin_d,'num_col2')
|
|
|
|
|
|
|
|
|
|
# print(spliter_y_new,spliter_y_new_d,'num_col_classifier')
|
|
|
|
|
|
|
|
|
|
# print(matrix_of_lines_ch.shape,matrix_of_lines_ch_d.shape,'matrix_of_lines_ch')
|
|
|
|
|
|
|
|
|
|
if num_col_classifier >= 3:
|
|
|
|
|