more extraction of util functions

pull/8/head
Konstantin Baierer 4 years ago
parent bf209d1622
commit 86c809b58f

@ -41,6 +41,9 @@ from .utils import (
filter_contours_area_of_image_interiors,
rotatedRectWithMaxArea,
rotate_image,
rotate_max_area,
rotation_not_90_func,
rotation_not_90_func_full_layout,
rotate_max_area_new,
rotation_image_new,
crop_image_inside_box,
@ -60,9 +63,14 @@ from .utils import (
seperate_lines,
seperate_lines_new_inside_teils2,
filter_small_drop_capitals_from_no_patch_layout,
find_num_col_deskew,
return_hor_spliter_by_index_for_without_verticals,
find_new_features_of_contoures,
find_num_col,
find_num_col_deskew,
find_num_col_only_image,
find_num_col_by_vertical_lines,
find_contours_mean_y_diff,
contours_in_same_horizon,
)
@ -1303,7 +1311,7 @@ class eynollah:
try:
textline_con, hierachy = return_contours_of_image(img_int_p)
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierachy, max_area=1, min_area=0.0008)
y_diff_mean = self.find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
sigma_des = int(y_diff_mean * (4.0 / 40.0))
@ -1460,7 +1468,7 @@ class eynollah:
textline_con, hierachy = return_contours_of_image(img_int_p)
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierachy, max_area=1, min_area=0.00008)
y_diff_mean = self.find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
sigma_des = int(y_diff_mean * (4.0 / 40.0))
@ -2260,11 +2268,6 @@ class eynollah:
return slope
def find_contours_mean_y_diff(self, contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
return np.mean(np.diff(np.sort(np.array(cy_main))))
def return_deskew_slop(self, img_patch_org, sigma_des, main_page=False):
@ -2826,7 +2829,7 @@ class eynollah:
try:
textline_con, hierachy = return_contours_of_image(crop_img)
textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierachy, max_area=1, min_area=0.0008)
y_diff_mean = self.find_contours_mean_y_diff(textline_con_fil)
y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
sigma_des = int(y_diff_mean * (4.0 / 40.0))
@ -4100,400 +4103,6 @@ class eynollah:
image_back_zero[:, :][image_back_zero[:, :] == -255] = 255
return image_back_zero
def find_num_col_only_image(self, regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 15
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
peaks_neg_org = np.copy(peaks_neg)
peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
peaks = peaks[(peaks > 0.09 * regions_without_seperators.shape[1]) & (peaks < 0.91 * regions_without_seperators.shape[1])]
peaks_neg = peaks_neg[(peaks_neg > 500) & (peaks_neg < (regions_without_seperators.shape[1] - 500))]
# print(peaks)
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
interest_neg = z[peaks_neg]
min_peaks_pos = np.mean(interest_pos) # np.min(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
# $print(min_peaks_pos)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
num_col = (len(interest_neg_fin)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
if num_col == 3:
if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or (peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or (peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m):
num_col = 1
else:
pass
if num_col == 2:
if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
num_col = 1
else:
pass
diff_peaks = np.abs(np.diff(peaks_neg_fin))
cut_off = 400
peaks_neg_true = []
forest = []
for i in range(len(peaks_neg_fin)):
if i == 0:
forest.append(peaks_neg_fin[i])
if i < (len(peaks_neg_fin) - 1):
if diff_peaks[i] <= cut_off:
forest.append(peaks_neg_fin[i + 1])
if diff_peaks[i] > cut_off:
# print(forest[np.argmin(z[forest]) ] )
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
forest = []
forest.append(peaks_neg_fin[i + 1])
if i == (len(peaks_neg_fin) - 1):
# print(print(forest[np.argmin(z[forest]) ] ))
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
num_col = (len(peaks_neg_true)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_quarter = int(len(y) / 4.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
p_u_quarter = len(y) - p_quarter
if num_col == 3:
if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or (peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m):
num_col = 1
peaks_neg_true = []
elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter):
peaks_neg_true = [peaks_neg_true[0]]
elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
peaks_neg_true = [peaks_neg_true[1]]
else:
pass
if num_col == 2:
if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
num_col = 1
peaks_neg_true = []
if num_col == 4:
if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2:
num_col = 1
peaks_neg_true = []
else:
pass
# no deeper hill around found hills
peaks_fin_true = []
for i in range(len(peaks_neg_true)):
hill_main = peaks_neg_true[i]
# deep_depth=z[peaks_neg]
hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ((peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))]
deep_depth_around = z[hills_around]
# print(hill_main,z[hill_main],hills_around,deep_depth_around,'manoooo')
try:
if np.min(deep_depth_around) < z[hill_main]:
pass
else:
peaks_fin_true.append(hill_main)
except:
pass
diff_peaks_annormal = diff_peaks[diff_peaks < 360]
if len(diff_peaks_annormal) > 0:
arg_help = np.array(range(len(diff_peaks)))
arg_help_ann = arg_help[diff_peaks < 360]
peaks_neg_fin_new = []
for ii in range(len(peaks_neg_fin)):
if ii in arg_help_ann:
arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]])
if arg_min == 0:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])
elif (ii - 1) in arg_help_ann:
pass
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new = peaks_neg_fin
# sometime pages with one columns gives also some negative peaks. delete those peaks
param = z[peaks_neg_true] / float(min_peaks_pos) * 100
if len(param[param <= 41]) == 0:
peaks_neg_true = []
return len(peaks_fin_true), peaks_fin_true
def find_num_col_by_vertical_lines(self, regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :, 0].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 35 # 70#35
z = gaussian_filter1d(regions_without_seperators_0, sigma_)
peaks, _ = find_peaks(z, height=0)
# print(peaks,'peaksnew')
return peaks
def find_num_col(self, regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 35 # 70#35
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
# print(last_nonzero)
# print(isNaN(last_nonzero))
# last_nonzero=0#halalikh
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
last_nonzero = last_nonzero - 100
first_nonzero = first_nonzero + 200
peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
peaks = peaks[(peaks > 0.06 * regions_without_seperators.shape[1]) & (peaks < 0.94 * regions_without_seperators.shape[1])]
peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_seperators.shape[1] - 370))]
# print(peaks)
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
# plt.plot(z)
# plt.show()
interest_neg = z[peaks_neg]
min_peaks_pos = np.min(interest_pos)
max_peaks_pos = np.max(interest_pos)
if max_peaks_pos / min_peaks_pos >= 35:
min_peaks_pos = np.mean(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
# print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax')
# $print(min_peaks_pos)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
# print(interest_neg,'interest_neg')
# print(grenze,'grenze')
# print(min_peaks_pos,'min_peaks_pos')
# print(dis_talaei,'dis_talaei')
# print(peaks_neg,'peaks_neg')
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
# interest_neg_fin=interest_neg[(interest_neg<grenze)]
num_col = (len(interest_neg_fin)) + 1
# print(peaks_neg_fin,'peaks_neg_fin')
# print(num_col,'diz')
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 4.0)
p_g_u = len(y) - int(len(y) / 4.0)
if num_col == 3:
if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m):
num_col = 1
peaks_neg_fin = []
else:
pass
if num_col == 2:
if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
num_col = 1
peaks_neg_fin = []
else:
pass
##print(len(peaks_neg_fin))
diff_peaks = np.abs(np.diff(peaks_neg_fin))
cut_off = 400
peaks_neg_true = []
forest = []
# print(len(peaks_neg_fin),'len_')
for i in range(len(peaks_neg_fin)):
if i == 0:
forest.append(peaks_neg_fin[i])
if i < (len(peaks_neg_fin) - 1):
if diff_peaks[i] <= cut_off:
forest.append(peaks_neg_fin[i + 1])
if diff_peaks[i] > cut_off:
# print(forest[np.argmin(z[forest]) ] )
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
forest = []
forest.append(peaks_neg_fin[i + 1])
if i == (len(peaks_neg_fin) - 1):
# print(print(forest[np.argmin(z[forest]) ] ))
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
num_col = (len(peaks_neg_true)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_quarter = int(len(y) / 5.0)
p_g_l = int(len(y) / 4.0)
p_g_u = len(y) - int(len(y) / 4.0)
p_u_quarter = len(y) - p_quarter
##print(num_col,'early')
if num_col == 3:
if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ((peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m):
num_col = 1
peaks_neg_true = []
elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter):
peaks_neg_true = [peaks_neg_true[0]]
elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
peaks_neg_true = [peaks_neg_true[1]]
else:
pass
if num_col == 2:
if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
num_col = 1
peaks_neg_true = []
else:
pass
diff_peaks_annormal = diff_peaks[diff_peaks < 360]
if len(diff_peaks_annormal) > 0:
arg_help = np.array(range(len(diff_peaks)))
arg_help_ann = arg_help[diff_peaks < 360]
peaks_neg_fin_new = []
for ii in range(len(peaks_neg_fin)):
if ii in arg_help_ann:
arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]])
if arg_min == 0:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])
elif (ii - 1) in arg_help_ann:
pass
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new = peaks_neg_fin
# plt.plot(gaussian_filter1d(y, sigma_))
# plt.plot(peaks_neg_true,z[peaks_neg_true],'*')
# plt.plot([0,len(y)], [grenze,grenze])
# plt.show()
##print(len(peaks_neg_true))
return len(peaks_neg_true), peaks_neg_true
def return_points_with_boundies(self, peaks_neg_fin, first_point, last_point):
peaks_neg_tot = []
peaks_neg_tot.append(first_point)
@ -4502,23 +4111,6 @@ class eynollah:
peaks_neg_tot.append(last_point)
return peaks_neg_tot
def contours_in_same_horizon(self, cy_main_hor):
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X1[0::1, :] = cy_main_hor[:]
X2 = X1.T
X_dif = np.abs(X2 - X1)
args_help = np.array(range(len(cy_main_hor)))
all_args = []
for i in range(len(cy_main_hor)):
list_h = list(args_help[X_dif[i, :] <= 20])
list_h.append(i)
if len(list_h) > 1:
all_args.append(list(set(list_h)))
return np.unique(all_args)
def return_boxes_of_images_by_order_of_reading_without_seperators(self, spliter_y_new, image_p_rev, regions_without_seperators, matrix_of_lines_ch, seperators_closeup_n):
boxes = []
@ -4542,7 +4134,7 @@ class eynollah:
##plt.show()
###find_num_col_both_layout_and_org(regions_without_seperators,image_page[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:,:],7.)
num_col, peaks_neg_fin = self.find_num_col_only_image(image_p_rev[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=2.4)
num_col, peaks_neg_fin = find_num_col_only_image(image_p_rev[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=2.4)
# num_col, peaks_neg_fin=find_num_col(regions_without_seperators[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:],multiplier=7.0)
x_min_hor_some = matrix_new[:, 2][(matrix_new[:, 9] == 0)]
@ -4664,7 +4256,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
# num_col_sub, peaks_neg_fin_sub=find_num_col(regions_without_seperators[int(newest_y_spliter[n]):int(newest_y_spliter[n+1]),newest_peaks[j]:newest_peaks[j+1]],multiplier=2.3)
num_col_sub, peaks_neg_fin_sub = self.find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.4)
num_col_sub, peaks_neg_fin_sub = find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.4)
else:
peaks_neg_fin_sub = []
@ -4699,7 +4291,7 @@ class eynollah:
if len(cy_child_in) > 0:
###num_col_ch, peaks_neg_ch=find_num_col( regions_without_seperators[int(newest_y_spliter[n]):int(newest_y_spliter[n+1]),newest_peaks[j]:newest_peaks[j+1]],multiplier=2.3)
num_col_ch, peaks_neg_ch = self.find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
num_col_ch, peaks_neg_ch = find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
peaks_neg_ch = peaks_neg_ch[:] + newest_peaks[j]
@ -4731,7 +4323,7 @@ class eynollah:
if 1 > 0: # len( matrix_new_new2[:,9][matrix_new_new2[:,9]==1] )>0 and np.max(matrix_new_new2[:,8][matrix_new_new2[:,9]==1])>=0.2*(np.abs(newest_y_spliter_h[nd+1]-newest_y_spliter_h[nd] )):
# num_col_sub_ch, peaks_neg_fin_sub_ch=find_num_col(regions_without_seperators[int(newest_y_spliter_h[nd]):int(newest_y_spliter_h[nd+1]),nst_p_ch[jn]:nst_p_ch[jn+1]],multiplier=2.3)
num_col_sub_ch, peaks_neg_fin_sub_ch = self.find_num_col_only_image(image_p_rev[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=2.3)
num_col_sub_ch, peaks_neg_fin_sub_ch = find_num_col_only_image(image_p_rev[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=2.3)
# print(peaks_neg_fin_sub_ch,'gada kutullllllll')
else:
peaks_neg_fin_sub_ch = []
@ -4755,7 +4347,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
###num_col_sub, peaks_neg_fin_sub=find_num_col(regions_without_seperators[int(newest_y_spliter[n]):int(newest_y_spliter[n+1]),newest_peaks[j]:newest_peaks[j+1]],multiplier=2.3)
num_col_sub, peaks_neg_fin_sub = self.find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
num_col_sub, peaks_neg_fin_sub = find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
else:
peaks_neg_fin_sub = []
@ -4784,7 +4376,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
###num_col_sub, peaks_neg_fin_sub=find_num_col(regions_without_seperators[int(newest_y_spliter[n]):int(newest_y_spliter[n+1]),newest_peaks[j]:newest_peaks[j+1]],multiplier=5.0)
num_col_sub, peaks_neg_fin_sub = self.find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
num_col_sub, peaks_neg_fin_sub = find_num_col_only_image(image_p_rev[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=2.3)
else:
peaks_neg_fin_sub = []
@ -4829,7 +4421,7 @@ class eynollah:
###find_num_col_both_layout_and_org(regions_without_seperators,image_page[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:,:],7.)
try:
num_col, peaks_neg_fin = self.find_num_col_only_image(image_p_rev[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=2.4)
num_col, peaks_neg_fin = find_num_col_only_image(image_p_rev[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=2.4)
except:
peaks_neg_fin = []
num_col = 0
@ -4921,7 +4513,7 @@ class eynollah:
for jv in range(len(args_hor)):
img_p_in_hor = cv2.fillPoly(img_in_hor, pts=[contours_lines[args_hor[jv]]], color=(1, 1, 1))
all_args_uniq = self.contours_in_same_horizon(cy_main_hor)
all_args_uniq = contours_in_same_horizon(cy_main_hor)
# print(all_args_uniq,'all_args_uniq')
if len(all_args_uniq) > 0:
if type(all_args_uniq[0]) is list:
@ -5083,7 +4675,7 @@ class eynollah:
slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, slope_lines_org_hor, y_min_main_hor, y_max_main_hor, cx_main_hor = find_features_of_lines(contours_lines_hor)
args_hor = np.array(range(len(slope_lines_hor)))
all_args_uniq = self.contours_in_same_horizon(cy_main_hor)
all_args_uniq = contours_in_same_horizon(cy_main_hor)
# print(all_args_uniq,'all_args_uniq')
if len(all_args_uniq) > 0:
if type(all_args_uniq[0]) is list:
@ -5173,7 +4765,7 @@ class eynollah:
##plt.show()
###find_num_col_both_layout_and_org(regions_without_seperators,image_page[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:,:],7.)
num_col, peaks_neg_fin = self.find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
num_col, peaks_neg_fin = find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
# num_col, peaks_neg_fin=find_num_col(regions_without_seperators[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:],multiplier=7.0)
x_min_hor_some = matrix_new[:, 2][(matrix_new[:, 9] == 0)]
@ -5302,7 +4894,7 @@ class eynollah:
matrix_new_new = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 9] == 1) & (matrix_of_lines_ch[:, 6] > newest_y_spliter[n]) & (matrix_of_lines_ch[:, 7] < newest_y_spliter[n + 1]) & ((matrix_of_lines_ch[:, 1] + 500) < newest_peaks[j + 1]) & ((matrix_of_lines_ch[:, 1] - 500) > newest_peaks[j])]
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if len(matrix_new_new[:, 9][matrix_new_new[:, 9] == 1]) > 0 and np.max(matrix_new_new[:, 8][matrix_new_new[:, 9] == 1]) >= 0.2 * (np.abs(newest_y_spliter[n + 1] - newest_y_spliter[n])):
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
else:
peaks_neg_fin_sub = []
@ -5337,7 +4929,7 @@ class eynollah:
cy_child_in = cy_hor_some_sort_child[(cy_hor_some_sort_child > newest_y_spliter[n]) & (cy_hor_some_sort_child < newest_y_spliter[n + 1])]
if len(cy_child_in) > 0:
num_col_ch, peaks_neg_ch = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
num_col_ch, peaks_neg_ch = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
# print(peaks_neg_ch,'mizzzz')
# peaks_neg_ch=[]
# for djh in range(len(peaks_neg_ch)):
@ -5369,7 +4961,7 @@ class eynollah:
matrix_new_new2 = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 9] == 1) & (matrix_of_lines_ch[:, 6] > newest_y_spliter_h[nd]) & (matrix_of_lines_ch[:, 7] < newest_y_spliter_h[nd + 1]) & ((matrix_of_lines_ch[:, 1] + 500) < nst_p_ch[jn + 1]) & ((matrix_of_lines_ch[:, 1] - 500) > nst_p_ch[jn])]
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if len(matrix_new_new2[:, 9][matrix_new_new2[:, 9] == 1]) > 0 and np.max(matrix_new_new2[:, 8][matrix_new_new2[:, 9] == 1]) >= 0.2 * (np.abs(newest_y_spliter_h[nd + 1] - newest_y_spliter_h[nd])):
num_col_sub_ch, peaks_neg_fin_sub_ch = self.find_num_col(regions_without_seperators[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=5.0)
num_col_sub_ch, peaks_neg_fin_sub_ch = find_num_col(regions_without_seperators[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=5.0)
else:
peaks_neg_fin_sub_ch = []
@ -5392,7 +4984,7 @@ class eynollah:
matrix_new_new = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 9] == 1) & (matrix_of_lines_ch[:, 6] > newest_y_spliter[n]) & (matrix_of_lines_ch[:, 7] < newest_y_spliter[n + 1]) & ((matrix_of_lines_ch[:, 1] + 500) < newest_peaks[j + 1]) & ((matrix_of_lines_ch[:, 1] - 500) > newest_peaks[j])]
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if len(matrix_new_new[:, 9][matrix_new_new[:, 9] == 1]) > 0 and np.max(matrix_new_new[:, 8][matrix_new_new[:, 9] == 1]) >= 0.2 * (np.abs(newest_y_spliter[n + 1] - newest_y_spliter[n])):
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
else:
peaks_neg_fin_sub = []
@ -5417,7 +5009,7 @@ class eynollah:
matrix_new_new = matrix_of_lines_ch[:, :][(matrix_of_lines_ch[:, 9] == 1) & (matrix_of_lines_ch[:, 6] > newest_y_spliter[n]) & (matrix_of_lines_ch[:, 7] < newest_y_spliter[n + 1]) & ((matrix_of_lines_ch[:, 1] + 500) < newest_peaks[j + 1]) & ((matrix_of_lines_ch[:, 1] - 500) > newest_peaks[j])]
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if len(matrix_new_new[:, 9][matrix_new_new[:, 9] == 1]) > 0 and np.max(matrix_new_new[:, 8][matrix_new_new[:, 9] == 1]) >= 0.2 * (np.abs(newest_y_spliter[n + 1] - newest_y_spliter[n])):
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
else:
peaks_neg_fin_sub = []
@ -5466,7 +5058,7 @@ class eynollah:
# plt.imshow(regions_without_seperators[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:])
# plt.show()
try:
num_col, peaks_neg_fin = self.find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
num_col, peaks_neg_fin = find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
except:
peaks_neg_fin = []
@ -5597,7 +5189,7 @@ class eynollah:
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
# print( int(newest_y_spliter[n]),int(newest_y_spliter[n+1]),newest_peaks[j],newest_peaks[j+1] )
try:
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
except:
peaks_neg_fin_sub = []
else:
@ -5634,7 +5226,7 @@ class eynollah:
if len(cy_child_in) > 0:
try:
num_col_ch, peaks_neg_ch = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
num_col_ch, peaks_neg_ch = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
except:
peaks_neg_ch = []
# print(peaks_neg_ch,'mizzzz')
@ -5669,7 +5261,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new2[:,9][matrix_new_new2[:,9]==1] )>0 and np.max(matrix_new_new2[:,8][matrix_new_new2[:,9]==1])>=0.2*(np.abs(newest_y_spliter_h[nd+1]-newest_y_spliter_h[nd] )):
try:
num_col_sub_ch, peaks_neg_fin_sub_ch = self.find_num_col(regions_without_seperators[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=7.0)
num_col_sub_ch, peaks_neg_fin_sub_ch = find_num_col(regions_without_seperators[int(newest_y_spliter_h[nd]) : int(newest_y_spliter_h[nd + 1]), nst_p_ch[jn] : nst_p_ch[jn + 1]], multiplier=7.0)
except:
peaks_neg_fin_sub_ch = []
@ -5695,7 +5287,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
try:
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=7.0)
except:
peaks_neg_fin_sub = []
else:
@ -5723,7 +5315,7 @@ class eynollah:
# print(matrix_new_new,newest_y_spliter[n],newest_y_spliter[n+1],newest_peaks[j],newest_peaks[j+1],'gada')
if 1 > 0: # len( matrix_new_new[:,9][matrix_new_new[:,9]==1] )>0 and np.max(matrix_new_new[:,8][matrix_new_new[:,9]==1])>=0.2*(np.abs(newest_y_spliter[n+1]-newest_y_spliter[n] )):
try:
num_col_sub, peaks_neg_fin_sub = self.find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
num_col_sub, peaks_neg_fin_sub = find_num_col(regions_without_seperators[int(newest_y_spliter[n]) : int(newest_y_spliter[n + 1]), newest_peaks[j] : newest_peaks[j + 1]], multiplier=5.0)
except:
peaks_neg_fin_sub = []
else:
@ -5770,7 +5362,7 @@ class eynollah:
###find_num_col_both_layout_and_org(regions_without_seperators,image_page[int(spliter_y_new[i]):int(spliter_y_new[i+1]),:,:],7.)
try:
num_col, peaks_neg_fin = self.find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
num_col, peaks_neg_fin = find_num_col(regions_without_seperators[int(spliter_y_new[i]) : int(spliter_y_new[i + 1]), :], multiplier=7.0)
except:
peaks_neg_fin = []
@ -6004,6 +5596,21 @@ class eynollah:
return y_min_main, y_max_main
def find_features_of_contours(self, contours_main):
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
return y_min_main, y_max_main, areas_main
def add_tables_heuristic_to_layout(self, image_regions_eraly_p, boxes, slope_mean_hor, spliter_y, peaks_neg_tot, image_revised):
image_revised_1 = self.delete_seperator_around(spliter_y, peaks_neg_tot, image_revised)
@ -6119,20 +5726,6 @@ class eynollah:
##plt.show()
return image_revised_last
def find_features_of_contours(self, contours_main):
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
return y_min_main, y_max_main, areas_main
def remove_headers_and_mains_intersection(self, seperators_closeup_n, img_revised_tab, boxes):
for ind in range(len(boxes)):
asp = np.zeros((img_revised_tab[:, :, 0].shape[0], seperators_closeup_n[:, :, 0].shape[1]))
@ -6815,11 +6408,11 @@ class eynollah:
##plt.imshow(regions_without_seperators_teil)
##plt.show()
# num_col, peaks_neg_fin=self.find_num_col(regions_without_seperators_teil,multiplier=6.0)
# num_col, peaks_neg_fin=find_num_col(regions_without_seperators_teil,multiplier=6.0)
# regions_without_seperators_teil=cv2.erode(regions_without_seperators_teil,kernel,iterations = 3)
#
num_col, peaks_neg_fin = self.find_num_col(regions_without_seperators_teil, multiplier=7.0)
num_col, peaks_neg_fin = find_num_col(regions_without_seperators_teil, multiplier=7.0)
if num_col > num_col_fin:
num_col_fin = num_col
@ -6827,19 +6420,19 @@ class eynollah:
"""
#print(length_y_vertical_lines,length_y_threshold,'x_center_of_ver_linesx_center_of_ver_linesx_center_of_ver_lines')
if len(cx_main_ver)>0 and len( dist_y_ver[dist_y_ver>=length_y_threshold] ) >=1:
num_col, peaks_neg_fin=self.find_num_col(regions_without_seperators_teil,multiplier=6.0)
num_col, peaks_neg_fin=find_num_col(regions_without_seperators_teil,multiplier=6.0)
else:
#plt.imshow(image_page_background_zero_teil)
#plt.show()
#num_col, peaks_neg_fin=self.find_num_col_only_image(image_page_background_zero,multiplier=2.4)#2.3)
num_col, peaks_neg_fin=self.find_num_col_only_image(image_page_background_zero_teil,multiplier=3.4)#2.3)
#num_col, peaks_neg_fin=find_num_col_only_image(image_page_background_zero,multiplier=2.4)#2.3)
num_col, peaks_neg_fin=find_num_col_only_image(image_page_background_zero_teil,multiplier=3.4)#2.3)
print(num_col,'birda')
if num_col>0:
pass
elif num_col==0:
print(num_col,'birda2222')
num_col_regions, peaks_neg_fin_regions=self.find_num_col(regions_without_seperators_teil,multiplier=10.0)
num_col_regions, peaks_neg_fin_regions=find_num_col(regions_without_seperators_teil,multiplier=10.0)
if num_col_regions==0:
pass
else:
@ -6851,7 +6444,7 @@ class eynollah:
# print(num_col+1,'num colmsssssssss')
if len(args_big_parts) == 1 and (len(peaks_neg_fin_fin) + 1) < num_col_classifier:
peaks_neg_fin = self.find_num_col_by_vertical_lines(vertical)
peaks_neg_fin = find_num_col_by_vertical_lines(vertical)
peaks_neg_fin = peaks_neg_fin[peaks_neg_fin >= 500]
peaks_neg_fin = peaks_neg_fin[peaks_neg_fin <= (vertical.shape[1] - 500)]
peaks_neg_fin_fin = peaks_neg_fin[:]
@ -7285,37 +6878,6 @@ class eynollah:
##plt.show()
return text_region2_1st_channel
def rotation_not_90_func(self, img, textline, text_regions_p_1, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
return self.rotate_max_area(img, rotated, rotated_textline, rotated_layout, thetha)
def rotate_max_area(self, image, rotated, rotated_textline, rotated_layout, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2]
def rotation_not_90_func_full_layout(self, img, textline, text_regions_p_1, text_regions_p_fully, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
return self.rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
def rotate_max_area_full_layout(self, image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2]
def get_regions_from_xy_2models_ens(self, img):
img_org = np.copy(img)
@ -9087,7 +8649,7 @@ class eynollah:
img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], self.kernel, iterations=6)
try:
num_col, peaks_neg_fin = self.find_num_col(img_only_regions, multiplier=6.0)
num_col, peaks_neg_fin = find_num_col(img_only_regions, multiplier=6.0)
if not num_column_is_classified:
num_col_classifier = num_col + 1
except:
@ -9157,7 +8719,7 @@ class eynollah:
##sys.exit()
print("deskewing: " + str(time.time() - t1))
image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] # self.rotation_not_90_func(image_page,textline_mask_tot_ea,slope_first)
image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] # rotation_not_90_func(image_page,textline_mask_tot_ea,slope_first)
textline_mask_tot[mask_images[:, :] == 1] = 0
pixel_img = 1
@ -9201,7 +8763,7 @@ class eynollah:
if not self.full_layout:
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n = self.rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew)
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
@ -9332,7 +8894,7 @@ class eynollah:
# plt.show()
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = self.rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])

@ -107,6 +107,38 @@ def rotyate_image_different( img, slope):
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
return img_rotation
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2]
def rotation_not_90_func(img, textline, text_regions_p_1, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, thetha)
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2]
def crop_image_inside_box(box, img_org_copy):
image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
@ -1669,3 +1701,419 @@ def find_new_features_of_contoures(contours_main):
# dis_x=np.abs(x_max_main-x_min_main)
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
def find_num_col(regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 35 # 70#35
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
# print(last_nonzero)
# print(isNaN(last_nonzero))
# last_nonzero=0#halalikh
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
last_nonzero = last_nonzero - 100
first_nonzero = first_nonzero + 200
peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
peaks = peaks[(peaks > 0.06 * regions_without_seperators.shape[1]) & (peaks < 0.94 * regions_without_seperators.shape[1])]
peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_seperators.shape[1] - 370))]
# print(peaks)
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
# plt.plot(z)
# plt.show()
interest_neg = z[peaks_neg]
min_peaks_pos = np.min(interest_pos)
max_peaks_pos = np.max(interest_pos)
if max_peaks_pos / min_peaks_pos >= 35:
min_peaks_pos = np.mean(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
# print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax')
# $print(min_peaks_pos)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
# print(interest_neg,'interest_neg')
# print(grenze,'grenze')
# print(min_peaks_pos,'min_peaks_pos')
# print(dis_talaei,'dis_talaei')
# print(peaks_neg,'peaks_neg')
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
# interest_neg_fin=interest_neg[(interest_neg<grenze)]
num_col = (len(interest_neg_fin)) + 1
# print(peaks_neg_fin,'peaks_neg_fin')
# print(num_col,'diz')
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 4.0)
p_g_u = len(y) - int(len(y) / 4.0)
if num_col == 3:
if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m):
num_col = 1
peaks_neg_fin = []
else:
pass
if num_col == 2:
if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
num_col = 1
peaks_neg_fin = []
else:
pass
##print(len(peaks_neg_fin))
diff_peaks = np.abs(np.diff(peaks_neg_fin))
cut_off = 400
peaks_neg_true = []
forest = []
# print(len(peaks_neg_fin),'len_')
for i in range(len(peaks_neg_fin)):
if i == 0:
forest.append(peaks_neg_fin[i])
if i < (len(peaks_neg_fin) - 1):
if diff_peaks[i] <= cut_off:
forest.append(peaks_neg_fin[i + 1])
if diff_peaks[i] > cut_off:
# print(forest[np.argmin(z[forest]) ] )
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
forest = []
forest.append(peaks_neg_fin[i + 1])
if i == (len(peaks_neg_fin) - 1):
# print(print(forest[np.argmin(z[forest]) ] ))
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
num_col = (len(peaks_neg_true)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_quarter = int(len(y) / 5.0)
p_g_l = int(len(y) / 4.0)
p_g_u = len(y) - int(len(y) / 4.0)
p_u_quarter = len(y) - p_quarter
##print(num_col,'early')
if num_col == 3:
if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ((peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m):
num_col = 1
peaks_neg_true = []
elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter):
peaks_neg_true = [peaks_neg_true[0]]
elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
peaks_neg_true = [peaks_neg_true[1]]
else:
pass
if num_col == 2:
if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
num_col = 1
peaks_neg_true = []
else:
pass
diff_peaks_annormal = diff_peaks[diff_peaks < 360]
if len(diff_peaks_annormal) > 0:
arg_help = np.array(range(len(diff_peaks)))
arg_help_ann = arg_help[diff_peaks < 360]
peaks_neg_fin_new = []
for ii in range(len(peaks_neg_fin)):
if ii in arg_help_ann:
arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]])
if arg_min == 0:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])
elif (ii - 1) in arg_help_ann:
pass
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new = peaks_neg_fin
# plt.plot(gaussian_filter1d(y, sigma_))
# plt.plot(peaks_neg_true,z[peaks_neg_true],'*')
# plt.plot([0,len(y)], [grenze,grenze])
# plt.show()
##print(len(peaks_neg_true))
return len(peaks_neg_true), peaks_neg_true
def find_num_col_only_image(regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 15
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
peaks_neg_org = np.copy(peaks_neg)
peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
peaks = peaks[(peaks > 0.09 * regions_without_seperators.shape[1]) & (peaks < 0.91 * regions_without_seperators.shape[1])]
peaks_neg = peaks_neg[(peaks_neg > 500) & (peaks_neg < (regions_without_seperators.shape[1] - 500))]
# print(peaks)
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
interest_neg = z[peaks_neg]
min_peaks_pos = np.mean(interest_pos) # np.min(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
# $print(min_peaks_pos)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
num_col = (len(interest_neg_fin)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
if num_col == 3:
if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or (peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or (peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m):
num_col = 1
else:
pass
if num_col == 2:
if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
num_col = 1
else:
pass
diff_peaks = np.abs(np.diff(peaks_neg_fin))
cut_off = 400
peaks_neg_true = []
forest = []
for i in range(len(peaks_neg_fin)):
if i == 0:
forest.append(peaks_neg_fin[i])
if i < (len(peaks_neg_fin) - 1):
if diff_peaks[i] <= cut_off:
forest.append(peaks_neg_fin[i + 1])
if diff_peaks[i] > cut_off:
# print(forest[np.argmin(z[forest]) ] )
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
forest = []
forest.append(peaks_neg_fin[i + 1])
if i == (len(peaks_neg_fin) - 1):
# print(print(forest[np.argmin(z[forest]) ] ))
if not isNaN(forest[np.argmin(z[forest])]):
peaks_neg_true.append(forest[np.argmin(z[forest])])
num_col = (len(peaks_neg_true)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_quarter = int(len(y) / 4.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
p_u_quarter = len(y) - p_quarter
if num_col == 3:
if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or (peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m):
num_col = 1
peaks_neg_true = []
elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter):
peaks_neg_true = [peaks_neg_true[0]]
elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
peaks_neg_true = [peaks_neg_true[1]]
else:
pass
if num_col == 2:
if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
num_col = 1
peaks_neg_true = []
if num_col == 4:
if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2:
num_col = 1
peaks_neg_true = []
else:
pass
# no deeper hill around found hills
peaks_fin_true = []
for i in range(len(peaks_neg_true)):
hill_main = peaks_neg_true[i]
# deep_depth=z[peaks_neg]
hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ((peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))]
deep_depth_around = z[hills_around]
# print(hill_main,z[hill_main],hills_around,deep_depth_around,'manoooo')
try:
if np.min(deep_depth_around) < z[hill_main]:
pass
else:
peaks_fin_true.append(hill_main)
except:
pass
diff_peaks_annormal = diff_peaks[diff_peaks < 360]
if len(diff_peaks_annormal) > 0:
arg_help = np.array(range(len(diff_peaks)))
arg_help_ann = arg_help[diff_peaks < 360]
peaks_neg_fin_new = []
for ii in range(len(peaks_neg_fin)):
if ii in arg_help_ann:
arg_min = np.argmin([interest_neg_fin[ii], interest_neg_fin[ii + 1]])
if arg_min == 0:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])
elif (ii - 1) in arg_help_ann:
pass
else:
peaks_neg_fin_new.append(peaks_neg_fin[ii])
else:
peaks_neg_fin_new = peaks_neg_fin
# sometime pages with one columns gives also some negative peaks. delete those peaks
param = z[peaks_neg_true] / float(min_peaks_pos) * 100
if len(param[param <= 41]) == 0:
peaks_neg_true = []
return len(peaks_fin_true), peaks_fin_true
def find_num_col_by_vertical_lines(regions_without_seperators, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :, 0].sum(axis=0)
##plt.plot(regions_without_seperators_0)
##plt.show()
sigma_ = 35 # 70#35
z = gaussian_filter1d(regions_without_seperators_0, sigma_)
peaks, _ = find_peaks(z, height=0)
# print(peaks,'peaksnew')
return peaks
def contours_in_same_horizon(cy_main_hor):
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X1[0::1, :] = cy_main_hor[:]
X2 = X1.T
X_dif = np.abs(X2 - X1)
args_help = np.array(range(len(cy_main_hor)))
all_args = []
for i in range(len(cy_main_hor)):
list_h = list(args_help[X_dif[i, :] <= 20])
list_h.append(i)
if len(list_h) > 1:
all_args.append(list(set(list_h)))
return np.unique(all_args)
def find_contours_mean_y_diff(contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
return np.mean(np.diff(np.sort(np.array(cy_main))))

Loading…
Cancel
Save