|
|
|
@ -102,14 +102,15 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
|
|
|
|
textline_con_fil = filter_contours_area_of_image(img_patch,
|
|
|
|
|
textline_con, hierarchy,
|
|
|
|
|
max_area=1, min_area=0.0008)
|
|
|
|
|
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
|
|
|
|
|
# print(sigma_gaus,'sigma_gaus')
|
|
|
|
|
if len(np.diff(peaks_new_tot))>1:
|
|
|
|
|
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
|
|
|
|
|
else:
|
|
|
|
|
sigma_gaus = 12
|
|
|
|
|
except:
|
|
|
|
|
sigma_gaus = 12
|
|
|
|
|
if sigma_gaus < 3:
|
|
|
|
|
sigma_gaus = 3
|
|
|
|
|
# print(sigma_gaus,'sigma')
|
|
|
|
|
|
|
|
|
|
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
|
|
|
|
|
y_padded_up_to_down = -y_padded + np.max(y_padded)
|
|
|
|
@ -137,7 +138,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
|
|
|
|
x_d = M[0, 2]
|
|
|
|
|
y_d = M[1, 2]
|
|
|
|
|
|
|
|
|
|
thetha = thetha / 180. * np.pi
|
|
|
|
|
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
|
|
|
|
|
contour_text_interest_copy = contour_text_interest.copy()
|
|
|
|
@ -162,77 +162,73 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
x = np.array(range(len(y)))
|
|
|
|
|
|
|
|
|
|
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
|
|
|
|
if 1>0:
|
|
|
|
|
try:
|
|
|
|
|
y_padded_smoothed_e= gaussian_filter1d(y_padded, 2)
|
|
|
|
|
y_padded_up_to_down_e=-y_padded+np.max(y_padded)
|
|
|
|
|
y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40)
|
|
|
|
|
y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e
|
|
|
|
|
y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
y_padded_smoothed_e= gaussian_filter1d(y_padded, 2)
|
|
|
|
|
y_padded_up_to_down_e=-y_padded+np.max(y_padded)
|
|
|
|
|
y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40)
|
|
|
|
|
y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e
|
|
|
|
|
y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
|
|
|
|
|
|
|
|
|
|
peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
|
|
|
|
|
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
|
|
|
|
neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
|
|
|
|
|
|
|
|
|
peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
|
|
|
|
|
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
|
|
|
|
neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
|
|
|
|
arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
|
|
|
|
|
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
|
|
|
|
|
|
|
|
|
|
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
|
|
|
|
|
arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
|
|
|
|
|
peaks_new=peaks_e[:]
|
|
|
|
|
peaks_neg_new=peaks_neg_e[:]
|
|
|
|
|
|
|
|
|
|
arg_neg_must_be_deleted= np.arange(len(peaks_neg_e))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3]
|
|
|
|
|
diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
|
|
|
|
|
|
|
|
|
|
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
|
|
|
|
|
arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
|
|
|
|
|
|
|
|
|
|
peaks_new=peaks_e[:]
|
|
|
|
|
peaks_neg_new=peaks_neg_e[:]
|
|
|
|
|
|
|
|
|
|
clusters_to_be_deleted=[]
|
|
|
|
|
if len(arg_diff_cluster)>0:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
|
|
|
|
|
for i in range(len(arg_diff_cluster)-1):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:
|
|
|
|
|
arg_diff_cluster[i+1]+1])
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
|
|
|
|
|
if len(clusters_to_be_deleted)>0:
|
|
|
|
|
peaks_new_extra=[]
|
|
|
|
|
for m in range(len(clusters_to_be_deleted)):
|
|
|
|
|
min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]])
|
|
|
|
|
max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]])
|
|
|
|
|
peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
|
|
|
|
|
for m1 in range(len(clusters_to_be_deleted[m])):
|
|
|
|
|
peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]]
|
|
|
|
|
peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]]
|
|
|
|
|
peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]]
|
|
|
|
|
peaks_new_tot=[]
|
|
|
|
|
for i1 in peaks_new:
|
|
|
|
|
peaks_new_tot.append(i1)
|
|
|
|
|
for i1 in peaks_new_extra:
|
|
|
|
|
peaks_new_tot.append(i1)
|
|
|
|
|
peaks_new_tot=np.sort(peaks_new_tot)
|
|
|
|
|
else:
|
|
|
|
|
peaks_new_tot=peaks_e[:]
|
|
|
|
|
clusters_to_be_deleted=[]
|
|
|
|
|
if len(arg_diff_cluster)>0:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
|
|
|
|
|
for i in range(len(arg_diff_cluster)-1):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:
|
|
|
|
|
arg_diff_cluster[i+1]+1])
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
|
|
|
|
|
if len(clusters_to_be_deleted)>0:
|
|
|
|
|
peaks_new_extra=[]
|
|
|
|
|
for m in range(len(clusters_to_be_deleted)):
|
|
|
|
|
min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]])
|
|
|
|
|
max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]])
|
|
|
|
|
peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
|
|
|
|
|
for m1 in range(len(clusters_to_be_deleted[m])):
|
|
|
|
|
peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]]
|
|
|
|
|
peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]]
|
|
|
|
|
peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]]
|
|
|
|
|
peaks_new_tot=[]
|
|
|
|
|
for i1 in peaks_new:
|
|
|
|
|
peaks_new_tot.append(i1)
|
|
|
|
|
for i1 in peaks_new_extra:
|
|
|
|
|
peaks_new_tot.append(i1)
|
|
|
|
|
peaks_new_tot=np.sort(peaks_new_tot)
|
|
|
|
|
else:
|
|
|
|
|
peaks_new_tot=peaks_e[:]
|
|
|
|
|
|
|
|
|
|
textline_con,hierarchy=return_contours_of_image(img_patch)
|
|
|
|
|
textline_con_fil=filter_contours_area_of_image(img_patch,
|
|
|
|
|
textline_con, hierarchy,
|
|
|
|
|
max_area=1, min_area=0.0008)
|
|
|
|
|
textline_con,hierarchy=return_contours_of_image(img_patch)
|
|
|
|
|
textline_con_fil=filter_contours_area_of_image(img_patch,
|
|
|
|
|
textline_con, hierarchy,
|
|
|
|
|
max_area=1, min_area=0.0008)
|
|
|
|
|
|
|
|
|
|
if len(np.diff(peaks_new_tot))>0:
|
|
|
|
|
y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
sigma_gaus=int( y_diff_mean * (7./40.0) )
|
|
|
|
|
else:
|
|
|
|
|
sigma_gaus=12
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
if len(np.diff(peaks_new_tot))>0:
|
|
|
|
|
y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
sigma_gaus=int( y_diff_mean * (7./40.0) )
|
|
|
|
|
else:
|
|
|
|
|
sigma_gaus=12
|
|
|
|
|
if sigma_gaus<3:
|
|
|
|
|
sigma_gaus=3
|
|
|
|
|
#print(sigma_gaus,'sigma')
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
sigma_gaus=12
|
|
|
|
|
if sigma_gaus<3:
|
|
|
|
|
sigma_gaus=3
|
|
|
|
|
|
|
|
|
|
y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus)
|
|
|
|
|
y_padded_up_to_down=-y_padded+np.max(y_padded)
|
|
|
|
|
y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40)
|
|
|
|
|
y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down
|
|
|
|
|
y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
|
|
|
|
|
|
|
|
|
|
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
|
|
|
|
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
|
|
|
|
|
|
|
|
@ -243,6 +239,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
|
|
|
|
|
arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
|
|
|
|
|
arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
arg_neg_must_be_deleted=[]
|
|
|
|
|
arg_diff_cluster=[]
|
|
|
|
@ -250,7 +247,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
peaks_new=peaks[:]
|
|
|
|
|
peaks_neg_new=peaks_neg[:]
|
|
|
|
|
clusters_to_be_deleted=[]
|
|
|
|
|
|
|
|
|
|
if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
|
|
|
|
|
for i in range(len(arg_diff_cluster)-1):
|
|
|
|
@ -279,21 +275,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
peaks_new_tot.append(i1)
|
|
|
|
|
peaks_new_tot=np.sort(peaks_new_tot)
|
|
|
|
|
|
|
|
|
|
##plt.plot(y_padded_up_to_down_padded)
|
|
|
|
|
##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
##plt.plot(y_padded_up_to_down_padded)
|
|
|
|
|
##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
##plt.plot(y_padded_smoothed)
|
|
|
|
|
##plt.plot(peaks,y_padded_smoothed[peaks],'*')
|
|
|
|
|
##plt.show()
|
|
|
|
|
|
|
|
|
|
##plt.plot(y_padded_smoothed)
|
|
|
|
|
##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
|
|
|
|
|
##plt.show()
|
|
|
|
|
peaks=peaks_new_tot[:]
|
|
|
|
|
peaks_neg=peaks_neg_new[:]
|
|
|
|
|
else:
|
|
|
|
@ -302,11 +283,13 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
peaks_neg=peaks_neg_new[:]
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks=np.std(y_padded_smoothed[peaks])
|
|
|
|
|
if len(y_padded_smoothed[peaks]) > 1:
|
|
|
|
|
mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks=np.std(y_padded_smoothed[peaks])
|
|
|
|
|
else:
|
|
|
|
|
mean_value_of_peaks = np.nan
|
|
|
|
|
std_value_of_peaks = np.nan
|
|
|
|
|
peaks_values=y_padded_smoothed[peaks]
|
|
|
|
|
|
|
|
|
|
peaks_neg = peaks_neg - 20 - 20
|
|
|
|
|
peaks = peaks - 20
|
|
|
|
|
for jj in range(len(peaks_neg)):
|
|
|
|
@ -349,7 +332,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
point_down_narrow = peaks[jj] + first_nonzero + int(
|
|
|
|
|
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if point_down_narrow >= img_patch.shape[0]:
|
|
|
|
|
point_down_narrow = img_patch.shape[0] - 2
|
|
|
|
|
|
|
|
|
@ -605,7 +587,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
|
|
|
[int(x_max), int(point_up)],
|
|
|
|
|
[int(x_max), int(point_down)],
|
|
|
|
|
[int(x_min), int(point_down)]]))
|
|
|
|
|
|
|
|
|
|
return peaks, textline_boxes_rot
|
|
|
|
|
|
|
|
|
|
def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
|
|
@ -637,7 +618,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
|
|
|
peaks_neg_new = peaks_neg[:]
|
|
|
|
|
clusters_to_be_deleted = []
|
|
|
|
|
|
|
|
|
|
if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0:
|
|
|
|
|
if len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) >= 2:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
|
|
|
|
for i in range(len(arg_diff_cluster) - 1):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
|
|
|
|
@ -645,7 +626,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
|
|
|
|
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
|
|
|
|
if len(arg_neg_must_be_deleted) == 1:
|
|
|
|
|
else:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted)
|
|
|
|
|
if len(clusters_to_be_deleted) > 0:
|
|
|
|
|
peaks_new_extra = []
|
|
|
|
@ -671,9 +652,14 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
|
|
|
peaks_new_tot = peaks[:]
|
|
|
|
|
peaks = peaks_new_tot[:]
|
|
|
|
|
peaks_neg = peaks_neg_new[:]
|
|
|
|
|
|
|
|
|
|
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
|
|
|
|
|
|
|
|
|
if len(y_padded_smoothed[peaks])>1:
|
|
|
|
|
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
|
|
|
|
else:
|
|
|
|
|
mean_value_of_peaks = np.nan
|
|
|
|
|
std_value_of_peaks = np.nan
|
|
|
|
|
|
|
|
|
|
peaks_values = y_padded_smoothed[peaks]
|
|
|
|
|
|
|
|
|
|
peaks_neg = peaks_neg - 20 - 20
|
|
|
|
@ -691,7 +677,6 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
|
|
|
textline_boxes_rot = []
|
|
|
|
|
|
|
|
|
|
if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3:
|
|
|
|
|
# print('11')
|
|
|
|
|
for jj in range(len(peaks)):
|
|
|
|
|
|
|
|
|
|
if jj == (len(peaks) - 1):
|
|
|
|
@ -998,15 +983,16 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
|
|
|
textline_con_fil = filter_contours_area_of_image(img_patch,
|
|
|
|
|
textline_con, hierarchy,
|
|
|
|
|
max_area=1, min_area=0.0008)
|
|
|
|
|
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
if len(np.diff(peaks_new_tot)):
|
|
|
|
|
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
|
|
|
|
sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
|
|
|
|
|
else:
|
|
|
|
|
sigma_gaus = 12
|
|
|
|
|
|
|
|
|
|
sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
|
|
|
|
|
# print(sigma_gaus,'sigma_gaus')
|
|
|
|
|
except:
|
|
|
|
|
sigma_gaus = 12
|
|
|
|
|
if sigma_gaus < 3:
|
|
|
|
|
sigma_gaus = 3
|
|
|
|
|
# print(sigma_gaus,'sigma')
|
|
|
|
|
|
|
|
|
|
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
|
|
|
|
|
y_padded_up_to_down = -y_padded + np.max(y_padded)
|
|
|
|
@ -1030,7 +1016,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
|
|
|
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
|
|
|
|
|
|
|
|
|
clusters_to_be_deleted = []
|
|
|
|
|
if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0:
|
|
|
|
|
if len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) >= 2:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
|
|
|
|
for i in range(len(arg_diff_cluster) - 1):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 :
|
|
|
|
@ -1038,7 +1024,7 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
|
|
|
|
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
|
|
|
|
if len(arg_neg_must_be_deleted) == 1:
|
|
|
|
|
else:
|
|
|
|
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted)
|
|
|
|
|
if len(clusters_to_be_deleted) > 0:
|
|
|
|
|
peaks_new_extra = []
|
|
|
|
@ -1081,9 +1067,14 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
|
|
|
peaks_new_tot = peaks[:]
|
|
|
|
|
peaks = peaks_new_tot[:]
|
|
|
|
|
peaks_neg = peaks_neg_new[:]
|
|
|
|
|
|
|
|
|
|
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
|
|
|
|
|
|
|
|
|
if len(y_padded_smoothed[peaks]) > 1:
|
|
|
|
|
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
|
|
|
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
|
|
|
|
else:
|
|
|
|
|
mean_value_of_peaks = np.nan
|
|
|
|
|
std_value_of_peaks = np.nan
|
|
|
|
|
|
|
|
|
|
peaks_values = y_padded_smoothed[peaks]
|
|
|
|
|
|
|
|
|
|
###peaks_neg = peaks_neg - 20 - 20
|
|
|
|
@ -1093,10 +1084,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
|
|
|
|
|
|
|
|
if len(peaks_neg_true) > 0:
|
|
|
|
|
peaks_neg_true = np.array(peaks_neg_true)
|
|
|
|
|
|
|
|
|
|
peaks_neg_true = peaks_neg_true - 20 - 20
|
|
|
|
|
|
|
|
|
|
# print(peaks_neg_true)
|
|
|
|
|
for i in range(len(peaks_neg_true)):
|
|
|
|
|
img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0
|
|
|
|
|
else:
|
|
|
|
@ -1181,13 +1170,11 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|
|
|
|
if diff_peaks[i] <= cut_off:
|
|
|
|
|
forest.append(peaks_neg[i + 1])
|
|
|
|
|
if diff_peaks[i] > cut_off:
|
|
|
|
|
# print(forest[np.argmin(z[forest]) ] )
|
|
|
|
|
if not np.isnan(forest[np.argmin(z[forest])]):
|
|
|
|
|
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
|
|
|
|
forest = []
|
|
|
|
|
forest.append(peaks_neg[i + 1])
|
|
|
|
|
if i == (len(peaks_neg) - 1):
|
|
|
|
|
# print(print(forest[np.argmin(z[forest]) ] ))
|
|
|
|
|
if not np.isnan(forest[np.argmin(z[forest])]):
|
|
|
|
|
peaks_neg_true.append(forest[np.argmin(z[forest])])
|
|
|
|
|
|
|
|
|
@ -1204,17 +1191,14 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|
|
|
|
if diff_peaks_pos[i] <= cut_off:
|
|
|
|
|
forest.append(peaks[i + 1])
|
|
|
|
|
if diff_peaks_pos[i] > cut_off:
|
|
|
|
|
# print(forest[np.argmin(z[forest]) ] )
|
|
|
|
|
if not np.isnan(forest[np.argmax(z[forest])]):
|
|
|
|
|
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
|
|
|
|
forest = []
|
|
|
|
|
forest.append(peaks[i + 1])
|
|
|
|
|
if i == (len(peaks) - 1):
|
|
|
|
|
# print(print(forest[np.argmin(z[forest]) ] ))
|
|
|
|
|
if not np.isnan(forest[np.argmax(z[forest])]):
|
|
|
|
|
peaks_pos_true.append(forest[np.argmax(z[forest])])
|
|
|
|
|
|
|
|
|
|
# print(len(peaks_neg_true) ,len(peaks_pos_true) ,'lensss')
|
|
|
|
|
|
|
|
|
|
if len(peaks_neg_true) > 0:
|
|
|
|
|
peaks_neg_true = np.array(peaks_neg_true)
|
|
|
|
@ -1240,7 +1224,6 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
|
|
|
|
"""
|
|
|
|
|
peaks_neg_true = peaks_neg_true - 20 - 20
|
|
|
|
|
|
|
|
|
|
# print(peaks_neg_true)
|
|
|
|
|
for i in range(len(peaks_neg_true)):
|
|
|
|
|
img_path[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0
|
|
|
|
|
|
|
|
|
@ -1282,7 +1265,6 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|
|
|
|
contours_imgs, hierarchy,
|
|
|
|
|
max_area=max_area, min_area=min_area)
|
|
|
|
|
cont_final = []
|
|
|
|
|
###print(add_boxes_coor_into_textlines,'ikki')
|
|
|
|
|
for i in range(len(contours_imgs)):
|
|
|
|
|
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
|
|
|
|
|
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
|
|
|
|
@ -1297,12 +1279,10 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
|
|
|
|
##0]
|
|
|
|
|
##contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
|
|
|
|
##if add_boxes_coor_into_textlines:
|
|
|
|
|
##print(np.shape(contours_text_rot[0]),'sjppo')
|
|
|
|
|
##contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
|
|
|
|
|
##contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
|
|
|
|
|
cont_final.append(contours_text_rot[0])
|
|
|
|
|
|
|
|
|
|
##print(cont_final,'nadizzzz')
|
|
|
|
|
return None, cont_final
|
|
|
|
|
|
|
|
|
|
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
|
|
|
|
@ -1313,20 +1293,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
|
|
|
|
|
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel)
|
|
|
|
|
textline_mask = cv2.erode(textline_mask, kernel, iterations=2)
|
|
|
|
|
# textline_mask = cv2.erode(textline_mask, kernel, iterations=1)
|
|
|
|
|
|
|
|
|
|
# print(textline_mask.shape[0]/float(textline_mask.shape[1]),'miz')
|
|
|
|
|
try:
|
|
|
|
|
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
|
|
|
|
|
# plt.imshow(textline_mask)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# if abs(slope)>1:
|
|
|
|
|
# x_help=30
|
|
|
|
|
# y_help=2
|
|
|
|
|
# else:
|
|
|
|
|
# x_help=2
|
|
|
|
|
# y_help=2
|
|
|
|
|
|
|
|
|
|
x_help = 30
|
|
|
|
|
y_help = 2
|
|
|
|
|
|
|
|
|
@ -1350,28 +1317,12 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
|
|
|
|
|
img_contour = np.zeros((box_ind[3], box_ind[2], 3))
|
|
|
|
|
img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255))
|
|
|
|
|
|
|
|
|
|
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
|
|
|
|
|
# plt.imshow(img_contour)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help),
|
|
|
|
|
img_contour.shape[1] + int(2 * x_help), 3))
|
|
|
|
|
img_contour_help[y_help : y_help + img_contour.shape[0],
|
|
|
|
|
x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :])
|
|
|
|
|
|
|
|
|
|
img_contour_rot = rotate_image(img_contour_help, slope)
|
|
|
|
|
# plt.imshow(img_contour_rot_help)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# plt.imshow(dst_help)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3:
|
|
|
|
|
# plt.imshow(img_contour_rot_help)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# plt.imshow(dst_help)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
img_contour_rot = img_contour_rot.astype(np.uint8)
|
|
|
|
|
# dst_help = dst_help.astype(np.uint8)
|
|
|
|
@ -1382,9 +1333,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
|
|
|
|
|
len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
|
|
|
|
|
ind_big_con = np.argmax(len_con_text_rot)
|
|
|
|
|
|
|
|
|
|
# print('juzaa')
|
|
|
|
|
if abs(slope) > 45:
|
|
|
|
|
# print(add_boxes_coor_into_textlines,'avval')
|
|
|
|
|
_, contours_rotated_clean = separate_lines_vertical_cont(
|
|
|
|
|
textline_mask, contours_text_rot[ind_big_con], box_ind, slope,
|
|
|
|
|
add_boxes_coor_into_textlines=add_boxes_coor_into_textlines)
|
|
|
|
@ -1416,7 +1365,6 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|
|
|
|
length_x = int(img_path.shape[1] / float(num_patches))
|
|
|
|
|
# margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
|
|
|
|
|
margin = int(0.04 * length_x)
|
|
|
|
|
# print(margin,'margin')
|
|
|
|
|
# if margin<=4:
|
|
|
|
|
# margin = int(0.08 * length_x)
|
|
|
|
|
# margin=0
|
|
|
|
@ -1456,11 +1404,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|
|
|
|
# if abs(slope_region)>70 and abs(slope_xline)<25:
|
|
|
|
|
# slope_xline=[slope_region][0]
|
|
|
|
|
slopes_tile_wise.append(slope_xline)
|
|
|
|
|
# print(slope_xline,'xlineeee')
|
|
|
|
|
img_line_rotated = rotate_image(img_xline, slope_xline)
|
|
|
|
|
img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
|
|
|
|
|
|
|
|
|
|
# print(slopes_tile_wise,'slopes_tile_wise')
|
|
|
|
|
|
|
|
|
|
img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
|
|
|
|
|
|
|
|
|
|
img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape)
|
|
|
|
@ -1502,8 +1448,6 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
|
|
|
|
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
|
|
|
|
|
img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
|
|
|
|
|
|
|
|
|
|
# plt.imshow(img_patch_ineterst_revised)
|
|
|
|
|
# plt.show()
|
|
|
|
|
return img_patch_ineterst_revised
|
|
|
|
|
|
|
|
|
|
def do_image_rotation(angle, img, sigma_des, logger=None):
|
|
|
|
@ -1536,20 +1480,13 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
|
|
|
|
|
#img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:]
|
|
|
|
|
img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
|
|
|
|
|
|
|
|
|
|
#print(img_resized.shape,'img_resizedshape')
|
|
|
|
|
#plt.imshow(img_resized)
|
|
|
|
|
#plt.show()
|
|
|
|
|
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
|
|
|
|
|
#plt.imshow(img_resized)
|
|
|
|
|
#plt.show()
|
|
|
|
|
angles = np.array([-45, 0, 45, 90,])
|
|
|
|
|
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
|
|
|
|
|
|
|
|
|
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
|
|
|
|
|
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
|
|
|
|
elif main_page:
|
|
|
|
|
#plt.imshow(img_resized)
|
|
|
|
|
#plt.show()
|
|
|
|
|
angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
|
|
|
|
|
angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
|
|
|
|
|
|
|
|
|
@ -1620,7 +1557,6 @@ def do_work_of_slopes_new(
|
|
|
|
|
textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
|
|
|
|
|
hierarchy,
|
|
|
|
|
max_area=1, min_area=0.00008)
|
|
|
|
|
|
|
|
|
|
y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if len(textline_con_fil) > 1 else np.NaN
|
|
|
|
|
if np.isnan(y_diff_mean):
|
|
|
|
|
slope_for_all = MAX_SLOPE
|
|
|
|
@ -1637,12 +1573,9 @@ def do_work_of_slopes_new(
|
|
|
|
|
if slope_for_all == MAX_SLOPE:
|
|
|
|
|
slope_for_all = slope_deskew
|
|
|
|
|
slope = slope_for_all
|
|
|
|
|
|
|
|
|
|
mask_only_con_region = np.zeros(textline_mask_tot_ea.shape)
|
|
|
|
|
mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contour_par], color=(1, 1, 1))
|
|
|
|
|
|
|
|
|
|
# plt.imshow(mask_only_con_region)
|
|
|
|
|
# plt.show()
|
|
|
|
|
all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w].copy()
|
|
|
|
|
mask_only_con_region = mask_only_con_region[y: y + h, x: x + w]
|
|
|
|
|
|
|
|
|
@ -1706,20 +1639,15 @@ def do_work_of_slopes_new_curved(
|
|
|
|
|
mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w]
|
|
|
|
|
textline_biggest_region = mask_biggest * textline_mask_tot_ea
|
|
|
|
|
|
|
|
|
|
# print(slope_for_all,'slope_for_all')
|
|
|
|
|
textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y+h, x: x+w], 0,
|
|
|
|
|
num_col, slope_for_all,
|
|
|
|
|
logger=logger, plotter=plotter)
|
|
|
|
|
|
|
|
|
|
# new line added
|
|
|
|
|
##print(np.shape(textline_rotated_separated),np.shape(mask_biggest))
|
|
|
|
|
|
|
|
|
|
textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0
|
|
|
|
|
# till here
|
|
|
|
|
|
|
|
|
|
textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated
|
|
|
|
|
|
|
|
|
|
# plt.imshow(textline_region_in_image)
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
pixel_img = 1
|
|
|
|
|
cnt_textlines_in_image = return_contours_of_interested_textline(textline_region_in_image, pixel_img)
|
|
|
|
@ -1742,7 +1670,6 @@ def do_work_of_slopes_new_curved(
|
|
|
|
|
logger.error(why)
|
|
|
|
|
else:
|
|
|
|
|
textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text, True)
|
|
|
|
|
# print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region')
|
|
|
|
|
|
|
|
|
|
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
|
|
|
|
|
|
|
|
|
|