mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-10 04:39:54 +02:00
deduplicate code seperate_lines{,_vertical}
This commit is contained in:
parent
11315da683
commit
c0ae2dc7fa
1 changed files with 10 additions and 107 deletions
|
@ -13,8 +13,7 @@ from .contour import (
|
||||||
)
|
)
|
||||||
from .is_nan import isNaN
|
from .is_nan import isNaN
|
||||||
|
|
||||||
def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||||
|
|
||||||
(h, w) = img_patch.shape[:2]
|
(h, w) = img_patch.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
||||||
|
@ -23,7 +22,6 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
|
|
||||||
thetha = thetha / 180.0 * np.pi
|
thetha = thetha / 180.0 * np.pi
|
||||||
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
|
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
|
||||||
contour_text_interest_copy = contour_text_interest.copy()
|
|
||||||
|
|
||||||
x_cont = contour_text_interest[:, 0, 0]
|
x_cont = contour_text_interest[:, 0, 0]
|
||||||
y_cont = contour_text_interest[:, 0, 1]
|
y_cont = contour_text_interest[:, 0, 1]
|
||||||
|
@ -37,7 +35,7 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
|
|
||||||
xv = np.linspace(x_min_cont, x_max_cont, 1000)
|
xv = np.linspace(x_min_cont, x_max_cont, 1000)
|
||||||
|
|
||||||
textline_patch_sum_along_width = img_patch.sum(axis=1)
|
textline_patch_sum_along_width = img_patch.sum(axis=axis)
|
||||||
|
|
||||||
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
||||||
|
|
||||||
|
@ -121,6 +119,13 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
||||||
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
||||||
|
|
||||||
|
return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix
|
||||||
|
|
||||||
|
def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
|
|
||||||
|
contour_text_interest_copy = contour_text_interest.copy()
|
||||||
|
x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
neg_peaks_max = np.max(y_padded_smoothed[peaks])
|
neg_peaks_max = np.max(y_padded_smoothed[peaks])
|
||||||
|
|
||||||
|
@ -478,111 +483,9 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
def seperate_lines_vertical(img_patch, contour_text_interest, thetha):
|
def seperate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
|
|
||||||
thetha = thetha + 90
|
thetha = thetha + 90
|
||||||
|
|
||||||
(h, w) = img_patch.shape[:2]
|
|
||||||
center = (w // 2, h // 2)
|
|
||||||
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
|
||||||
x_d = M[0, 2]
|
|
||||||
y_d = M[1, 2]
|
|
||||||
|
|
||||||
thetha = thetha / 180.0 * np.pi
|
|
||||||
rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
|
|
||||||
contour_text_interest_copy = contour_text_interest.copy()
|
contour_text_interest_copy = contour_text_interest.copy()
|
||||||
|
x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0)
|
||||||
|
|
||||||
x_cont = contour_text_interest[:, 0, 0]
|
|
||||||
y_cont = contour_text_interest[:, 0, 1]
|
|
||||||
x_cont = x_cont - np.min(x_cont)
|
|
||||||
y_cont = y_cont - np.min(y_cont)
|
|
||||||
|
|
||||||
x_min_cont = 0
|
|
||||||
x_max_cont = img_patch.shape[1]
|
|
||||||
y_min_cont = 0
|
|
||||||
y_max_cont = img_patch.shape[0]
|
|
||||||
|
|
||||||
xv = np.linspace(x_min_cont, x_max_cont, 1000)
|
|
||||||
|
|
||||||
textline_patch_sum_along_width = img_patch.sum(axis=0)
|
|
||||||
|
|
||||||
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
|
||||||
|
|
||||||
y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero]
|
|
||||||
y_padded = np.zeros(len(y) + 40)
|
|
||||||
y_padded[20 : len(y) + 20] = y
|
|
||||||
x = np.array(range(len(y)))
|
|
||||||
|
|
||||||
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
|
||||||
if 1 > 0:
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
y_padded_smoothed_e = gaussian_filter1d(y_padded, 2)
|
|
||||||
y_padded_up_to_down_e = -y_padded + np.max(y_padded)
|
|
||||||
y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40)
|
|
||||||
y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
|
|
||||||
y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
|
|
||||||
|
|
||||||
peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
|
|
||||||
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
|
||||||
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
|
||||||
|
|
||||||
arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
|
||||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
|
||||||
|
|
||||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
|
||||||
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
|
||||||
|
|
||||||
peaks_new = peaks_e[:]
|
|
||||||
peaks_neg_new = peaks_neg_e[:]
|
|
||||||
|
|
||||||
clusters_to_be_deleted = []
|
|
||||||
if len(arg_diff_cluster) > 0:
|
|
||||||
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
|
|
||||||
for i in range(len(arg_diff_cluster) - 1):
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
|
|
||||||
|
|
||||||
if len(clusters_to_be_deleted) > 0:
|
|
||||||
peaks_new_extra = []
|
|
||||||
for m in range(len(clusters_to_be_deleted)):
|
|
||||||
min_cluster = np.min(peaks_e[clusters_to_be_deleted[m]])
|
|
||||||
max_cluster = np.max(peaks_e[clusters_to_be_deleted[m]])
|
|
||||||
peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0))
|
|
||||||
for m1 in range(len(clusters_to_be_deleted[m])):
|
|
||||||
peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]]
|
|
||||||
peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]]
|
|
||||||
|
|
||||||
peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]]
|
|
||||||
peaks_new_tot = []
|
|
||||||
for i1 in peaks_new:
|
|
||||||
peaks_new_tot.append(i1)
|
|
||||||
for i1 in peaks_new_extra:
|
|
||||||
peaks_new_tot.append(i1)
|
|
||||||
peaks_new_tot = np.sort(peaks_new_tot)
|
|
||||||
|
|
||||||
else:
|
|
||||||
peaks_new_tot = peaks_e[:]
|
|
||||||
|
|
||||||
textline_con, hierachy = return_contours_of_image(img_patch)
|
|
||||||
textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierachy, max_area=1, min_area=0.0008)
|
|
||||||
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
|
||||||
|
|
||||||
sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
|
|
||||||
# print(sigma_gaus,'sigma_gaus')
|
|
||||||
except:
|
|
||||||
sigma_gaus = 12
|
|
||||||
if sigma_gaus < 3:
|
|
||||||
sigma_gaus = 3
|
|
||||||
# print(sigma_gaus,'sigma')
|
|
||||||
|
|
||||||
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
|
|
||||||
y_padded_up_to_down = -y_padded + np.max(y_padded)
|
|
||||||
y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40)
|
|
||||||
y_padded_up_to_down_padded[20 : len(y_padded_up_to_down) + 20] = y_padded_up_to_down
|
|
||||||
y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
|
|
||||||
|
|
||||||
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
|
||||||
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
|
||||||
|
|
||||||
# plt.plot(y_padded_up_to_down_padded)
|
# plt.plot(y_padded_up_to_down_padded)
|
||||||
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue