mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
order_of_regions: simplify
- use new `find_center_of_contours` - avoid unused calculations - avoid loops in favour of array processing
This commit is contained in:
parent
a1c8fd4467
commit
4950e6bd78
1 changed files with 34 additions and 97 deletions
|
@ -15,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d
|
|||
|
||||
from .is_nan import isNaN
|
||||
from .contour import (contours_in_same_horizon,
|
||||
find_center_of_contours,
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
return_parent_contours)
|
||||
|
||||
def pairwise(iterable):
|
||||
# pairwise('ABCDEFG') → AB BC CD DE EF FG
|
||||
|
||||
iterator = iter(iterable)
|
||||
a = next(iterator, None)
|
||||
|
||||
for b in iterator:
|
||||
yield a, b
|
||||
a = b
|
||||
|
||||
def return_x_start_end_mothers_childs_and_type_of_reading_order(
|
||||
x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
|
||||
|
||||
|
@ -1183,106 +1194,45 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
|
|||
textlines_con_changed.append(textlines_big_org_form)
|
||||
return textlines_con_changed
|
||||
|
||||
def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
||||
def order_of_regions(textline_mask, contours_main, contours_head, y_ref):
|
||||
##plt.imshow(textline_mask)
|
||||
##plt.show()
|
||||
"""
|
||||
print(len(contours_main),'contours_main')
|
||||
mada_n=textline_mask.sum(axis=1)
|
||||
y=mada_n[:]
|
||||
|
||||
y_help=np.zeros(len(y)+40)
|
||||
y_help[20:len(y)+20]=y
|
||||
x=np.arange(len(y))
|
||||
|
||||
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
||||
##plt.imshow(textline_mask[:,:])
|
||||
##plt.show()
|
||||
|
||||
sigma_gaus=8
|
||||
z= gaussian_filter1d(y_help, sigma_gaus)
|
||||
zneg_rev=-y_help+np.max(y_help)
|
||||
zneg=np.zeros(len(zneg_rev)+40)
|
||||
zneg[20:len(zneg_rev)+20]=zneg_rev
|
||||
zneg= gaussian_filter1d(zneg, sigma_gaus)
|
||||
|
||||
peaks, _ = find_peaks(z, height=0)
|
||||
peaks_neg, _ = find_peaks(zneg, height=0)
|
||||
peaks_neg=peaks_neg-20-20
|
||||
peaks=peaks-20
|
||||
"""
|
||||
textline_sum_along_width = textline_mask.sum(axis=1)
|
||||
|
||||
y = textline_sum_along_width[:]
|
||||
y = textline_mask.sum(axis=1) # horizontal projection profile
|
||||
y_padded = np.zeros(len(y) + 40)
|
||||
y_padded[20 : len(y) + 20] = y
|
||||
x = np.arange(len(y))
|
||||
|
||||
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
||||
|
||||
sigma_gaus = 8
|
||||
z = gaussian_filter1d(y_padded, sigma_gaus)
|
||||
zneg_rev = -y_padded + np.max(y_padded)
|
||||
#z = gaussian_filter1d(y_padded, sigma_gaus)
|
||||
#peaks, _ = find_peaks(z, height=0)
|
||||
#peaks = peaks - 20
|
||||
zneg_rev = np.max(y_padded) - y_padded
|
||||
zneg = np.zeros(len(zneg_rev) + 40)
|
||||
zneg[20 : len(zneg_rev) + 20] = zneg_rev
|
||||
zneg = gaussian_filter1d(zneg, sigma_gaus)
|
||||
|
||||
peaks, _ = find_peaks(z, height=0)
|
||||
peaks_neg, _ = find_peaks(zneg, height=0)
|
||||
peaks_neg = peaks_neg - 20 - 20
|
||||
peaks = peaks - 20
|
||||
|
||||
##plt.plot(z)
|
||||
##plt.show()
|
||||
if contours_main != None:
|
||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
cx_main, cy_main = find_center_of_contours(contours_main)
|
||||
cx_head, cy_head = find_center_of_contours(contours_head)
|
||||
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0])
|
||||
# offset from bbox of mask
|
||||
peaks_neg_new += y_ref
|
||||
|
||||
if len(contours_header) != None:
|
||||
areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
|
||||
M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
|
||||
cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
||||
cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
||||
|
||||
x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
||||
x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
||||
|
||||
y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
||||
y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
||||
# print(cy_main,'mainy')
|
||||
|
||||
peaks_neg_new = []
|
||||
peaks_neg_new.append(0 + y_ref)
|
||||
for iii in range(len(peaks_neg)):
|
||||
peaks_neg_new.append(peaks_neg[iii] + y_ref)
|
||||
peaks_neg_new.append(textline_mask.shape[0] + y_ref)
|
||||
|
||||
if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new):
|
||||
cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10
|
||||
if contours_main != None:
|
||||
indexer_main = np.arange(len(contours_main))
|
||||
if contours_main != None:
|
||||
len_main = len(contours_main)
|
||||
else:
|
||||
len_main = 0
|
||||
|
||||
matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5))
|
||||
matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header))
|
||||
matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int)
|
||||
matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head))
|
||||
matrix_of_orders[: len(contours_main), 1] = 1
|
||||
matrix_of_orders[len(contours_main) :, 1] = 2
|
||||
matrix_of_orders[: len(contours_main), 2] = cx_main
|
||||
matrix_of_orders[len(contours_main) :, 2] = cx_header
|
||||
matrix_of_orders[len(contours_main) :, 2] = cx_head
|
||||
matrix_of_orders[: len(contours_main), 3] = cy_main
|
||||
matrix_of_orders[len(contours_main) :, 3] = cy_header
|
||||
matrix_of_orders[len(contours_main) :, 3] = cy_head
|
||||
matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main))
|
||||
matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header))
|
||||
matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head))
|
||||
|
||||
# print(peaks_neg_new,'peaks_neg_new')
|
||||
# print(matrix_of_orders,'matrix_of_orders')
|
||||
|
@ -1290,27 +1240,14 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
|||
final_indexers_sorted = []
|
||||
final_types = []
|
||||
final_index_type = []
|
||||
for i in range(len(peaks_neg_new) - 1):
|
||||
top = peaks_neg_new[i]
|
||||
down = peaks_neg_new[i + 1]
|
||||
indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) &
|
||||
((matrix_of_orders[:, 3] < down))]
|
||||
cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) &
|
||||
((matrix_of_orders[:, 3] < down))]
|
||||
cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) &
|
||||
((matrix_of_orders[:, 3] < down))]
|
||||
types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) &
|
||||
(matrix_of_orders[:, 3] < down)]
|
||||
index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) &
|
||||
(matrix_of_orders[:, 3] < down)]
|
||||
for top, bot in pairwise(peaks_neg_new):
|
||||
indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \
|
||||
matrix_of_orders[(matrix_of_orders[:, 3] >= top) &
|
||||
(matrix_of_orders[:, 3] < bot)].T
|
||||
sorted_inside = np.argsort(cxs_in)
|
||||
ind_in_int = indexes_in[sorted_inside]
|
||||
ind_in_type = types_of_text[sorted_inside]
|
||||
ind_ind_type = index_types_of_text[sorted_inside]
|
||||
for j in range(len(ind_in_int)):
|
||||
final_indexers_sorted.append(int(ind_in_int[j]))
|
||||
final_types.append(int(ind_in_type[j]))
|
||||
final_index_type.append(int(ind_ind_type[j]))
|
||||
final_indexers_sorted.extend(indexes_in[sorted_inside])
|
||||
final_types.extend(types_in[sorted_inside])
|
||||
final_index_type.extend(typed_indexes_in[sorted_inside])
|
||||
|
||||
##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue