order_of_regions: simplify

- use new `find_center_of_contours`
- avoid unused calculations
- avoid loops in favour of array processing
This commit is contained in:
Robert Sachunsky 2025-10-02 22:28:52 +02:00
parent a1c8fd4467
commit 4950e6bd78

View file

@ -15,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d
from .is_nan import isNaN from .is_nan import isNaN
from .contour import (contours_in_same_horizon, from .contour import (contours_in_same_horizon,
find_center_of_contours,
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,
return_parent_contours) return_parent_contours)
def pairwise(iterable):
# pairwise('ABCDEFG') → AB BC CD DE EF FG
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
def return_x_start_end_mothers_childs_and_type_of_reading_order( def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
@ -1183,106 +1194,45 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
textlines_con_changed.append(textlines_big_org_form) textlines_con_changed.append(textlines_big_org_form)
return textlines_con_changed return textlines_con_changed
def order_of_regions(textline_mask, contours_main, contours_header, y_ref): def order_of_regions(textline_mask, contours_main, contours_head, y_ref):
##plt.imshow(textline_mask) ##plt.imshow(textline_mask)
##plt.show() ##plt.show()
""" y = textline_mask.sum(axis=1) # horizontal projection profile
print(len(contours_main),'contours_main')
mada_n=textline_mask.sum(axis=1)
y=mada_n[:]
y_help=np.zeros(len(y)+40)
y_help[20:len(y)+20]=y
x=np.arange(len(y))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
##plt.imshow(textline_mask[:,:])
##plt.show()
sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus)
zneg_rev=-y_help+np.max(y_help)
zneg=np.zeros(len(zneg_rev)+40)
zneg[20:len(zneg_rev)+20]=zneg_rev
zneg= gaussian_filter1d(zneg, sigma_gaus)
peaks, _ = find_peaks(z, height=0)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks_neg=peaks_neg-20-20
peaks=peaks-20
"""
textline_sum_along_width = textline_mask.sum(axis=1)
y = textline_sum_along_width[:]
y_padded = np.zeros(len(y) + 40) y_padded = np.zeros(len(y) + 40)
y_padded[20 : len(y) + 20] = y y_padded[20 : len(y) + 20] = y
x = np.arange(len(y))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
sigma_gaus = 8 sigma_gaus = 8
z = gaussian_filter1d(y_padded, sigma_gaus) #z = gaussian_filter1d(y_padded, sigma_gaus)
zneg_rev = -y_padded + np.max(y_padded) #peaks, _ = find_peaks(z, height=0)
#peaks = peaks - 20
zneg_rev = np.max(y_padded) - y_padded
zneg = np.zeros(len(zneg_rev) + 40) zneg = np.zeros(len(zneg_rev) + 40)
zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg[20 : len(zneg_rev) + 20] = zneg_rev
zneg = gaussian_filter1d(zneg, sigma_gaus) zneg = gaussian_filter1d(zneg, sigma_gaus)
peaks, _ = find_peaks(z, height=0)
peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg, _ = find_peaks(zneg, height=0)
peaks_neg = peaks_neg - 20 - 20 peaks_neg = peaks_neg - 20 - 20
peaks = peaks - 20
##plt.plot(z) ##plt.plot(z)
##plt.show() ##plt.show()
if contours_main != None: cx_main, cy_main = find_center_of_contours(contours_main)
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) cx_head, cy_head = find_center_of_contours(contours_head)
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) # offset from bbox of mask
peaks_neg_new += y_ref
if len(contours_header) != None:
areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int)
x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head))
y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
# print(cy_main,'mainy')
peaks_neg_new = []
peaks_neg_new.append(0 + y_ref)
for iii in range(len(peaks_neg)):
peaks_neg_new.append(peaks_neg[iii] + y_ref)
peaks_neg_new.append(textline_mask.shape[0] + y_ref)
if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new):
cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10
if contours_main != None:
indexer_main = np.arange(len(contours_main))
if contours_main != None:
len_main = len(contours_main)
else:
len_main = 0
matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5))
matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header))
matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[: len(contours_main), 1] = 1
matrix_of_orders[len(contours_main) :, 1] = 2 matrix_of_orders[len(contours_main) :, 1] = 2
matrix_of_orders[: len(contours_main), 2] = cx_main matrix_of_orders[: len(contours_main), 2] = cx_main
matrix_of_orders[len(contours_main) :, 2] = cx_header matrix_of_orders[len(contours_main) :, 2] = cx_head
matrix_of_orders[: len(contours_main), 3] = cy_main matrix_of_orders[: len(contours_main), 3] = cy_main
matrix_of_orders[len(contours_main) :, 3] = cy_header matrix_of_orders[len(contours_main) :, 3] = cy_head
matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main))
matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head))
# print(peaks_neg_new,'peaks_neg_new') # print(peaks_neg_new,'peaks_neg_new')
# print(matrix_of_orders,'matrix_of_orders') # print(matrix_of_orders,'matrix_of_orders')
@ -1290,27 +1240,14 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
final_indexers_sorted = [] final_indexers_sorted = []
final_types = [] final_types = []
final_index_type = [] final_index_type = []
for i in range(len(peaks_neg_new) - 1): for top, bot in pairwise(peaks_neg_new):
top = peaks_neg_new[i] indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \
down = peaks_neg_new[i + 1] matrix_of_orders[(matrix_of_orders[:, 3] >= top) &
indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < bot)].T
((matrix_of_orders[:, 3] < down))]
cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) &
((matrix_of_orders[:, 3] < down))]
cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) &
((matrix_of_orders[:, 3] < down))]
types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) &
(matrix_of_orders[:, 3] < down)]
index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) &
(matrix_of_orders[:, 3] < down)]
sorted_inside = np.argsort(cxs_in) sorted_inside = np.argsort(cxs_in)
ind_in_int = indexes_in[sorted_inside] final_indexers_sorted.extend(indexes_in[sorted_inside])
ind_in_type = types_of_text[sorted_inside] final_types.extend(types_in[sorted_inside])
ind_ind_type = index_types_of_text[sorted_inside] final_index_type.extend(typed_indexes_in[sorted_inside])
for j in range(len(ind_in_int)):
final_indexers_sorted.append(int(ind_in_int[j]))
final_types.append(int(ind_in_type[j]))
final_index_type.append(int(ind_ind_type[j]))
##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]