separate_marginals_to_left_and_right_and_order_from_top_to_down: simplify

- use new `find_center_of_contours`
- avoid loops in favour of array processing
- avoid repeated sorting
This commit is contained in:
Robert Sachunsky 2025-10-02 21:07:35 +02:00
parent 81827c2942
commit 8c3d5eb0eb
2 changed files with 39 additions and 38 deletions

View file

@ -4418,52 +4418,53 @@ class Eynollah:
def separate_marginals_to_left_and_right_and_order_from_top_to_down( def separate_marginals_to_left_and_right_and_order_from_top_to_down(
self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals,
slopes_marginals, mid_point_of_page_width): slopes_marginals, mid_point_of_page_width):
cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals)
polygons_of_marginals)
cx_marg = np.array(cx_marg) cx_marg = np.array(cx_marg)
cy_marg = np.array(cy_marg) cy_marg = np.array(cy_marg)
def split(lis):
array = np.array(lis)
return (list(array[cx_marg < mid_point_of_page_width]),
list(array[cx_marg >= mid_point_of_page_width]))
(poly_marg_left,
poly_marg_right) = \
split(polygons_of_marginals)
(all_found_textline_polygons_marginals_left,
all_found_textline_polygons_marginals_right) = \
split(all_found_textline_polygons_marginals)
poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) (all_box_coord_marginals_left,
poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) all_box_coord_marginals_right) = \
split(all_box_coord_marginals)
all_found_textline_polygons_marginals_left = \ (slopes_marg_left,
list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) slopes_marg_right) = \
all_found_textline_polygons_marginals_right = \ split(slopes_marginals)
list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] )
all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) (cy_marg_left,
all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) cy_marg_right) = \
split(cy_marg)
order_left = np.argsort(cy_marg_left)
order_right = np.argsort(cy_marg_right)
def sort_left(lis):
return list(np.array(lis)[order_left])
def sort_right(lis):
return list(np.array(lis)[order_right])
slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] ) ordered_left_marginals = sort_left(poly_marg_left)
slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] ) ordered_right_marginals = sort_right(poly_marg_right)
cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] ordered_left_marginals_textline = sort_left(all_found_textline_polygons_marginals_left)
cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] ordered_right_marginals_textline = sort_right(all_found_textline_polygons_marginals_right)
ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), ordered_left_marginals_bbox = sort_left(all_box_coord_marginals_left)
key=lambda x: x[0])] ordered_right_marginals_bbox = sort_right(all_box_coord_marginals_right)
ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right),
key=lambda x: x[0])]
ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, ordered_left_slopes_marginals = sort_left(slopes_marg_left)
all_found_textline_polygons_marginals_left), ordered_right_slopes_marginals = sort_right(slopes_marg_right)
key=lambda x: x[0])]
ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right,
all_found_textline_polygons_marginals_right),
key=lambda x: x[0])]
ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left,
all_box_coord_marginals_left),
key=lambda x: x[0])]
ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right,
all_box_coord_marginals_right),
key=lambda x: x[0])]
ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left),
key=lambda x: x[0])]
ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right),
key=lambda x: x[0])]
return (ordered_left_marginals, return (ordered_left_marginals,
ordered_right_marginals, ordered_right_marginals,

View file

@ -1417,7 +1417,7 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0) ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) cx_cross, cy_cross = find_center_of_contours(contours_cross)
for ii in range(len(cx_cross)): for ii in range(len(cx_cross)):
img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0
img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0