From 8c3d5eb0eb0eccd97542a86b0d3385e95f4f1da0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 2 Oct 2025 21:07:35 +0200 Subject: [PATCH] separate_marginals_to_left_and_right_and_order_from_top_to_down: simplify - use new `find_center_of_contours` - avoid loops in favour of array processing - avoid repeated sorting --- src/eynollah/eynollah.py | 75 +++++++++++++++++----------------- src/eynollah/utils/__init__.py | 2 +- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index b2d9016..9eba3d3 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4418,52 +4418,53 @@ class Eynollah: def separate_marginals_to_left_and_right_and_order_from_top_to_down( self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): - cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( - polygons_of_marginals) - + cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals) cx_marg = np.array(cx_marg) cy_marg = np.array(cy_marg) + + def split(lis): + array = np.array(lis) + return (list(array[cx_marg < mid_point_of_page_width]), + list(array[cx_marg >= mid_point_of_page_width])) + + (poly_marg_left, + poly_marg_right) = \ + split(polygons_of_marginals) + + (all_found_textline_polygons_marginals_left, + all_found_textline_polygons_marginals_right) = \ + split(all_found_textline_polygons_marginals) - poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) - poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) + (all_box_coord_marginals_left, + all_box_coord_marginals_right) = \ + split(all_box_coord_marginals) - all_found_textline_polygons_marginals_left = \ - list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) - all_found_textline_polygons_marginals_right = \ - list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) + (slopes_marg_left, + slopes_marg_right) = \ + split(slopes_marginals) - all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) - all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) + (cy_marg_left, + cy_marg_right) = \ + split(cy_marg) + + order_left = np.argsort(cy_marg_left) + order_right = np.argsort(cy_marg_right) + def sort_left(lis): + return list(np.array(lis)[order_left]) + def sort_right(lis): + return list(np.array(lis)[order_right]) - slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] ) - slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] ) + ordered_left_marginals = sort_left(poly_marg_left) + ordered_right_marginals = sort_right(poly_marg_right) - cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] - cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] + ordered_left_marginals_textline = sort_left(all_found_textline_polygons_marginals_left) + ordered_right_marginals_textline = sort_right(all_found_textline_polygons_marginals_right) - ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), - key=lambda x: x[0])] - ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), - key=lambda x: x[0])] + ordered_left_marginals_bbox = sort_left(all_box_coord_marginals_left) + ordered_right_marginals_bbox = sort_right(all_box_coord_marginals_right) - ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, - all_found_textline_polygons_marginals_left), - key=lambda x: x[0])] - ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, - all_found_textline_polygons_marginals_right), - key=lambda x: x[0])] - - ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, - all_box_coord_marginals_left), - key=lambda x: x[0])] - ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, - all_box_coord_marginals_right), - key=lambda x: x[0])] - - ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), - key=lambda x: x[0])] - ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), - key=lambda x: x[0])] + ordered_left_slopes_marginals = sort_left(slopes_marg_left) + ordered_right_slopes_marginals = sort_right(slopes_marg_right) return (ordered_left_marginals, ordered_right_marginals, diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 52bf3ef..4eee5a9 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1417,7 +1417,7 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) + cx_cross, cy_cross = find_center_of_contours(contours_cross) for ii in range(len(cx_cross)): img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0