From 31f240c3b8a6eaa034b5ae02cf009930e8275725 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 2 Sep 2025 15:04:04 +0200 Subject: [PATCH] do_image_rotation, do_work_of_slopes_new_curved: pass arrays via shared memory --- src/eynollah/eynollah.py | 12 +++++--- src/eynollah/utils/separate_lines.py | 12 ++++++-- src/eynollah/utils/shm.py | 45 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 src/eynollah/utils/shm.py diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index b450b17..42af8e4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -95,6 +95,7 @@ from .utils.drop_capitals import ( ) from .utils.marginals import get_marginals from .utils.resize import resize_image +from .utils.shm import share_ndarray from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, @@ -1582,9 +1583,11 @@ class Eynollah: if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") - results = self.executor.map(partial(do_work_of_slopes_new_curved, - textline_mask_tot_ea=textline_mask_tot, - mask_texts_only=mask_texts_only, + with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: + with share_ndarray(mask_texts_only) as mask_texts_only_shared: + results = self.executor.map(partial(do_work_of_slopes_new_curved, + textline_mask_tot_ea=textline_mask_tot_shared, + mask_texts_only=mask_texts_only_shared, num_col=num_col, scale_par=scale_par, slope_deskew=slope_deskew, @@ -1593,7 +1596,8 @@ class Eynollah: logger=self.logger, plotter=self.plotter,), boxes, contours, contours_par, range(len(contours_par))) - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + results = list(results) # exhaust prior to release self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 3363367..e4bb953 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -15,6 +15,7 @@ from .contour import ( return_contours_of_interested_textline, find_contours_mean_y_diff, ) +from .shm import share_ndarray, wrap_ndarray_shared from . import ( find_num_col_deskew, crop_image_inside_box, @@ -1454,7 +1455,8 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl return img_patch_interest_revised -def do_image_rotation(angle, img, sigma_des, logger=None): +@wrap_ndarray_shared(kw='img') +def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None): if logger is None: logger = getLogger(__package__) img_rot = rotate_image(img, angle) @@ -1521,7 +1523,8 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): if logger is None: logger = getLogger(__package__) - results = list(map(partial(do_image_rotation, img=img, sigma_des=sigma_des, logger=logger), angles)) + with share_ndarray(img) as img_shared: + results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=logger), angles)) if plotter: plotter.save_plot_of_rotation_angle(angles, results) try: @@ -1594,9 +1597,12 @@ def do_work_of_slopes_new( return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope +@wrap_ndarray_shared(kw='textline_mask_tot_ea') +@wrap_ndarray_shared(kw='mask_texts_only') def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew, + textline_mask_tot_ea=None, mask_texts_only=None, + num_col=1, scale_par=1.0, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: diff --git a/src/eynollah/utils/shm.py b/src/eynollah/utils/shm.py new file mode 100644 index 0000000..4b51053 --- /dev/null +++ b/src/eynollah/utils/shm.py @@ -0,0 +1,45 @@ +from multiprocessing import shared_memory +from contextlib import contextmanager +from functools import wraps +import numpy as np + +@contextmanager +def share_ndarray(array: np.ndarray): + size = np.dtype(array.dtype).itemsize * np.prod(array.shape) + shm = shared_memory.SharedMemory(create=True, size=size) + try: + shared_array = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf) + shared_array[:] = array[:] + shared_array.flags["WRITEABLE"] = False + yield dict(shape=array.shape, dtype=array.dtype, name=shm.name) + finally: + shm.close() + shm.unlink() + +@contextmanager +def ndarray_shared(array: dict): + shm = shared_memory.SharedMemory(name=array['name']) + try: + array = np.ndarray(array['shape'], dtype=array['dtype'], buffer=shm.buf) + yield array + finally: + shm.close() + +def wrap_ndarray_shared(kw=None): + def wrapper(f): + if kw is None: + @wraps(f) + def shared_func(array, *args, **kwargs): + with ndarray_shared(array) as ndarray: + return f(ndarray, *args, **kwargs) + return shared_func + else: + @wraps(f) + def shared_func(*args, **kwargs): + array = kwargs.pop(kw) + with ndarray_shared(array) as ndarray: + kwargs[kw] = ndarray + return f(*args, **kwargs) + return shared_func + return wrapper +