get/do_work_of_slopes etc.: reduce call/return signatures

- `get_textregion_contours_in_org_image_light`: no more need
  to also return unchanged contours here (see 41cc38c5); therefore
- `txt_con_org`: no more need for this
  (now mere alias to `contours_only_text_parent`); also
- `index_by_text_par_con`: no more need for this (see prev. commit),
  so do not pass/return
- `get_slopes_and_deskew_*`: do not pass `contours_only_text`
  (where not used)
- `get_slopes_and_deskew_*`: do not return unchanged contours, boxes
- `do_work_of_slopes_*`: adapt respectively
This commit is contained in:
Robert Sachunsky 2025-10-07 22:53:30 +02:00
parent 02a347a48a
commit d88ca18eec
3 changed files with 54 additions and 60 deletions

View file

@ -879,7 +879,7 @@ class Eynollah:
thresholding_for_fl_light_version=False, thresholding_for_fl_light_version=False,
threshold_art_class_textline=0.1): threshold_art_class_textline=0.1):
self.logger.debug("enter do_prediction") self.logger.debug("enter do_prediction (patches=%d)", patches)
img_height_model = model.layers[-1].output_shape[1] img_height_model = model.layers[-1].output_shape[1]
img_width_model = model.layers[-1].output_shape[2] img_width_model = model.layers[-1].output_shape[2]
@ -1856,7 +1856,7 @@ class Eynollah:
return sorted_textlines return sorted_textlines
def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new_light2(self, contours_par, textline_mask_tot, boxes, slope_deskew):
polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001)
cx_main_tot, cy_main_tot = find_center_of_contours(polygons_of_textlines) cx_main_tot, cy_main_tot = find_center_of_contours(polygons_of_textlines)
@ -1889,16 +1889,12 @@ class Eynollah:
all_box_coord.append(crop_coor) all_box_coord.append(crop_coor)
return (all_found_textline_polygons, return (all_found_textline_polygons,
boxes,
contours,
contours_par,
all_box_coord, all_box_coord,
np.array(range(len(contours_par))),
slopes) slopes)
def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
if not len(contours): if not len(contours):
return [], [], [], [], [], [], [] return [], [], []
self.logger.debug("enter get_slopes_and_deskew_new_light") self.logger.debug("enter get_slopes_and_deskew_new_light")
with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
results = self.executor.map(partial(do_work_of_slopes_new_light, results = self.executor.map(partial(do_work_of_slopes_new_light,
@ -1906,15 +1902,15 @@ class Eynollah:
slope_deskew=slope_deskew, slope_deskew=slope_deskew,
textline_light=self.textline_light, textline_light=self.textline_light,
logger=self.logger,), logger=self.logger,),
boxes, contours, contours_par, range(len(contours_par))) boxes, contours, contours_par)
results = list(results) # exhaust prior to release results = list(results) # exhaust prior to release
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) #textline_polygons, box_coord, slopes = zip(*results)
self.logger.debug("exit get_slopes_and_deskew_new_light") self.logger.debug("exit get_slopes_and_deskew_new_light")
return tuple(zip(*results)) return tuple(zip(*results))
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew):
if not len(contours): if not len(contours):
return [], [], [], [], [], [], [] return [], [], []
self.logger.debug("enter get_slopes_and_deskew_new") self.logger.debug("enter get_slopes_and_deskew_new")
with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
results = self.executor.map(partial(do_work_of_slopes_new, results = self.executor.map(partial(do_work_of_slopes_new,
@ -1924,16 +1920,16 @@ class Eynollah:
KERNEL=KERNEL, KERNEL=KERNEL,
logger=self.logger, logger=self.logger,
plotter=self.plotter,), plotter=self.plotter,),
boxes, contours, contours_par, range(len(contours_par))) boxes, contours, contours_par)
results = list(results) # exhaust prior to release results = list(results) # exhaust prior to release
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) #textline_polygons, box_coord, slopes = zip(*results)
self.logger.debug("exit get_slopes_and_deskew_new") self.logger.debug("exit get_slopes_and_deskew_new")
return tuple(zip(*results)) return tuple(zip(*results))
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes,
mask_texts_only, num_col, scale_par, slope_deskew): mask_texts_only, num_col, scale_par, slope_deskew):
if not len(contours): if not len(contours_par):
return [], [], [], [], [], [], [] return [], [], []
self.logger.debug("enter get_slopes_and_deskew_new_curved") self.logger.debug("enter get_slopes_and_deskew_new_curved")
with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: with share_ndarray(textline_mask_tot) as textline_mask_tot_shared:
with share_ndarray(mask_texts_only) as mask_texts_only_shared: with share_ndarray(mask_texts_only) as mask_texts_only_shared:
@ -1947,9 +1943,9 @@ class Eynollah:
KERNEL=KERNEL, KERNEL=KERNEL,
logger=self.logger, logger=self.logger,
plotter=self.plotter,), plotter=self.plotter,),
boxes, contours, contours_par, range(len(contours_par))) boxes, contours_par)
results = list(results) # exhaust prior to release results = list(results) # exhaust prior to release
#textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) #textline_polygons, box_coord, slopes = zip(*results)
self.logger.debug("exit get_slopes_and_deskew_new_curved") self.logger.debug("exit get_slopes_and_deskew_new_curved")
return tuple(zip(*results)) return tuple(zip(*results))
@ -4037,7 +4033,7 @@ class Eynollah:
def filter_contours_without_textline_inside( def filter_contours_without_textline_inside(
self, contours, text_con_org, contours_textline, self, contours_par, contours_textline,
contours_only_text_parent_d_ordered, contours_only_text_parent_d_ordered,
conf_contours_textregions): conf_contours_textregions):
@ -4049,12 +4045,11 @@ class Eynollah:
return [] return []
return list(np.array(lis)[indices]) return list(np.array(lis)[indices])
return (filterfun(contours), return (filterfun(contours_par),
filterfun(text_con_org),
filterfun(conf_contours_textregions),
filterfun(contours_textline), filterfun(contours_textline),
filterfun(contours_only_text_parent_d_ordered), filterfun(contours_only_text_parent_d_ordered),
indices filterfun(conf_contours_textregions),
# indices
) )
def separate_marginals_to_left_and_right_and_order_from_top_to_down( def separate_marginals_to_left_and_right_and_order_from_top_to_down(
@ -4592,12 +4587,11 @@ class Eynollah:
contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, contours_only_text_parent, contours_only_text_parent_d_ordered, text_only,
marginal_cnts=polygons_of_marginals) marginal_cnts=polygons_of_marginals)
#print("text region early 3.5 in %.1fs", time.time() - t0) #print("text region early 3.5 in %.1fs", time.time() - t0)
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( conf_contours_textregions = get_textregion_contours_in_org_image_light(
contours_only_text_parent, self.image, confidence_matrix) contours_only_text_parent, self.image, confidence_matrix)
#txt_con_org = dilate_textregion_contours(txt_con_org)
#contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
else: else:
txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( conf_contours_textregions = get_textregion_contours_in_org_image_light(
contours_only_text_parent, self.image, confidence_matrix) contours_only_text_parent, self.image, confidence_matrix)
#print("text region early 4 in %.1fs", time.time() - t0) #print("text region early 4 in %.1fs", time.time() - t0)
boxes_text = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_text = get_text_region_boxes_by_given_contours(contours_only_text_parent)
@ -4607,13 +4601,13 @@ class Eynollah:
if not self.curved_line: if not self.curved_line:
if self.light_version: if self.light_version:
if self.textline_light: if self.textline_light:
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( all_box_coord, slopes = self.get_slopes_and_deskew_new_light2(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, contours_only_text_parent, textline_mask_tot_ea_org,
boxes_text, slope_deskew) boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, polygons_of_marginals, textline_mask_tot_ea_org,
boxes_marginals, slope_deskew) boxes_marginals, slope_deskew)
all_found_textline_polygons = dilate_textline_contours( all_found_textline_polygons = dilate_textline_contours(
@ -4622,46 +4616,46 @@ class Eynollah:
all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline")
all_found_textline_polygons_marginals = dilate_textline_contours( all_found_textline_polygons_marginals = dilate_textline_contours(
all_found_textline_polygons_marginals) all_found_textline_polygons_marginals)
contours_only_text_parent, txt_con_org, conf_contours_textregions, \ contours_only_text_parent, all_found_textline_polygons, \
all_found_textline_polygons, contours_only_text_parent_d_ordered, \ contours_only_text_parent_d_ordered, conf_contours_textregions = \
index_by_text_par_con = self.filter_contours_without_textline_inside( self.filter_contours_without_textline_inside(
contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent, all_found_textline_polygons,
contours_only_text_parent_d_ordered, conf_contours_textregions) contours_only_text_parent_d_ordered, conf_contours_textregions)
else: else:
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ all_found_textline_polygons, \
index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( all_box_coord, slopes = self.get_slopes_and_deskew_new_light(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea, contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea,
boxes_text, slope_deskew) boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
boxes_marginals, slope_deskew) boxes_marginals, slope_deskew)
#all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
# all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
else: else:
textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1)
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( all_box_coord, slopes = self.get_slopes_and_deskew_new(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea, contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea,
boxes_text, slope_deskew) boxes_text, slope_deskew)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea,
boxes_marginals, slope_deskew) boxes_marginals, slope_deskew)
else: else:
scale_param = 1 scale_param = 1
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_found_textline_polygons, \
all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( all_box_coord, slopes = self.get_slopes_and_deskew_new_curved(
txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, contours_only_text_parent, textline_mask_tot_ea_erode,
boxes_text, text_only, boxes_text, text_only,
num_col_classifier, scale_param, slope_deskew) num_col_classifier, scale_param, slope_deskew)
all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons = small_textlines_to_parent_adherence2(
all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_found_textline_polygons_marginals, \
all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved(
polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, polygons_of_marginals, textline_mask_tot_ea_erode,
boxes_marginals, text_only, boxes_marginals, text_only,
num_col_classifier, scale_param, slope_deskew) num_col_classifier, scale_param, slope_deskew)
all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
@ -4884,7 +4878,7 @@ class Eynollah:
conf_contours_textregions, conf_contours_textregions_h) conf_contours_textregions, conf_contours_textregions_h)
else: else:
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
txt_con_org, page_coord, order_text_new, id_of_texts_tot, contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, all_box_coord, polygons_of_images, all_found_textline_polygons, all_box_coord, polygons_of_images,
polygons_of_marginals_left, polygons_of_marginals_right, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,

View file

@ -216,7 +216,7 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
if not len(cnts): if not len(cnts):
return [], [] return []
confidence_matrix = cv2.resize(confidence_matrix, confidence_matrix = cv2.resize(confidence_matrix,
(img.shape[1] // 6, img.shape[0] // 6), (img.shape[1] // 6, img.shape[0] // 6),
@ -226,7 +226,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix):
cnt_mask = np.zeros(confidence_matrix.shape) cnt_mask = np.zeros(confidence_matrix.shape)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return cnts, confs return confs
def return_contours_of_interested_textline(region_pre_p, label): def return_contours_of_interested_textline(region_pre_p, label):
# pixels of images are identified by 5 # pixels of images are identified by 5

View file

@ -1592,7 +1592,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new( def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par,
textline_mask_tot_ea=None, slope_deskew=0.0, textline_mask_tot_ea=None, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
): ):
@ -1647,12 +1647,12 @@ def do_work_of_slopes_new(
all_text_region_raw[mask_only_con_region == 0] = 0 all_text_region_raw[mask_only_con_region == 0] = 0
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text)
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope return cnt_clean_rot, crop_coor, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
@wrap_ndarray_shared(kw='mask_texts_only') @wrap_ndarray_shared(kw='mask_texts_only')
def do_work_of_slopes_new_curved( def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con, box_text, contour_par,
textline_mask_tot_ea=None, mask_texts_only=None, textline_mask_tot_ea=None, mask_texts_only=None,
num_col=1, scale_par=1.0, slope_deskew=0.0, num_col=1, scale_par=1.0, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None
@ -1743,11 +1743,11 @@ def do_work_of_slopes_new_curved(
slope_for_all, contour_par, slope_for_all, contour_par,
box_text, True) box_text, True)
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope return textlines_cnt_per_region[::-1], crop_coor, slope
@wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='textline_mask_tot_ea')
def do_work_of_slopes_new_light( def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con, box_text, contour, contour_par,
textline_mask_tot_ea=None, slope_deskew=0, textline_light=True, textline_mask_tot_ea=None, slope_deskew=0, textline_light=True,
logger=None logger=None
): ):
@ -1777,4 +1777,4 @@ def do_work_of_slopes_new_light(
all_text_region_raw[mask_only_con_region == 0] = 0 all_text_region_raw[mask_only_con_region == 0] = 0
cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text)
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew return cnt_clean_rot, crop_coor, slope_deskew