From 2da718f76f1e7dcb0f61180e48607ec40289dd49 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 23 Apr 2026 21:05:20 +0200 Subject: [PATCH] writer, do_work_of_slopes*: drop passing bboxes around (needed no more) --- src/eynollah/eynollah.py | 54 +++++++--------------------- src/eynollah/utils/__init__.py | 3 -- src/eynollah/utils/separate_lines.py | 6 ++-- src/eynollah/writer.py | 30 ++++------------ 4 files changed, 21 insertions(+), 72 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 961cdaa..54f603e 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1006,7 +1006,6 @@ class Eynollah: args_textlines = np.arange(len(polygons_of_textlines)) all_found_textline_polygons = [] slopes = [] - all_box_coord =[] for index, con_region_ind in enumerate(contours_par): results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) @@ -1026,17 +1025,12 @@ class Eynollah: all_found_textline_polygons.append(textlines_ins)#[::-1]) slopes.append(slope_deskew) - crop_coor = box2rect(boxes[index]) - all_box_coord.append(crop_coor) - - return (all_found_textline_polygons, - all_box_coord, - slopes) + return all_found_textline_polygons, slopes def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes, num_col, slope_deskew, name): if not len(contours_par): - return [], [], [] + return [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") results = map(partial(do_work_of_slopes_new_curved, textline_mask_tot_ea=textline_mask_tot, @@ -1049,7 +1043,7 @@ class Eynollah: name=name), boxes, contours_par) results = list(results) # exhaust prior to release - #textline_polygons, box_coord, slopes = zip(*results) + #textline_polygons, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -2064,7 +2058,7 @@ class Eynollah: ) def separate_marginals_to_left_and_right_and_order_from_top_to_down( - self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, + self, polygons_of_marginals, all_found_textline_polygons_marginals, slopes_marginals, conf_marginals, mid_point_of_page_width): cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals) cx_marg = ensure_array(cx_marg) @@ -2086,7 +2080,6 @@ class Eynollah: return (*splitsort(polygons_of_marginals), *splitsort(all_found_textline_polygons_marginals), - *splitsort(all_box_coord_marginals), *splitsort(slopes_marginals), *splitsort(conf_marginals)) @@ -2264,15 +2257,12 @@ class Eynollah: page_coord=page_coord, order_of_texts=order_text_new, all_found_textline_polygons=all_found_textline_polygons, - all_box_coord=page_coord, found_polygons_images=[], found_polygons_tables=[], found_polygons_marginals_left=[], found_polygons_marginals_right=[], all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_right=[], - all_box_coord_marginals_left=[], - all_box_coord_marginals_right=[], slopes=slopes, slopes_marginals_left=[], slopes_marginals_right=[], @@ -2337,15 +2327,12 @@ class Eynollah: page_coord=page_coord, order_of_texts=[], all_found_textline_polygons=[], - all_box_coord=[], found_polygons_images=[], found_polygons_tables=[], found_polygons_marginals_left=[], found_polygons_marginals_right=[], all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_right=[], - all_box_coord_marginals_left=[], - all_box_coord_marginals_right=[], slopes=[], slopes_marginals_left=[], slopes_marginals_right=[], @@ -2486,12 +2473,12 @@ class Eynollah: #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: - all_found_textline_polygons, \ - all_box_coord, slopes = self.get_slopes_and_deskew_new_light2( + all_found_textline_polygons, slopes = \ + self.get_slopes_and_deskew_new_light2( polygons_of_textregions, textline_mask_tot_ea_org, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, \ - all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2( + all_found_textline_polygons_marginals, slopes_marginals = \ + self.get_slopes_and_deskew_new_light2( polygons_of_marginals, textline_mask_tot_ea_org, boxes_marginals, slope_deskew) @@ -2503,15 +2490,15 @@ class Eynollah: all_found_textline_polygons_marginals) else: textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, \ - all_box_coord, slopes = self.get_slopes_and_deskew_new_curved( + all_found_textline_polygons, slopes = \ + self.get_slopes_and_deskew_new_curved( polygons_of_textregions, textline_mask_tot_ea_erode, boxes_text, num_col_classifier, slope_deskew, image['name']) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, \ - all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved( + all_found_textline_polygons_marginals, slopes_marginals = \ + self.get_slopes_and_deskew_new_curved( polygons_of_marginals, textline_mask_tot_ea_erode, boxes_marginals, num_col_classifier, slope_deskew, image['name']) @@ -2533,8 +2520,6 @@ class Eynollah: polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left, - all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right, conf_marginals_left, @@ -2542,7 +2527,6 @@ class Eynollah: self.separate_marginals_to_left_and_right_and_order_from_top_to_down( polygons_of_marginals, all_found_textline_polygons_marginals, - all_box_coord_marginals, slopes_marginals, conf_marginals, 0.5 * text_regions_p.shape[1]) @@ -2556,8 +2540,6 @@ class Eynollah: polygons_of_textregions_h, polygons_of_textregions_d, polygons_of_textregions_h_d, - all_box_coord, - all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, @@ -2568,7 +2550,6 @@ class Eynollah: regions_fully, polygons_of_textregions, polygons_of_textregions_d, - all_box_coord, all_found_textline_polygons, slopes, conf_textregions) @@ -2576,10 +2557,6 @@ class Eynollah: if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page, image['name']) self.plotter.save_plot_of_layout_all(text_regions_p, image_page, image['name']) - ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( - ##text_regions_p, polygons_of_drop_capitals, polygons_of_textregions, polygons_of_textregions_h, - ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - ##kernel=KERNEL, curved_line=self.curved_line) else: polygons_of_drop_capitals = [] polygons_of_textregions_h = [] @@ -2640,8 +2617,6 @@ class Eynollah: order_of_texts=order_text_new, all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons_h=all_found_textline_polygons_h, - all_box_coord=all_box_coord, - all_box_coord_h=all_box_coord_h, found_polygons_images=polygons_of_images, found_polygons_tables=polygons_of_tables, found_polygons_drop_capitals=polygons_of_drop_capitals, @@ -2649,8 +2624,6 @@ class Eynollah: found_polygons_marginals_right=polygons_of_marginals_right, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left=all_box_coord_marginals_left, - all_box_coord_marginals_right=all_box_coord_marginals_right, slopes=slopes, slopes_h=slopes_h, slopes_marginals_left=slopes_marginals_left, @@ -2671,15 +2644,12 @@ class Eynollah: page_coord=page_coord, order_of_texts=order_text_new, all_found_textline_polygons=all_found_textline_polygons, - all_box_coord=all_box_coord, found_polygons_images=polygons_of_images, found_polygons_tables=polygons_of_tables, found_polygons_marginals_left=polygons_of_marginals_left, found_polygons_marginals_right=polygons_of_marginals_right, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left=all_box_coord_marginals_left, - all_box_coord_marginals_right=all_box_coord_marginals_right, slopes=slopes, slopes_marginals_left=slopes_marginals_left, slopes_marginals_right=slopes_marginals_right, diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 6b3b871..1d48ac5 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -887,7 +887,6 @@ def split_textregion_main_vs_head( regions_model_full, polygons_of_textregions, polygons_of_textregions_d, - all_box_coord, all_found_textline_polygons, slopes, conf_textregions, @@ -960,8 +959,6 @@ def split_textregion_main_vs_head( select(polygons_of_textregions, head), select(polygons_of_textregions_d, main), select(polygons_of_textregions_d, head), - select(all_box_coord, main), - select(all_box_coord, head), select(all_found_textline_polygons, main), select(all_found_textline_polygons, head), select(slopes, main), diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 693ee77..b173d32 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1645,7 +1645,6 @@ def do_work_of_slopes_new_curved( # print(slope, slope_deskew) - crop_coor = box2rect(box_text) if abs(slope) < 45: mask_parent = np.zeros((h, w), dtype=np.uint8) mask_parent = cv2.fillPoly(mask_parent, pts=[contour_par - [x, y]], color=1) @@ -1676,7 +1675,7 @@ def do_work_of_slopes_new_curved( slope, contour_par, box_text) - return textlines_cnt_per_region[::-1], crop_coor, slope + return textlines_cnt_per_region[::-1], slope def do_work_of_slopes_new_light( box_text, contour, contour_par, @@ -1688,7 +1687,6 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1703,4 +1701,4 @@ def do_work_of_slopes_new_light( cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - return cnt_clean_rot, crop_coor, slope_deskew + return cnt_clean_rot, slope_deskew diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index ea6862c..dd39bac 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -51,7 +51,7 @@ class EynollahXmlWriter: poly = make_valid(clip_by_rect(poly, 0, 0, self.image_width, self.image_height)) return points_from_polygon(poly.exterior.coords[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, slopes, counter, ocr_all_textlines_textregion): for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) @@ -60,12 +60,7 @@ class EynollahXmlWriter: textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) text_region.add_TextLine(textline) text_region.set_orientation(-slopes[region_idx]) - region_bboxes = all_box_coord[region_idx] offset = [page_coord[2], page_coord[0]] - # FIXME: or actually... self.curved_line or np.abs(slopes[region_idx]) > 45? - if self.curved_line and np.abs(slopes[region_idx]) > 45: - offset[0] += region_bboxes[2] - offset[1] += region_bboxes[0] coords.set_points(self.calculate_points(polygon_textline, offset)) def write_pagexml(self, pcgts): @@ -80,15 +75,12 @@ class EynollahXmlWriter: page_coord, order_of_texts, all_found_textline_polygons, - all_box_coord, found_polygons_images, found_polygons_tables, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left, - all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, @@ -111,8 +103,6 @@ class EynollahXmlWriter: order_of_texts=order_of_texts, all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons_h=[], - all_box_coord=all_box_coord, - all_box_coord_h=[], found_polygons_images=found_polygons_images, found_polygons_tables=found_polygons_tables, found_polygons_drop_capitals=[], @@ -120,8 +110,6 @@ class EynollahXmlWriter: found_polygons_marginals_right=found_polygons_marginals_right, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left=all_box_coord_marginals_left, - all_box_coord_marginals_right=all_box_coord_marginals_right, slopes=slopes, slopes_h=[], slopes_marginals_left=slopes_marginals_left, @@ -147,8 +135,6 @@ class EynollahXmlWriter: order_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, - all_box_coord, - all_box_coord_h, found_polygons_images, found_polygons_tables, found_polygons_drop_capitals, @@ -156,8 +142,6 @@ class EynollahXmlWriter: found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left, - all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, @@ -214,7 +198,7 @@ class EynollahXmlWriter: else: ocr_textlines = None self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, - all_box_coord, slopes, counter, ocr_textlines) + slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm, region_contour in enumerate(found_polygons_text_region_h): @@ -231,7 +215,7 @@ class EynollahXmlWriter: else: ocr_textlines = None self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, - all_box_coord_h, slopes_h, counter, ocr_textlines) + slopes_h, counter, ocr_textlines) for mm, region_contour in enumerate(found_polygons_drop_capitals): dropcapital = TextRegionType( @@ -241,14 +225,13 @@ class EynollahXmlWriter: if conf_drops: dropcapital.Coords.set_conf(conf_drops[mm]) page.add_TextRegion(dropcapital) - all_box_coord_drop = [[0, 0, 0, 0]] slopes_drop = [0] if ocr_all_textlines_drop: ocr_textlines = ocr_all_textlines_drop[mm] else: ocr_textlines = None self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord, - all_box_coord_drop, slopes_drop, counter, ocr_textlines) + slopes_drop, counter, ocr_textlines) for mm, region_contour in enumerate(found_polygons_marginals_left): marginal = TextRegionType( @@ -262,7 +245,8 @@ class EynollahXmlWriter: ocr_textlines = ocr_all_textlines_marginals_left[mm] else: ocr_textlines = None - self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) + self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, + slopes_marginals_left, counter, ocr_textlines) for mm, region_contour in enumerate(found_polygons_marginals_right): marginal = TextRegionType( @@ -277,7 +261,7 @@ class EynollahXmlWriter: else: ocr_textlines = None self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, - all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) + slopes_marginals_right, counter, ocr_textlines) for mm, region_contour in enumerate(found_polygons_images): image = ImageRegionType(