From f8b4d29a59098f8a82e90b2790015568841bc53f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 2 May 2025 00:13:11 +0200 Subject: [PATCH 01/29] docker: prepackage ocrd-all-module-dir.json --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 4785fc1..4ba498b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,8 @@ COPY . . COPY ocrd-tool.json . # prepackage ocrd-tool.json as ocrd-all-tool.json RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json +# prepackage ocrd-all-module-dir.json +RUN ocrd ocrd-tool ocrd-tool.json dump-module-dirs > $(dirname $(ocrd bashlib filename))/ocrd-all-module-dir.json # install everything and reduce image size RUN make install EXTRAS=OCR && rm -rf /build/eynollah # smoke test From e9179e1d3458c9261989cf996863881f03a24ebd Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 2 May 2025 00:13:06 +0200 Subject: [PATCH 02/29] docker: use latest core base stage --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5f2bf34..73d4d34 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,9 @@ PIP ?= pip3 EXTRAS ?= # DOCKER_BASE_IMAGE = artefakt.dev.sbb.berlin:5000/sbb/ocrd_core:v2.68.0 -DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-tf2:v3.3.0 -DOCKER_TAG = ocrd/eynollah +DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest +DOCKER_TAG ?= ocrd/eynollah +DOCKER ?= docker #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz @@ -117,7 +118,7 @@ coverage: # Build docker image docker: - docker build \ + $(DOCKER) build \ --build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ From c194a20c9c55bedb16ed859343f48a6b3645eadc Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 11 Jun 2025 18:57:08 +0200 Subject: [PATCH 03/29] Fixed duplicate textline_light assignments (true and false) in the OCR-D framework for the Eynollah light version, which caused rectangles to be used instead of contours for textlines --- src/eynollah/ocrd-tool.json | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index e972ec8..ce15206 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -38,7 +38,7 @@ "textline_light": { "type": "boolean", "default": true, - "description": "Light version need textline light" + "description": "Light version need textline light. If this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline with a faster method." }, "tables": { "type": "boolean", @@ -65,11 +65,6 @@ "default": false, "description": "if this parameter set to true, this tool would check that input image need resizing and enhancement or not." }, - "textline_light": { - "type": "boolean", - "default": false, - "description": "if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline with a faster method." - }, "right_to_left": { "type": "boolean", "default": false, From b7b218ff11660061fb0f606b871ebe3c9f831184 Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 12 Jun 2025 15:30:17 +0200 Subject: [PATCH 04/29] OCR-D processor: same behavior as standalone wrt light_version/textline_light --- src/eynollah/processor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index 8f99489..a53fede 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -14,9 +14,10 @@ class EynollahProcessor(Processor): return 'ocrd-eynollah-segment' def setup(self) -> None: - if self.parameter['textline_light'] and not self.parameter['light_version']: - raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection, " - "but parameter 'light_version' is not enabled") + assert self.parameter + if self.parameter['textline_light'] != self.parameter['light_version']: + raise ValueError("Error: You must set or unset both parameter 'textline_light' (to enable light textline detection), " + "and parameter 'light_version' (faster+simpler method for main region detection and deskewing)") self.eynollah = Eynollah( self.resolve_resource(self.parameter['models']), logger=self.logger, From 21615a986dbe2c6a2ddcf603b45ebe24e52f1e90 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 13 Aug 2025 14:14:37 +0200 Subject: [PATCH 05/29] OCR-D processor: expose reading_order_machine_based --- src/eynollah/ocrd-tool.json | 5 +++++ src/eynollah/processor.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index ce15206..af5e03f 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -74,6 +74,11 @@ "type": "boolean", "default": false, "description": "ignore the special role of headings during reading order detection" + }, + "reading_order_machine_based": { + "type": "boolean", + "default": false, + "description": "use data-driven (rather than rule-based) reading order detection" } }, "resources": [ diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index a53fede..c2922c1 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -24,6 +24,7 @@ class EynollahProcessor(Processor): allow_enhancement=self.parameter['allow_enhancement'], curved_line=self.parameter['curved_line'], right2left=self.parameter['right_to_left'], + reading_order_machine_based=self.parameter['reading_order_machine_based'], ignore_page_extraction=self.parameter['ignore_page_extraction'], light_version=self.parameter['light_version'], textline_light=self.parameter['textline_light'], @@ -57,6 +58,8 @@ class EynollahProcessor(Processor): - If ``ignore_page_extraction``, then attempt no cropping of the page. - If ``curved_line``, then compute contour polygons for text lines instead of simple bounding boxes. + - If ``reading_order_machine_based``, then detect reading order via + data-driven model instead of geometrical heuristics. Produce a new output file by serialising the resulting hierarchy. """ From f994ea5f0b721fe8dadc886f2f5b5921a509cabd Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 19 Aug 2025 11:58:45 +0200 Subject: [PATCH 06/29] dilate_textregions_contours: simplify (via shapely's Polygon.buffer()), ensure validity --- src/eynollah/eynollah.py | 106 ++------------------------------- src/eynollah/utils/__init__.py | 1 + src/eynollah/utils/contour.py | 30 +++++++++- 3 files changed, 33 insertions(+), 104 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d47016b..e41a043 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -27,6 +27,7 @@ from loky import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np +from shapely.geometry import Polygon from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d from numba import cuda @@ -68,6 +69,7 @@ from .utils.contour import ( get_text_region_boxes_by_given_contours, get_textregion_contours_in_org_image, get_textregion_contours_in_org_image_light, + make_valid, return_contours_of_image, return_contours_of_interested_region, return_contours_of_interested_region_by_min_size, @@ -3774,107 +3776,9 @@ class Eynollah: return all_found_textline_polygons def dilate_textregions_contours(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - con_ind = all_found_textline_polygons[j] - #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons + return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords, + dtype=int)[:, np.newaxis] + for poly in all_found_textline_polygons] def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index c5962f8..1dfca14 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2156,6 +2156,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') + # FIXME: cannot convert numpy.float64 to integer... for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 0e84153..3d7e5c8 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,7 +1,7 @@ from functools import partial import cv2 import numpy as np -from shapely import geometry +from shapely.geometry import Polygon from .rotate import rotate_image, rotation_image_new @@ -43,7 +43,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area if len(c) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) + polygon = Polygon([point[0] for point in c]) area = polygon.area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and @@ -58,7 +58,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m if len(c) < 3: # A polygon cannot have less than 3 points continue - polygon = geometry.Polygon([point[0] for point in c]) + polygon = Polygon([point[0] for point in c]) # area = cv2.contourArea(c) area = polygon.area ##print(np.prod(thresh.shape[:2])) @@ -332,3 +332,27 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, return img_ret[:, :, 0] +def make_valid(polygon: Polygon) -> Polygon: + """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" + points = list(polygon.exterior.coords) + # try by re-arranging points + for split in range(1, len(points)): + if polygon.is_valid or polygon.simplify(polygon.area).is_valid: + break + # simplification may not be possible (at all) due to ordering + # in that case, try another starting point + polygon = Polygon(points[-split:]+points[:-split]) + # try by simplification + for tolerance in range(int(polygon.area + 1.5)): + if polygon.is_valid: + break + # simplification may require a larger tolerance + polygon = polygon.simplify(tolerance + 1) + # try by enlarging + for tolerance in range(1, int(polygon.area + 2.5)): + if polygon.is_valid: + break + # enlargement may require a larger tolerance + polygon = polygon.buffer(tolerance) + assert polygon.is_valid, polygon.wkt + return polygon From e9a6ff5d8144259f68e5f608de772c0aafb1c669 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 19 Aug 2025 20:09:09 +0200 Subject: [PATCH 07/29] return_boxes_of_images_by_order_of_reading_new: simplify, avoid changing dtype during np.append --- src/eynollah/eynollah.py | 2 +- src/eynollah/utils/__init__.py | 89 ++++++++++++++-------------------- 2 files changed, 38 insertions(+), 53 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e41a043..91195b7 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -3776,7 +3776,7 @@ class Eynollah: return all_found_textline_polygons def dilate_textregions_contours(self, all_found_textline_polygons): - return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords, + return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords, dtype=int)[:, np.newaxis] for poly in all_found_textline_polygons] diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 1dfca14..6fde81d 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1742,6 +1742,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_ending = np.array(x_ending) y_type_2 = np.array(y_type_2) y_diff_type_2 = np.array(y_diff_type_2) + all_columns = set(range(len(peaks_neg_tot) - 1)) if ((reading_order_type==1) or (reading_order_type==0 and @@ -1863,19 +1864,16 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_start_without_mother[dj], - x_end_without_mother[dj])) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, x_start_without_mother) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, x_end_without_mother) @@ -1906,32 +1904,26 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(x_end_column_sort[ii]-1) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_start_without_mother[dj], - x_end_without_mother[dj])) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(len(peaks_neg_tot)-1) - columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers)) + columns_covered_by_mothers.update( + range(x_start_without_mother[dj], + x_end_without_mother[dj])) + columns_not_covered = list(all_columns - columns_covered_by_mothers) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) x_ending = np.append(x_ending, x_end_without_mother) - columns_covered_by_with_child_no_mothers = [] + columns_covered_by_with_child_no_mothers = set() for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ - list(range(x_start_with_child_without_mother[dj], - x_end_with_child_without_mother[dj])) - columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) - - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers)) + columns_covered_by_with_child_no_mothers.update( + range(x_start_with_child_without_mother[dj], + x_end_with_child_without_mother[dj])) + columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers) #indexes_to_be_spanned=[] for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) @@ -1967,21 +1959,19 @@ def return_boxes_of_images_by_order_of_reading_new( if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - columns_covered_by_mothers = [] + columns_covered_by_mothers = set() for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers = columns_covered_by_mothers + \ - list(range(x_starting_all_between_nm_wc[dj], - x_ending_all_between_nm_wc[dj])) - columns_covered_by_mothers = list(set(columns_covered_by_mothers)) - - all_columns=np.arange(i_s_nc, x_end_biggest_column) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + columns_covered_by_mothers.update( + range(x_starting_all_between_nm_wc[dj], + x_ending_all_between_nm_wc[dj])) + child_columns = set(range(i_s_nc, x_end_biggest_column)) + columns_not_covered = list(child_columns - columns_covered_by_mothers) should_longest_line_be_extended=0 if (len(x_diff_all_between_nm_wc) > 0 and set(list(range(x_starting_all_between_nm_wc[biggest], x_ending_all_between_nm_wc[biggest])) + - list(columns_not_covered)) != set(all_columns)): + list(columns_not_covered)) != child_columns): should_longest_line_be_extended=1 index_lines_so_close_to_top_separator = \ np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & @@ -2092,36 +2082,31 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_covered_by_lines_covered_more_than_2col = [] + columns_covered_by_lines_covered_more_than_2col = set() for dj in range(len(x_starting)): - if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns): - pass - else: - columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ - list(range(x_starting[dj],x_ending[dj])) - columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col)) - columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + if set(range(x_starting[dj], x_ending[dj])) != all_columns: + columns_covered_by_lines_covered_more_than_2col.update( + range(x_starting[dj], x_ending[dj])) + columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) if len(new_main_sep_y) > 0: x_starting = np.append(x_starting, 0) - x_ending = np.append(x_ending, len(peaks_neg_tot)-1) + x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) else: x_starting = np.append(x_starting, x_starting[0]) x_ending = np.append(x_ending, x_ending[0]) else: - all_columns = np.arange(len(peaks_neg_tot)-1) - columns_not_covered = list(set(all_columns)) + columns_not_covered = list(all_columns) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, columns_not_covered) - x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) + x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) ind_args=np.array(range(len(y_type_2))) #ind_args=np.array(ind_args) From 6c442c9ae9f3095eb671ceae9184a34b28203bd2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 19 Aug 2025 22:56:36 +0200 Subject: [PATCH 08/29] separate_lines/do_work_of_slopes: skip if crop is empty --- src/eynollah/utils/separate_lines.py | 46 +++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 0322579..ffbfff7 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1345,24 +1345,26 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean -def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): +def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None): if logger is None: logger = getLogger(__package__) + if not np.prod(img_crop.shape): + return img_crop if num_col == 1: - num_patches = int(img_path.shape[1] / 200.0) + num_patches = int(img_crop.shape[1] / 200.0) else: - num_patches = int(img_path.shape[1] / 140.0) - # num_patches=int(img_path.shape[1]/200.) + num_patches = int(img_crop.shape[1] / 140.0) + # num_patches=int(img_crop.shape[1]/200.) if num_patches == 0: num_patches = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - # plt.imshow(img_patch_ineterst) + # plt.imshow(img_patch_interest) # plt.show() - length_x = int(img_path.shape[1] / float(num_patches)) + length_x = int(img_crop.shape[1] / float(num_patches)) # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 margin = int(0.04 * length_x) # if margin<=4: @@ -1370,7 +1372,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # margin=0 width_mid = length_x - 2 * margin - nxf = img_path.shape[1] / float(width_mid) + nxf = img_crop.shape[1] / float(width_mid) if nxf > int(nxf): nxf = int(nxf) + 1 @@ -1386,12 +1388,12 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] try: assert img_xline.any() @@ -1407,9 +1409,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) + img_patch_interest_revised = np.zeros(img_patch_interest.shape) for i in range(nxf): if i == 0: @@ -1419,11 +1421,11 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x + if index_x_u > img_crop.shape[1]: + index_x_u = img_crop.shape[1] + index_x_d = img_crop.shape[1] - length_x - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + img_xline = img_patch_interest[:, index_x_d:index_x_u] img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] @@ -1446,9 +1448,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size + img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size - return img_patch_ineterst_revised + return img_patch_interest_revised def do_image_rotation(angle, img, sigma_des, logger=None): if logger is None: @@ -1546,7 +1548,7 @@ def do_work_of_slopes_new( img_int_p = all_text_region_raw[:,:] img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) - if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] @@ -1603,7 +1605,7 @@ def do_work_of_slopes_new_curved( # plt.imshow(img_int_p) # plt.show() - if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew else: From b6d1c43a859b3fa8ae10aca4852beaebe75284da Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 20 Aug 2025 14:26:14 +0200 Subject: [PATCH 09/29] dilate_textregions_contours_textline_version: simplify (via shapely's Polygon.buffer()), ensure validity --- src/eynollah/eynollah.py | 106 ++------------------------------- src/eynollah/utils/__init__.py | 1 - 2 files changed, 4 insertions(+), 103 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 91195b7..959e9a6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -3672,108 +3672,10 @@ class Eynollah: return x_differential_new def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): - #print(all_found_textline_polygons) - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 0.1) - y_differential = gaussian_filter1d(y_differential, 0.1) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.12) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.12) - - if dilation_m1>8: - dilation_m1 = 8 - if dilation_m1<6: - dilation_m1 = 6 - #print(dilation_m1, 'dilation_m1') - dilation_m1 = 6 - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - #print(results,'results') - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - #indices_2 = indices_2[1:] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons + return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords, + dtype=int)[:, np.newaxis] + for poly in region] + for region in all_found_textline_polygons] def dilate_textregions_contours(self, all_found_textline_polygons): return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords, diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 6fde81d..451a431 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2141,7 +2141,6 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_itself=x_end_copy.pop(il) #print(y_copy,'y_copy2') - # FIXME: cannot convert numpy.float64 to integer... for column in range(x_start_itself, x_end_itself+1): #print(column,'cols') y_in_cols=[] From 277d00579e667d11f7bd0b0ad44f0d6fd8874ce2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 20 Aug 2025 14:28:14 +0200 Subject: [PATCH 10/29] get_textregion_contours_in_org_image_light: no back rotation, drop slope_first (always 0) --- src/eynollah/eynollah.py | 14 ++++++-------- src/eynollah/utils/contour.py | 23 ++++++++--------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 959e9a6..ac7044d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2927,12 +2927,10 @@ class Eynollah: #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True, map=self.executor.map, logger=self.logger, plotter=self.plotter) - slope_first = 0 - if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew) - return slope_deskew, slope_first + return slope_deskew def run_marginals( self, image_page, textline_mask_tot_ea, mask_images, mask_lines, @@ -4173,9 +4171,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ @@ -4216,7 +4214,7 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] @@ -4405,12 +4403,12 @@ class Eynollah: contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) + contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) + contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 3d7e5c8..7868971 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -247,23 +247,16 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) return cont_int[0], index_r_con, confidence_contour -def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): +def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map=map): if not len(cnts): return [], [] - - confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/6).astype(int) for i in cnts] - results = map(partial(do_back_rotation_and_get_cnt_back, - img=img, - slope_first=slope_first, - confidence_matrix=confidence_matrix, - ), - cnts, range(len(cnts))) - contours, indexes, conf_contours = tuple(zip(*results)) - return [i*6 for i in contours], list(conf_contours) + + confs = [] + for cnt in cnts: + cnt_mask = np.zeros(confidence_matrix.shape) + cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt], color=1.0) + confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) + return cnts, confs def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 From 3d53070b90acfa8533cab37649e58334bacb713f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 01:03:46 +0200 Subject: [PATCH 11/29] avoid creating invalid polygons via rounding --- src/eynollah/eynollah.py | 5 +++-- src/eynollah/utils/contour.py | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index ac7044d..05907d6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -3670,16 +3670,17 @@ class Eynollah: return x_differential_new def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): - return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords, + return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], dtype=int)[:, np.newaxis] for poly in region] for region in all_found_textline_polygons] def dilate_textregions_contours(self, all_found_textline_polygons): - return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords, + return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], dtype=int)[:, np.newaxis] for poly in all_found_textline_polygons] + def dilate_textline_contours(self, all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for ij in range(len(all_found_textline_polygons[j])): diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 7868971..e864cc7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -49,7 +49,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.uint)) + for point in polygon.exterior.coords[:-1]], dtype=np.uint)) return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): @@ -70,7 +70,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m True): # print(c[0][0][1]) found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords], dtype=np.int32)) + for point in polygon.exterior.coords[:-1]], dtype=np.int32)) return found_polygons_early def find_new_features_of_contours(contours_main): @@ -327,6 +327,11 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, def make_valid(polygon: Polygon) -> Polygon: """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" + def isint(x): + return isinstance(x, int) or int(x) == x + # make sure rounding does not invalidate + if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0: + polygon = Polygon(np.round(polygon.exterior.coords)) points = list(polygon.exterior.coords) # try by re-arranging points for split in range(1, len(points)): From b610fe07a6ade1978d343012218e0107e9de146f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 01:05:15 +0200 Subject: [PATCH 12/29] check_any_text_region_in_model_one_is_main_or_header_light: return original instead of resampled contours --- src/eynollah/utils/__init__.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 451a431..44086f0 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -955,11 +955,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full.shape[0] // zoom), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] + contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent] ### cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ - find_new_features_of_contours(contours_only_text_parent) + find_new_features_of_contours(contours_only_text_parent_z) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main @@ -982,8 +982,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] + for ii, con in enumerate(contours_only_text_parent_z): img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) @@ -994,23 +993,22 @@ def check_any_text_region_in_model_one_is_main_or_header_light( if (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 - contours_only_text_parent_head.append(con) + contours_only_text_parent_head.append(contours_only_text_parent[ii]) + conf_contours_head.append(None) # why not conf_contours[ii], too? if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) - conf_contours_head.append(None) else: regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 - contours_only_text_parent_main.append(con) + contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) ### to make it faster @@ -1018,8 +1016,6 @@ def check_any_text_region_in_model_one_is_main_or_header_light( # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full.shape[0] // zoom), # interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### return (regions_model_1, From 42474afa4bbf2127939ff237c02c52116c7ceae5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 01:32:32 +0200 Subject: [PATCH 13/29] =?UTF-8?q?rename=20*lines=5Fxml=20=E2=86=92=20*sepl?= =?UTF-8?q?ines=20for=20clarity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/eynollah/eynollah.py | 58 ++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 05907d6..d692069 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1713,9 +1713,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = textline_con_fil = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -1779,7 +1779,7 @@ class Eynollah: [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page + return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") @@ -1895,24 +1895,24 @@ class Eynollah: mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) + #print(jv, hir_seplines[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() @@ -1937,7 +1937,7 @@ class Eynollah: #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix + return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") @@ -2020,9 +2020,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2034,7 +2034,7 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml + return text_regions_p_true, erosion_hurts, polygons_seplines except: if self.input_binary: prediction_bin = np.copy(img_org) @@ -2069,9 +2069,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = filter_contours_area_of_image( - mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2084,7 +2084,7 @@ class Eynollah: erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_lines_xml + return text_regions_p_true, erosion_hurts, polygons_seplines def do_order_of_regions_full_layout( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -4102,7 +4102,7 @@ class Eynollah: img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ + text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( @@ -4145,7 +4145,7 @@ class Eynollah: polygons_of_marginals = [] all_found_textline_polygons_marginals = [] all_box_coord_marginals = [] - polygons_lines_xml = [] + polygons_seplines = [] contours_tables = [] ocr_all_textlines = None conf_contours_textregions =None @@ -4153,13 +4153,13 @@ class Eynollah: cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + cont_page, polygons_seplines, contours_tables, ocr_all_textlines, conf_contours_textregions) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ + text_regions_p_1, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4186,7 +4186,7 @@ class Eynollah: textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ + text_regions_p_1, erosion_hurts, polygons_seplines = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4385,13 +4385,13 @@ class Eynollah: [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], - cont_page, polygons_lines_xml, [], [], []) + cont_page, polygons_seplines, [], [], []) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], - cont_page, polygons_lines_xml, contours_tables, [], []) + cont_page, polygons_seplines, contours_tables, [], []) return pcgts @@ -4584,7 +4584,7 @@ class Eynollah: all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h) + cont_page, polygons_seplines, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h) return pcgts contours_only_text_parent_h = None @@ -4663,7 +4663,7 @@ class Eynollah: txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + cont_page, polygons_seplines, contours_tables, ocr_all_textlines, conf_contours_textregions) return pcgts From 244772f08659507ccf573f76e425509af86441ed Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 01:33:16 +0200 Subject: [PATCH 14/29] filter_contours_area_of_image*: also ensure validity here --- src/eynollah/eynollah.py | 4 ++-- src/eynollah/utils/contour.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d692069..6a50fc0 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -3671,13 +3671,13 @@ class Eynollah: def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=int)[:, np.newaxis] + dtype=np.uint)[:, np.newaxis] for poly in region] for region in all_found_textline_polygons] def dilate_textregions_contours(self, all_found_textline_polygons): return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=int)[:, np.newaxis] + dtype=np.uint)[:, np.newaxis] for poly in all_found_textline_polygons] diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index e864cc7..dac3a7c 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -48,8 +48,8 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords[:-1]], dtype=np.uint)) + found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1], + dtype=np.uint)[:, np.newaxis]) return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): @@ -69,8 +69,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m # hierarchy[0][jv][3]==-1 True): # print(c[0][0][1]) - found_polygons_early.append(np.array([[point] - for point in polygon.exterior.coords[:-1]], dtype=np.int32)) + found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1], + dtype=np.uint)[:, np.newaxis]) return found_polygons_early def find_new_features_of_contours(contours_main): From 8b5f90e243ae3ae8256e28710f5ac9158906ef0b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 01:42:46 +0200 Subject: [PATCH 15/29] =?UTF-8?q?move=20dilate=5F*=5Fcontours=20to=20.util?= =?UTF-8?q?s.contour,=20rename=20dilate=5Ftextregions=5Fcontours=5Ftextlin?= =?UTF-8?q?e=5Fversion=20=E2=86=92=20dilate=5Ftextline=5Fcontours?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/eynollah/eynollah.py | 253 ++-------------------------------- src/eynollah/utils/contour.py | 11 ++ 2 files changed, 22 insertions(+), 242 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 6a50fc0..3a4551c 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -69,12 +69,13 @@ from .utils.contour import ( get_text_region_boxes_by_given_contours, get_textregion_contours_in_org_image, get_textregion_contours_in_org_image_light, - make_valid, return_contours_of_image, return_contours_of_interested_region, return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, + dilate_textregion_contours, + dilate_textline_contours, ) from .utils.rotate import ( rotate_image, @@ -1919,7 +1920,7 @@ class Eynollah: #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) + ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -3669,117 +3670,6 @@ class Eynollah: return x_differential_new - def dilate_textregions_contours_textline_version(self, all_found_textline_polygons): - return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis] - for poly in region] - for region in all_found_textline_polygons] - - def dilate_textregions_contours(self, all_found_textline_polygons): - return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis] - for poly in all_found_textline_polygons] - - - def dilate_textline_contours(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for ij in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][ij] - area = cv2.contourArea(con_ind) - - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] - y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] - - abs_diff=abs(abs(x_differential)- abs(y_differential) ) - - inc_x = np.zeros(len(x_differential)+1) - inc_y = np.zeros(len(x_differential)+1) - - if (y_max-y_min) <= (x_max-x_min): - dilation_m1 = round(area / (x_max-x_min) * 0.35) - else: - dilation_m1 = round(area / (y_max-y_min) * 0.35) - - if dilation_m1>12: - dilation_m1 = 12 - if dilation_m1<4: - dilation_m1 = 4 - #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 - - for i in range(len(x_differential)): - if abs_diff[i]==0: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - - elif abs_diff[i]!=0 and abs_diff[i]>=3: - if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) - else: - inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) - else: - inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - - inc_x[0] = inc_x[-1] - inc_y[0] = inc_y[-1] - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] - con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_ind = con_ind.astype(np.int32) - - results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) - for ind in range(len(con_scaled[:,0, 1])) ] - results = np.array(results) - results[results==0] = 1 - - diff_result = np.diff(results) - - indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] - indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - - if results[0]==1: - con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] - con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] - indices_m2 = indices_m2[1:] - - if len(indices_2)>len(indices_m2): - con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] - con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] - indices_2 = indices_2[:-1] - - for ii in range(len(indices_2)): - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] - return all_found_textline_polygons - def filter_contours_inside_a_bigger_one(self,contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): if type_contour=="textregion": areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] @@ -3917,121 +3807,6 @@ class Eynollah: return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) - def dilate_textlines(self, all_found_textline_polygons): - for j in range(len(all_found_textline_polygons)): - for i in range(len(all_found_textline_polygons[j])): - con_ind = all_found_textline_polygons[j][i] - con_ind = con_ind.astype(float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) - mult = x_biger_than_x*x_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if y_differential[0]==0: - y_differential[0] = 0.1 - if y_differential[-1]==0: - y_differential[-1]= 0.1 - y_differential = [y_differential[ind] if y_differential[ind] != 0 - else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) - for ind in range(len(y_differential))] - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind] < 0 else 1 - for ind in range(len(y_differential))] - y_differential = self.return_it_in_two_groups(y_differential) - y_differential = np.array(y_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - if x_differential[-1]==0: - x_differential[-1]= 0.1 - x_differential = [x_differential[ind] if x_differential[ind] != 0 - else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) - for ind in range(len(x_differential))] - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind] < 0 else 1 - for ind in range(len(x_differential))] - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - con_scaled = con_ind*1 - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 - - try: - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 - - try: - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - except: - pass - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] - - return all_found_textline_polygons - def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): @@ -4130,8 +3905,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( - all_found_textline_polygons) + all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") @@ -4255,14 +4029,14 @@ class Eynollah: boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) else: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -4398,15 +4172,14 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = self.dilate_textregions_contours( - contours_only_text_parent) + contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) - #txt_con_org = self.dilate_textregions_contours(txt_con_org) - #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + #txt_con_org = dilate_textregion_contours(txt_con_org) + #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) else: txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) @@ -4433,14 +4206,10 @@ class Eynollah: #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textregions_contours_textline_version( - all_found_textline_polygons) + all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( - all_found_textline_polygons_marginals) + all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index dac3a7c..620ab75 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -325,6 +325,17 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, return img_ret[:, :, 0] +def dilate_textline_contours(self, all_found_textline_polygons): + return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], + dtype=np.uint)[:, np.newaxis] + for poly in region] + for region in all_found_textline_polygons] + +def dilate_textregion_contours(self, all_found_textline_polygons): + return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], + dtype=np.uint)[:, np.newaxis] + for poly in all_found_textline_polygons] + def make_valid(polygon: Polygon) -> Polygon: """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" def isint(x): From 8be52fb1430968ba0f6431619077077df7bc2a63 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 12:59:03 +0200 Subject: [PATCH 16/29] refactor shapely converisons into contour2polygon / polygon2contour, also handle heterogeneous geometries --- src/eynollah/eynollah.py | 1 - src/eynollah/utils/contour.py | 107 ++++++++++++++++++++++++++-------- 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 3a4551c..976634f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -27,7 +27,6 @@ from loky import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 import numpy as np -from shapely.geometry import Polygon from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d from numba import cuda diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 620ab75..11f6490 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,7 +1,15 @@ +from typing import Sequence, Union +from numbers import Number from functools import partial +import itertools + import cv2 import numpy as np -from shapely.geometry import Polygon +from scipy.sparse.csgraph import minimum_spanning_tree +from shapely.geometry import Polygon, LineString +from shapely.geometry.polygon import orient +from shapely import set_precision +from shapely.ops import unary_union, nearest_points from .rotate import rotate_image, rotation_image_new @@ -37,29 +45,28 @@ def get_text_region_boxes_by_given_contours(contours): return boxes, contours_new -def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = Polygon([point[0] for point in c]) + polygon = contour2polygon(contour, dilate=dilate) area = polygon.area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): - found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis]) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early -def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): +def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] - for jv,c in enumerate(contours): - if len(c) < 3: # A polygon cannot have less than 3 points + for jv, contour in enumerate(contours): + if len(contour) < 3: # A polygon cannot have less than 3 points continue - polygon = Polygon([point[0] for point in c]) - # area = cv2.contourArea(c) + polygon = contour2polygon(contour, dilate=dilate) + # area = cv2.contourArea(contour) area = polygon.area ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area @@ -68,9 +75,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m area <= max_area * np.prod(image.shape[:2]) and # hierarchy[0][jv][3]==-1 True): - # print(c[0][0][1]) - found_polygons_early.append(np.array(make_valid(polygon).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis]) + # print(contour[0][0][1]) + found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early def find_new_features_of_contours(contours_main): @@ -325,16 +331,29 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, return img_ret[:, :, 0] -def dilate_textline_contours(self, all_found_textline_polygons): - return [[np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis] - for poly in region] +def dilate_textline_contours(all_found_textline_polygons): + return [[polygon2contour(contour2polygon(contour, dilate=5)) + for contour in region] for region in all_found_textline_polygons] -def dilate_textregion_contours(self, all_found_textline_polygons): - return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords[:-1], - dtype=np.uint)[:, np.newaxis] - for poly in all_found_textline_polygons] +def dilate_textregion_contours(all_found_textline_polygons): + return [polygon2contour(contour2polygon(contour, dilate=5)) + for contour in all_found_textline_polygons] + +def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): + polygon = Polygon([point[0] for point in contour]) + if dilate: + polygon = polygon.buffer(dilate) + if polygon.geom_type == 'GeometryCollection': + # heterogeneous result: filter zero-area shapes (LineString, Point) + polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0]) + if polygon.geom_type == 'MultiPolygon': + # homogeneous result: construct convex hull to connect + polygon = join_polygons(polygon.geoms) + return make_valid(polygon) + +def polygon2contour(polygon: Polygon) -> np.ndarray: + return np.array(polygon.exterior.coords[:-1], dtype=np.uint)[:, np.newaxis] def make_valid(polygon: Polygon) -> Polygon: """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" @@ -343,7 +362,7 @@ def make_valid(polygon: Polygon) -> Polygon: # make sure rounding does not invalidate if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0: polygon = Polygon(np.round(polygon.exterior.coords)) - points = list(polygon.exterior.coords) + points = list(polygon.exterior.coords[:-1]) # try by re-arranging points for split in range(1, len(points)): if polygon.is_valid or polygon.simplify(polygon.area).is_valid: @@ -365,3 +384,43 @@ def make_valid(polygon: Polygon) -> Polygon: polygon = polygon.buffer(tolerance) assert polygon.is_valid, polygon.wkt return polygon + +def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon: + """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points""" + # ensure input polygons are simply typed and all oriented equally + polygons = [orient(poly) + for poly in itertools.chain.from_iterable( + [poly.geoms + if poly.geom_type in ['MultiPolygon', 'GeometryCollection'] + else [poly] + for poly in polygons])] + npoly = len(polygons) + if npoly == 1: + return polygons[0] + # find min-dist path through all polygons (travelling salesman) + pairs = itertools.combinations(range(npoly), 2) + dists = np.zeros((npoly, npoly), dtype=float) + for i, j in pairs: + dist = polygons[i].distance(polygons[j]) + if dist < 1e-5: + dist = 1e-5 # if pair merely touches, we still need to get an edge + dists[i, j] = dist + dists[j, i] = dist + dists = minimum_spanning_tree(dists, overwrite=True) + # add bridge polygons (where necessary) + for prevp, nextp in zip(*dists.nonzero()): + prevp = polygons[prevp] + nextp = polygons[nextp] + nearest = nearest_points(prevp, nextp) + bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1) + polygons.append(bridgep) + jointp = unary_union(polygons) + assert jointp.geom_type == 'Polygon', jointp.wkt + # follow-up calculations will necessarily be integer; + # so anticipate rounding here and then ensure validity + jointp2 = set_precision(jointp, 1.0) + if jointp2.geom_type != 'Polygon' or not jointp2.is_valid: + jointp2 = Polygon(np.round(jointp.exterior.coords)) + jointp2 = make_valid(jointp2) + assert jointp2.geom_type == 'Polygon', jointp2.wkt + return jointp2 From fd6a6495a2ad79097d6b6dc65dfa7d54ec9fcbc5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 21 Aug 2025 13:00:31 +0200 Subject: [PATCH 17/29] =?UTF-8?q?increase=20dilatation:=20textregions/line?= =?UTF-8?q?s=20(5=E2=86=926),=20seplines=20(0=E2=86=921)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/eynollah/eynollah.py | 10 +++++----- src/eynollah/utils/contour.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 976634f..55b9bfd 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1714,8 +1714,8 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) - polygons_seplines = textline_con_fil = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) + polygons_seplines = filter_contours_area_of_image( + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -1909,7 +1909,7 @@ class Eynollah: #plt.show() polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) test_khat = np.zeros(prediction_regions_org.shape) test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) @@ -2022,7 +2022,7 @@ class Eynollah: polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2071,7 +2071,7 @@ class Eynollah: polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001) + mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 11f6490..fb1e790 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -332,12 +332,12 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, return img_ret[:, :, 0] def dilate_textline_contours(all_found_textline_polygons): - return [[polygon2contour(contour2polygon(contour, dilate=5)) + return [[polygon2contour(contour2polygon(contour, dilate=6)) for contour in region] for region in all_found_textline_polygons] def dilate_textregion_contours(all_found_textline_polygons): - return [polygon2contour(contour2polygon(contour, dilate=5)) + return [polygon2contour(contour2polygon(contour, dilate=6)) for contour in all_found_textline_polygons] def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): From 698f38e46185bad7162163fed432c8472e0c7a58 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 29 Aug 2025 12:16:56 +0200 Subject: [PATCH 18/29] polygon2contour: avoid overflow --- src/eynollah/utils/contour.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index fb1e790..4b45b03 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -353,7 +353,8 @@ def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number return make_valid(polygon) def polygon2contour(polygon: Polygon) -> np.ndarray: - return np.array(polygon.exterior.coords[:-1], dtype=np.uint)[:, np.newaxis] + polygon = np.array(polygon.exterior.coords[:-1], dtype=np.int) + return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] def make_valid(polygon: Polygon) -> Polygon: """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" From 57821662b9de6a7d60b16a93666c3a8258bd5750 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 29 Aug 2025 12:19:37 +0200 Subject: [PATCH 19/29] filter_contours_without_textline_inside: avoid removing from identical lists twice --- src/eynollah/eynollah.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 55b9bfd..7232bb5 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -3764,7 +3764,9 @@ class Eynollah: return contours def filter_contours_without_textline_inside( - self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): + self, contours, text_con_org, contours_textline, + contours_only_text_parent_d_ordered, + conf_contours_textregions): ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] @@ -3788,23 +3790,21 @@ class Eynollah: ###if np.any(results==1): ###contours_with_textline.append(con_tr) - textregion_index_to_del = [] + textregion_index_to_del = set() for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion)==0: - textregion_index_to_del.append(index_textregion) + if len(textlines_textregion) == 0: + textregion_index_to_del.add(index_textregion) + def filterfun(lis): + if len(lis) == 0: + return [] + return list(np.delete(lis, list(textregion_index_to_del))) - uniqe_args_trs = np.unique(textregion_index_to_del) - uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] - - for ind_u_a_trs in uniqe_args_trs_sorted: - conf_contours_textregions.pop(ind_u_a_trs) - contours.pop(ind_u_a_trs) - contours_textline.pop(ind_u_a_trs) - text_con_org.pop(ind_u_a_trs) - if len(contours_only_text_parent_d_ordered) > 0: - contours_only_text_parent_d_ordered.pop(ind_u_a_trs) - - return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) + return (filterfun(contours), + filterfun(text_con_org), + filterfun(conf_contours_textregions), + filterfun(contours_textline), + filterfun(contours_only_text_parent_d_ordered), + np.arange(len(contours) - len(textregion_index_to_del))) def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, From 741aa7867c42469ebe5b21b83f370e4b699ef499 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 29 Aug 2025 12:37:44 +0200 Subject: [PATCH 20/29] get_marginals: exit early if no peaks found to avoid spurious overlap mask --- src/eynollah/utils/marginals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index a29e50d..22ada4e 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -94,6 +94,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve except: point_left=first_nonzero + if point_left == first_nonzero and point_right == last_nonzero: + return text_regions if point_right>=mask_marginals.shape[1]: From d3566e55ef6d0a541be30621c942e7d904b44aa9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 29 Aug 2025 17:21:08 +0200 Subject: [PATCH 21/29] polygon2contour: fix 698f38e4 (deprecated dtype) --- src/eynollah/utils/contour.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 4b45b03..a9a7172 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -353,7 +353,7 @@ def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number return make_valid(polygon) def polygon2contour(polygon: Polygon) -> np.ndarray: - polygon = np.array(polygon.exterior.coords[:-1], dtype=np.int) + polygon = np.array(polygon.exterior.coords[:-1], dtype=int) return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] def make_valid(polygon: Polygon) -> Polygon: From 892ff41e38a1730eafe4ebc0955ad4bcf7a4c558 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:00:33 +0200 Subject: [PATCH 22/29] utils: introduce box2rect and box2slice --- src/eynollah/utils/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 44086f0..f76c3e1 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1,3 +1,4 @@ +from typing import Tuple import time import math @@ -298,9 +299,17 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) +def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: + return (box[1], box[1] + box[3], + box[0], box[0] + box[2]) + +def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]: + return (slice(box[1], box[1] + box[3]), + slice(box[0], box[0] + box[2])) + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] - return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + image_box = img_org_copy[box2slice(box)] + return image_box, box2rect(box) def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) From 142ac8825e7dbca3403d260d06d4db52d4aa8325 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:02:43 +0200 Subject: [PATCH 23/29] use box2rect instead of crop_image_inside_box when no image needed --- src/eynollah/eynollah.py | 8 +++++--- src/eynollah/utils/separate_lines.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 7232bb5..50fe34c 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -98,6 +98,8 @@ from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, + box2rect, + box2slice, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, @@ -1542,7 +1544,7 @@ class Eynollah: all_found_textline_polygons.append(textlines_ins[::-1]) slopes.append(slope_deskew) - _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + crop_coor = box2rect(boxes[index]) all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes @@ -1754,7 +1756,7 @@ class Eynollah: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -1768,7 +1770,7 @@ class Eynollah: if h < 150 or w < 150: pass else: - _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) + page_coord_img = box2rect(box) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index ffbfff7..b1a90b5 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -18,6 +18,8 @@ from .contour import ( from . import ( find_num_col_deskew, crop_image_inside_box, + box2rect, + box2slice, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -1540,7 +1542,7 @@ def do_work_of_slopes_new( logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1631,7 +1633,7 @@ def do_work_of_slopes_new_curved( slope_for_all = slope_deskew slope = slope_for_all - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) if abs(slope_for_all) < 45: textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) @@ -1685,7 +1687,7 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) + crop_coor = box2rect(box_text) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline From dbbf1073df112073942aa5c206a033da6f49c783 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:05:40 +0200 Subject: [PATCH 24/29] avoid pulling unused 'image_page_rotated' through functions --- src/eynollah/eynollah.py | 48 +++++++++++++--------------- src/eynollah/utils/separate_lines.py | 6 ++-- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 50fe34c..3a00c7d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1521,7 +1521,7 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) @@ -1549,13 +1549,12 @@ class Eynollah: return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, slope_deskew=slope_deskew,textline_light=self.textline_light, logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) @@ -1563,13 +1562,12 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") results = self.executor.map(partial(do_work_of_slopes_new, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, slope_deskew=slope_deskew, MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, @@ -1580,13 +1578,12 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") results = self.executor.map(partial(do_work_of_slopes_new_curved, textline_mask_tot_ea=textline_mask_tot, - image_page_rotated=image_page_rotated, mask_texts_only=mask_texts_only, num_col=num_col, scale_par=scale_par, @@ -2935,10 +2932,10 @@ class Eynollah: return slope_deskew def run_marginals( - self, image_page, textline_mask_tot_ea, mask_images, mask_lines, + self, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): - image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] + textline_mask_tot = textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 @@ -2957,10 +2954,7 @@ class Eynollah: except Exception as e: self.logger.error("exception %s", e) - if self.plotter: - self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) - self.plotter.save_plot_of_layout_main(text_regions_p, image_page) - return textline_mask_tot, text_regions_p, image_page_rotated + return textline_mask_tot, text_regions_p def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, @@ -3112,7 +3106,7 @@ class Eynollah: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -3132,7 +3126,7 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -4008,9 +4002,12 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p, image_page_rotated = \ - self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, + textline_mask_tot, text_regions_p = \ + self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + if self.plotter: + self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) + self.plotter.save_plot_of_layout_main(text_regions_p, image_page) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4019,7 +4016,6 @@ class Eynollah: textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) - image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) @@ -4195,11 +4191,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, @@ -4219,11 +4215,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: @@ -4231,25 +4227,25 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - image_page_rotated, boxes_text, slope_deskew) + boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - image_page_rotated, boxes_marginals, slope_deskew) + boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - image_page_rotated, boxes_text, text_only, + boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - image_page_rotated, boxes_marginals, text_only, + boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index b1a90b5..dcddc65 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1532,7 +1532,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, + textline_mask_tot_ea, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1590,7 +1590,7 @@ def do_work_of_slopes_new( def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, + textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1679,7 +1679,7 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, + textline_mask_tot_ea, slope_deskew, textline_light, logger=None ): if logger is None: From eae1303ebb73f3609217c8d960855d7bb5ddd1d2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:06:36 +0200 Subject: [PATCH 25/29] =?UTF-8?q?contours:=20rename=20'pixel'=20=E2=86=92?= =?UTF-8?q?=20'label'=20for=20clarity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/eynollah/utils/contour.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index a9a7172..ee2faa7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -141,12 +141,12 @@ def return_parent_contours(contours, hierarchy): if hierarchy[0][i][3] == -1] return contours_parent -def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): +def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -264,12 +264,12 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) return cnts, confs -def return_contours_of_interested_textline(region_pre_p, pixel): +def return_contours_of_interested_textline(region_pre_p, label): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -292,12 +292,12 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): +def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -310,12 +310,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si return contours_imgs -def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): +def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 + cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: - cnts_images = (region_pre_p[:, :] == pixel) * 1 + cnts_images = (region_pre_p[:, :] == label) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) From bb9cba1fd9a393ea592b1b31c9b152fbf244f6ba Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:07:18 +0200 Subject: [PATCH 26/29] writer: SeparatorRegion needs SeparatorRegionType (not ImageRegionType) --- src/eynollah/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 92e353f..01c86de 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -296,7 +296,7 @@ class EynollahXmlWriter(): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) for mm in range(len(found_polygons_tables)): page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) From 090341241e5cc2482abdca271da6bc0ac5891423 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 26 Aug 2025 21:07:50 +0200 Subject: [PATCH 27/29] writer: use @type='heading' instead of 'header' --- src/eynollah/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 01c86de..b9e906a 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -268,7 +268,7 @@ class EynollahXmlWriter(): self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='header', + textregion = TextRegionType(id=counter.next_region_id, type_='heading', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) From 92a7c7cfea471f140f32f97c0200a1ebaebefcc2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 1 Sep 2025 11:37:22 +0200 Subject: [PATCH 28/29] changed the drop capitals bonding box to contour ratio threshold --- src/eynollah/utils/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index ca86047..05397d0 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -851,7 +851,8 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) - if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and + + if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.7 and percent_text_to_all_in_drop >= 0.3): layout_in_patch[box0] = drop_capital_label else: From 8c949cec714751fbfd8c791b90f80f4e26b6d330 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 3 Sep 2025 19:18:11 +0200 Subject: [PATCH 29/29] PR #173 has been reverted. Additionally, for TrOCR, the cropped text lines will no longer be added to a list before prediction. Instead, for each batch size, the text line images will be collected and predictions will be made directly on them. --- src/eynollah/eynollah.py | 452 ++++++++++++++++++++------- src/eynollah/utils/__init__.py | 63 ++-- src/eynollah/utils/contour.py | 177 +++-------- src/eynollah/utils/marginals.py | 2 - src/eynollah/utils/separate_lines.py | 60 ++-- src/eynollah/writer.py | 4 +- 6 files changed, 442 insertions(+), 316 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 396b4b9..eff9fa8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -73,8 +73,6 @@ from .utils.contour import ( return_contours_of_interested_region_by_min_size, return_contours_of_interested_textline, return_parent_contours, - dilate_textregion_contours, - dilate_textline_contours, ) from .utils.rotate import ( rotate_image, @@ -112,8 +110,6 @@ from .utils.resize import resize_image from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, - box2rect, - box2slice, find_num_col, otsu_copy_binary, put_drop_out_from_only_drop_model, @@ -1750,7 +1746,7 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) M_main_tot = [cv2.moments(polygons_of_textlines[j]) @@ -1773,17 +1769,18 @@ class Eynollah: all_found_textline_polygons.append(textlines_ins[::-1]) slopes.append(slope_deskew) - crop_coor = box2rect(boxes[index]) + _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) all_box_coord.append(crop_coor) return all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))), slopes - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") results = self.executor.map(partial(do_work_of_slopes_new_light, textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, slope_deskew=slope_deskew,textline_light=self.textline_light, logger=self.logger,), boxes, contours, contours_par, range(len(contours_par))) @@ -1791,12 +1788,13 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") results = self.executor.map(partial(do_work_of_slopes_new, textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, slope_deskew=slope_deskew, MAX_SLOPE=MAX_SLOPE, KERNEL=KERNEL, @@ -1807,12 +1805,13 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): if not len(contours): return [], [], [], [], [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") results = self.executor.map(partial(do_work_of_slopes_new_curved, textline_mask_tot_ea=textline_mask_tot, + image_page_rotated=image_page_rotated, mask_texts_only=mask_texts_only, num_col=num_col, scale_par=scale_par, @@ -1994,9 +1993,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) - polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2035,7 +2034,7 @@ class Eynollah: ##polygons_of_images_fin.append(ploy_img_ind) box = cv2.boundingRect(ploy_img_ind) - page_coord_img = box2rect(box) + _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2049,7 +2048,7 @@ class Eynollah: if h < 150 or w < 150: pass else: - page_coord_img = box2rect(box) + _, page_coord_img = crop_image_inside_box(box, text_regions_p_true) # cont_page.append(np.array([[page_coord[2], page_coord[0]], # [page_coord[3], page_coord[0]], # [page_coord[3], page_coord[1]], @@ -2060,7 +2059,7 @@ class Eynollah: [page_coord_img[2], page_coord_img[1]]])) self.logger.debug("exit get_regions_extract_images_only") - return text_regions_p_true, erosion_hurts, polygons_seplines, polygons_of_images_fin, image_page, page_coord, cont_page + return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") @@ -2176,31 +2175,31 @@ class Eynollah: mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 - polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts=polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #for jv in range(1): - #print(jv, hir_seplines[0][232][3]) + #print(jv, hir_lines_xml[0][232][3]) #test_khat = np.zeros(prediction_regions_org.shape) - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_seplines[232]], color=(1,1,1)) + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() - polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) test_khat = np.zeros(prediction_regions_org.shape) - test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() #sys.exit() polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) + ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -2218,7 +2217,7 @@ class Eynollah: #plt.show() #print("inside 4 ", time.time()-t_in) self.logger.debug("exit get_regions_light_v") - return text_regions_p_true, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin, confidence_matrix + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin, confidence_matrix else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) self.logger.debug("exit get_regions_light_v") @@ -2301,9 +2300,9 @@ class Eynollah: mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1 - polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) - polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -2315,7 +2314,7 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_seplines + return text_regions_p_true, erosion_hurts, polygons_lines_xml except: if self.input_binary: prediction_bin = np.copy(img_org) @@ -2350,9 +2349,9 @@ class Eynollah: mask_texts_only = (prediction_regions_org == 1)*1 mask_images_only= (prediction_regions_org == 2)*1 - polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) - polygons_seplines = filter_contours_area_of_image( - mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_lines_xml = filter_contours_area_of_image( + mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -2365,7 +2364,7 @@ class Eynollah: erosion_hurts = True self.logger.debug("exit get_regions_from_xy_2models") - return text_regions_p_true, erosion_hurts, polygons_seplines + return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -3233,13 +3232,13 @@ class Eynollah: if self.plotter: self.plotter.save_deskewed_image(slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew) - return slope_deskew + return slope_deskew, slope_first def run_marginals( - self, textline_mask_tot_ea, mask_images, mask_lines, + self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): - textline_mask_tot = textline_mask_tot_ea[:, :] + image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 text_regions_p_1[mask_lines[:, :] == 1] = 3 @@ -3257,7 +3256,10 @@ class Eynollah: except Exception as e: self.logger.error("exception %s", e) - return textline_mask_tot, text_regions_p + if self.plotter: + self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) + self.plotter.save_plot_of_layout_main(text_regions_p, image_page) + return textline_mask_tot, text_regions_p, image_page_rotated def run_boxes_no_full_layout( self, image_page, textline_mask_tot, text_regions_p, @@ -3409,7 +3411,7 @@ class Eynollah: text_regions_p[:,:][table_prediction[:,:]==1] = 10 img_revised_tab = text_regions_p[:,:] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -3429,7 +3431,7 @@ class Eynollah: else: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) @@ -4280,9 +4282,7 @@ class Eynollah: def filter_contours_without_textline_inside( - self, contours, text_con_org, contours_textline, - contours_only_text_parent_d_ordered, - conf_contours_textregions): + self, contours,text_con_org, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): ###contours_txtline_of_all_textregions = [] ###for jj in range(len(contours_textline)): ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] @@ -4306,21 +4306,138 @@ class Eynollah: ###if np.any(results==1): ###contours_with_textline.append(con_tr) - textregion_index_to_del = set() + textregion_index_to_del = [] for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion) == 0: - textregion_index_to_del.add(index_textregion) - def filterfun(lis): - if len(lis) == 0: - return [] - return list(np.delete(lis, list(textregion_index_to_del))) + if len(textlines_textregion)==0: + textregion_index_to_del.append(index_textregion) - return (filterfun(contours), - filterfun(text_con_org), - filterfun(conf_contours_textregions), - filterfun(contours_textline), - filterfun(contours_only_text_parent_d_ordered), - np.arange(len(contours) - len(textregion_index_to_del))) + uniqe_args_trs = np.unique(textregion_index_to_del) + uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] + + for ind_u_a_trs in uniqe_args_trs_sorted: + conf_contours_textregions.pop(ind_u_a_trs) + contours.pop(ind_u_a_trs) + contours_textline.pop(ind_u_a_trs) + text_con_org.pop(ind_u_a_trs) + if len(contours_only_text_parent_d_ordered) > 0: + contours_only_text_parent_d_ordered.pop(ind_u_a_trs) + + return contours, text_con_org, conf_contours_textregions, contours_textline, contours_only_text_parent_d_ordered, np.array(range(len(contours))) + + def dilate_textlines(self, all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + for i in range(len(all_found_textline_polygons[j])): + con_ind = all_found_textline_polygons[j][i] + con_ind = con_ind.astype(float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + mult = x_biger_than_x*x_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if y_differential[0]==0: + y_differential[0] = 0.1 + if y_differential[-1]==0: + y_differential[-1]= 0.1 + y_differential = [y_differential[ind] if y_differential[ind] != 0 + else 0.5 * (y_differential[ind-1] + y_differential[ind+1]) + for ind in range(len(y_differential))] + + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] + y_differential.append(y_differential[0]) + + y_differential = [-1 if y_differential[ind] < 0 else 1 + for ind in range(len(y_differential))] + y_differential = self.return_it_in_two_groups(y_differential) + y_differential = np.array(y_differential) + + con_scaled = con_ind*1 + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass + + else: + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + if x_differential[-1]==0: + x_differential[-1]= 0.1 + x_differential = [x_differential[ind] if x_differential[ind] != 0 + else 0.5 * (x_differential[ind-1] + x_differential[ind+1]) + for ind in range(len(x_differential))] + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind] < 0 else 1 + for ind in range(len(x_differential))] + x_differential = self.return_it_in_two_groups(x_differential) + x_differential = np.array(x_differential) + + con_scaled = con_ind*1 + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + try: + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + try: + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + except: + pass + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] + + return all_found_textline_polygons def delete_regions_without_textlines( self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, @@ -4431,7 +4548,7 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) if self.extract_only_images: - text_regions_p_1, erosion_hurts, polygons_seplines, polygons_of_images, image_page, page_coord, cont_page = \ + text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], @@ -4459,7 +4576,8 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline") @@ -4477,7 +4595,7 @@ class Eynollah: all_found_textline_polygons_marginals_right = [] all_box_coord_marginals_left = [] all_box_coord_marginals_right = [] - polygons_seplines = [] + polygons_lines_xml = [] contours_tables = [] conf_contours_textregions =[0] @@ -4491,13 +4609,13 @@ class Eynollah: cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1, erosion_hurts, polygons_seplines, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4510,9 +4628,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew = self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ @@ -4524,7 +4642,7 @@ class Eynollah: textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) #print("text region early -4 in %.1fs", time.time() - t0) else: - text_regions_p_1, erosion_hurts, polygons_seplines = \ + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4551,7 +4669,7 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew = self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] @@ -4569,12 +4687,9 @@ class Eynollah: text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) - textline_mask_tot, text_regions_p = \ - self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, + textline_mask_tot, text_regions_p, image_page_rotated = \ + self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - if self.plotter: - self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) - self.plotter.save_plot_of_layout_main(text_regions_p, image_page) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4583,6 +4698,7 @@ class Eynollah: textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) @@ -4593,14 +4709,14 @@ class Eynollah: boxes, boxes_d, polygons_of_marginals, contours_tables = \ self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) else: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light if self.light_version else None) - ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.light_version: drop_label_in_full_layout = 4 textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 @@ -4724,30 +4840,31 @@ class Eynollah: [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], - cont_page, polygons_seplines) + cont_page, polygons_lines_xml) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], - cont_page, polygons_seplines, contours_tables) + cont_page, polygons_lines_xml, contours_tables) return pcgts #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) + contours_only_text_parent = self.dilate_textregions_contours( + contours_only_text_parent) contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) - #txt_con_org = dilate_textregion_contours(txt_con_org) - #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) + contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( - contours_only_text_parent, self.image, confidence_matrix, map=self.executor.map) + contours_only_text_parent, self.image, slope_first, confidence_matrix, map=self.executor.map) #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) @@ -4759,11 +4876,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, - boxes_text, slope_deskew) + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, - boxes_marginals, slope_deskew) + image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = \ # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, @@ -4771,10 +4888,14 @@ class Eynollah: #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = \ # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, # boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) - all_found_textline_polygons = dilate_textline_contours(all_found_textline_polygons) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = dilate_textline_contours(all_found_textline_polygons_marginals) + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version( + all_found_textline_polygons_marginals) contours_only_text_parent, txt_con_org, conf_contours_textregions, all_found_textline_polygons, contours_only_text_parent_d_ordered, \ index_by_text_par_con = self.filter_contours_without_textline_inside( contours_only_text_parent, txt_con_org, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) @@ -4783,11 +4904,11 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - boxes_text, slope_deskew) + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - boxes_marginals, slope_deskew) + image_page_rotated, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: @@ -4795,25 +4916,25 @@ class Eynollah: all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( txt_con_org, contours_only_text_parent, textline_mask_tot_ea, - boxes_text, slope_deskew) + image_page_rotated, boxes_text, slope_deskew) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, - boxes_marginals, slope_deskew) + image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, - boxes_text, text_only, + image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, - boxes_marginals, text_only, + image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) @@ -4950,7 +5071,7 @@ class Eynollah: all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) + cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) return pcgts contours_only_text_parent_h = None @@ -5042,7 +5163,7 @@ class Eynollah: txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) return pcgts @@ -5178,8 +5299,12 @@ class Eynollah_ocr: cropped_lines = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] + + extracted_texts = [] indexer_text_region = 0 + indexer_b_s = 0 + for nn in root1.iter(region_tags): for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): @@ -5204,40 +5329,105 @@ class Eynollah_ocr: img_crop = img_poly_on_img[y:y+h, x:x+w, :] img_crop[mask_poly==0] = 255 + if h2w_ratio > 0.1: cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width) ) cropped_lines_meging_indexing.append(0) + indexer_b_s+=1 + if indexer_b_s==self.b_s: + imgs = cropped_lines[:] + cropped_lines = [] + indexer_b_s = 0 + + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + extracted_texts = extracted_texts + generated_text_merged + else: splited_images, _ = return_textlines_split_if_needed(img_crop, None) #print(splited_images) if splited_images: cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(1) + indexer_b_s+=1 + + if indexer_b_s==self.b_s: + imgs = cropped_lines[:] + cropped_lines = [] + indexer_b_s = 0 + + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + extracted_texts = extracted_texts + generated_text_merged + + cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)) cropped_lines_meging_indexing.append(-1) + indexer_b_s+=1 + + if indexer_b_s==self.b_s: + imgs = cropped_lines[:] + cropped_lines = [] + indexer_b_s = 0 + + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + extracted_texts = extracted_texts + generated_text_merged + else: cropped_lines.append(img_crop) cropped_lines_meging_indexing.append(0) + indexer_b_s+=1 + + if indexer_b_s==self.b_s: + imgs = cropped_lines[:] + cropped_lines = [] + indexer_b_s = 0 + + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + extracted_texts = extracted_texts + generated_text_merged + + + indexer_text_region = indexer_text_region +1 - - extracted_texts = [] - n_iterations = math.ceil(len(cropped_lines) / self.b_s) - - for i in range(n_iterations): - if i==(n_iterations-1): - n_start = i*self.b_s - imgs = cropped_lines[n_start:] - else: - n_start = i*self.b_s - n_end = (i+1)*self.b_s - imgs = cropped_lines[n_start:n_end] + if indexer_b_s!=0: + imgs = cropped_lines[:] + cropped_lines = [] + indexer_b_s = 0 + pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) extracted_texts = extracted_texts + generated_text_merged + ####extracted_texts = [] + ####n_iterations = math.ceil(len(cropped_lines) / self.b_s) + + ####for i in range(n_iterations): + ####if i==(n_iterations-1): + ####n_start = i*self.b_s + ####imgs = cropped_lines[n_start:] + ####else: + ####n_start = i*self.b_s + ####n_end = (i+1)*self.b_s + ####imgs = cropped_lines[n_start:n_end] + ####pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values + ####generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device)) + ####generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + ####extracted_texts = extracted_texts + generated_text_merged + del cropped_lines gc.collect() @@ -5288,31 +5478,71 @@ class Eynollah_ocr: #print(time.time() - t0 ,'elapsed time') - indexer = 0 indexer_textregion = 0 for nn in root1.iter(region_tags): - text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') - unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') + #id_textregion = nn.attrib['id'] + #id_textregions.append(id_textregion) + #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion]) + + is_textregion_text = False + for childtest in nn: + if childtest.tag.endswith("TextEquiv"): + is_textregion_text = True + + if not is_textregion_text: + text_subelement_textregion = ET.SubElement(nn, 'TextEquiv') + unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode') has_textline = False for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): - text_subelement = ET.SubElement(child_textregion, 'TextEquiv') - unicode_textline = ET.SubElement(text_subelement, 'Unicode') - unicode_textline.text = extracted_texts_merged[indexer] + + is_textline_text = False + for childtest2 in child_textregion: + if childtest2.tag.endswith("TextEquiv"): + is_textline_text = True + + + if not is_textline_text: + text_subelement = ET.SubElement(child_textregion, 'TextEquiv') + ##text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}") + unicode_textline = ET.SubElement(text_subelement, 'Unicode') + unicode_textline.text = extracted_texts_merged[indexer] + else: + for childtest3 in child_textregion: + if childtest3.tag.endswith("TextEquiv"): + for child_uc in childtest3: + if child_uc.tag.endswith("Unicode"): + ##childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}") + child_uc.text = extracted_texts_merged[indexer] + indexer = indexer + 1 has_textline = True if has_textline: - unicode_textregion.text = text_by_textregion[indexer_textregion] + if is_textregion_text: + for child4 in nn: + if child4.tag.endswith("TextEquiv"): + for childtr_uc in child4: + if childtr_uc.tag.endswith("Unicode"): + childtr_uc.text = text_by_textregion[indexer_textregion] + else: + unicode_textregion.text = text_by_textregion[indexer_textregion] indexer_textregion = indexer_textregion + 1 - - + ###sample_order = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order] + + ##ordered_texts_sample = [text for _, text in sorted(sample_order)] + ##tot_page_text = ' '.join(ordered_texts_sample) + + ##for page_element in root1.iter(link+'Page'): + ##text_page = ET.SubElement(page_element, 'TextEquiv') + ##unicode_textpage = ET.SubElement(text_page, 'Unicode') + ##unicode_textpage.text = tot_page_text + ET.register_namespace("",name_space) tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None) - #print("Job done in %.1fs", time.time() - t0) else: ###max_len = 280#512#280#512 ###padding_token = 1500#299#1500#299 diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 058ab83..ca86047 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1,4 +1,3 @@ -from typing import Tuple import time import math @@ -299,17 +298,9 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order( x_end_with_child_without_mother, new_main_sep_y) -def box2rect(box: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: - return (box[1], box[1] + box[3], - box[0], box[0] + box[2]) - -def box2slice(box: Tuple[int, int, int, int]) -> Tuple[slice, slice]: - return (slice(box[1], box[1] + box[3]), - slice(box[0], box[0] + box[2])) - def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box2slice(box)] - return image_box, box2rect(box) + image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] + return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) @@ -860,8 +851,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) - - if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.7 and + if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and percent_text_to_all_in_drop >= 0.3): layout_in_patch[box0] = drop_capital_label else: @@ -965,11 +955,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light( regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom, regions_model_full.shape[0] // zoom), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent] + contours_only_text_parent = [(i / zoom).astype(int) for i in contours_only_text_parent] ### cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \ - find_new_features_of_contours(contours_only_text_parent_z) + find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main @@ -992,7 +982,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii, con in enumerate(contours_only_text_parent_z): + for ii in range(len(contours_only_text_parent)): + con=contours_only_text_parent[ii] img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) @@ -1003,22 +994,23 @@ def check_any_text_region_in_model_one_is_main_or_header_light( if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 - contours_only_text_parent_head.append(contours_only_text_parent[ii]) - conf_contours_head.append(None) # why not conf_contours[ii], too? + contours_only_text_parent_head.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) + conf_contours_head.append(None) else: regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 - contours_only_text_parent_main.append(contours_only_text_parent[ii]) + contours_only_text_parent_main.append(con) conf_contours_main.append(conf_contours[ii]) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) + #print(all_pixels,pixels_main,pixels_header) ### to make it faster @@ -1026,6 +1018,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light( # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full.shape[0] // zoom), # interpolation=cv2.INTER_NEAREST) + contours_only_text_parent_head = [(i * zoom).astype(int) for i in contours_only_text_parent_head] + contours_only_text_parent_main = [(i * zoom).astype(int) for i in contours_only_text_parent_main] ### return (regions_model_1, @@ -1748,7 +1742,6 @@ def return_boxes_of_images_by_order_of_reading_new( x_ending = np.array(x_ending) y_type_2 = np.array(y_type_2) y_diff_type_2 = np.array(y_diff_type_2) - all_columns = set(range(len(peaks_neg_tot) - 1)) if ((reading_order_type==1) or (reading_order_type==0 and @@ -1870,7 +1863,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(len(peaks_neg_tot)-2) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = set() + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): columns_covered_by_mothers = columns_covered_by_mothers + \ list(range(int(x_start_without_mother[dj]), @@ -1882,7 +1875,7 @@ def return_boxes_of_images_by_order_of_reading_new( y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, x_start_without_mother) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, x_end_without_mother) @@ -1913,7 +1906,7 @@ def return_boxes_of_images_by_order_of_reading_new( x_end_by_order.append(x_end_column_sort[ii]-1) else: #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - columns_covered_by_mothers = set() + columns_covered_by_mothers = [] for dj in range(len(x_start_without_mother)): columns_covered_by_mothers = columns_covered_by_mothers + \ list(range(int(x_start_without_mother[dj]), @@ -1925,12 +1918,12 @@ def return_boxes_of_images_by_order_of_reading_new( y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) + x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, x_start_without_mother) - x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, x_end_without_mother) - columns_covered_by_with_child_no_mothers = set() + columns_covered_by_with_child_no_mothers = [] for dj in range(len(x_end_with_child_without_mother)): columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ list(range(int(x_start_with_child_without_mother[dj]), @@ -1974,7 +1967,7 @@ def return_boxes_of_images_by_order_of_reading_new( if len(x_diff_all_between_nm_wc)>0: biggest=np.argmax(x_diff_all_between_nm_wc) - columns_covered_by_mothers = set() + columns_covered_by_mothers = [] for dj in range(len(x_starting_all_between_nm_wc)): columns_covered_by_mothers = columns_covered_by_mothers + \ list(range(int(x_starting_all_between_nm_wc[dj]), @@ -2099,7 +2092,8 @@ def return_boxes_of_images_by_order_of_reading_new( x_start_by_order=[] x_end_by_order=[] if len(x_starting)>0: - columns_covered_by_lines_covered_more_than_2col = set() + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_covered_by_lines_covered_more_than_2col = [] for dj in range(len(x_starting)): if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns): pass @@ -2112,21 +2106,22 @@ def return_boxes_of_images_by_order_of_reading_new( y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) - x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) if len(new_main_sep_y) > 0: x_starting = np.append(x_starting, 0) - x_ending = np.append(x_ending, len(peaks_neg_tot) - 1) + x_ending = np.append(x_ending, len(peaks_neg_tot)-1) else: x_starting = np.append(x_starting, x_starting[0]) x_ending = np.append(x_ending, x_ending[0]) else: - columns_not_covered = list(all_columns) + all_columns = np.arange(len(peaks_neg_tot)-1) + columns_not_covered = list(set(all_columns)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) - x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype)) - x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1) + x_starting = np.append(x_starting, columns_not_covered) + x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) ind_args=np.array(range(len(y_type_2))) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index ee2faa7..0e84153 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -1,15 +1,7 @@ -from typing import Sequence, Union -from numbers import Number from functools import partial -import itertools - import cv2 import numpy as np -from scipy.sparse.csgraph import minimum_spanning_tree -from shapely.geometry import Polygon, LineString -from shapely.geometry.polygon import orient -from shapely import set_precision -from shapely.ops import unary_union, nearest_points +from shapely import geometry from .rotate import rotate_image, rotation_image_new @@ -45,28 +37,29 @@ def get_text_region_boxes_by_given_contours(contours): return boxes, contours_new -def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): +def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] - for jv, contour in enumerate(contours): - if len(contour) < 3: # A polygon cannot have less than 3 points + for jv,c in enumerate(contours): + if len(c) < 3: # A polygon cannot have less than 3 points continue - polygon = contour2polygon(contour, dilate=dilate) + polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area if (area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): - found_polygons_early.append(polygon2contour(polygon)) + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early -def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): +def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] - for jv, contour in enumerate(contours): - if len(contour) < 3: # A polygon cannot have less than 3 points + for jv,c in enumerate(contours): + if len(c) < 3: # A polygon cannot have less than 3 points continue - polygon = contour2polygon(contour, dilate=dilate) - # area = cv2.contourArea(contour) + polygon = geometry.Polygon([point[0] for point in c]) + # area = cv2.contourArea(c) area = polygon.area ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area @@ -75,8 +68,9 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1. area <= max_area * np.prod(image.shape[:2]) and # hierarchy[0][jv][3]==-1 True): - # print(contour[0][0][1]) - found_polygons_early.append(polygon2contour(polygon)) + # print(c[0][0][1]) + found_polygons_early.append(np.array([[point] + for point in polygon.exterior.coords], dtype=np.int32)) return found_polygons_early def find_new_features_of_contours(contours_main): @@ -141,12 +135,12 @@ def return_parent_contours(contours, hierarchy): if hierarchy[0][i][3] == -1] return contours_parent -def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): +def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 + cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 else: - cnts_images = (region_pre_p[:, :] == label) * 1 + cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -253,23 +247,30 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) return cont_int[0], index_r_con, confidence_contour -def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix, map=map): +def get_textregion_contours_in_org_image_light(cnts, img, slope_first, confidence_matrix, map=map): if not len(cnts): return [], [] + + confidence_matrix = cv2.resize(confidence_matrix, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) + img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST) + ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) + #cnts = cnts/2 + cnts = [(i/6).astype(int) for i in cnts] + results = map(partial(do_back_rotation_and_get_cnt_back, + img=img, + slope_first=slope_first, + confidence_matrix=confidence_matrix, + ), + cnts, range(len(cnts))) + contours, indexes, conf_contours = tuple(zip(*results)) + return [i*6 for i in contours], list(conf_contours) - confs = [] - for cnt in cnts: - cnt_mask = np.zeros(confidence_matrix.shape) - cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt], color=1.0) - confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) - return cnts, confs - -def return_contours_of_interested_textline(region_pre_p, label): +def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 + cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 else: - cnts_images = (region_pre_p[:, :] == label) * 1 + cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -292,12 +293,12 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): +def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 + cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 else: - cnts_images = (region_pre_p[:, :] == label) * 1 + cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -310,12 +311,12 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_si return contours_imgs -def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): +def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 + cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 else: - cnts_images = (region_pre_p[:, :] == label) * 1 + cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) @@ -331,97 +332,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, return img_ret[:, :, 0] -def dilate_textline_contours(all_found_textline_polygons): - return [[polygon2contour(contour2polygon(contour, dilate=6)) - for contour in region] - for region in all_found_textline_polygons] - -def dilate_textregion_contours(all_found_textline_polygons): - return [polygon2contour(contour2polygon(contour, dilate=6)) - for contour in all_found_textline_polygons] - -def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): - polygon = Polygon([point[0] for point in contour]) - if dilate: - polygon = polygon.buffer(dilate) - if polygon.geom_type == 'GeometryCollection': - # heterogeneous result: filter zero-area shapes (LineString, Point) - polygon = unary_union([geom for geom in polygon.geoms if geom.area > 0]) - if polygon.geom_type == 'MultiPolygon': - # homogeneous result: construct convex hull to connect - polygon = join_polygons(polygon.geoms) - return make_valid(polygon) - -def polygon2contour(polygon: Polygon) -> np.ndarray: - polygon = np.array(polygon.exterior.coords[:-1], dtype=int) - return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] - -def make_valid(polygon: Polygon) -> Polygon: - """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" - def isint(x): - return isinstance(x, int) or int(x) == x - # make sure rounding does not invalidate - if not all(map(isint, np.array(polygon.exterior.coords).flat)) and polygon.minimum_clearance < 1.0: - polygon = Polygon(np.round(polygon.exterior.coords)) - points = list(polygon.exterior.coords[:-1]) - # try by re-arranging points - for split in range(1, len(points)): - if polygon.is_valid or polygon.simplify(polygon.area).is_valid: - break - # simplification may not be possible (at all) due to ordering - # in that case, try another starting point - polygon = Polygon(points[-split:]+points[:-split]) - # try by simplification - for tolerance in range(int(polygon.area + 1.5)): - if polygon.is_valid: - break - # simplification may require a larger tolerance - polygon = polygon.simplify(tolerance + 1) - # try by enlarging - for tolerance in range(1, int(polygon.area + 2.5)): - if polygon.is_valid: - break - # enlargement may require a larger tolerance - polygon = polygon.buffer(tolerance) - assert polygon.is_valid, polygon.wkt - return polygon - -def join_polygons(polygons: Sequence[Polygon], scale=20) -> Polygon: - """construct concave hull (alpha shape) from input polygons by connecting their pairwise nearest points""" - # ensure input polygons are simply typed and all oriented equally - polygons = [orient(poly) - for poly in itertools.chain.from_iterable( - [poly.geoms - if poly.geom_type in ['MultiPolygon', 'GeometryCollection'] - else [poly] - for poly in polygons])] - npoly = len(polygons) - if npoly == 1: - return polygons[0] - # find min-dist path through all polygons (travelling salesman) - pairs = itertools.combinations(range(npoly), 2) - dists = np.zeros((npoly, npoly), dtype=float) - for i, j in pairs: - dist = polygons[i].distance(polygons[j]) - if dist < 1e-5: - dist = 1e-5 # if pair merely touches, we still need to get an edge - dists[i, j] = dist - dists[j, i] = dist - dists = minimum_spanning_tree(dists, overwrite=True) - # add bridge polygons (where necessary) - for prevp, nextp in zip(*dists.nonzero()): - prevp = polygons[prevp] - nextp = polygons[nextp] - nearest = nearest_points(prevp, nextp) - bridgep = orient(LineString(nearest).buffer(max(1, scale/5), resolution=1), -1) - polygons.append(bridgep) - jointp = unary_union(polygons) - assert jointp.geom_type == 'Polygon', jointp.wkt - # follow-up calculations will necessarily be integer; - # so anticipate rounding here and then ensure validity - jointp2 = set_precision(jointp, 1.0) - if jointp2.geom_type != 'Polygon' or not jointp2.is_valid: - jointp2 = Polygon(np.round(jointp.exterior.coords)) - jointp2 = make_valid(jointp2) - assert jointp2.geom_type == 'Polygon', jointp2.wkt - return jointp2 diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 595cd14..ac8dc1d 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -99,8 +99,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve except: point_left=first_nonzero - if point_left == first_nonzero and point_right == last_nonzero: - return text_regions if point_right>=mask_marginals.shape[1]: diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index dd37b89..ead5cfb 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -20,8 +20,6 @@ from .contour import ( from . import ( find_num_col_deskew, crop_image_inside_box, - box2rect, - box2slice, ) def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): @@ -1349,26 +1347,24 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean -def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None): +def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): if logger is None: logger = getLogger(__package__) - if not np.prod(img_crop.shape): - return img_crop if num_col == 1: - num_patches = int(img_crop.shape[1] / 200.0) + num_patches = int(img_path.shape[1] / 200.0) else: - num_patches = int(img_crop.shape[1] / 140.0) - # num_patches=int(img_crop.shape[1]/200.) + num_patches = int(img_path.shape[1] / 140.0) + # num_patches=int(img_path.shape[1]/200.) if num_patches == 0: num_patches = 1 - img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - # plt.imshow(img_patch_interest) + # plt.imshow(img_patch_ineterst) # plt.show() - length_x = int(img_crop.shape[1] / float(num_patches)) + length_x = int(img_path.shape[1] / float(num_patches)) # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 margin = int(0.04 * length_x) # if margin<=4: @@ -1376,7 +1372,7 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl # margin=0 width_mid = length_x - 2 * margin - nxf = img_crop.shape[1] / float(width_mid) + nxf = img_path.shape[1] / float(width_mid) if nxf > int(nxf): nxf = int(nxf) + 1 @@ -1392,12 +1388,12 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_crop.shape[1]: - index_x_u = img_crop.shape[1] - index_x_d = img_crop.shape[1] - length_x + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_interest[:, index_x_d:index_x_u] + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] try: assert img_xline.any() @@ -1413,9 +1409,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl img_line_rotated = rotate_image(img_xline, slope_xline) img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - img_patch_interest = img_crop[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - img_patch_interest_revised = np.zeros(img_patch_interest.shape) + img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) for i in range(nxf): if i == 0: @@ -1425,11 +1421,11 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl index_x_d = i * width_mid index_x_u = index_x_d + length_x - if index_x_u > img_crop.shape[1]: - index_x_u = img_crop.shape[1] - index_x_d = img_crop.shape[1] - length_x + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x - img_xline = img_patch_interest[:, index_x_d:index_x_u] + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] @@ -1452,9 +1448,9 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin] - img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size + img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size - return img_patch_interest_revised + return img_patch_ineterst_revised def do_image_rotation(angle, img, sigma_des, logger=None): if logger is None: @@ -1635,7 +1631,7 @@ def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None): def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, slope_deskew, + textline_mask_tot_ea, image_page_rotated, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1645,7 +1641,7 @@ def do_work_of_slopes_new( logger.debug('enter do_work_of_slopes_new') x, y, w, h = box_text - crop_coor = box2rect(box_text) + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline @@ -1653,7 +1649,7 @@ def do_work_of_slopes_new( img_int_p = all_text_region_raw[:,:] img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) - if not np.prod(img_int_p.shape) or img_int_p.shape[0] /img_int_p.shape[1] < 0.1: + if img_int_p.shape[0] /img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew all_text_region_raw = textline_mask_tot_ea[y: y + h, x: x + w] @@ -1693,7 +1689,7 @@ def do_work_of_slopes_new( def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, mask_texts_only, num_col, scale_par, slope_deskew, + textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): if KERNEL is None: @@ -1710,7 +1706,7 @@ def do_work_of_slopes_new_curved( # plt.imshow(img_int_p) # plt.show() - if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1: + if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slope = 0 slope_for_all = slope_deskew else: @@ -1736,7 +1732,7 @@ def do_work_of_slopes_new_curved( slope_for_all = slope_deskew slope = slope_for_all - crop_coor = box2rect(box_text) + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) if abs(slope_for_all) < 45: textline_region_in_image = np.zeros(textline_mask_tot_ea.shape) @@ -1782,7 +1778,7 @@ def do_work_of_slopes_new_curved( def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, - textline_mask_tot_ea, slope_deskew, textline_light, + textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, logger=None ): if logger is None: @@ -1790,7 +1786,7 @@ def do_work_of_slopes_new_light( logger.debug('enter do_work_of_slopes_new_light') x, y, w, h = box_text - crop_coor = box2rect(box_text) + _, crop_coor = crop_image_inside_box(box_text, image_page_rotated) mask_textline = np.zeros(textline_mask_tot_ea.shape) mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1)) all_text_region_raw = textline_mask_tot_ea * mask_textline diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 936c95f..2f9caf3 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -289,7 +289,7 @@ class EynollahXmlWriter(): self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='heading', + textregion = TextRegionType(id=counter.next_region_id, type_='header', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) @@ -335,7 +335,7 @@ class EynollahXmlWriter(): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) for mm in range(len(found_polygons_tables)): page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))