From 5171e09c2d264554d86d2829bb0ffee80dba8133 Mon Sep 17 00:00:00 2001 From: kba Date: Fri, 28 Nov 2025 10:50:50 +0100 Subject: [PATCH] eynollah.py: fix kwargs to writer --- src/eynollah/eynollah.py | 77 ++++++++++++++++------------------------ src/eynollah/writer.py | 3 -- 2 files changed, 30 insertions(+), 50 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index e5b4984..5e67b5e 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -8,7 +8,6 @@ document layout analysis (segmentation) with output in PAGE-XML # FIXME: fix all of those... # pyright: reportUnnecessaryTypeIgnoreComment=true # pyright: reportPossiblyUnboundVariable=false -# pyright: reportCallIssue=false # pyright: reportOperatorIssue=false # pyright: reportUnboundVariable=false # pyright: reportArgumentType=false @@ -20,12 +19,6 @@ document layout analysis (segmentation) with output in PAGE-XML import logging import sys -# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files -if sys.version_info < (3, 10): - import importlib_resources -else: - import importlib.resources as importlib_resources - from difflib import SequenceMatcher as sq import math import os @@ -48,9 +41,9 @@ import statistics tf_disable_interactive_logs() -import tensorflow as tf # type: ignore +import tensorflow as tf try: - import torch # type: ignore + import torch except ImportError: torch = None try: @@ -3367,19 +3360,17 @@ class Eynollah: order_text_new = [0] slopes =[0] - id_of_texts_tot =['region_0001'] conf_contours_textregions =[0] pcgts = self.writer.build_pagexml_no_full_layout( found_polygons_text_region=cont_page, page_coord=page_coord, order_of_texts=order_text_new, - id_of_texts=id_of_texts_tot, all_found_textline_polygons=all_found_textline_polygons, all_box_coord=page_coord, - polygons_of_images=[], - polygons_of_marginals_left=[], - polygons_of_marginals_right=[], + found_polygons_text_region_img=[], + found_polygons_marginals_left=[], + found_polygons_marginals_right=[], all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_right=[], all_box_coord_marginals_left=[], @@ -3389,9 +3380,7 @@ class Eynollah: slopes_marginals_right=[], cont_page=cont_page, polygons_seplines=[], - contours_tables=[], - conf_contours_textregion=conf_contours_textregions, - skip_layout_reading_order=True + found_polygons_tables=[], ) self.logger.info("Basic processing complete") return pcgts @@ -3439,12 +3428,11 @@ class Eynollah: found_polygons_text_region=[], page_coord=page_coord, order_of_texts=[], - id_of_texts=[], all_found_textline_polygons=[], all_box_coord=[], - polygons_of_images=[], - polygons_of_marginals_left=[], - polygons_of_marginals_right=[], + found_polygons_text_region_img=[], + found_polygons_marginals_left=[], + found_polygons_marginals_right=[], all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_right=[], all_box_coord_marginals_left=[], @@ -3454,7 +3442,7 @@ class Eynollah: slopes_marginals_right=[], cont_page=cont_page, polygons_seplines=[], - contours_tables=[] + found_polygons_tables=[], ) return pcgts @@ -3668,20 +3656,19 @@ class Eynollah: empty_marginals = [[]] * len(polygons_of_marginals) if self.full_layout: pcgts = self.writer.build_pagexml_full_layout( - contours_only_text_parent=[], - contours_only_text_parent_h=[], + found_polygons_text_region=[], + found_polygons_text_region_h=[], page_coord=page_coord, order_of_texts=[], - id_of_texts=[], all_found_textline_polygons=[], all_found_textline_polygons_h=[], all_box_coord=[], all_box_coord_h=[], - polygons_of_images=polygons_of_images, - contours_tables=contours_tables, - polygons_of_drop_capitals=[], - polygons_of_marginals_left=polygons_of_marginals, - polygons_of_marginals_right=polygons_of_marginals, + found_polygons_text_region_img=polygons_of_images, + found_polygons_tables=contours_tables, + found_polygons_drop_capitals=[], + found_polygons_marginals_left=polygons_of_marginals, + found_polygons_marginals_right=polygons_of_marginals, all_found_textline_polygons_marginals_left=empty_marginals, all_found_textline_polygons_marginals_right=empty_marginals, all_box_coord_marginals_left=empty_marginals, @@ -3698,12 +3685,11 @@ class Eynollah: found_polygons_text_region=[], page_coord=page_coord, order_of_texts=[], - id_of_texts=[], all_found_textline_polygons=[], all_box_coord=[], - polygons_of_images=polygons_of_images, - polygons_of_marginals_left=polygons_of_marginals, - polygons_of_marginals_right=polygons_of_marginals, + found_polygons_text_region_img=polygons_of_images, + found_polygons_marginals_left=polygons_of_marginals, + found_polygons_marginals_right=polygons_of_marginals, all_found_textline_polygons_marginals_left=empty_marginals, all_found_textline_polygons_marginals_right=empty_marginals, all_box_coord_marginals_left=empty_marginals, @@ -3713,7 +3699,7 @@ class Eynollah: slopes_marginals_right=[], cont_page=cont_page, polygons_seplines=polygons_seplines, - contours_tables=contours_tables + found_polygons_tables=contours_tables ) return pcgts @@ -3877,16 +3863,15 @@ class Eynollah: found_polygons_text_region_h=contours_only_text_parent_h, page_coord=page_coord, order_of_texts=order_text_new, - id_of_texts=id_of_texts_tot, all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons_h=all_found_textline_polygons_h, all_box_coord=all_box_coord, all_box_coord_h=all_box_coord_h, - polygons_of_images=polygons_of_images, - contours_tables=contours_tables, - polygons_of_drop_capitals=polygons_of_drop_capitals, - polygons_of_marginals_left=polygons_of_marginals_left, - polygons_of_marginals_right=polygons_of_marginals_right, + found_polygons_text_region_img=polygons_of_images, + found_polygons_tables=contours_tables, + found_polygons_drop_capitals=polygons_of_drop_capitals, + found_polygons_marginals_left=polygons_of_marginals_left, + found_polygons_marginals_right=polygons_of_marginals_right, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, all_box_coord_marginals_left=all_box_coord_marginals_left, @@ -3905,12 +3890,11 @@ class Eynollah: found_polygons_text_region=contours_only_text_parent, page_coord=page_coord, order_of_texts=order_text_new, - id_of_texts=id_of_texts_tot, all_found_textline_polygons=all_found_textline_polygons, all_box_coord=all_box_coord, - polygons_of_images=polygons_of_images, - polygons_of_marginals_left=polygons_of_marginals_left, - polygons_of_marginals_right=polygons_of_marginals_right, + found_polygons_text_region_img=polygons_of_images, + found_polygons_marginals_left=polygons_of_marginals_left, + found_polygons_marginals_right=polygons_of_marginals_right, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, all_box_coord_marginals_left=all_box_coord_marginals_left, @@ -3920,8 +3904,7 @@ class Eynollah: slopes_marginals_right=slopes_marginals_right, cont_page=cont_page, polygons_seplines=polygons_seplines, - contours_tables=contours_tables, - conf_contours_textregions=conf_contours_textregions + found_polygons_tables=contours_tables, ) return pcgts diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index a944c72..63e54b2 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -88,7 +88,6 @@ class EynollahXmlWriter: found_polygons_text_region, page_coord, order_of_texts, - id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, @@ -110,7 +109,6 @@ class EynollahXmlWriter: found_polygons_text_region_h=[], page_coord=page_coord, order_of_texts=order_of_texts, - id_of_texts=id_of_texts, all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons_h=[], all_box_coord=all_box_coord, @@ -139,7 +137,6 @@ class EynollahXmlWriter: found_polygons_text_region_h, page_coord, order_of_texts, - id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord,