eynollah.py: fix kwargs to writer

This commit is contained in:
kba 2025-11-28 10:50:50 +01:00 committed by kba
parent c24cf94bce
commit 5171e09c2d
2 changed files with 30 additions and 50 deletions

View file

@ -8,7 +8,6 @@ document layout analysis (segmentation) with output in PAGE-XML
# FIXME: fix all of those... # FIXME: fix all of those...
# pyright: reportUnnecessaryTypeIgnoreComment=true # pyright: reportUnnecessaryTypeIgnoreComment=true
# pyright: reportPossiblyUnboundVariable=false # pyright: reportPossiblyUnboundVariable=false
# pyright: reportCallIssue=false
# pyright: reportOperatorIssue=false # pyright: reportOperatorIssue=false
# pyright: reportUnboundVariable=false # pyright: reportUnboundVariable=false
# pyright: reportArgumentType=false # pyright: reportArgumentType=false
@ -20,12 +19,6 @@ document layout analysis (segmentation) with output in PAGE-XML
import logging import logging
import sys import sys
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
if sys.version_info < (3, 10):
import importlib_resources
else:
import importlib.resources as importlib_resources
from difflib import SequenceMatcher as sq from difflib import SequenceMatcher as sq
import math import math
import os import os
@ -48,9 +41,9 @@ import statistics
tf_disable_interactive_logs() tf_disable_interactive_logs()
import tensorflow as tf # type: ignore import tensorflow as tf
try: try:
import torch # type: ignore import torch
except ImportError: except ImportError:
torch = None torch = None
try: try:
@ -3367,19 +3360,17 @@ class Eynollah:
order_text_new = [0] order_text_new = [0]
slopes =[0] slopes =[0]
id_of_texts_tot =['region_0001']
conf_contours_textregions =[0] conf_contours_textregions =[0]
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
found_polygons_text_region=cont_page, found_polygons_text_region=cont_page,
page_coord=page_coord, page_coord=page_coord,
order_of_texts=order_text_new, order_of_texts=order_text_new,
id_of_texts=id_of_texts_tot,
all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons=all_found_textline_polygons,
all_box_coord=page_coord, all_box_coord=page_coord,
polygons_of_images=[], found_polygons_text_region_img=[],
polygons_of_marginals_left=[], found_polygons_marginals_left=[],
polygons_of_marginals_right=[], found_polygons_marginals_right=[],
all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_left=[],
all_found_textline_polygons_marginals_right=[], all_found_textline_polygons_marginals_right=[],
all_box_coord_marginals_left=[], all_box_coord_marginals_left=[],
@ -3389,9 +3380,7 @@ class Eynollah:
slopes_marginals_right=[], slopes_marginals_right=[],
cont_page=cont_page, cont_page=cont_page,
polygons_seplines=[], polygons_seplines=[],
contours_tables=[], found_polygons_tables=[],
conf_contours_textregion=conf_contours_textregions,
skip_layout_reading_order=True
) )
self.logger.info("Basic processing complete") self.logger.info("Basic processing complete")
return pcgts return pcgts
@ -3439,12 +3428,11 @@ class Eynollah:
found_polygons_text_region=[], found_polygons_text_region=[],
page_coord=page_coord, page_coord=page_coord,
order_of_texts=[], order_of_texts=[],
id_of_texts=[],
all_found_textline_polygons=[], all_found_textline_polygons=[],
all_box_coord=[], all_box_coord=[],
polygons_of_images=[], found_polygons_text_region_img=[],
polygons_of_marginals_left=[], found_polygons_marginals_left=[],
polygons_of_marginals_right=[], found_polygons_marginals_right=[],
all_found_textline_polygons_marginals_left=[], all_found_textline_polygons_marginals_left=[],
all_found_textline_polygons_marginals_right=[], all_found_textline_polygons_marginals_right=[],
all_box_coord_marginals_left=[], all_box_coord_marginals_left=[],
@ -3454,7 +3442,7 @@ class Eynollah:
slopes_marginals_right=[], slopes_marginals_right=[],
cont_page=cont_page, cont_page=cont_page,
polygons_seplines=[], polygons_seplines=[],
contours_tables=[] found_polygons_tables=[],
) )
return pcgts return pcgts
@ -3668,20 +3656,19 @@ class Eynollah:
empty_marginals = [[]] * len(polygons_of_marginals) empty_marginals = [[]] * len(polygons_of_marginals)
if self.full_layout: if self.full_layout:
pcgts = self.writer.build_pagexml_full_layout( pcgts = self.writer.build_pagexml_full_layout(
contours_only_text_parent=[], found_polygons_text_region=[],
contours_only_text_parent_h=[], found_polygons_text_region_h=[],
page_coord=page_coord, page_coord=page_coord,
order_of_texts=[], order_of_texts=[],
id_of_texts=[],
all_found_textline_polygons=[], all_found_textline_polygons=[],
all_found_textline_polygons_h=[], all_found_textline_polygons_h=[],
all_box_coord=[], all_box_coord=[],
all_box_coord_h=[], all_box_coord_h=[],
polygons_of_images=polygons_of_images, found_polygons_text_region_img=polygons_of_images,
contours_tables=contours_tables, found_polygons_tables=contours_tables,
polygons_of_drop_capitals=[], found_polygons_drop_capitals=[],
polygons_of_marginals_left=polygons_of_marginals, found_polygons_marginals_left=polygons_of_marginals,
polygons_of_marginals_right=polygons_of_marginals, found_polygons_marginals_right=polygons_of_marginals,
all_found_textline_polygons_marginals_left=empty_marginals, all_found_textline_polygons_marginals_left=empty_marginals,
all_found_textline_polygons_marginals_right=empty_marginals, all_found_textline_polygons_marginals_right=empty_marginals,
all_box_coord_marginals_left=empty_marginals, all_box_coord_marginals_left=empty_marginals,
@ -3698,12 +3685,11 @@ class Eynollah:
found_polygons_text_region=[], found_polygons_text_region=[],
page_coord=page_coord, page_coord=page_coord,
order_of_texts=[], order_of_texts=[],
id_of_texts=[],
all_found_textline_polygons=[], all_found_textline_polygons=[],
all_box_coord=[], all_box_coord=[],
polygons_of_images=polygons_of_images, found_polygons_text_region_img=polygons_of_images,
polygons_of_marginals_left=polygons_of_marginals, found_polygons_marginals_left=polygons_of_marginals,
polygons_of_marginals_right=polygons_of_marginals, found_polygons_marginals_right=polygons_of_marginals,
all_found_textline_polygons_marginals_left=empty_marginals, all_found_textline_polygons_marginals_left=empty_marginals,
all_found_textline_polygons_marginals_right=empty_marginals, all_found_textline_polygons_marginals_right=empty_marginals,
all_box_coord_marginals_left=empty_marginals, all_box_coord_marginals_left=empty_marginals,
@ -3713,7 +3699,7 @@ class Eynollah:
slopes_marginals_right=[], slopes_marginals_right=[],
cont_page=cont_page, cont_page=cont_page,
polygons_seplines=polygons_seplines, polygons_seplines=polygons_seplines,
contours_tables=contours_tables found_polygons_tables=contours_tables
) )
return pcgts return pcgts
@ -3877,16 +3863,15 @@ class Eynollah:
found_polygons_text_region_h=contours_only_text_parent_h, found_polygons_text_region_h=contours_only_text_parent_h,
page_coord=page_coord, page_coord=page_coord,
order_of_texts=order_text_new, order_of_texts=order_text_new,
id_of_texts=id_of_texts_tot,
all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons=all_found_textline_polygons,
all_found_textline_polygons_h=all_found_textline_polygons_h, all_found_textline_polygons_h=all_found_textline_polygons_h,
all_box_coord=all_box_coord, all_box_coord=all_box_coord,
all_box_coord_h=all_box_coord_h, all_box_coord_h=all_box_coord_h,
polygons_of_images=polygons_of_images, found_polygons_text_region_img=polygons_of_images,
contours_tables=contours_tables, found_polygons_tables=contours_tables,
polygons_of_drop_capitals=polygons_of_drop_capitals, found_polygons_drop_capitals=polygons_of_drop_capitals,
polygons_of_marginals_left=polygons_of_marginals_left, found_polygons_marginals_left=polygons_of_marginals_left,
polygons_of_marginals_right=polygons_of_marginals_right, found_polygons_marginals_right=polygons_of_marginals_right,
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
all_box_coord_marginals_left=all_box_coord_marginals_left, all_box_coord_marginals_left=all_box_coord_marginals_left,
@ -3905,12 +3890,11 @@ class Eynollah:
found_polygons_text_region=contours_only_text_parent, found_polygons_text_region=contours_only_text_parent,
page_coord=page_coord, page_coord=page_coord,
order_of_texts=order_text_new, order_of_texts=order_text_new,
id_of_texts=id_of_texts_tot,
all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons=all_found_textline_polygons,
all_box_coord=all_box_coord, all_box_coord=all_box_coord,
polygons_of_images=polygons_of_images, found_polygons_text_region_img=polygons_of_images,
polygons_of_marginals_left=polygons_of_marginals_left, found_polygons_marginals_left=polygons_of_marginals_left,
polygons_of_marginals_right=polygons_of_marginals_right, found_polygons_marginals_right=polygons_of_marginals_right,
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
all_box_coord_marginals_left=all_box_coord_marginals_left, all_box_coord_marginals_left=all_box_coord_marginals_left,
@ -3920,8 +3904,7 @@ class Eynollah:
slopes_marginals_right=slopes_marginals_right, slopes_marginals_right=slopes_marginals_right,
cont_page=cont_page, cont_page=cont_page,
polygons_seplines=polygons_seplines, polygons_seplines=polygons_seplines,
contours_tables=contours_tables, found_polygons_tables=contours_tables,
conf_contours_textregions=conf_contours_textregions
) )
return pcgts return pcgts

View file

@ -88,7 +88,6 @@ class EynollahXmlWriter:
found_polygons_text_region, found_polygons_text_region,
page_coord, page_coord,
order_of_texts, order_of_texts,
id_of_texts,
all_found_textline_polygons, all_found_textline_polygons,
all_box_coord, all_box_coord,
found_polygons_text_region_img, found_polygons_text_region_img,
@ -110,7 +109,6 @@ class EynollahXmlWriter:
found_polygons_text_region_h=[], found_polygons_text_region_h=[],
page_coord=page_coord, page_coord=page_coord,
order_of_texts=order_of_texts, order_of_texts=order_of_texts,
id_of_texts=id_of_texts,
all_found_textline_polygons=all_found_textline_polygons, all_found_textline_polygons=all_found_textline_polygons,
all_found_textline_polygons_h=[], all_found_textline_polygons_h=[],
all_box_coord=all_box_coord, all_box_coord=all_box_coord,
@ -139,7 +137,6 @@ class EynollahXmlWriter:
found_polygons_text_region_h, found_polygons_text_region_h,
page_coord, page_coord,
order_of_texts, order_of_texts,
id_of_texts,
all_found_textline_polygons, all_found_textline_polygons,
all_found_textline_polygons_h, all_found_textline_polygons_h,
all_box_coord, all_box_coord,