mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
eynollah.py: fix kwargs to writer
This commit is contained in:
parent
c24cf94bce
commit
5171e09c2d
2 changed files with 30 additions and 50 deletions
|
|
@ -8,7 +8,6 @@ document layout analysis (segmentation) with output in PAGE-XML
|
||||||
# FIXME: fix all of those...
|
# FIXME: fix all of those...
|
||||||
# pyright: reportUnnecessaryTypeIgnoreComment=true
|
# pyright: reportUnnecessaryTypeIgnoreComment=true
|
||||||
# pyright: reportPossiblyUnboundVariable=false
|
# pyright: reportPossiblyUnboundVariable=false
|
||||||
# pyright: reportCallIssue=false
|
|
||||||
# pyright: reportOperatorIssue=false
|
# pyright: reportOperatorIssue=false
|
||||||
# pyright: reportUnboundVariable=false
|
# pyright: reportUnboundVariable=false
|
||||||
# pyright: reportArgumentType=false
|
# pyright: reportArgumentType=false
|
||||||
|
|
@ -20,12 +19,6 @@ document layout analysis (segmentation) with output in PAGE-XML
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
|
|
||||||
if sys.version_info < (3, 10):
|
|
||||||
import importlib_resources
|
|
||||||
else:
|
|
||||||
import importlib.resources as importlib_resources
|
|
||||||
|
|
||||||
from difflib import SequenceMatcher as sq
|
from difflib import SequenceMatcher as sq
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
|
@ -48,9 +41,9 @@ import statistics
|
||||||
|
|
||||||
tf_disable_interactive_logs()
|
tf_disable_interactive_logs()
|
||||||
|
|
||||||
import tensorflow as tf # type: ignore
|
import tensorflow as tf
|
||||||
try:
|
try:
|
||||||
import torch # type: ignore
|
import torch
|
||||||
except ImportError:
|
except ImportError:
|
||||||
torch = None
|
torch = None
|
||||||
try:
|
try:
|
||||||
|
|
@ -3367,19 +3360,17 @@ class Eynollah:
|
||||||
|
|
||||||
order_text_new = [0]
|
order_text_new = [0]
|
||||||
slopes =[0]
|
slopes =[0]
|
||||||
id_of_texts_tot =['region_0001']
|
|
||||||
conf_contours_textregions =[0]
|
conf_contours_textregions =[0]
|
||||||
|
|
||||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||||
found_polygons_text_region=cont_page,
|
found_polygons_text_region=cont_page,
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=order_text_new,
|
order_of_texts=order_text_new,
|
||||||
id_of_texts=id_of_texts_tot,
|
|
||||||
all_found_textline_polygons=all_found_textline_polygons,
|
all_found_textline_polygons=all_found_textline_polygons,
|
||||||
all_box_coord=page_coord,
|
all_box_coord=page_coord,
|
||||||
polygons_of_images=[],
|
found_polygons_text_region_img=[],
|
||||||
polygons_of_marginals_left=[],
|
found_polygons_marginals_left=[],
|
||||||
polygons_of_marginals_right=[],
|
found_polygons_marginals_right=[],
|
||||||
all_found_textline_polygons_marginals_left=[],
|
all_found_textline_polygons_marginals_left=[],
|
||||||
all_found_textline_polygons_marginals_right=[],
|
all_found_textline_polygons_marginals_right=[],
|
||||||
all_box_coord_marginals_left=[],
|
all_box_coord_marginals_left=[],
|
||||||
|
|
@ -3389,9 +3380,7 @@ class Eynollah:
|
||||||
slopes_marginals_right=[],
|
slopes_marginals_right=[],
|
||||||
cont_page=cont_page,
|
cont_page=cont_page,
|
||||||
polygons_seplines=[],
|
polygons_seplines=[],
|
||||||
contours_tables=[],
|
found_polygons_tables=[],
|
||||||
conf_contours_textregion=conf_contours_textregions,
|
|
||||||
skip_layout_reading_order=True
|
|
||||||
)
|
)
|
||||||
self.logger.info("Basic processing complete")
|
self.logger.info("Basic processing complete")
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
@ -3439,12 +3428,11 @@ class Eynollah:
|
||||||
found_polygons_text_region=[],
|
found_polygons_text_region=[],
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=[],
|
order_of_texts=[],
|
||||||
id_of_texts=[],
|
|
||||||
all_found_textline_polygons=[],
|
all_found_textline_polygons=[],
|
||||||
all_box_coord=[],
|
all_box_coord=[],
|
||||||
polygons_of_images=[],
|
found_polygons_text_region_img=[],
|
||||||
polygons_of_marginals_left=[],
|
found_polygons_marginals_left=[],
|
||||||
polygons_of_marginals_right=[],
|
found_polygons_marginals_right=[],
|
||||||
all_found_textline_polygons_marginals_left=[],
|
all_found_textline_polygons_marginals_left=[],
|
||||||
all_found_textline_polygons_marginals_right=[],
|
all_found_textline_polygons_marginals_right=[],
|
||||||
all_box_coord_marginals_left=[],
|
all_box_coord_marginals_left=[],
|
||||||
|
|
@ -3454,7 +3442,7 @@ class Eynollah:
|
||||||
slopes_marginals_right=[],
|
slopes_marginals_right=[],
|
||||||
cont_page=cont_page,
|
cont_page=cont_page,
|
||||||
polygons_seplines=[],
|
polygons_seplines=[],
|
||||||
contours_tables=[]
|
found_polygons_tables=[],
|
||||||
)
|
)
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
|
|
@ -3668,20 +3656,19 @@ class Eynollah:
|
||||||
empty_marginals = [[]] * len(polygons_of_marginals)
|
empty_marginals = [[]] * len(polygons_of_marginals)
|
||||||
if self.full_layout:
|
if self.full_layout:
|
||||||
pcgts = self.writer.build_pagexml_full_layout(
|
pcgts = self.writer.build_pagexml_full_layout(
|
||||||
contours_only_text_parent=[],
|
found_polygons_text_region=[],
|
||||||
contours_only_text_parent_h=[],
|
found_polygons_text_region_h=[],
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=[],
|
order_of_texts=[],
|
||||||
id_of_texts=[],
|
|
||||||
all_found_textline_polygons=[],
|
all_found_textline_polygons=[],
|
||||||
all_found_textline_polygons_h=[],
|
all_found_textline_polygons_h=[],
|
||||||
all_box_coord=[],
|
all_box_coord=[],
|
||||||
all_box_coord_h=[],
|
all_box_coord_h=[],
|
||||||
polygons_of_images=polygons_of_images,
|
found_polygons_text_region_img=polygons_of_images,
|
||||||
contours_tables=contours_tables,
|
found_polygons_tables=contours_tables,
|
||||||
polygons_of_drop_capitals=[],
|
found_polygons_drop_capitals=[],
|
||||||
polygons_of_marginals_left=polygons_of_marginals,
|
found_polygons_marginals_left=polygons_of_marginals,
|
||||||
polygons_of_marginals_right=polygons_of_marginals,
|
found_polygons_marginals_right=polygons_of_marginals,
|
||||||
all_found_textline_polygons_marginals_left=empty_marginals,
|
all_found_textline_polygons_marginals_left=empty_marginals,
|
||||||
all_found_textline_polygons_marginals_right=empty_marginals,
|
all_found_textline_polygons_marginals_right=empty_marginals,
|
||||||
all_box_coord_marginals_left=empty_marginals,
|
all_box_coord_marginals_left=empty_marginals,
|
||||||
|
|
@ -3698,12 +3685,11 @@ class Eynollah:
|
||||||
found_polygons_text_region=[],
|
found_polygons_text_region=[],
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=[],
|
order_of_texts=[],
|
||||||
id_of_texts=[],
|
|
||||||
all_found_textline_polygons=[],
|
all_found_textline_polygons=[],
|
||||||
all_box_coord=[],
|
all_box_coord=[],
|
||||||
polygons_of_images=polygons_of_images,
|
found_polygons_text_region_img=polygons_of_images,
|
||||||
polygons_of_marginals_left=polygons_of_marginals,
|
found_polygons_marginals_left=polygons_of_marginals,
|
||||||
polygons_of_marginals_right=polygons_of_marginals,
|
found_polygons_marginals_right=polygons_of_marginals,
|
||||||
all_found_textline_polygons_marginals_left=empty_marginals,
|
all_found_textline_polygons_marginals_left=empty_marginals,
|
||||||
all_found_textline_polygons_marginals_right=empty_marginals,
|
all_found_textline_polygons_marginals_right=empty_marginals,
|
||||||
all_box_coord_marginals_left=empty_marginals,
|
all_box_coord_marginals_left=empty_marginals,
|
||||||
|
|
@ -3713,7 +3699,7 @@ class Eynollah:
|
||||||
slopes_marginals_right=[],
|
slopes_marginals_right=[],
|
||||||
cont_page=cont_page,
|
cont_page=cont_page,
|
||||||
polygons_seplines=polygons_seplines,
|
polygons_seplines=polygons_seplines,
|
||||||
contours_tables=contours_tables
|
found_polygons_tables=contours_tables
|
||||||
)
|
)
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
|
|
@ -3877,16 +3863,15 @@ class Eynollah:
|
||||||
found_polygons_text_region_h=contours_only_text_parent_h,
|
found_polygons_text_region_h=contours_only_text_parent_h,
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=order_text_new,
|
order_of_texts=order_text_new,
|
||||||
id_of_texts=id_of_texts_tot,
|
|
||||||
all_found_textline_polygons=all_found_textline_polygons,
|
all_found_textline_polygons=all_found_textline_polygons,
|
||||||
all_found_textline_polygons_h=all_found_textline_polygons_h,
|
all_found_textline_polygons_h=all_found_textline_polygons_h,
|
||||||
all_box_coord=all_box_coord,
|
all_box_coord=all_box_coord,
|
||||||
all_box_coord_h=all_box_coord_h,
|
all_box_coord_h=all_box_coord_h,
|
||||||
polygons_of_images=polygons_of_images,
|
found_polygons_text_region_img=polygons_of_images,
|
||||||
contours_tables=contours_tables,
|
found_polygons_tables=contours_tables,
|
||||||
polygons_of_drop_capitals=polygons_of_drop_capitals,
|
found_polygons_drop_capitals=polygons_of_drop_capitals,
|
||||||
polygons_of_marginals_left=polygons_of_marginals_left,
|
found_polygons_marginals_left=polygons_of_marginals_left,
|
||||||
polygons_of_marginals_right=polygons_of_marginals_right,
|
found_polygons_marginals_right=polygons_of_marginals_right,
|
||||||
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
||||||
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
||||||
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
||||||
|
|
@ -3905,12 +3890,11 @@ class Eynollah:
|
||||||
found_polygons_text_region=contours_only_text_parent,
|
found_polygons_text_region=contours_only_text_parent,
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=order_text_new,
|
order_of_texts=order_text_new,
|
||||||
id_of_texts=id_of_texts_tot,
|
|
||||||
all_found_textline_polygons=all_found_textline_polygons,
|
all_found_textline_polygons=all_found_textline_polygons,
|
||||||
all_box_coord=all_box_coord,
|
all_box_coord=all_box_coord,
|
||||||
polygons_of_images=polygons_of_images,
|
found_polygons_text_region_img=polygons_of_images,
|
||||||
polygons_of_marginals_left=polygons_of_marginals_left,
|
found_polygons_marginals_left=polygons_of_marginals_left,
|
||||||
polygons_of_marginals_right=polygons_of_marginals_right,
|
found_polygons_marginals_right=polygons_of_marginals_right,
|
||||||
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
||||||
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
||||||
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
||||||
|
|
@ -3920,8 +3904,7 @@ class Eynollah:
|
||||||
slopes_marginals_right=slopes_marginals_right,
|
slopes_marginals_right=slopes_marginals_right,
|
||||||
cont_page=cont_page,
|
cont_page=cont_page,
|
||||||
polygons_seplines=polygons_seplines,
|
polygons_seplines=polygons_seplines,
|
||||||
contours_tables=contours_tables,
|
found_polygons_tables=contours_tables,
|
||||||
conf_contours_textregions=conf_contours_textregions
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,6 @@ class EynollahXmlWriter:
|
||||||
found_polygons_text_region,
|
found_polygons_text_region,
|
||||||
page_coord,
|
page_coord,
|
||||||
order_of_texts,
|
order_of_texts,
|
||||||
id_of_texts,
|
|
||||||
all_found_textline_polygons,
|
all_found_textline_polygons,
|
||||||
all_box_coord,
|
all_box_coord,
|
||||||
found_polygons_text_region_img,
|
found_polygons_text_region_img,
|
||||||
|
|
@ -110,7 +109,6 @@ class EynollahXmlWriter:
|
||||||
found_polygons_text_region_h=[],
|
found_polygons_text_region_h=[],
|
||||||
page_coord=page_coord,
|
page_coord=page_coord,
|
||||||
order_of_texts=order_of_texts,
|
order_of_texts=order_of_texts,
|
||||||
id_of_texts=id_of_texts,
|
|
||||||
all_found_textline_polygons=all_found_textline_polygons,
|
all_found_textline_polygons=all_found_textline_polygons,
|
||||||
all_found_textline_polygons_h=[],
|
all_found_textline_polygons_h=[],
|
||||||
all_box_coord=all_box_coord,
|
all_box_coord=all_box_coord,
|
||||||
|
|
@ -139,7 +137,6 @@ class EynollahXmlWriter:
|
||||||
found_polygons_text_region_h,
|
found_polygons_text_region_h,
|
||||||
page_coord,
|
page_coord,
|
||||||
order_of_texts,
|
order_of_texts,
|
||||||
id_of_texts,
|
|
||||||
all_found_textline_polygons,
|
all_found_textline_polygons,
|
||||||
all_found_textline_polygons_h,
|
all_found_textline_polygons_h,
|
||||||
all_box_coord,
|
all_box_coord,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue