writer: also ensure validity after scaling

This commit is contained in:
Robert Sachunsky 2025-12-02 16:35:32 +01:00
parent 56e73bf72f
commit 43a95842bd
2 changed files with 39 additions and 62 deletions

View file

@ -1670,10 +1670,10 @@ class Eynollah:
else: else:
box = [0, 0, self.image.shape[1], self.image.shape[0]] box = [0, 0, self.image.shape[1], self.image.shape[0]]
cropped_page, page_coord = crop_image_inside_box(box, self.image) cropped_page, page_coord = crop_image_inside_box(box, self.image)
cont_page.append(np.array([[page_coord[2], page_coord[0]], cont_page.append(np.array([[[page_coord[2], page_coord[0]]],
[page_coord[3], page_coord[0]], [[page_coord[3], page_coord[0]]],
[page_coord[3], page_coord[1]], [[page_coord[3], page_coord[1]]],
[page_coord[2], page_coord[1]]])) [[page_coord[2], page_coord[1]]]]))
return cropped_page, page_coord, cont_page return cropped_page, page_coord, cont_page
def early_page_for_num_of_column_classification(self,img_bin): def early_page_for_num_of_column_classification(self,img_bin):

View file

@ -3,10 +3,10 @@
from pathlib import Path from pathlib import Path
import os.path import os.path
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from .utils.xml import create_page_xml, xml_reading_order import numpy as np
from .utils.counter import EynollahIdCounter from shapely import affinity, clip_by_rect
from ocrd_utils import getLogger from ocrd_utils import getLogger, points_from_polygon
from ocrd_models.ocrd_page import ( from ocrd_models.ocrd_page import (
BorderType, BorderType,
CoordsType, CoordsType,
@ -19,7 +19,10 @@ from ocrd_models.ocrd_page import (
SeparatorRegionType, SeparatorRegionType,
to_xml to_xml
) )
import numpy as np
from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter
from .utils.contour import contour2polygon, make_valid
class EynollahXmlWriter: class EynollahXmlWriter:
@ -41,20 +44,14 @@ class EynollahXmlWriter:
def image_filename_stem(self): def image_filename_stem(self):
return Path(Path(self.image_filename).name).stem return Path(Path(self.image_filename).name).stem
def calculate_page_coords(self, cont_page): def calculate_points(self, contour, offset=None):
self.logger.debug('enter calculate_page_coords') self.logger.debug('enter calculate_points')
points_page_print = "" poly = contour2polygon(contour)
for _, contour in enumerate(cont_page[0]): if offset is not None:
if len(contour) == 2: poly = affinity.translate(poly, *offset)
points_page_print += str(int((contour[0]) / self.scale_x)) poly = affinity.scale(poly, xfact=1 / self.scale_x, yfact=1 / self.scale_y, origin=(0, 0))
points_page_print += ',' poly = make_valid(clip_by_rect(poly, 0, 0, self.width_org, self.height_org))
points_page_print += str(int((contour[1]) / self.scale_y)) return points_from_polygon(poly.exterior.coords[:-1])
else:
points_page_print += str(int((contour[0][0]) / self.scale_x))
points_page_print += ','
points_page_print += str(int((contour[0][1] ) / self.scale_y))
points_page_print = points_page_print + ' '
return points_page_print[:-1]
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
self.logger.debug('enter serialize_lines_in_region') self.logger.debug('enter serialize_lines_in_region')
@ -67,20 +64,12 @@ class EynollahXmlWriter:
text_region.add_TextLine(textline) text_region.add_TextLine(textline)
text_region.set_orientation(-slopes[region_idx]) text_region.set_orientation(-slopes[region_idx])
region_bboxes = all_box_coord[region_idx] region_bboxes = all_box_coord[region_idx]
points_co = '' offset = [page_coord[2], page_coord[0]]
for point in polygon_textline: # FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45?
if len(point) != 2: if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45):
point = point[0] offset[0] += region_bboxes[2]
point_x = point[0] + page_coord[2] offset[1] += region_bboxes[0]
point_y = point[1] + page_coord[0] coords.set_points(self.calculate_points(polygon_textline, offset))
# FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45?
if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45):
point_x += region_bboxes[2]
point_y += region_bboxes[0]
point_x = max(0, int(point_x / self.scale_x))
point_y = max(0, int(point_y / self.scale_y))
points_co += str(point_x) + ',' + str(point_y) + ' '
coords.set_points(points_co[:-1])
def write_pagexml(self, pcgts): def write_pagexml(self, pcgts):
self.logger.info("output filename: '%s'", self.output_filename) self.logger.info("output filename: '%s'", self.output_filename)
@ -135,8 +124,13 @@ class EynollahXmlWriter:
# create the file structure # create the file structure
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
page = pcgts.get_Page() page = pcgts.get_Page()
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) if len(cont_page):
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0]))))
if skip_layout_reading_order:
offset = None
else:
offset = [page_coord[2], page_coord[0]]
counter = EynollahIdCounter() counter = EynollahIdCounter()
if len(order_of_texts): if len(order_of_texts):
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
@ -149,8 +143,7 @@ class EynollahXmlWriter:
for mm, region_contour in enumerate(found_polygons_text_region): for mm, region_contour in enumerate(found_polygons_text_region):
textregion = TextRegionType( textregion = TextRegionType(
id=counter.next_region_id, type_='paragraph', id=counter.next_region_id, type_='paragraph',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord, Coords=CoordsType(points=self.calculate_points(region_contour, offset))
skip_layout_reading_order))
) )
if conf_contours_textregions: if conf_contours_textregions:
textregion.Coords.set_conf(conf_contours_textregions[mm]) textregion.Coords.set_conf(conf_contours_textregions[mm])
@ -166,7 +159,7 @@ class EynollahXmlWriter:
for mm, region_contour in enumerate(found_polygons_text_region_h): for mm, region_contour in enumerate(found_polygons_text_region_h):
textregion = TextRegionType( textregion = TextRegionType(
id=counter.next_region_id, type_='heading', id=counter.next_region_id, type_='heading',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) Coords=CoordsType(points=self.calculate_points(region_contour, offset))
) )
if conf_contours_textregions_h: if conf_contours_textregions_h:
textregion.Coords.set_conf(conf_contours_textregions_h[mm]) textregion.Coords.set_conf(conf_contours_textregions_h[mm])
@ -181,7 +174,7 @@ class EynollahXmlWriter:
for mm, region_contour in enumerate(found_polygons_marginals_left): for mm, region_contour in enumerate(found_polygons_marginals_left):
marginal = TextRegionType( marginal = TextRegionType(
id=counter.next_region_id, type_='marginalia', id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) Coords=CoordsType(points=self.calculate_points(region_contour, offset))
) )
page.add_TextRegion(marginal) page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_left: if ocr_all_textlines_marginals_left:
@ -193,7 +186,7 @@ class EynollahXmlWriter:
for mm, region_contour in enumerate(found_polygons_marginals_right): for mm, region_contour in enumerate(found_polygons_marginals_right):
marginal = TextRegionType( marginal = TextRegionType(
id=counter.next_region_id, type_='marginalia', id=counter.next_region_id, type_='marginalia',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) Coords=CoordsType(points=self.calculate_points(region_contour, offset))
) )
page.add_TextRegion(marginal) page.add_TextRegion(marginal)
if ocr_all_textlines_marginals_right: if ocr_all_textlines_marginals_right:
@ -206,7 +199,7 @@ class EynollahXmlWriter:
for mm, region_contour in enumerate(found_polygons_drop_capitals): for mm, region_contour in enumerate(found_polygons_drop_capitals):
dropcapital = TextRegionType( dropcapital = TextRegionType(
id=counter.next_region_id, type_='drop-capital', id=counter.next_region_id, type_='drop-capital',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) Coords=CoordsType(points=self.calculate_points(region_contour, offset))
) )
page.add_TextRegion(dropcapital) page.add_TextRegion(dropcapital)
all_box_coord_drop = [[0, 0, 0, 0]] all_box_coord_drop = [[0, 0, 0, 0]]
@ -221,33 +214,17 @@ class EynollahXmlWriter:
for region_contour in found_polygons_text_region_img: for region_contour in found_polygons_text_region_img:
page.add_ImageRegion( page.add_ImageRegion(
ImageRegionType(id=counter.next_region_id, ImageRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) Coords=CoordsType(points=self.calculate_points(region_contour, offset))))
for region_contour in polygons_seplines: for region_contour in polygons_seplines:
page.add_SeparatorRegion( page.add_SeparatorRegion(
SeparatorRegionType(id=counter.next_region_id, SeparatorRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0])))) Coords=CoordsType(points=self.calculate_points(region_contour, None))))
for region_contour in found_polygons_tables: for region_contour in found_polygons_tables:
page.add_TableRegion( page.add_TableRegion(
TableRegionType(id=counter.next_region_id, TableRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) Coords=CoordsType(points=self.calculate_points(region_contour, offset))))
return pcgts return pcgts
def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
self.logger.debug('enter calculate_polygon_coords')
coords = ''
for point in contour:
if len(point) != 2:
point = point[0]
point_x = point[0]
point_y = point[1]
if not skip_layout_reading_order:
point_x += page_coord[2]
point_y += page_coord[0]
point_x = int(point_x / self.scale_x)
point_y = int(point_y / self.scale_y)
coords += str(point_x) + ',' + str(point_y) + ' '
return coords[:-1]