mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
writer: simplify
- `build_pagexml_no_full_layout`: delegate to `build_pagexml_full_layout` (removing redundant code)
This commit is contained in:
parent
e32479765c
commit
cbbb3248c7
1 changed files with 49 additions and 84 deletions
|
@ -87,8 +87,50 @@ class EynollahXmlWriter():
|
|||
with open(self.output_filename, 'w') as f:
|
||||
f.write(to_xml(pcgts))
|
||||
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_seplines, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False):
|
||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||
def build_pagexml_no_full_layout(
|
||||
self, found_polygons_text_region,
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons,
|
||||
all_box_coord,
|
||||
found_polygons_text_region_img,
|
||||
found_polygons_marginals_left, found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
found_polygons_tables,
|
||||
**kwargs):
|
||||
return self.build_pagexml_full_layout(
|
||||
found_polygons_text_region, [],
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons, [],
|
||||
all_box_coord, [],
|
||||
found_polygons_text_region_img, found_polygons_tables, [],
|
||||
found_polygons_marginals_left, found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, [], slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
**kwargs)
|
||||
|
||||
def build_pagexml_full_layout(
|
||||
self,
|
||||
found_polygons_text_region, found_polygons_text_region_h,
|
||||
page_coord, order_of_texts, id_of_texts,
|
||||
all_found_textline_polygons, all_found_textline_polygons_h,
|
||||
all_box_coord, all_box_coord_h,
|
||||
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,
|
||||
found_polygons_marginals_left,found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines,
|
||||
ocr_all_textlines=None, ocr_all_textlines_h=None,
|
||||
ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None,
|
||||
ocr_all_textlines_drop=None,
|
||||
conf_contours_textregion=None, conf_contours_textregion_h=None,
|
||||
skip_layout_reading_order=False):
|
||||
self.logger.debug('enter build_pagexml')
|
||||
|
||||
# create the file structure
|
||||
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||
|
@ -108,89 +150,10 @@ class EynollahXmlWriter():
|
|||
textregion = TextRegionType(
|
||||
id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord,
|
||||
skip_layout_reading_order),
|
||||
conf=conf_contours_textregion[mm]),
|
||||
)
|
||||
page.add_TextRegion(textregion)
|
||||
if ocr_all_textlines:
|
||||
ocr_textlines = ocr_all_textlines[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord,
|
||||
all_box_coord, slopes, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_left):
|
||||
marginal = TextRegionType(
|
||||
id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_left:
|
||||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord,
|
||||
all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_right):
|
||||
marginal = TextRegionType(
|
||||
id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TextRegion(marginal)
|
||||
if ocr_all_textlines_marginals_right:
|
||||
ocr_textlines = ocr_all_textlines_marginals_right[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord,
|
||||
all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for region_contour in found_polygons_text_region_img:
|
||||
img_region = ImageRegionType(
|
||||
id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_ImageRegion(img_region)
|
||||
|
||||
for region_contour in polygons_seplines:
|
||||
sep = SeparatorRegionType(
|
||||
id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0]))
|
||||
)
|
||||
page.add_SeparatorRegion(sep)
|
||||
|
||||
for region_contour in found_polygons_tables:
|
||||
tab = TableRegionType(
|
||||
id=counter.next_region_id,
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
page.add_TableRegion(tab)
|
||||
|
||||
return pcgts
|
||||
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_seplines, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
|
||||
self.logger.debug('enter build_pagexml_full_layout')
|
||||
|
||||
# create the file structure
|
||||
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||
page = pcgts.get_Page()
|
||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||
|
||||
counter = EynollahIdCounter()
|
||||
if len(order_of_texts):
|
||||
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
|
||||
id_of_marginalia_left = [_counter_marginals.next_region_id
|
||||
for _ in found_polygons_marginals_left]
|
||||
id_of_marginalia_right = [_counter_marginals.next_region_id
|
||||
for _ in found_polygons_marginals_right]
|
||||
xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_text_region):
|
||||
textregion = TextRegionType(
|
||||
id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord),
|
||||
conf=conf_contours_textregion[mm])
|
||||
skip_layout_reading_order))
|
||||
)
|
||||
if conf_contours_textregion:
|
||||
textregion.Coords.set_conf(conf_contours_textregion[mm])
|
||||
page.add_TextRegion(textregion)
|
||||
if ocr_all_textlines:
|
||||
ocr_textlines = ocr_all_textlines[mm]
|
||||
|
@ -205,6 +168,8 @@ class EynollahXmlWriter():
|
|||
id=counter.next_region_id, type_='heading',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
|
||||
)
|
||||
if conf_contours_textregion_h:
|
||||
textregion.Coords.set_conf(conf_contours_textregion_h[mm])
|
||||
page.add_TextRegion(textregion)
|
||||
if ocr_all_textlines_h:
|
||||
ocr_textlines = ocr_all_textlines_h[mm]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue