From e5b635ec2d00c33da4bec757a66b6559b35f4781 Mon Sep 17 00:00:00 2001 From: Kai Date: Fri, 7 May 2021 10:50:09 +0200 Subject: [PATCH] try other coordinate computation --- tsvtools/cli.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tsvtools/cli.py b/tsvtools/cli.py index c9cb71a..5d85dc7 100644 --- a/tsvtools/cli.py +++ b/tsvtools/cli.py @@ -107,8 +107,10 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint, for region_idx, region in enumerate(pcgts.get_Page().get_AllRegions(classes=['Text'], order='reading-order')): for text_line in region.get_TextLine(): - points = [int(scale_factor * float(pos)) for coords in text_line.get_Coords() for p in - coords.attrib['points'].split(' ') for pos in p.split(',')] + # points = [int(scale_factor * float(pos)) for coords in text_line.get_Coords() for p in + # coords.attrib['points'].split(' ') for pos in p.split(',')] + + points = [int(scale_factor * float(pos)) for pos in text_line.get_Coords().points] x_points, y_points = points[0::2], points[1::2] @@ -128,8 +130,10 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint, # transform OCR coordinates using `scale_factor` to derive # correct coordinates for the web presentation image - points = [int(scale_factor * float(pos)) for coords in text_line.get_Coords() for p in - coords.attrib['points'].split(' ') for pos in p.split(',')] + # points = [int(scale_factor * float(pos)) for coords in text_equiv.get_Coords() for p in + # coords.attrib['points'].split(' ') for pos in p.split(',')] + + points = [int(scale_factor * float(pos)) for pos in text_equiv.get_Coords().points] x_points, y_points = points[0::2], points[1::2]