From 5e60fabe4adeec4d50dc9f77c03231d953241654 Mon Sep 17 00:00:00 2001 From: Kai Date: Fri, 7 May 2021 11:28:24 +0200 Subject: [PATCH] revert changes --- tsvtools/cli.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/tsvtools/cli.py b/tsvtools/cli.py index 5d85dc7..6d68a2d 100644 --- a/tsvtools/cli.py +++ b/tsvtools/cli.py @@ -107,16 +107,7 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint, for region_idx, region in enumerate(pcgts.get_Page().get_AllRegions(classes=['Text'], order='reading-order')): for text_line in region.get_TextLine(): - # points = [int(scale_factor * float(pos)) for coords in text_line.get_Coords() for p in - # coords.attrib['points'].split(' ') for pos in p.split(',')] - - points = [int(scale_factor * float(pos)) for pos in text_line.get_Coords().points] - - x_points, y_points = points[0::2], points[1::2] - - left, right, top, bottom = min(x_points), max(x_points), min(y_points), max(y_points) - - # left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(text_line.get_Coords().points)] + left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(text_line.get_Coords().points)] if min_confidence is not None and max_confidence is not None: conf = np.max([textequiv.conf for textequiv in text_line.get_TextEquiv()]) @@ -129,17 +120,7 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint, for text_equiv in word.get_TextEquiv(): # transform OCR coordinates using `scale_factor` to derive # correct coordinates for the web presentation image - - # points = [int(scale_factor * float(pos)) for coords in text_equiv.get_Coords() for p in - # coords.attrib['points'].split(' ') for pos in p.split(',')] - - points = [int(scale_factor * float(pos)) for pos in text_equiv.get_Coords().points] - - x_points, y_points = points[0::2], points[1::2] - - left, right, top, bottom = min(x_points), max(x_points), min(y_points), max(y_points) - - # left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(word.get_Coords().points)] + left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(word.get_Coords().points)] tsv.append((region_idx, len(line_info) - 1, left + (right - left) / 2.0, text_equiv.get_Unicode(), len(urls), left, right, top, bottom, text_line.id))