revert changes

2025-07-27 02:49:53 +02:00 · 2021-05-07 11:28:24 +02:00 · 2021-05-07 11:28:24 +02:00 · 5e60fabe4a
commit 5e60fabe4a
parent e5b635ec2d
1 changed files with 2 additions and 21 deletions
--- a/tsvtools/cli.py
+++ b/tsvtools/cli.py
@ -107,16 +107,7 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint,
    for region_idx, region in enumerate(pcgts.get_Page().get_AllRegions(classes=['Text'], order='reading-order')):
        for text_line in region.get_TextLine():

-            # points = [int(scale_factor * float(pos)) for coords in text_line.get_Coords() for p in
-            #          coords.attrib['points'].split(' ') for pos in p.split(',')]
-
-            points = [int(scale_factor * float(pos)) for pos in text_line.get_Coords().points]
-
-            x_points, y_points = points[0::2], points[1::2]
-
-            left, right, top, bottom = min(x_points), max(x_points), min(y_points), max(y_points)
-
-            # left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(text_line.get_Coords().points)]
+            left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(text_line.get_Coords().points)]

            if min_confidence is not None and max_confidence is not None:
                conf = np.max([textequiv.conf for textequiv in text_line.get_TextEquiv()])
@ -129,17 +120,7 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint,
                for text_equiv in word.get_TextEquiv():
                    # transform OCR coordinates using `scale_factor` to derive
                    # correct coordinates for the web presentation image
-
-                    # points = [int(scale_factor * float(pos)) for coords in text_equiv.get_Coords() for p in
-                    #           coords.attrib['points'].split(' ') for pos in p.split(',')]
-
-                    points = [int(scale_factor * float(pos)) for pos in text_equiv.get_Coords().points]
-
-                    x_points, y_points = points[0::2], points[1::2]
-
-                    left, right, top, bottom = min(x_points), max(x_points), min(y_points), max(y_points)
-
-                    # left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(word.get_Coords().points)]
+                    left, top, right, bottom = [int(scale_factor * x) for x in bbox_from_points(word.get_Coords().points)]

                    tsv.append((region_idx, len(line_info) - 1, left + (right - left) / 2.0,
                                text_equiv.get_Unicode(), len(urls), left, right, top, bottom, text_line.id))