fix repeated text rows

2026-03-05 23:02:04 +01:00 · 2021-03-10 15:15:24 +01:00 · 2021-03-10 15:15:24 +01:00 · de575037e6
commit de575037e6
parent a6008b83b5
1 changed files with 7 additions and 5 deletions
--- a/tsvtools/cli.py
+++ b/tsvtools/cli.py
@ -125,11 +125,13 @@ def page2tsv(page_xml_file, tsv_out_file, purpose, image_url, ner_rest_endpoint,
                for text_equiv in word.findall('./{%s}TextEquiv/{%s}Unicode' % (xmlns, xmlns)):
                    text = text_equiv.text
                    points = []
                    for coords in word.findall('./{%s}Coords' % xmlns):
                        # transform OCR coordinates using `scale_factor` to derive
                        # correct coordinates for the web presentation image
-                        points = [int(scale_factor * float(pos))
+                        points += [int(scale_factor * float(pos))
                                  for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
                    x_points, y_points = points[0::2], points[1::2]