1
0
Fork 0
mirror of https://github.com/qurator-spk/page2tsv.git synced 2025-06-16 06:49:52 +02:00

Update cli.py

This commit is contained in:
Clemens Neudecker 2020-02-20 18:35:16 +01:00 committed by GitHub
parent 2946909cf3
commit e535a070c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

2
cli.py
View file

@ -173,7 +173,7 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, noproxy,
text = word.text text = word.text
for coords in words.findall('.//{%s}Coords' % xmlns): for coords in words.findall('.//{%s}Coords' % xmlns):
# transform the OCR coordinates by 0.5685 to derive the correct coords for the web presentation image # transform OCR coordinates using `scale_factor` to derive correct coordinates for the web presentation image
points = [int(scale_factor * float(pos)) for p in coords.attrib['points'].split(' ') for pos in p.split(',')] points = [int(scale_factor * float(pos)) for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
x_points = [points[i] for i in range(0, len(points), 2)] x_points = [points[i] for i in range(0, len(points), 2)]