mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2025-06-16 06:49:52 +02:00
Update cli.py
This commit is contained in:
parent
2946909cf3
commit
e535a070c4
1 changed files with 1 additions and 1 deletions
2
cli.py
2
cli.py
|
@ -173,7 +173,7 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, noproxy,
|
||||||
text = word.text
|
text = word.text
|
||||||
for coords in words.findall('.//{%s}Coords' % xmlns):
|
for coords in words.findall('.//{%s}Coords' % xmlns):
|
||||||
|
|
||||||
# transform the OCR coordinates by 0.5685 to derive the correct coords for the web presentation image
|
# transform OCR coordinates using `scale_factor` to derive correct coordinates for the web presentation image
|
||||||
points = [int(scale_factor * float(pos)) for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
|
points = [int(scale_factor * float(pos)) for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
|
||||||
|
|
||||||
x_points = [points[i] for i in range(0, len(points), 2)]
|
x_points = [points[i] for i in range(0, len(points), 2)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue