mirror of
https://github.com/qurator-spk/neat.git
synced 2025-06-11 04:39:54 +02:00
Transform OCR coordinates for web presentation images (fixes #31)
thx @kba! (scaling factor will require testing with more images though)
This commit is contained in:
parent
2115bdafaf
commit
409d7db2f2
1 changed files with 2 additions and 1 deletions
|
@ -172,7 +172,8 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, noproxy)
|
|||
text = word.text
|
||||
for coords in words.findall('.//{%s}Coords' % xmlns):
|
||||
|
||||
points = [int(pos) for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
|
||||
# transform the OCR coordinates by 0.5685 to derived the correct coords for the web presentation
|
||||
points = [int(0.5685 * float(pos)) for p in coords.attrib['points'].split(' ') for pos in p.split(',')]
|
||||
|
||||
x_points = [points[i] for i in range(0, len(points), 2)]
|
||||
y_points = [points[i] for i in range(1, len(points), 2)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue