mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2025-06-15 22:39:54 +02:00
try to infer correct line ordering ...
This commit is contained in:
parent
7bf9cfa5de
commit
abdabbac4f
1 changed files with 2 additions and 2 deletions
4
cli.py
4
cli.py
|
@ -203,8 +203,8 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, noproxy,
|
|||
tsv = pd.DataFrame(tsv, columns=['rid', 'line', 'hcenter'] + out_columns)
|
||||
|
||||
vlinecenter = pd.DataFrame(tsv[['line', 'top']].groupby('line', sort=False).mean().top +
|
||||
(tsv[['line', 'bottom']].groupby('line').mean().bottom -
|
||||
tsv[['line', 'top']].groupby('line').mean().top) / 2, columns=['vlinecenter'])
|
||||
(tsv[['line', 'bottom']].groupby('line', sort=False).mean().bottom -
|
||||
tsv[['line', 'top']].groupby('line', sort=False).mean().top) / 2, columns=['vlinecenter'])
|
||||
|
||||
tsv = tsv.merge(vlinecenter, left_on='line', right_index=True)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue