From abdabbac4f3c2d8457352cbabdec874785129282 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Mon, 9 Mar 2020 13:44:16 +0100 Subject: [PATCH] try to infer correct line ordering ... --- cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 9950427..5850600 100644 --- a/cli.py +++ b/cli.py @@ -203,8 +203,8 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, noproxy, tsv = pd.DataFrame(tsv, columns=['rid', 'line', 'hcenter'] + out_columns) vlinecenter = pd.DataFrame(tsv[['line', 'top']].groupby('line', sort=False).mean().top + - (tsv[['line', 'bottom']].groupby('line').mean().bottom - - tsv[['line', 'top']].groupby('line').mean().top) / 2, columns=['vlinecenter']) + (tsv[['line', 'bottom']].groupby('line', sort=False).mean().bottom - + tsv[['line', 'top']].groupby('line', sort=False).mean().top) / 2, columns=['vlinecenter']) tsv = tsv.merge(vlinecenter, left_on='line', right_index=True)