1
0
Fork 0
mirror of https://github.com/qurator-spk/page2tsv.git synced 2025-06-15 14:29:53 +02:00

permit empty files

This commit is contained in:
Kai Labusch 2020-08-15 08:46:18 +02:00
parent 2dc3857770
commit a834da494a

3
cli.py
View file

@ -284,6 +284,9 @@ def page2tsv(page_xml_file, tsv_out_file, image_url, ner_rest_endpoint, ned_rest
tsv = pd.DataFrame(tsv, columns=['rid', 'line', 'hcenter'] + out_columns)
if len(tsv)==0:
return
vlinecenter = pd.DataFrame(tsv[['line', 'top']].groupby('line', sort=False).mean().top +
(tsv[['line', 'bottom']].groupby('line', sort=False).mean().bottom -
tsv[['line', 'top']].groupby('line', sort=False).mean().top) / 2,