mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2025-06-15 14:29:53 +02:00
add command line tool that creates page2tsv commands from an excel file
This commit is contained in:
parent
83fb2ea033
commit
361c811264
2 changed files with 15 additions and 2 deletions
14
cli.py
14
cli.py
|
@ -148,7 +148,7 @@ def ner(tsv, ner_rest_endpoint):
|
|||
|
||||
def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None):
|
||||
|
||||
if os.path.exists(json_file):
|
||||
if json_file is not None and os.path.exists(json_file):
|
||||
|
||||
print('Loading {}'.format(json_file))
|
||||
|
||||
|
@ -354,3 +354,15 @@ def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint,
|
|||
tsv.to_csv(tsv_out_file, sep="\t", quoting=3, index=False)
|
||||
except requests.HTTPError as e:
|
||||
print(e)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('xls-file', type=click.Path(exists=True), required=True, nargs=1)
|
||||
def make_page2tsv_commands(xls_file):
|
||||
|
||||
df = pd.read_excel(xls_file)
|
||||
|
||||
for _, row in df.iterrows():
|
||||
print('page2tsv $(OPTIONS) {}.xml {}.tsv --image-url={} --scale-factor={}'.
|
||||
format(row.Filename, row.Filename, row.iiif_url.replace('/full/full', '/left,top,width,height/full'),
|
||||
row.scale_factor))
|
3
setup.py
3
setup.py
|
@ -23,7 +23,8 @@ setup(
|
|||
"extract-doc-links=cli:extract_document_links",
|
||||
"annotate-tsv=cli:annotate_tsv",
|
||||
"page2tsv=cli:page2tsv",
|
||||
"find-entities=cli:find_entities"
|
||||
"find-entities=cli:find_entities",
|
||||
"make-page2tsv-commands=cli:make_page2tsv_commands"
|
||||
]
|
||||
},
|
||||
python_requires='>=3.6.0',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue