diff --git a/cli.py b/cli.py index b706773..c0cb314 100644 --- a/cli.py +++ b/cli.py @@ -148,7 +148,7 @@ def ner(tsv, ner_rest_endpoint): def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None): - if os.path.exists(json_file): + if json_file is not None and os.path.exists(json_file): print('Loading {}'.format(json_file)) @@ -354,3 +354,15 @@ def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint, tsv.to_csv(tsv_out_file, sep="\t", quoting=3, index=False) except requests.HTTPError as e: print(e) + + +@click.command() +@click.argument('xls-file', type=click.Path(exists=True), required=True, nargs=1) +def make_page2tsv_commands(xls_file): + + df = pd.read_excel(xls_file) + + for _, row in df.iterrows(): + print('page2tsv $(OPTIONS) {}.xml {}.tsv --image-url={} --scale-factor={}'. + format(row.Filename, row.Filename, row.iiif_url.replace('/full/full', '/left,top,width,height/full'), + row.scale_factor)) \ No newline at end of file diff --git a/setup.py b/setup.py index 36d9c3b..40ec323 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,8 @@ setup( "extract-doc-links=cli:extract_document_links", "annotate-tsv=cli:annotate_tsv", "page2tsv=cli:page2tsv", - "find-entities=cli:find_entities" + "find-entities=cli:find_entities", + "make-page2tsv-commands=cli:make_page2tsv_commands" ] }, python_requires='>=3.6.0',