mirror of
https://github.com/qurator-spk/page2tsv.git
synced 2025-06-30 21:59:54 +02:00
add priority option for find-entities
This commit is contained in:
parent
318d9bd122
commit
7b53cc5539
2 changed files with 10 additions and 3 deletions
|
@ -199,6 +199,7 @@ def tsv2page(output_filename, keep_words, page_file, tsv_file):
|
||||||
with open(output_filename, 'w', encoding='utf-8') as f:
|
with open(output_filename, 'w', encoding='utf-8') as f:
|
||||||
f.write(ET.tostring(tree, pretty_print=True).decode('utf-8'))
|
f.write(ET.tostring(tree, pretty_print=True).decode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1)
|
@click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1)
|
||||||
@click.argument('tsv-out-file', type=click.Path(), required=True, nargs=1)
|
@click.argument('tsv-out-file', type=click.Path(), required=True, nargs=1)
|
||||||
|
@ -209,7 +210,9 @@ def tsv2page(output_filename, keep_words, page_file, tsv_file):
|
||||||
@click.option('--ned-json-file', type=str, default=None)
|
@click.option('--ned-json-file', type=str, default=None)
|
||||||
@click.option('--noproxy', type=bool, is_flag=True, help='disable proxy. default: proxy is enabled.')
|
@click.option('--noproxy', type=bool, is_flag=True, help='disable proxy. default: proxy is enabled.')
|
||||||
@click.option('--ned-threshold', type=float, default=None)
|
@click.option('--ned-threshold', type=float, default=None)
|
||||||
def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint, ned_json_file, noproxy, ned_threshold):
|
@click.option('--ned-priority', type=int, default=2)
|
||||||
|
def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint, ned_json_file, noproxy, ned_threshold,
|
||||||
|
ned_priority):
|
||||||
|
|
||||||
if noproxy:
|
if noproxy:
|
||||||
os.environ['no_proxy'] = '*'
|
os.environ['no_proxy'] = '*'
|
||||||
|
@ -236,7 +239,8 @@ def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint,
|
||||||
|
|
||||||
if ned_rest_endpoint is not None:
|
if ned_rest_endpoint is not None:
|
||||||
|
|
||||||
tsv, ned_result = ned(tsv, ner_result, ned_rest_endpoint, json_file=ned_json_file, threshold=ned_threshold)
|
tsv, ned_result = ned(tsv, ner_result, ned_rest_endpoint, json_file=ned_json_file, threshold=ned_threshold,
|
||||||
|
priority=ned_priority)
|
||||||
|
|
||||||
if ned_json_file is not None and not os.path.exists(ned_json_file):
|
if ned_json_file is not None and not os.path.exists(ned_json_file):
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import requests
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None):
|
def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None, priority=None):
|
||||||
|
|
||||||
if json_file is not None and os.path.exists(json_file):
|
if json_file is not None and os.path.exists(json_file):
|
||||||
|
|
||||||
|
@ -22,6 +22,9 @@ def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None):
|
||||||
|
|
||||||
ned_rest_endpoint = ned_rest_endpoint + '/ned?return_full=' + str(int(json_file is not None)).lower()
|
ned_rest_endpoint = ned_rest_endpoint + '/ned?return_full=' + str(int(json_file is not None)).lower()
|
||||||
|
|
||||||
|
if priority is not None:
|
||||||
|
ned_rest_endpoint += "&priority=" + str(int(priority))
|
||||||
|
|
||||||
resp = requests.post(url=ned_rest_endpoint, json=ner_parsed, timeout=3600000)
|
resp = requests.post(url=ned_rest_endpoint, json=ner_parsed, timeout=3600000)
|
||||||
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue