|
|
@ -32,6 +32,7 @@ def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None):
|
|
|
|
entity = ""
|
|
|
|
entity = ""
|
|
|
|
entity_type = None
|
|
|
|
entity_type = None
|
|
|
|
tsv['ID'] = '-'
|
|
|
|
tsv['ID'] = '-'
|
|
|
|
|
|
|
|
tsv['conf'] = '-'
|
|
|
|
|
|
|
|
|
|
|
|
def check_entity(tag):
|
|
|
|
def check_entity(tag):
|
|
|
|
nonlocal entity, entity_type, rids
|
|
|
|
nonlocal entity, entity_type, rids
|
|
|
@ -44,19 +45,26 @@ def ned(tsv, ner_result, ned_rest_endpoint, json_file=None, threshold=None):
|
|
|
|
if 'ranking' in ned_result[eid]:
|
|
|
|
if 'ranking' in ned_result[eid]:
|
|
|
|
ranking = ned_result[eid]['ranking']
|
|
|
|
ranking = ned_result[eid]['ranking']
|
|
|
|
|
|
|
|
|
|
|
|
#tsv.loc[rids, 'ID'] = ranking[0][1]['wikidata'] if threshold is None or ranking[0][1]['proba_1'] >= threshold else ''
|
|
|
|
# tsv.loc[rids, 'ID'] = ranking[0][1]['wikidata']
|
|
|
|
|
|
|
|
# if threshold is None or ranking[0][1]['proba_1'] >= threshold else ''
|
|
|
|
|
|
|
|
|
|
|
|
tmp = "|".join([ranking[i][1]['wikidata']
|
|
|
|
tmp = "|".join([ranking[i][1]['wikidata']
|
|
|
|
for i in range(len(ranking))
|
|
|
|
for i in range(len(ranking))
|
|
|
|
if threshold is None or ranking[i][1]['proba_1'] >= threshold])
|
|
|
|
if threshold is None or ranking[i][1]['proba_1'] >= threshold])
|
|
|
|
tsv.loc[rids, 'ID'] = tmp if len(tmp) > 0 else '-'
|
|
|
|
tsv.loc[rids, 'ID'] = tmp if len(tmp) > 0 else '-'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp = ",".join([str(ranking[i][1]['proba_1'])
|
|
|
|
|
|
|
|
for i in range(len(ranking))
|
|
|
|
|
|
|
|
if threshold is None or ranking[i][1]['proba_1'] >= threshold])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tsv.loc[rids, 'conf'] = tmp if len(tmp) > 0 else '-'
|
|
|
|
|
|
|
|
|
|
|
|
rids = []
|
|
|
|
rids = []
|
|
|
|
entity = ""
|
|
|
|
entity = ""
|
|
|
|
entity_type = None
|
|
|
|
entity_type = None
|
|
|
|
|
|
|
|
|
|
|
|
ner_tmp = tsv.copy()
|
|
|
|
ner_tmp = tsv.copy()
|
|
|
|
ner_tmp.loc[~ner_tmp['NE-TAG'].isin(['O', 'B-PER', 'B-LOC','B-ORG', 'I-PER', 'I-LOC', 'I-ORG']), 'NE-TAG'] = 'O'
|
|
|
|
ner_tmp.loc[~ner_tmp['NE-TAG'].isin(['O', 'B-PER', 'B-LOC', 'B-ORG', 'I-PER', 'I-LOC', 'I-ORG']), 'NE-TAG'] = 'O'
|
|
|
|
|
|
|
|
|
|
|
|
for rid, row in ner_tmp.iterrows():
|
|
|
|
for rid, row in ner_tmp.iterrows():
|
|
|
|
|
|
|
|
|
|
|
|