From 715c6ca3f350d5ad88a51a2e4f541dda8f6f2bc4 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Mon, 2 Dec 2019 12:09:42 +0100 Subject: [PATCH 1/2] add multilang target --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bb7476f..5850fe8 100644 --- a/Makefile +++ b/Makefile @@ -199,8 +199,10 @@ $(BUILD_PATH)/bert-all-german-de-finetuned/$(EPOCH_FILE): bert-ner --train_sets='GERM-EVAL-TRAIN|DE-CONLL-TRAIN' --dev_sets='GERM-EVAL-DEV|DE-CONLL-TESTA' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1 $(BUILD_PATH)/bert-complete-de-finetuned/$(EPOCH_FILE): - bert-ner --train_sets='GERM-EVAL-TRAIN|DE-CONLL-TRAIN|SBB|ONB|LFT|DE-CONLL-TESTA|DE-CONLL-TESTB|GERM-EVAL-TEST' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1 + bert-ner --train_sets='GERM-EVAL-TRAIN|GERM-EVAL-DEV|DE-CONLL-TRAIN|DE-CONLL-DEV|SBB|ONB|LFT|DE-CONLL-TESTA|DE-CONLL-TESTB|GERM-EVAL-TEST' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1 +$(BUILD_PATH)/bert-multilang-de-finetuned/$(EPOCH_FILE): + bert-ner --train_sets='GERM-EVAL-DEV|GERM-EVAL-TEST|GERM-EVAL-TRAIN|SBB|ONB|LFT|BNF|KB|DE-CONLL-DEV|DE-CONLL-TESTA|DE-CONLL-TESTB|DE-CONLL-TRAIN|EN-CONLL-TESTA|EN-CONLL-TESTB|EN-CONLL-TRAIN' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1 $(BUILD_PATH)/bert-wikiner-de-finetuned/$(EPOCH_FILE): From 1c4d21810e22311e26fa43df0c7c21c39ddd03eb Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Mon, 2 Dec 2019 12:10:24 +0100 Subject: [PATCH 2/2] add historic french and dutch ground truth --- qurator/sbb_ner/ground_truth/europeana_historic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qurator/sbb_ner/ground_truth/europeana_historic.py b/qurator/sbb_ner/ground_truth/europeana_historic.py index 27017a6..11818ae 100644 --- a/qurator/sbb_ner/ground_truth/europeana_historic.py +++ b/qurator/sbb_ner/ground_truth/europeana_historic.py @@ -61,7 +61,10 @@ def main(path_to_ner_corpora, ner_ground_truth_file): gt_all = read_gt(['{}/enp_DE.sbb.bio/enp_DE.sbb.bio'.format(path_to_ner_corpora), '{}/enp_DE.onb.bio/enp_DE.onb.bio'.format(path_to_ner_corpora), - '{}/enp_DE.lft.bio/enp_DE.lft.bio'.format(path_to_ner_corpora)], ['SBB', 'ONB', 'LFT']) + '{}/enp_DE.lft.bio/enp_DE.lft.bio'.format(path_to_ner_corpora), + '{}/enp_FR.bnf.bio/enp_FR.bnf.bio'.format(path_to_ner_corpora), + '{}/enp_NL.kb.bio/enp_NL.kb.bio'.format(path_to_ner_corpora)], + ['SBB', 'ONB', 'LFT', 'BNF', 'KB']) gt_all.to_pickle(ner_ground_truth_file)