You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

523 lines
31 KiB
Makefile

REPO_PATH ?=$(shell pwd)
BERT_BASE_PATH ?=$(REPO_PATH)/data/BERT
BERT_URL ?=https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip
REL_BERT_PATH ?=multi_cased_L-12_H-768_A-12
#BERT_URL ?=https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip
#REL_BERT_PATH ?=wwm_uncased_L-24_H-1024_A-16
BERT_MODEL_PATH ?=$(BERT_BASE_PATH)/$(REL_BERT_PATH)
DIGISAM_PATH ?=$(REPO_PATH)/data/digisam
REL_FINETUNED_PATH ?=data/digisam/BERT_de_finetuned
#REL_FINETUNED_PATH ?=data/digisam/BERT-large_de_finetuned
BERT_FINETUNED_PATH ?=$(REPO_PATH)/$(REL_FINETUNED_PATH)
NER_DATA_PATH ?=$(REPO_PATH)/data/NER
REL_BUILD_PATH ?=data/build
BUILD_PATH ?=$(REPO_PATH)/$(REL_BUILD_PATH)
EPOCHS ?=1
EPOCH_FILE ?=pytorch_model_ep$(EPOCHS).bin
MODEL_FILE ?=pytorch_model.bin
CROSS_VAL_FILE ?=cross_validation_results.pkl
WEIGHT_DECAY ?=0.03
WARMUP_PROPORTION ?=0.4
BATCH_SIZE ?=32
GRAD_ACC_STEPS ?=2
# BATCH_SIZE ?=128 # <===== unsupervised
# GRAD_ACC_STEPS ?=4 # <===== unsupervised
MAX_SEQ_LEN ?=128
# EXTRA_OPTIONS="--dry_run --no_cuda" <- Test if everything works.
EXTRA_OPTIONS ?=
DO_LOWER_CASE ?=
BERT_NER_OPTIONS ?=--task_name=ner --max_seq_length=$(MAX_SEQ_LEN) --num_train_epochs=$(EPOCHS) --warmup_proportion=$(WARMUP_PROPORTION) --gradient_accumulation_steps=$(GRAD_ACC_STEPS) --train_batch_size=$(BATCH_SIZE) --gt_file=$(BUILD_PATH)/gt.pkl --weight_decay=$(WEIGHT_DECAY) $(DO_LOWER_CASE) $(EXTRA_OPTIONS)
BERT_NER_EVAL_OPTIONS ?=--eval_batch_size=8 --task_name=ner --gt_file=$(BUILD_PATH)/gt.pkl $(DO_LOWER_CASE) $(EXTRA_OPTIONS)
###############################################################################
# directories
#
$(BUILD_PATH):
mkdir -p $(BUILD_PATH)
$(BERT_FINETUNED_PATH):
mkdir -p $(BERT_FINETUNED_PATH)
cp -L $(BERT_MODEL_PATH)/pytorch_model.bin $(BERT_FINETUNED_PATH)/pytorch_model.bin
chmod u+rw $(BERT_FINETUNED_PATH)/pytorch_model.bin
ln -sfn $(BERT_MODEL_PATH)/bert_config.json $(BERT_FINETUNED_PATH)/bert_config.json
ln -sfn $(BERT_MODEL_PATH)/vocab.txt $(BERT_FINETUNED_PATH)/vocab.txt
dirs: $(BUILD_PATH) $(BERT_FINETUNED_PATH)
###############################################################################
# BERT unsupervised on "Digitale Sammlungen":
#
TEMP_PREFIX ?=/tmp/
$(BERT_MODEL_PATH)/bert_model.ckpt.index:
wget -nc --directory-prefix=$(BERT_BASE_PATH) $(BERT_URL)
unzip -d $(BERT_BASE_PATH) $(BERT_MODEL_PATH).zip
$(BERT_MODEL_PATH)/pytorch_model.bin: $(BERT_MODEL_PATH)/bert_model.ckpt.index
pytorch_pretrained_bert convert_tf_checkpoint_to_pytorch $(BERT_MODEL_PATH)/bert_model.ckpt $(BERT_MODEL_PATH)/bert_config.json $(BERT_MODEL_PATH)/pytorch_model.bin
$(DIGISAM_PATH)/de_corpus.txt:
collectcorpus $(DIGISAM_PATH)/fulltext.sqlite3 $(DIGISAM_PATH)/selection_de.pkl $(DIGISAM_PATH)/de_corpus.txt --chunksize=10000
$(BERT_MODEL_PATH)/epoch_0.json: $(DIGISAM_PATH)/de_corpus.txt $(BERT_MODEL_PATH)/pytorch_model.bin
bert-pregenerate-trainingdata --train_corpus $(DIGISAM_PATH)/de_corpus.txt --output_dir $(BERT_MODEL_PATH) --bert_model $(BERT_MODEL_PATH) --reduce_memory --epochs $(EPOCHS)
bert-digisam-unsupervised: $(BERT_MODEL_PATH)/epoch_0.json
bert-finetune --pregenerated_data $(BERT_MODEL_PATH) --output_dir $(BERT_FINETUNED_PATH) --bert_model $(BERT_MODEL_PATH) --reduce_memory --fp16 --gradient_accumulation_steps 4 --train_batch_size 32 --epochs $(EPOCHS) --temp_prefix $(TEMP_PREFIX)
bert-digisam-unsupervised-continued: $(BERT_FINETUNED_PATH) $(BERT_MODEL_PATH)/epoch_0.json
bert-finetune --pregenerated_data $(BERT_MODEL_PATH) --output_dir $(BERT_FINETUNED_PATH) --bert_model $(BERT_FINETUNED_PATH) --reduce_memory --fp16 --gradient_accumulation_steps=$(GRAD_ACC_STEPS) --train_batch_size=$(BATCH_SIZE) --epochs $(EPOCHS) --temp_prefix $(TEMP_PREFIX)
get-bert: $(BERT_MODEL_PATH)/bert_model.ckpt.index
convert-bert: $(BERT_MODEL_PATH)/pytorch_model.bin
###############################################################################
#NER ground truth:
$(NER_DATA_PATH)/ner-corpora:
git clone https://github.com/EuropeanaNewspapers/ner-corpora $(NER_DATA_PATH)/ner-corpora
$(BUILD_PATH)/europeana_historic.pkl: $(NER_DATA_PATH)/ner-corpora
compile_europeana_historic $(NER_DATA_PATH)/ner-corpora $(BUILD_PATH)/europeana_historic.pkl
$(BUILD_PATH)/germ_eval.pkl: $(NER_DATA_PATH)/GermEval
compile_germ_eval $(NER_DATA_PATH)/GermEval $(BUILD_PATH)/germ_eval.pkl
$(BUILD_PATH)/conll2003.pkl: $(NER_DATA_PATH)/conll2003
compile_conll $(NER_DATA_PATH)/conll2003 $(BUILD_PATH)/conll2003.pkl
$(BUILD_PATH)/wikiner.pkl: $(NER_DATA_PATH)/wikiner
compile_wikiner $(NER_DATA_PATH)/wikiner $(BUILD_PATH)/wikiner.pkl
$(BUILD_PATH)/gt.pkl:
join-gt $(BUILD_PATH)/germ_eval.pkl $(BUILD_PATH)/europeana_historic.pkl $(BUILD_PATH)/conll2003.pkl $(BUILD_PATH)/wikiner.pkl $(BUILD_PATH)/gt.pkl
ner-ground-truth: dirs $(BUILD_PATH)/europeana_historic.pkl $(BUILD_PATH)/germ_eval.pkl $(BUILD_PATH)/conll2003.pkl $(BUILD_PATH)/wikiner.pkl $(BUILD_PATH)/gt.pkl
###############################################################################
#BERT NER training:
.PRECIOUS: %/vocab.txt %/bert_config.json %/$(MODEL_FILE)
%/vocab.txt:
ln -sfnr $(BERT_FINETUNED_PATH)/vocab.txt $(@D)/vocab.txt
%/bert_config.json:
ln -sfnr $(BERT_FINETUNED_PATH)/bert_config.json $(@D)/bert_config.json
%/$(MODEL_FILE):
ln -sfnr $(@D)/$(EPOCH_FILE) $(@D)/$(MODEL_FILE)
########################################
# baseline
$(BUILD_PATH)/bert-conll2003-en-baseline/$(EPOCH_FILE):
bert-ner --train_sets='EN-CONLL-TRAIN' --dev_sets='EN-CONLL-TESTA' --bert_model=bert-base-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-baseline/$(EPOCH_FILE):
bert-ner --train_sets='DE-CONLL-TRAIN' --dev_sets='DE-CONLL-TESTA' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-baseline/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-TRAIN' --dev_sets='GERM-EVAL-DEV' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-baseline/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-TRAIN|DE-CONLL-TRAIN' --dev_sets='GERM-EVAL-DEV|DE-CONLL-TESTA' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/$(EPOCH_FILE):
bert-ner --train_sets='WIKINER-WP3' --dev_sets='GERM-EVAL-DEV|DE-CONLL-TESTA' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-baseline/$(EPOCH_FILE):
bert-ner --train_sets='LFT' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-baseline/$(EPOCH_FILE):
bert-ner --train_sets='ONB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-baseline/$(EPOCH_FILE):
bert-ner --train_sets='SBB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-sbb-baseline/$(EPOCH_FILE):
bert-ner --train_sets='LFT|SBB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-sbb-baseline/$(EPOCH_FILE):
bert-ner --train_sets='ONB|SBB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-lft-baseline/$(EPOCH_FILE):
bert-ner --train_sets='ONB|LFT' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
bert-%-baseline: $(BUILD_PATH)/bert-%-baseline/$(EPOCH_FILE) $(BUILD_PATH)/bert-%-baseline/vocab.txt $(BUILD_PATH)/bert-%-baseline/bert_config.json $(BUILD_PATH)/bert-%-baseline/$(MODEL_FILE) ;
bert-baseline: dirs ner-ground-truth bert-conll2003-en-baseline bert-conll2003-de-baseline bert-germ-eval-baseline bert-all-german-baseline bert-wikiner-baseline bert-lft-baseline bert-onb-baseline bert-sbb-baseline bert-lft-sbb-baseline bert-onb-sbb-baseline bert-onb-lft-baseline
$(BUILD_PATH)/bert-lft-baseline/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='LFT' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-baseline/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='ONB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-baseline/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='SBB' --bert_model=bert-base-multilingual-cased --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
bert-cv-%-baseline: $(BUILD_PATH)/bert-%-baseline/$(CROSS_VAL_FILE) $(BUILD_PATH)/bert-%-baseline/vocab.txt $(BUILD_PATH)/bert-%-baseline/bert_config.json $(BUILD_PATH)/bert-%-baseline/$(MODEL_FILE) ;
bert-cv-baseline: bert-cv-lft-baseline bert-cv-onb-baseline bert-cv-sbb-baseline
########################################
#de-finetuned
$(BUILD_PATH)/bert-conll2003-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='DE-CONLL-TRAIN' --dev_sets='DE-CONLL-TESTA' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-TRAIN' --dev_sets='GERM-EVAL-DEV' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-TRAIN|DE-CONLL-TRAIN' --dev_sets='GERM-EVAL-DEV|DE-CONLL-TESTA' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-complete-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-TRAIN|GERM-EVAL-DEV|DE-CONLL-TRAIN|DE-CONLL-DEV|SBB|ONB|LFT|DE-CONLL-TESTA|DE-CONLL-TESTB|GERM-EVAL-TEST' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-multilang-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='GERM-EVAL-DEV|GERM-EVAL-TEST|GERM-EVAL-TRAIN|SBB|ONB|LFT|BNF|KB|DE-CONLL-DEV|DE-CONLL-TESTA|DE-CONLL-TESTB|DE-CONLL-TRAIN|EN-CONLL-TESTA|EN-CONLL-TESTB|EN-CONLL-TRAIN' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='WIKINER-WP3' --dev_sets='GERM-EVAL-DEV|DE-CONLL-TESTA' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='LFT' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='ONB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='SBB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-sbb-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='LFT|SBB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-sbb-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='ONB|SBB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-lft-de-finetuned/$(EPOCH_FILE):
bert-ner --train_sets='ONB|LFT' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
bert-%-de-finetuned: ner-ground-truth $(BUILD_PATH)/bert-%-de-finetuned/$(EPOCH_FILE) $(BUILD_PATH)/bert-%-de-finetuned/vocab.txt $(BUILD_PATH)/bert-%-de-finetuned/bert_config.json $(BUILD_PATH)/bert-%-de-finetuned/$(MODEL_FILE) ;
bert-finetuned: dirs ner-ground-truth bert-conll2003-de-finetuned bert-germ-eval-de-finetuned bert-all-german-de-finetuned bert-wikiner-de-finetuned bert-lft-de-finetuned bert-onb-de-finetuned bert-sbb-de-finetuned bert-lft-sbb-de-finetuned bert-onb-sbb-de-finetuned bert-onb-lft-de-finetuned
$(BUILD_PATH)/bert-lft-de-finetuned/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='LFT' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) # >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-de-finetuned/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='ONB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-de-finetuned/$(CROSS_VAL_FILE):
bert-ner --do_cross_validation --train_sets='SBB' --bert_model=$(BERT_FINETUNED_PATH) --output_dir=$(@D) $(BERT_NER_OPTIONS) >> $(@D).log 2<&1
bert-cv-%-de-finetuned: $(BUILD_PATH)/bert-%-de-finetuned/$(CROSS_VAL_FILE) $(BUILD_PATH)/bert-%-de-finetuned/vocab.txt $(BUILD_PATH)/bert-%-de-finetuned/bert_config.json $(BUILD_PATH)/bert-%-de-finetuned/$(MODEL_FILE) ;
bert-cv-de-finetuned: bert-cv-lft-de-finetuned bert-cv-onb-de-finetuned bert-cv-sbb-de-finetuned
bert-cv: bert-cv-de-finetuned bert-cv-baseline
bert-train: bert-finetuned bert-baseline
###############################################################################
# Evaluation
#
$(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-germ-eval-baseline/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-baseline/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-baseline/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-baseline/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-sbb-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-sbb-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-lft-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
bert-ner-evaluation-baseline: dirs $(BUILD_PATH)/bert-all-german-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-all-german-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-all-german-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-all-german-baseline/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-all-german-baseline/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-all-german-baseline/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-wikiner-baseline/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-germ-eval-baseline/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-lft-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-lft-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-onb-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-onb-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-sbb-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-sbb-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-lft-sbb-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-germ-eval-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-germ-eval-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-germ-eval-baseline/eval_results-ONB.pkl $(BUILD_PATH)/bert-onb-sbb-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-onb-lft-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-LFT.pkl $(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-SBB.pkl $(BUILD_PATH)/bert-conll2003-de-baseline/eval_results-ONB.pkl
#######################################
$(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-GERM-EVAL-TEST.pkl:
bert-ner --dev_sets='GERM-EVAL-TEST' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-DE-CONLL-TESTB.pkl:
bert-ner --dev_sets='DE-CONLL-TESTB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(BUILD_PATH)/bert-all-german-de-finetuned --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(BUILD_PATH)/bert-all-german-de-finetuned --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(BUILD_PATH)/bert-all-german-de-finetuned --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
#
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-sbb-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-lft-sbb-de-finetuned/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-sbb-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
$(BUILD_PATH)/bert-onb-lft-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --output_dir=$(@D) --num_train_epochs $(EPOCHS) $(BERT_NER_EVAL_OPTIONS) >> $(@D).log 2<&1
bert-ner-evaluation-de-finetuned: dirs $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-all-german-de-finetuned/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-wikiner-de-finetuned/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-GERM-EVAL-TEST.pkl $(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-DE-CONLL-TESTA.pkl $(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-DE-CONLL-TESTB.pkl $(BUILD_PATH)/bert-lft-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-lft-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-onb-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-onb-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-sbb-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-sbb-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-lft-sbb-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-germ-eval-de-finetuned/eval_results-ONB.pkl $(BUILD_PATH)/bert-onb-sbb-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-onb-lft-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-LFT.pkl $(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-SBB.pkl $(BUILD_PATH)/bert-conll2003-de-finetuned/eval_results-ONB.pkl
bert-evaluation: bert-ner-evaluation-baseline bert-ner-evaluation-de-finetuned
###############################################################################
#wikipedia
WIKI_DATA_DIR=data/wikipedia
WP_EPOCH_SIZE=100000
wikipedia-ner-baseline-train: $(WIKI_DATA_DIR)/wikipedia-tagged.csv
bert-ner --gt_file=$(WIKI_DATA_DIR)/wikipedia-tagged.csv --train_sets=$(WIKI_DATA_DIR)/ner-train-index.pkl --dev_sets=$(WIKI_DATA_DIR)/ner-dev-index.pkl --bert_model=bert-base-multilingual-cased --task_name=wikipedia-ner --output_dir=$(BUILD_PATH)/wikipedia-baseline --num_train_epochs $(EPOCHS) --num_data_epochs=$(EPOCHS) --epoch_size=$(WP_EPOCH_SIZE) $(BERT_NER_OPTIONS)
wikipedia-ner-de-finetuned-train: $(WIKI_DATA_DIR)/wikipedia-tagged.csv
bert-ner --gt_file=$(WIKI_DATA_DIR)/wikipedia-tagged.csv --train_sets=$(WIKI_DATA_DIR)/ner-train-index.pkl --dev_sets=$(WIKI_DATA_DIR)/ner-dev-index.pkl --bert_model=$(DIGISAM_PATH)/BERT_de_finetuned --task_name=wikipedia-ner --output_dir=$(BUILD_PATH)/wikipedia-de-finetuned --num_train_epochs $(EPOCHS) --num_data_epochs=$(EPOCHS) --epoch_size=$(WP_EPOCH_SIZE) $(BERT_NER_OPTIONS)
########################
$(BUILD_PATH)/wikipedia-baseline/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-baseline/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-baseline/eval_results-ONB.pkl:
bert-ner --dev_sets='ONB' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-baseline/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-de-finetuned/eval_results-LFT.pkl:
bert-ner --dev_sets='LFT' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-de-finetuned/eval_results-SBB.pkl:
bert-ner --dev_sets='SBB' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
$(BUILD_PATH)/wikipedia-de-finetuned/eval_results-DE-CONLL-TESTA.pkl:
bert-ner --dev_sets='DE-CONLL-TESTA' --task_name=ner --output_dir=$(@D) $(BERT_NER_EVAL_OPTIONS)
wikipedia-baseline-evaluation: $(BUILD_PATH)/wikipedia-baseline/eval_results-SBB.pkl $(BUILD_PATH)/wikipedia-baseline/eval_results-LFT.pkl $(BUILD_PATH)/wikipedia-baseline/eval_results-ONB.pkl $(BUILD_PATH)/wikipedia-baseline/eval_results-DE-CONLL-TESTA.pkl
wikipedia-evaluation: $(BUILD_PATH)/wikipedia-de-finetuned/eval_results-LFT.pkl
wikipedia-evaluation2: $(BUILD_PATH)/wikipedia-de-finetuned/eval_results-SBB.pkl
wikipedia-evaluation3: $(BUILD_PATH)/wikipedia-de-finetuned/eval_results-DE-CONLL-TESTA.pkl
###############################
model_archive:
tar --exclude='*ep[1-6]*' --exclude='*eval*' --exclude='pytorch_model.bin' --exclude='*.pkl' -chzf models.tar.gz data/konvens2019/build-wd_0.03/bert-all-german-de-finetuned data/konvens2019/build-on-all-german-de-finetuned/bert-sbb-de-finetuned data/konvens2019/build-wd_0.03/bert-sbb-de-finetuned data/konvens2019/build-wd_0.03/bert-all-german-baseline