mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-13 01:13:54 +02:00
Merge pull request #7 from qurator-spk/ro-fixes-training-reload-additions
Ro fixes training reload additions
This commit is contained in:
commit
395decd6d6
4 changed files with 53 additions and 9 deletions
|
|
@ -6,5 +6,4 @@ tensorflow
|
|||
tf-keras # avoid keras 3 (also needs TF_USE_LEGACY_KERAS=1)
|
||||
numba <= 0.58.1
|
||||
scikit-image
|
||||
biopython
|
||||
tabulate
|
||||
|
|
|
|||
48
src/eynollah/training/reload-models-v0.8.mk
Normal file
48
src/eynollah/training/reload-models-v0.8.mk
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
SHELL = bash -e
|
||||
|
||||
MODELS_SRC = models_eynollah
|
||||
MODELS_DST = reloaded/models_eynollah
|
||||
|
||||
|
||||
# $(MODELS_DST)/eynollah-binarization_20210425 \
|
||||
# $(MODELS_DST)/eynollah-column-classifier_20210425 \
|
||||
# $(MODELS_DST)/eynollah-enhancement_20210425 \
|
||||
# $(MODELS_DST)/eynollah-main-regions-aug-rotation_20210425 \
|
||||
# $(MODELS_DST)/eynollah-main-regions-aug-scaling_20210425 \
|
||||
# $(MODELS_DST)/eynollah-main-regions-ensembled_20210425 \
|
||||
# $(MODELS_DST)/eynollah-main-regions_20220314 \
|
||||
# $(MODELS_DST)/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18 \
|
||||
# $(MODELS_DST)/eynollah-tables_20210319 \
|
||||
# $(MODELS_DST)/model_eynollah_ocr_cnnrnn_20250930 \
|
||||
|
||||
RELOADABLE_MODELS = \
|
||||
$(MODELS_DST)/model_eynollah_page_extraction_20250915 \
|
||||
$(MODELS_DST)/model_eynollah_reading_order_20250824 \
|
||||
$(MODELS_DST)/modelens_e_l_all_sp_0_1_2_3_4_171024 \
|
||||
$(MODELS_DST)/modelens_full_lay_1__4_3_091124 \
|
||||
$(MODELS_DST)/modelens_table_0t4_201124 \
|
||||
$(MODELS_DST)/modelens_textline_0_1__2_4_16092024
|
||||
|
||||
all: $(RELOADABLE_MODELS)
|
||||
|
||||
$(MODELS_DST)/%: $(MODELS_SRC)/%
|
||||
mkdir -p $@
|
||||
test -e $</config.json || exit 1
|
||||
eynollah-training train --force \
|
||||
with $</config.json \
|
||||
reload_weights=True \
|
||||
continue_training=False \
|
||||
dir_output=$(dir $@) \
|
||||
dir_of_start_model=$< \
|
||||
2>&1 | tee $(notdir $<).log
|
||||
cp $</config.json $@/config.json
|
||||
|
||||
compare:
|
||||
for i in `find $(MODELS_DST) -mindepth 2`;do \
|
||||
n=$(MODELS_SRC)$${i#$(MODELS_DST)}; \
|
||||
du -bs $$n $$i ; \
|
||||
done
|
||||
|
||||
|
||||
clear:
|
||||
rm -rf $(MODELS_DST)
|
||||
|
|
@ -7,7 +7,6 @@ import tensorflow as tf
|
|||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from Bio import pairwise2
|
||||
|
||||
from .resize import resize_image
|
||||
|
||||
|
|
@ -503,8 +502,3 @@ def return_rnn_cnn_ocr_of_given_textlines(image,
|
|||
ocr_textline_in_textregion.append(text_textline)
|
||||
ocr_all_textlines.append(ocr_textline_in_textregion)
|
||||
return ocr_all_textlines
|
||||
|
||||
def biopython_align(str1, str2):
|
||||
alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2)
|
||||
best_alignment = alignments[0] # Get the best alignment
|
||||
return best_alignment.seqA, best_alignment.seqB
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
sacred
|
||||
ocrd-fork-sacred >= 0.8.7.post1
|
||||
seaborn
|
||||
numpy
|
||||
tqdm
|
||||
imutils
|
||||
scipy
|
||||
tensorflow-addons # for connected_components
|
||||
tensorflow-addons # for connected_components, depublished and only compatible with tensorflow < 2.16
|
||||
tensorflow < 2.16 # for tensorflow-addons, so only needed in training
|
||||
tf_data < 2.16 # for tensorflow-addons, so only needed in training
|
||||
protobuf < 5 # for tensorflow-addons, so only needed in training
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue