From 2747385f89bfb52db638a37457c1dfae0a27f246 Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 7 May 2026 17:15:15 +0200 Subject: [PATCH 1/3] remove unused deprecating-warning-causing biopyton dependency --- requirements.txt | 1 - src/eynollah/utils/utils_ocr.py | 6 ------ 2 files changed, 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 53d1e39..d79853f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,4 @@ tensorflow tf-keras # avoid keras 3 (also needs TF_USE_LEGACY_KERAS=1) numba <= 0.58.1 scikit-image -biopython tabulate diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 928c164..93d1137 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -7,7 +7,6 @@ import tensorflow as tf from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d from PIL import Image, ImageDraw, ImageFont -from Bio import pairwise2 from .resize import resize_image @@ -503,8 +502,3 @@ def return_rnn_cnn_ocr_of_given_textlines(image, ocr_textline_in_textregion.append(text_textline) ocr_all_textlines.append(ocr_textline_in_textregion) return ocr_all_textlines - -def biopython_align(str1, str2): - alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2) - best_alignment = alignments[0] # Get the best alignment - return best_alignment.seqA, best_alignment.seqB From 34a9d458ce7723006d6e5ccb48045d396738d254 Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 7 May 2026 18:09:27 +0200 Subject: [PATCH 2/3] training deps: use sacred fork w/o pkg_resources, pin tf/tf_keras, protobuf packages to work with tensorflow_addons --- train/requirements.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/train/requirements.txt b/train/requirements.txt index 6f23d76..090bc50 100644 --- a/train/requirements.txt +++ b/train/requirements.txt @@ -1,7 +1,10 @@ -sacred +ocrd-fork-sacred >= 0.8.7.post1 seaborn numpy tqdm imutils scipy -tensorflow-addons # for connected_components +tensorflow-addons # for connected_components, depublished and only compatible with tensorflow < 2.16 +tensorflow < 2.16 # for tensorflow-addons, so only needed in training +tf_data < 2.16 # for tensorflow-addons, so only needed in training +protobuf < 5 # for tensorflow-addons, so only needed in training From a0bf1b51f4b10716f69d46fa5ad517ae9008eadf Mon Sep 17 00:00:00 2001 From: kba Date: Thu, 7 May 2026 19:30:29 +0200 Subject: [PATCH 3/3] makefile to reload models --- src/eynollah/training/reload-models-v0.8.mk | 48 +++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 src/eynollah/training/reload-models-v0.8.mk diff --git a/src/eynollah/training/reload-models-v0.8.mk b/src/eynollah/training/reload-models-v0.8.mk new file mode 100644 index 0000000..b7a38dd --- /dev/null +++ b/src/eynollah/training/reload-models-v0.8.mk @@ -0,0 +1,48 @@ +SHELL = bash -e + +MODELS_SRC = models_eynollah +MODELS_DST = reloaded/models_eynollah + + +# $(MODELS_DST)/eynollah-binarization_20210425 \ +# $(MODELS_DST)/eynollah-column-classifier_20210425 \ +# $(MODELS_DST)/eynollah-enhancement_20210425 \ +# $(MODELS_DST)/eynollah-main-regions-aug-rotation_20210425 \ +# $(MODELS_DST)/eynollah-main-regions-aug-scaling_20210425 \ +# $(MODELS_DST)/eynollah-main-regions-ensembled_20210425 \ +# $(MODELS_DST)/eynollah-main-regions_20220314 \ +# $(MODELS_DST)/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18 \ +# $(MODELS_DST)/eynollah-tables_20210319 \ +# $(MODELS_DST)/model_eynollah_ocr_cnnrnn_20250930 \ + +RELOADABLE_MODELS = \ + $(MODELS_DST)/model_eynollah_page_extraction_20250915 \ + $(MODELS_DST)/model_eynollah_reading_order_20250824 \ + $(MODELS_DST)/modelens_e_l_all_sp_0_1_2_3_4_171024 \ + $(MODELS_DST)/modelens_full_lay_1__4_3_091124 \ + $(MODELS_DST)/modelens_table_0t4_201124 \ + $(MODELS_DST)/modelens_textline_0_1__2_4_16092024 + +all: $(RELOADABLE_MODELS) + +$(MODELS_DST)/%: $(MODELS_SRC)/% + mkdir -p $@ + test -e $&1 | tee $(notdir $<).log + cp $