From a53d5fc4523bc46aaef18f22f956aa7c91a0b958 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 21 Oct 2025 13:15:57 +0200 Subject: [PATCH] update docs/makefile to point to v0.6.0 models --- Makefile | 16 +++++++++------- README.md | 6 +++--- src/eynollah/ocrd-tool.json | 6 +++--- train/README.md | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 29dd877..4a28a23 100644 --- a/Makefile +++ b/Makefile @@ -6,21 +6,23 @@ EXTRAS ?= DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest DOCKER_TAG ?= ocrd/eynollah DOCKER ?= docker +WGET = wget -O #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz # SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz -SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 +#SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 +SEG_MODEL := https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1 SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL))) SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%) -BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip +BIN_MODEL := https://zenodo.org/records/17295988/files/models_binarization_v0_6_0.tar.gz?download=1 BIN_MODELFILE = $(notdir $(BIN_MODEL)) BIN_MODELNAME := default-2021-03-09 -OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1 +OCR_MODEL := https://zenodo.org/records/17295988/files/models_ocr_v0_6_0.tar.gz?download=1 OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL))) OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%) @@ -55,18 +57,18 @@ help: # END-EVAL -# Download and extract models to $(PWD)/models_layout_v0_5_0 +# Download and extract models to $(PWD)/models_layout_v0_6_0 models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) # do not download these files if we already have the directories .INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE) $(BIN_MODELFILE): - wget -O $@ $(BIN_MODEL) + $(WGET) $@ $(BIN_MODEL) $(SEG_MODELFILE): - wget -O $@ $(SEG_MODEL) + $(WGET) $@ $(SEG_MODEL) $(OCR_MODELFILE): - wget -O $@ $(OCR_MODEL) + $(WGET) $@ $(OCR_MODEL) $(BIN_MODELNAME): $(BIN_MODELFILE) mkdir $@ diff --git a/README.md b/README.md index 3ba5086..3ecb3d7 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ make install EXTRAS=OCR ## Models -Pretrained models can be downloaded from [zenodo](https://zenodo.org/records/17194824) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). +Pretrained models can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.17194823) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md). Model cards are also provided for our trained models. @@ -162,7 +162,7 @@ formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah In this case, the source image file group with (preferably) RGB images should be used as input like this: - ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_5_0 + ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_6_0 If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows: - existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results) @@ -174,7 +174,7 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol (because some other preprocessing step was in effect like `denoised`), then the output PAGE-XML will be based on that as new top-level (`@imageFilename`) - ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_5_0 + ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_6_0 In general, it makes more sense to add other workflow steps **after** Eynollah. diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index dbbdc3b..3d1193d 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -83,10 +83,10 @@ }, "resources": [ { - "url": "https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1", - "name": "models_layout_v0_5_0", + "url": "https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1", + "name": "models_layout_v0_6_0", "type": "archive", - "path_in_archive": "models_layout_v0_5_0", + "path_in_archive": "models_layout_v0_6_0", "size": 3525684179, "description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement", "version_range": ">= v0.5.0" diff --git a/train/README.md b/train/README.md index 5f6d326..6aeea5d 100644 --- a/train/README.md +++ b/train/README.md @@ -22,14 +22,14 @@ Download our pretrained weights and add them to a `train/pretrained_model` folde ```sh cd train -wget -O pretrained_model.tar.gz https://zenodo.org/records/17243320/files/pretrained_model_v0_5_1.tar.gz?download=1 +wget -O pretrained_model.tar.gz "https://zenodo.org/records/17295988/files/pretrained_model_v0_6_0.tar.gz?download=1" tar xf pretrained_model.tar.gz ``` ### Binarization training data A small sample of training data for binarization experiment can be found [on -zenodo](https://zenodo.org/records/17243320/files/training_data_sample_binarization_v0_5_1.tar.gz?download=1), +zenodo](https://zenodo.org/records/17295988/files/training_data_sample_binarization_v0_6_0.tar.gz?download=1), which contains `images` and `labels` folders. ### Helpful tools