mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-26 23:34:13 +01:00
update docs/makefile to point to v0.6.0 models
This commit is contained in:
parent
c6b863b13f
commit
a53d5fc452
4 changed files with 17 additions and 15 deletions
16
Makefile
16
Makefile
|
|
@ -6,21 +6,23 @@ EXTRAS ?=
|
||||||
DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
|
DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
|
||||||
DOCKER_TAG ?= ocrd/eynollah
|
DOCKER_TAG ?= ocrd/eynollah
|
||||||
DOCKER ?= docker
|
DOCKER ?= docker
|
||||||
|
WGET = wget -O
|
||||||
|
|
||||||
#SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
|
#SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
|
||||||
#SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
|
#SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
|
||||||
# SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
|
# SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
|
||||||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
|
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
|
||||||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
|
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
|
||||||
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
|
#SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
|
||||||
|
SEG_MODEL := https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1
|
||||||
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
|
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
|
||||||
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
|
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
|
||||||
|
|
||||||
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
|
BIN_MODEL := https://zenodo.org/records/17295988/files/models_binarization_v0_6_0.tar.gz?download=1
|
||||||
BIN_MODELFILE = $(notdir $(BIN_MODEL))
|
BIN_MODELFILE = $(notdir $(BIN_MODEL))
|
||||||
BIN_MODELNAME := default-2021-03-09
|
BIN_MODELNAME := default-2021-03-09
|
||||||
|
|
||||||
OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
|
OCR_MODEL := https://zenodo.org/records/17295988/files/models_ocr_v0_6_0.tar.gz?download=1
|
||||||
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
|
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
|
||||||
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
|
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
|
||||||
|
|
||||||
|
|
@ -55,18 +57,18 @@ help:
|
||||||
# END-EVAL
|
# END-EVAL
|
||||||
|
|
||||||
|
|
||||||
# Download and extract models to $(PWD)/models_layout_v0_5_0
|
# Download and extract models to $(PWD)/models_layout_v0_6_0
|
||||||
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
|
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
|
||||||
|
|
||||||
# do not download these files if we already have the directories
|
# do not download these files if we already have the directories
|
||||||
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
|
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
|
||||||
|
|
||||||
$(BIN_MODELFILE):
|
$(BIN_MODELFILE):
|
||||||
wget -O $@ $(BIN_MODEL)
|
$(WGET) $@ $(BIN_MODEL)
|
||||||
$(SEG_MODELFILE):
|
$(SEG_MODELFILE):
|
||||||
wget -O $@ $(SEG_MODEL)
|
$(WGET) $@ $(SEG_MODEL)
|
||||||
$(OCR_MODELFILE):
|
$(OCR_MODELFILE):
|
||||||
wget -O $@ $(OCR_MODEL)
|
$(WGET) $@ $(OCR_MODEL)
|
||||||
|
|
||||||
$(BIN_MODELNAME): $(BIN_MODELFILE)
|
$(BIN_MODELNAME): $(BIN_MODELFILE)
|
||||||
mkdir $@
|
mkdir $@
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ make install EXTRAS=OCR
|
||||||
|
|
||||||
## Models
|
## Models
|
||||||
|
|
||||||
Pretrained models can be downloaded from [zenodo](https://zenodo.org/records/17194824) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).
|
Pretrained models can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.17194823) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).
|
||||||
|
|
||||||
For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md).
|
For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md).
|
||||||
Model cards are also provided for our trained models.
|
Model cards are also provided for our trained models.
|
||||||
|
|
@ -162,7 +162,7 @@ formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah
|
||||||
|
|
||||||
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
||||||
|
|
||||||
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_5_0
|
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_6_0
|
||||||
|
|
||||||
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
|
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
|
||||||
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
|
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
|
||||||
|
|
@ -174,7 +174,7 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol
|
||||||
(because some other preprocessing step was in effect like `denoised`), then
|
(because some other preprocessing step was in effect like `denoised`), then
|
||||||
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
|
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
|
||||||
|
|
||||||
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_5_0
|
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_6_0
|
||||||
|
|
||||||
In general, it makes more sense to add other workflow steps **after** Eynollah.
|
In general, it makes more sense to add other workflow steps **after** Eynollah.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -83,10 +83,10 @@
|
||||||
},
|
},
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"url": "https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1",
|
"url": "https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1",
|
||||||
"name": "models_layout_v0_5_0",
|
"name": "models_layout_v0_6_0",
|
||||||
"type": "archive",
|
"type": "archive",
|
||||||
"path_in_archive": "models_layout_v0_5_0",
|
"path_in_archive": "models_layout_v0_6_0",
|
||||||
"size": 3525684179,
|
"size": 3525684179,
|
||||||
"description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
|
"description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
|
||||||
"version_range": ">= v0.5.0"
|
"version_range": ">= v0.5.0"
|
||||||
|
|
|
||||||
|
|
@ -22,14 +22,14 @@ Download our pretrained weights and add them to a `train/pretrained_model` folde
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cd train
|
cd train
|
||||||
wget -O pretrained_model.tar.gz https://zenodo.org/records/17243320/files/pretrained_model_v0_5_1.tar.gz?download=1
|
wget -O pretrained_model.tar.gz "https://zenodo.org/records/17295988/files/pretrained_model_v0_6_0.tar.gz?download=1"
|
||||||
tar xf pretrained_model.tar.gz
|
tar xf pretrained_model.tar.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
### Binarization training data
|
### Binarization training data
|
||||||
|
|
||||||
A small sample of training data for binarization experiment can be found [on
|
A small sample of training data for binarization experiment can be found [on
|
||||||
zenodo](https://zenodo.org/records/17243320/files/training_data_sample_binarization_v0_5_1.tar.gz?download=1),
|
zenodo](https://zenodo.org/records/17295988/files/training_data_sample_binarization_v0_6_0.tar.gz?download=1),
|
||||||
which contains `images` and `labels` folders.
|
which contains `images` and `labels` folders.
|
||||||
|
|
||||||
### Helpful tools
|
### Helpful tools
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue