mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-26 23:34:13 +01:00
update docs/makefile to point to v0.6.0 models
This commit is contained in:
parent
c6b863b13f
commit
a53d5fc452
4 changed files with 17 additions and 15 deletions
16
Makefile
16
Makefile
|
|
@ -6,21 +6,23 @@ EXTRAS ?=
|
|||
DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
|
||||
DOCKER_TAG ?= ocrd/eynollah
|
||||
DOCKER ?= docker
|
||||
WGET = wget -O
|
||||
|
||||
#SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
|
||||
#SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
|
||||
# SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
|
||||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
|
||||
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
|
||||
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
|
||||
#SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
|
||||
SEG_MODEL := https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1
|
||||
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
|
||||
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
|
||||
|
||||
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
|
||||
BIN_MODEL := https://zenodo.org/records/17295988/files/models_binarization_v0_6_0.tar.gz?download=1
|
||||
BIN_MODELFILE = $(notdir $(BIN_MODEL))
|
||||
BIN_MODELNAME := default-2021-03-09
|
||||
|
||||
OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
|
||||
OCR_MODEL := https://zenodo.org/records/17295988/files/models_ocr_v0_6_0.tar.gz?download=1
|
||||
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
|
||||
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
|
||||
|
||||
|
|
@ -55,18 +57,18 @@ help:
|
|||
# END-EVAL
|
||||
|
||||
|
||||
# Download and extract models to $(PWD)/models_layout_v0_5_0
|
||||
# Download and extract models to $(PWD)/models_layout_v0_6_0
|
||||
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
|
||||
|
||||
# do not download these files if we already have the directories
|
||||
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
|
||||
|
||||
$(BIN_MODELFILE):
|
||||
wget -O $@ $(BIN_MODEL)
|
||||
$(WGET) $@ $(BIN_MODEL)
|
||||
$(SEG_MODELFILE):
|
||||
wget -O $@ $(SEG_MODEL)
|
||||
$(WGET) $@ $(SEG_MODEL)
|
||||
$(OCR_MODELFILE):
|
||||
wget -O $@ $(OCR_MODEL)
|
||||
$(WGET) $@ $(OCR_MODEL)
|
||||
|
||||
$(BIN_MODELNAME): $(BIN_MODELFILE)
|
||||
mkdir $@
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ make install EXTRAS=OCR
|
|||
|
||||
## Models
|
||||
|
||||
Pretrained models can be downloaded from [zenodo](https://zenodo.org/records/17194824) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).
|
||||
Pretrained models can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.17194823) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).
|
||||
|
||||
For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md).
|
||||
Model cards are also provided for our trained models.
|
||||
|
|
@ -162,7 +162,7 @@ formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah
|
|||
|
||||
In this case, the source image file group with (preferably) RGB images should be used as input like this:
|
||||
|
||||
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_5_0
|
||||
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_6_0
|
||||
|
||||
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
|
||||
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
|
||||
|
|
@ -174,7 +174,7 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol
|
|||
(because some other preprocessing step was in effect like `denoised`), then
|
||||
the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
|
||||
|
||||
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_5_0
|
||||
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_6_0
|
||||
|
||||
In general, it makes more sense to add other workflow steps **after** Eynollah.
|
||||
|
||||
|
|
|
|||
|
|
@ -83,10 +83,10 @@
|
|||
},
|
||||
"resources": [
|
||||
{
|
||||
"url": "https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1",
|
||||
"name": "models_layout_v0_5_0",
|
||||
"url": "https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1",
|
||||
"name": "models_layout_v0_6_0",
|
||||
"type": "archive",
|
||||
"path_in_archive": "models_layout_v0_5_0",
|
||||
"path_in_archive": "models_layout_v0_6_0",
|
||||
"size": 3525684179,
|
||||
"description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
|
||||
"version_range": ">= v0.5.0"
|
||||
|
|
|
|||
|
|
@ -22,14 +22,14 @@ Download our pretrained weights and add them to a `train/pretrained_model` folde
|
|||
|
||||
```sh
|
||||
cd train
|
||||
wget -O pretrained_model.tar.gz https://zenodo.org/records/17243320/files/pretrained_model_v0_5_1.tar.gz?download=1
|
||||
wget -O pretrained_model.tar.gz "https://zenodo.org/records/17295988/files/pretrained_model_v0_6_0.tar.gz?download=1"
|
||||
tar xf pretrained_model.tar.gz
|
||||
```
|
||||
|
||||
### Binarization training data
|
||||
|
||||
A small sample of training data for binarization experiment can be found [on
|
||||
zenodo](https://zenodo.org/records/17243320/files/training_data_sample_binarization_v0_5_1.tar.gz?download=1),
|
||||
zenodo](https://zenodo.org/records/17295988/files/training_data_sample_binarization_v0_6_0.tar.gz?download=1),
|
||||
which contains `images` and `labels` folders.
|
||||
|
||||
### Helpful tools
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue