eynollah/Makefile

132 lines
5.4 KiB
Makefile
Raw Normal View History

2025-03-31 14:13:16 +02:00
PYTHON ?= python3
PIP ?= pip3
EXTRAS ?=
2021-02-04 15:21:14 +01:00
2024-09-16 18:21:14 +02:00
# DOCKER_BASE_IMAGE = artefakt.dev.sbb.berlin:5000/sbb/ocrd_core:v2.68.0
2025-05-02 00:13:06 +02:00
DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
DOCKER_TAG ?= ocrd/eynollah
DOCKER ?= docker
WGET = wget -O
2024-09-16 18:21:14 +02:00
#SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
#SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
2025-09-25 20:35:54 +02:00
# SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
#SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
2025-10-29 21:27:10 +01:00
EYNOLLAH_MODELS_URL := https://zenodo.org/records/17417471/files/models_all_v0_7_0.zip
2025-10-29 21:07:49 +01:00
EYNOLLAH_MODELS_ZIP = $(notdir $(EYNOLLAH_MODELS_URL))
EYNOLLAH_MODELS_DIR = $(EYNOLLAH_MODELS_ZIP:%.zip=%)
2025-09-25 22:16:38 +02:00
PYTEST_ARGS ?= -vv --isolate
2024-09-16 18:21:14 +02:00
# BEGIN-EVAL makefile-parser --make-help Makefile
help:
@echo ""
@echo " Targets"
@echo ""
2025-03-31 14:13:16 +02:00
@echo " docker Build Docker image"
@echo " build Build Python source and binary distribution"
@echo " install Install package with pip"
@echo " install-dev Install editable with pip"
2025-03-31 14:13:16 +02:00
@echo " deps-test Install test dependencies with pip"
2025-09-30 21:50:21 +02:00
@echo " models Download and extract models to $(CURDIR):"
2025-10-29 21:07:49 +01:00
@echo " $(EYNOLLAH_MODELS_DIR)"
2025-03-31 14:13:16 +02:00
@echo " smoke-test Run simple CLI check"
2025-03-31 16:55:57 +02:00
@echo " ocrd-test Run OCR-D CLI check"
2020-11-27 14:38:29 +01:00
@echo " test Run unit tests"
@echo ""
@echo " Variables"
@echo " EXTRAS comma-separated list of features (like 'OCR,plotting') for 'install' [$(EXTRAS)]"
2025-03-31 14:13:16 +02:00
@echo " DOCKER_TAG Docker image tag for 'docker' [$(DOCKER_TAG)]"
@echo " PYTEST_ARGS pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]"
2025-10-23 11:58:23 +02:00
@echo " ALL_MODELS URL of archive of all models [$(ALL_MODELS)]"
@echo ""
# END-EVAL
# Download and extract models to $(PWD)/models_layout_v0_6_0
2025-10-23 11:58:23 +02:00
models: $(EYNOLLAH_MODELS_DIR)
2020-11-23 13:40:54 +01:00
2025-10-08 12:33:14 +02:00
# do not download these files if we already have the directories
2025-10-23 11:58:23 +02:00
.INTERMEDIATE: $(EYNOLLAH_MODELS_ZIP)
$(EYNOLLAH_MODELS_ZIP):
$(WGET) $@ $(EYNOLLAH_MODELS_URL)
$(EYNOLLAH_MODELS_DIR): $(EYNOLLAH_MODELS_ZIP)
2025-11-05 15:28:41 +01:00
unzip $<
2025-03-31 14:13:16 +02:00
build:
$(PIP) install build
$(PYTHON) -m build .
2020-11-23 13:40:54 +01:00
# Install with pip
install:
$(PIP) install .$(and $(EXTRAS),[$(EXTRAS)])
# Install editable with pip
install-dev:
$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)])
2025-03-31 14:13:16 +02:00
deps-test:
2025-03-31 14:13:16 +02:00
$(PIP) install -r requirements-test.txt
2020-11-23 13:40:54 +01:00
2025-03-31 16:55:57 +02:00
smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis:
2025-11-05 15:28:41 +01:00
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
2025-03-31 16:55:57 +02:00
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# layout, directory mode (skip one, add one):
2025-11-05 15:28:41 +01:00
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# mbreorder, directory mode (overwrite):
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
# binarize:
2025-11-05 15:28:41 +01:00
eynollah binarization -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -i $< -o $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
# enhance:
2025-11-05 15:28:41 +01:00
eynollah enhancement -m $(CURDIR)/models_eynollah -sos -i $< -o $(TMPDIR) -O
test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
2025-03-31 16:55:57 +02:00
$(RM) -r $(TMPDIR)
ocrd-test: export OCRD_MISSING_OUTPUT := ABORT
2025-03-31 16:55:57 +02:00
ocrd-test: TMPDIR != mktemp -d
ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
2025-03-31 16:55:57 +02:00
cp $< $(TMPDIR)
ocrd workspace -d $(TMPDIR) init
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
2025-11-05 15:28:41 +01:00
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_eynollah
2025-03-31 16:55:57 +02:00
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
2025-11-05 15:28:41 +01:00
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/models_eynollah/eynollah-binarization_20210425
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -P operation_level region
2025-03-31 16:55:57 +02:00
$(RM) -r $(TMPDIR)
2020-11-27 14:38:29 +01:00
# Run unit tests
2025-11-05 15:28:41 +01:00
test: export EYNOLLAH_MODELS_DIR := $(CURDIR)/models_eynollah
test:
$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS)
2024-09-16 18:21:14 +02:00
coverage:
coverage erase
$(MAKE) test PYTHON="coverage run"
coverage report -m
2024-09-16 18:21:14 +02:00
# Build docker image
docker:
2025-05-02 00:13:06 +02:00
$(DOCKER) build \
2024-09-16 18:21:14 +02:00
--build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \
--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
-t $(DOCKER_TAG) .
.PHONY: models build install install-dev test smoke-test ocrd-test coverage docker help