| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | PYTHON ?= python3 | 
					
						
							|  |  |  | PIP ?= pip3 | 
					
						
							| 
									
										
										
										
											2025-03-31 15:58:12 +02:00
										 |  |  | EXTRAS ?= | 
					
						
							| 
									
										
										
										
											2021-02-04 15:21:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | # DOCKER_BASE_IMAGE = artefakt.dev.sbb.berlin:5000/sbb/ocrd_core:v2.68.0
 | 
					
						
							| 
									
										
										
										
											2025-05-02 00:13:06 +02:00
										 |  |  | DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest | 
					
						
							|  |  |  | DOCKER_TAG ?= ocrd/eynollah | 
					
						
							|  |  |  | DOCKER ?= docker | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
 | 
					
						
							|  |  |  | #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
 | 
					
						
							| 
									
										
										
										
											2025-09-25 20:35:54 +02:00
										 |  |  | # SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
 | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
 | 
					
						
							|  |  |  | #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
 | 
					
						
							| 
									
										
										
										
											2025-09-25 20:35:54 +02:00
										 |  |  | SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL))) | 
					
						
							|  |  |  | SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%) | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | BIN_MODELFILE = $(notdir $(BIN_MODEL)) | 
					
						
							|  |  |  | BIN_MODELNAME := default-2021-03-09 | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1 | 
					
						
							|  |  |  | OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL))) | 
					
						
							|  |  |  | OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%) | 
					
						
							| 
									
										
										
										
											2025-09-25 22:16:38 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-30 19:20:35 +02:00
										 |  |  | PYTEST_ARGS ?= -vv --isolate | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | # BEGIN-EVAL makefile-parser --make-help Makefile
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | help: | 
					
						
							|  |  |  | 	@echo "" | 
					
						
							|  |  |  | 	@echo "  Targets" | 
					
						
							|  |  |  | 	@echo "" | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 	@echo "    docker       Build Docker image" | 
					
						
							|  |  |  | 	@echo "    build        Build Python source and binary distribution" | 
					
						
							|  |  |  | 	@echo "    install      Install package with pip" | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | 	@echo "    install-dev  Install editable with pip" | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 	@echo "    deps-test    Install test dependencies with pip" | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | 	@echo "    models       Download and extract models to $(CURDIR):" | 
					
						
							|  |  |  | 	@echo "                 $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)" | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 	@echo "    smoke-test   Run simple CLI check" | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	@echo "    ocrd-test    Run OCR-D CLI check" | 
					
						
							| 
									
										
										
										
											2020-11-27 14:38:29 +01:00
										 |  |  | 	@echo "    test         Run unit tests" | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | 	@echo "" | 
					
						
							|  |  |  | 	@echo "  Variables" | 
					
						
							| 
									
										
										
										
											2025-03-31 15:58:12 +02:00
										 |  |  | 	@echo "    EXTRAS       comma-separated list of features (like 'OCR,plotting') for 'install' [$(EXTRAS)]" | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 	@echo "    DOCKER_TAG   Docker image tag for 'docker' [$(DOCKER_TAG)]" | 
					
						
							|  |  |  | 	@echo "    PYTEST_ARGS  pytest args for 'test' (Set to '-s' to see log output during test execution, '-vv' to see individual tests. [$(PYTEST_ARGS)]" | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | 	@echo "    SEG_MODEL    URL of 'models' archive to download for segmentation 'test' [$(SEG_MODEL)]" | 
					
						
							|  |  |  | 	@echo "    BIN_MODEL    URL of 'models' archive to download for binarization 'test' [$(BIN_MODEL)]" | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | 	@echo "    OCR_MODEL    URL of 'models' archive to download for binarization 'test' [$(OCR_MODEL)]" | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | 	@echo "" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # END-EVAL
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-23 13:40:54 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-25 20:35:54 +02:00
										 |  |  | # Download and extract models to $(PWD)/models_layout_v0_5_0
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) | 
					
						
							| 
									
										
										
										
											2020-11-23 13:40:54 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-10-08 12:33:14 +02:00
										 |  |  | # do not download these files if we already have the directories
 | 
					
						
							|  |  |  | .INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | $(BIN_MODELFILE): | 
					
						
							|  |  |  | 	wget -O $@ $(BIN_MODEL) | 
					
						
							|  |  |  | $(SEG_MODELFILE): | 
					
						
							| 
									
										
										
										
											2025-09-25 21:12:52 +02:00
										 |  |  | 	wget -O $@ $(SEG_MODEL) | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | $(OCR_MODELFILE): | 
					
						
							| 
									
										
										
										
											2025-09-25 22:16:38 +02:00
										 |  |  | 	wget -O $@ $(OCR_MODEL) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | $(BIN_MODELNAME): $(BIN_MODELFILE) | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | 	mkdir $@ | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | 	unzip -d $@ $< | 
					
						
							|  |  |  | $(SEG_MODELNAME): $(SEG_MODELFILE) | 
					
						
							|  |  |  | 	tar zxf $< | 
					
						
							|  |  |  | $(OCR_MODELNAME): $(OCR_MODELFILE) | 
					
						
							|  |  |  | 	tar zxf $< | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | build: | 
					
						
							|  |  |  | 	$(PIP) install build | 
					
						
							|  |  |  | 	$(PYTHON) -m build . | 
					
						
							| 
									
										
										
										
											2020-11-23 13:40:54 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | # Install with pip
 | 
					
						
							|  |  |  | install: | 
					
						
							| 
									
										
										
										
											2025-03-31 15:58:12 +02:00
										 |  |  | 	$(PIP) install .$(and $(EXTRAS),[$(EXTRAS)]) | 
					
						
							| 
									
										
										
										
											2020-11-20 17:37:35 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Install editable with pip
 | 
					
						
							|  |  |  | install-dev: | 
					
						
							| 
									
										
										
										
											2025-03-31 15:58:12 +02:00
										 |  |  | 	$(PIP) install -e .$(and $(EXTRAS),[$(EXTRAS)]) | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | ifeq (OCR,$(findstring OCR, $(EXTRAS))) | 
					
						
							|  |  |  | deps-test: $(OCR_MODELNAME) | 
					
						
							|  |  |  | endif | 
					
						
							|  |  |  | deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-03-31 14:13:16 +02:00
										 |  |  | 	$(PIP) install -r requirements-test.txt | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | ifeq (OCR,$(findstring OCR, $(EXTRAS))) | 
					
						
							| 
									
										
										
										
											2025-10-07 00:54:25 +02:00
										 |  |  | 	ln -rs $(OCR_MODELNAME)/* $(SEG_MODELNAME)/ | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | endif | 
					
						
							| 
									
										
										
										
											2020-11-23 13:40:54 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | smoke-test: TMPDIR != mktemp -d | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif | 
					
						
							| 
									
										
										
										
											2025-04-01 22:43:30 +02:00
										 |  |  | 	# layout analysis: | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml | 
					
						
							|  |  |  | 	fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml | 
					
						
							| 
									
										
										
										
											2025-09-25 16:05:45 +02:00
										 |  |  | 	# layout, directory mode (skip one, add one): | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-04-01 22:43:30 +02:00
										 |  |  | 	test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml | 
					
						
							| 
									
										
										
										
											2025-09-25 16:05:45 +02:00
										 |  |  | 	# mbreorder, directory mode (overwrite): | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-09-25 16:05:45 +02:00
										 |  |  | 	fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml | 
					
						
							|  |  |  | 	fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml | 
					
						
							| 
									
										
										
										
											2025-04-01 22:43:30 +02:00
										 |  |  | 	# binarize: | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	eynollah binarization -m $(CURDIR)/$(BIN_MODELNAME) -i $< -o $(TMPDIR)/$(<F) | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | 	test -s $(TMPDIR)/$(<F) | 
					
						
							|  |  |  | 	@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" | 
					
						
							| 
									
										
										
										
											2025-09-25 16:05:45 +02:00
										 |  |  | 	# enhance: | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	eynollah enhancement -m $(CURDIR)/$(SEG_MODELNAME) -sos -i $< -o $(TMPDIR) -O | 
					
						
							| 
									
										
										
										
											2025-09-25 16:05:45 +02:00
										 |  |  | 	test -s $(TMPDIR)/$(<F) | 
					
						
							|  |  |  | 	@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	$(RM) -r $(TMPDIR) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-04 22:22:50 +02:00
										 |  |  | ocrd-test: export OCRD_MISSING_OUTPUT := ABORT | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | ocrd-test: TMPDIR != mktemp -d | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	cp $< $(TMPDIR) | 
					
						
							|  |  |  | 	ocrd workspace -d $(TMPDIR) init | 
					
						
							|  |  |  | 	ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F) | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/$(SEG_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
 | 
					
						
							|  |  |  | 	fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
 | 
					
						
							|  |  |  | 	fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result | 
					
						
							| 
									
										
										
										
											2025-10-06 21:27:21 +02:00
										 |  |  | 	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/$(BIN_MODELNAME) | 
					
						
							|  |  |  | 	ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/$(BIN_MODELNAME) -P operation_level region | 
					
						
							| 
									
										
										
										
											2025-03-31 16:55:57 +02:00
										 |  |  | 	$(RM) -r $(TMPDIR) | 
					
						
							| 
									
										
										
										
											2020-11-27 14:38:29 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Run unit tests
 | 
					
						
							| 
									
										
										
										
											2025-09-30 21:50:21 +02:00
										 |  |  | test: export MODELS_LAYOUT=$(CURDIR)/$(SEG_MODELNAME) | 
					
						
							|  |  |  | test: export MODELS_OCR=$(CURDIR)/$(OCR_MODELNAME) | 
					
						
							|  |  |  | test: export MODELS_BIN=$(CURDIR)/$(BIN_MODELNAME) | 
					
						
							| 
									
										
										
										
											2025-04-01 11:13:04 +02:00
										 |  |  | test: | 
					
						
							| 
									
										
										
										
											2025-04-04 23:37:00 +02:00
										 |  |  | 	$(PYTHON) -m pytest tests --durations=0 --continue-on-collection-errors $(PYTEST_ARGS) | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-04 22:22:50 +02:00
										 |  |  | coverage: | 
					
						
							|  |  |  | 	coverage erase | 
					
						
							|  |  |  | 	$(MAKE) test PYTHON="coverage run" | 
					
						
							| 
									
										
										
										
											2025-04-05 01:34:28 +02:00
										 |  |  | 	coverage report -m | 
					
						
							| 
									
										
										
										
											2025-04-04 22:22:50 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | # Build docker image
 | 
					
						
							|  |  |  | docker: | 
					
						
							| 
									
										
										
										
											2025-05-02 00:13:06 +02:00
										 |  |  | 	$(DOCKER) build \
 | 
					
						
							| 
									
										
										
										
											2024-09-16 18:21:14 +02:00
										 |  |  | 	--build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \
 | 
					
						
							|  |  |  | 	--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
 | 
					
						
							|  |  |  | 	--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
 | 
					
						
							|  |  |  | 	-t $(DOCKER_TAG) . | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-04 22:22:50 +02:00
										 |  |  | .PHONY: models build install install-dev test smoke-test ocrd-test coverage docker help |