diff --git a/Makefile b/Makefile index 7c26e8b..02fef8a 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,17 @@ # BEGIN-EVAL makefile-parser --make-help Makefile +.PHONY: help install help: @echo "" @echo " Targets" @echo "" @echo " install Install with pip" - @echo " model Downloads the pre-trained models from qurator-data.de" + @echo " models Downloads the pre-trained models from qurator-data.de" @echo " test Run tests" + @echo " clean Remove copies/results in test/assets" @echo "" @echo " Variables" @echo "" - @echo " MODEL_DIR Directory to store models" # END-EVAL @@ -19,11 +20,26 @@ install: pip install . # Downloads the pre-trained models from qurator-data.de -.PHONY: model -model: - ocrd resmgr download --allow-uninstalled --location cwd ocrd-sbb-binarize default +.PHONY: models +models: + ocrd resmgr download ocrd-sbb-binarize "*" + +repo/assets: + git submodule update --init repo/assets + +# Setup test data +test/assets: repo/assets + @mkdir -p $@ + cp -r -t $@ repo/assets/data/* # Run tests -test: model - ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default - ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784-page-region/data/mets.xml -I OCR-D-IMG -O BIN -P model default -P operation_level region +.PHONY: test +test: test/assets models + ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default + ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN2 -P model default-2021-03-09 + ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784-page-region/data/mets.xml -g phys_0001 -I OCR-D-GT-SEG-REGION -O BIN -P model default -P operation_level region + ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784-page-region/data/mets.xml -g phys_0001 -I OCR-D-GT-SEG-REGION -O BIN2 -P model default-2021-03-09 -P operation_level region + +.PHONY: clean +clean: + -$(RM) -fr test/assets diff --git a/README.md b/README.md index 8ec4e4b..c150683 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Clone the repository, enter it and run ### Models -Pre-trained models in `HDF5` format can be downloaded from here: +Pre-trained models in HDF5 format can be downloaded from here: https://qurator-data.de/sbb_binarization/ @@ -26,6 +26,11 @@ We also provide a Tensorflow `saved_model` via Huggingface: https://huggingface.co/SBB/sbb_binarization +With [OCR-D](https://ocr-d.de/), you can use the [Resource Manager](Tensorflow SavedModel) to deploy models, e.g. + + ocrd resmgr download ocrd-sbb-binarize "*" + + ## Usage ```sh @@ -39,11 +44,22 @@ Images containing a lot of border noise (black pixels) should be cropped beforeh ### Example -```sh -sbb_binarize -m /path/to/model/ myimage.tif myimage-bin.tif -``` -To use the [OCR-D](https://ocr-d.de/) interface: -```sh -ocrd-sbb-binarize --overwrite -I INPUT_FILE_GRP -O OCR-D-IMG-BIN -P model "/var/lib/sbb_binarization" -``` + sbb_binarize -m /path/to/model/ myimage.tif myimage-bin.tif + + +To use the [OCR-D](https://ocr-d.de/en/spec/cli) interface: + + ocrd-sbb-binarize -I INPUT_FILE_GRP -O OCR-D-IMG-BIN -P model default + + +## Testing + +For simple smoke tests, the following will +- download models +- download test data +- run the OCR-D wrapper (on page and region level): + + + make model + make test