From e34998d58f5e81851139b0ae098d3bede1b19632 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 15:36:24 +0200 Subject: [PATCH 1/7] :memo: readme, makefile, synopsis for cli Conflicts: Makefile README.md requirements.txt --- Makefile | 48 -------------------------------------------- README.md | 32 ++++++++++++++++++++++++----- ocrd_calamari/cli.py | 3 +++ 3 files changed, 30 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 9fabd2e..fb51f3e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,3 @@ -# '$(PYTHON)' -PYTHON = python - # '$(PIP_INSTALL)' PIP_INSTALL = pip install @@ -17,16 +14,9 @@ help: @echo " calamari Clone calamari repo" @echo " calamari_models Clone calamari_models repo" @echo " calamari/build pip install calamari" - @echo " deps-test Install testing python deps via pip" - @echo " repo/assets Clone OCR-D/assets to ./repo/assets" - @echo " test/assets Setup test assets" - @echo " assets-clean Remove symlinks in test/assets" - @echo " test Run unit tests" - @echo " coverage Run unit tests and determine test coverage" @echo "" @echo " Variables" @echo "" - @echo " PYTHON '$(PYTHON)'" @echo " PIP_INSTALL '$(PIP_INSTALL)'" @echo " GIT_CLONE '$(GIT_CLONE)'" @@ -50,41 +40,3 @@ calamari_models: # pip install calamari calamari/build: calamari calamari_models cd calamari && $(PIP_INSTALL) . - -# -# Assets and Tests -# - -# Install testing python deps via pip -deps-test: - $(PIP) install -r requirements_test.txt - - -# Clone OCR-D/assets to ./repo/assets -repo/assets: - mkdir -p $(dir $@) - git clone https://github.com/OCR-D/assets "$@" - - -# Setup test assets -test/assets: repo/assets - mkdir -p $@ - cp -r -t $@ repo/assets/data/* - -# Remove symlinks in test/assets -assets-clean: - rm -rf test/assets - -# Run unit tests -test: test/assets calamari_models - # declare -p HTTP_PROXY - $(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS) - -# Run unit tests and determine test coverage -coverage: test/assets calamari_models - coverage erase - make test PYTHON="coverage run" - coverage report - coverage html - -.PHONY: assets-clean test diff --git a/README.md b/README.md index dca1ffc..388b81f 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,41 @@ This offers a OCR-D compliant workspace processor for some of the functionality This processor only operates on the text line level and so needs a line segmentation (and by extension a binarized image) as its input. -## Example Usage +## Installation + +### From PyPI + +:construction: :construction: :construction: :construction: :construction: :construction: :construction: + +``` +pip install ocrd_calamari +``` + +### From Repo ```sh -ocrd-calamari-recognize -p test-parameters.json -m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI +pip install . ``` -With `test-parameters.json`: +To install the calamari with the GPU version of Tensorflow: -```json +```sh +pip install 'calamari-ocr[tf_cpu]' +pip install . +``` + +## Example Usage + +~~~ +ocrd-calamari-recognize -p test-parameters.json -m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI +~~~ + +With `test-parameters.json`: +~~~ { "checkpoint": "/path/to/some/trained/models/*.ckpt.json" } -``` +~~~ TODO ---- diff --git a/ocrd_calamari/cli.py b/ocrd_calamari/cli.py index 2a1e210..068b065 100644 --- a/ocrd_calamari/cli.py +++ b/ocrd_calamari/cli.py @@ -7,4 +7,7 @@ from ocrd_calamari.recognize import CalamariRecognize @click.command() @ocrd_cli_options def ocrd_calamari_recognize(*args, **kwargs): + """ + Run Calamari OCR multi-model recognition and voting + """ return ocrd_cli_wrap_processor(CalamariRecognize, *args, **kwargs) From 1a88b6d0b15f6531d1a8b87951b33d1b8429d985 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 2 Dec 2019 13:19:45 +0100 Subject: [PATCH 2/7] readme: installation and models --- README.md | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 388b81f..99a92d8 100644 --- a/README.md +++ b/README.md @@ -15,25 +15,15 @@ image) as its input. ## Installation -### From PyPI - -:construction: :construction: :construction: :construction: :construction: :construction: :construction: - ``` pip install ocrd_calamari ``` -### From Repo +## Install models -```sh -pip install . ``` - -To install the calamari with the GPU version of Tensorflow: - -```sh -pip install 'calamari-ocr[tf_cpu]' -pip install . +wget https://github.com/Calamari-OCR/calamari_models/archive/master.zip +unzip master.zip ``` ## Example Usage From a8f67f0a3412df7ccd514efe6f1aa83a594abadb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 2 Dec 2019 13:38:36 +0100 Subject: [PATCH 3/7] :memo: readme: downloading models --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 99a92d8..6158a8b 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,22 @@ pip install ocrd_calamari ## Install models +Download standard models: + ``` wget https://github.com/Calamari-OCR/calamari_models/archive/master.zip unzip master.zip ``` +Download models trained on GT4HistOCR data: + +``` +wget https://file.spk-berlin.de:8443/calamari-models/GT4HistOCR/model.tar.xz +mkdir gt4hist-calamari +cd gt4hist-calamari +tar xf ../model.tar.xz +``` + ## Example Usage ~~~ From 84bf6b3f65476936deda7f97cabb8e1e6e698312 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 3 Dec 2019 13:31:25 +0100 Subject: [PATCH 4/7] :memo: readme: remove construction sign emojis --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6158a8b..27624bf 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ image) as its input. ## Installation +### From PyPI + ``` pip install ocrd_calamari ``` From e0f76242fe9d604601bec57a07591d32c494e148 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 3 Dec 2019 13:33:34 +0100 Subject: [PATCH 5/7] :memo: readme: remove misleading paragraph on installing GPU-capable calamari --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 27624bf..f2081a7 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ image) as its input. pip install ocrd_calamari ``` +### From Repo + +```sh +pip install . +``` + ## Install models Download standard models: From 0f47d1fca5a30ff3fed0b5764988b096a9f93a5e Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 3 Dec 2019 13:37:50 +0100 Subject: [PATCH 6/7] makefile: pip -> pip3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fb51f3e..5efbb2d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # '$(PIP_INSTALL)' -PIP_INSTALL = pip install +PIP_INSTALL = pip3 install # '$(GIT_CLONE)' GIT_CLONE = git clone From 0812d203e5b79e8ff00c0fd894e4ff04c252c276 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 4 Dec 2019 16:50:08 +0100 Subject: [PATCH 7/7] =?UTF-8?q?=F0=9F=90=9B=20Add=20test=20targets=20back?= =?UTF-8?q?=20again=3F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Makefile b/Makefile index 5efbb2d..d184ff5 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,12 @@ help: @echo " calamari Clone calamari repo" @echo " calamari_models Clone calamari_models repo" @echo " calamari/build pip install calamari" + @echo " deps-test Install testing python deps via pip + @echo " repo/assets Clone OCR-D/assets to ./repo/assets" + @echo " test/assets Setup test assets" + @echo " assets-clean Remove symlinks in test/assets" + @echo " test Run unit tests" + @echo " coverage Run unit tests and determine test coverage" @echo "" @echo " Variables" @echo "" @@ -40,3 +46,42 @@ calamari_models: # pip install calamari calamari/build: calamari calamari_models cd calamari && $(PIP_INSTALL) . + + +# +# Assets and Tests +# + +# Install testing python deps via pip +deps-test: + $(PIP) install -r requirements_test.txt + + +# Clone OCR-D/assets to ./repo/assets +repo/assets: + mkdir -p $(dir $@) + git clone https://github.com/OCR-D/assets "$@" + + +# Setup test assets +test/assets: repo/assets + mkdir -p $@ + cp -r -t $@ repo/assets/data/* + +# Remove symlinks in test/assets +assets-clean: + rm -rf test/assets + +# Run unit tests +test: test/assets calamari_models + # declare -p HTTP_PROXY + $(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS) + +# Run unit tests and determine test coverage +coverage: test/assets calamari_models + coverage erase + make test PYTHON="coverage run" + coverage report + coverage html + +.PHONY: assets-clean test