From b54ccf90f70b254bb72fbed83f4dbe72bb56d209 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Aug 2019 19:52:08 +0200 Subject: [PATCH] smoke test, circle ci Conflicts: Makefile ocrd_calamari/__init__.py --- .circleci/config.yml | 21 ++++++++++ .gitignore | 4 +- Makefile | 82 +++++++++++++++++++++++++++++++-------- ocrd_calamari/__init__.py | 4 ++ requirements-test.txt | 2 + test/__init__.py | 0 test/base.py | 10 +++++ test/test_recognize.py | 62 +++++++++++++++++++++++++++++ 8 files changed, 167 insertions(+), 18 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 requirements-test.txt create mode 100644 test/__init__.py create mode 100644 test/base.py create mode 100644 test/test_recognize.py diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..deaaeb8 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,21 @@ +version: 2.1 +orbs: + codecov: codecov/codecov@1.0.5 + +jobs: + + build-python36: + docker: + - image: ubuntu:18.04 + steps: + - run: apt-get update ; apt-get install -y make git curl + - checkout + - run: make install + - run: pip install -r requirements-test.txt + - run: make coverage + - codecov/upload + +workflows: + build: + jobs: + - build-python36 diff --git a/.gitignore b/.gitignore index 1c2329e..d080392 100644 --- a/.gitignore +++ b/.gitignore @@ -102,7 +102,9 @@ venv.bak/ # mypy .mypy_cache/ + + /calamari /calamari_models /repo -/test +/test/assets diff --git a/Makefile b/Makefile index 15a546c..a31830f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,11 @@ -GIT_CLONE = git clone --depth 1 +# '$(PYTHON)' +PYTHON = python + +# '$(PIP_INSTALL)' +PIP_INSTALL = pip install -# Docker tag -DOCKER_TAG = ocrd/calamari +# '$(GIT_CLONE)' +GIT_CLONE = git clone --depth 1 # BEGIN-EVAL makefile-parser --make-help Makefile @@ -9,31 +13,75 @@ help: @echo "" @echo " Targets" @echo "" - @echo " calamari git clone calamari" - @echo " calamari_models git clone calamari_models" - @echo " calamari/build Install calamari" - @echo " docker Build docker image" + @echo " install Install ocrd_calamari" + @echo " calamari Clone calamari repo" + @echo " calamari_models Clone calamari_models repo" + @echo " calamari/build pip install calamari" + @echo " deps-test Install testing python deps via pip" + @echo " repo/assets Clone OCR-D/assets to ./repo/assets" + @echo " test/assets Setup test assets" + @echo " assets-clean Remove symlinks in test/assets" + @echo " test Run unit tests" + @echo " coverage Run unit tests and determine test coverage" @echo "" @echo " Variables" @echo "" - @echo " DOCKER_TAG Docker tag" + @echo " PYTHON '$(PYTHON)'" + @echo " PIP_INSTALL '$(PIP_INSTALL)'" + @echo " GIT_CLONE '$(GIT_CLONE)'" # END-EVAL -# git clone calamari +# Install ocrd_calamari +install: + $(PIP_INSTALL) . + +# Clone calamari repo calamari: $(GIT_CLONE) https://github.com/chwick/calamari -# git clone calamari_models +# Clone calamari_models repo calamari_models: $(GIT_CLONE) https://github.com/chwick/calamari_models -# Install calamari +# pip install calamari calamari/build: calamari calamari_models - cd calamari &&\ - pip install -r requirements.txt ;\ - python setup.py install + cd calamari && $(PIP_INSTALL) . + +# +# Assets and Tests +# + +# Install testing python deps via pip +deps-test: + $(PIP) install -r requirements_test.txt + + +# Clone OCR-D/assets to ./repo/assets +repo/assets: + mkdir -p $(dir $@) + git clone https://github.com/OCR-D/assets "$@" + + +# Setup test assets +test/assets: repo/assets + mkdir -p $@ + cp -r -t $@ repo/assets/data/* + +# Remove symlinks in test/assets +assets-clean: + rm -rf test/assets + +# Run unit tests +test: test/assets calamari_models + # declare -p HTTP_PROXY + $(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS) + +# Run unit tests and determine test coverage +coverage: + coverage erase + make test PYTHON="coverage run" + coverage report + coverage html -# Build docker image -docker: - docker build -t '$(DOCKER_TAG)' . +.PHONY: assets-clean test diff --git a/ocrd_calamari/__init__.py b/ocrd_calamari/__init__.py index 683a3e2..f56b516 100644 --- a/ocrd_calamari/__init__.py +++ b/ocrd_calamari/__init__.py @@ -1 +1,5 @@ +__all__ = [ + 'CalamariRecognize' +] + from .recognize import CalamariRecognize diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..7aad3d0 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +pytest +ocrd_tesserocr >= 0.4.0 diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/base.py b/test/base.py new file mode 100644 index 0000000..cea8ed3 --- /dev/null +++ b/test/base.py @@ -0,0 +1,10 @@ +# pylint: disable=unused-import + +import os +import sys +from unittest import TestCase, skip, main # pylint: disable=unused-import + +from test.assets import assets + +PWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(PWD + '/../ocrd') diff --git a/test/test_recognize.py b/test/test_recognize.py new file mode 100644 index 0000000..ed85485 --- /dev/null +++ b/test/test_recognize.py @@ -0,0 +1,62 @@ +import os +from os.path import join, exists +import shutil + +from test.base import TestCase, main, assets, skip + +from ocrd.resolver import Resolver + +from ocrd_tesserocr import TesserocrSegmentRegion +from ocrd_tesserocr import TesserocrSegmentLine + +from ocrd_calamari import CalamariRecognize + +#METS_HEROLD_SMALL = assets.url_of('SBB0000F29300010000/data/mets_one_file.xml') +# as long as #96 remains, we cannot use workspaces which have local relative files: +METS_HEROLD_SMALL = assets.url_of('kant_aufklaerung_1784-binarized/data/mets.xml') + +WORKSPACE_DIR = '/tmp/test-ocrd-calamari' + +class TestCalamariRecognize(TestCase): + + def setUp(self): + if exists(WORKSPACE_DIR): + shutil.rmtree(WORKSPACE_DIR) + os.makedirs(WORKSPACE_DIR) + + #skip("Takes too long") + def runTest(self): + resolver = Resolver() + workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, dst_dir=WORKSPACE_DIR) + + TesserocrSegmentRegion( + workspace, + input_file_grp="OCR-D-IMG", + output_file_grp="OCR-D-SEG-BLOCK" + ).process() + workspace.save_mets() + + TesserocrSegmentLine( + workspace, + input_file_grp="OCR-D-SEG-BLOCK", + output_file_grp="OCR-D-SEG-LINE" + ).process() + workspace.save_mets() + + CalamariRecognize( + workspace, + input_file_grp="OCR-D-SEG-LINE", + output_file_grp="OCR-D-OCR-CALAMARI", + parameter={ + 'checkpoint': 'calamari_models/fraktur_historical/*.ckpt.json' + } + ).process() + workspace.save_mets() + + page1 = join(workspace.directory, 'OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml') + self.assertTrue(exists(page1)) + with open(page1, 'r') as f: + self.assertIn('verſchuldeten', f.read()) + +if __name__ == '__main__': + main()