diff --git a/.circleci/config.yml b/.circleci/config.yml index ca93957..8f02829 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,14 +9,13 @@ jobs: - checkout - restore_cache: keys: - - model-cache + - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} + - run: make install - run: make model - save_cache: - key: model-cache + key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} paths: - models.tar.gz - models - - run: make install + ocrd-resources - run: git submodule update --init - run: make test @@ -27,14 +26,13 @@ jobs: - checkout - restore_cache: keys: - - model-cache + - ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} + - run: make install - run: make model - save_cache: - key: model-cache + key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }} paths: - models.tar.gz - models - - run: make install + ocrd-resources - run: git submodule update --init - run: make test diff --git a/Makefile b/Makefile index 95ddbfe..7c26e8b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,3 @@ -# Directory to store models -MODEL_DIR = $(PWD)/models - # BEGIN-EVAL makefile-parser --make-help Makefile help: @@ -22,15 +19,11 @@ install: pip install . # Downloads the pre-trained models from qurator-data.de -model: $(MODEL_DIR)/model1_bin.h5 - -$(MODEL_DIR)/model1_bin.h5: models.tar.gz - tar xf models.tar.gz - -models.tar.gz: - wget 'https://qurator-data.de/sbb_binarization/models.tar.gz' +.PHONY: model +model: + ocrd resmgr download --allow-uninstalled --location cwd ocrd-sbb-binarize default # Run tests test: model - cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) - cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region + ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default + ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784-page-region/data/mets.xml -I OCR-D-IMG -O BIN -P model default -P operation_level region diff --git a/requirements.txt b/requirements.txt index 6b012f7..85fd500 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy >= 1.17.0, < 1.19.0 setuptools >= 41 opencv-python-headless -ocrd >= 2.18.0 +ocrd >= 2.22.3 keras >= 2.3.1, < 2.4 h5py < 3 tensorflow-gpu >= 1.15, < 1.16 diff --git a/sbb_binarize/ocrd_cli.py b/sbb_binarize/ocrd_cli.py index 7d9a7d5..7f88dfc 100644 --- a/sbb_binarize/ocrd_cli.py +++ b/sbb_binarize/ocrd_cli.py @@ -40,15 +40,17 @@ class SbbBinarizeProcessor(Processor): kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] kwargs['version'] = OCRD_TOOL['version'] if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')): + LOG = getLogger('processor.SbbBinarize.__init__') if not 'model' in kwargs['parameter']: raise ValueError("'model' parameter is required") model_path = Path(kwargs['parameter']['model']) if not model_path.is_absolute(): - if 'SBB_BINARIZE_DATA' in environ: + if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']: + LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s' - prepending to model value '%s'. If you don't want this mechanism, unset the SBB_BINARIZE_DATA environment variable.", environ['SBB_BINARIZE_DATA'], model_path) model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path) - model_path = model_path.resolve() - if not model_path.is_dir(): - raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) + model_path = model_path.resolve() + if not model_path.is_dir(): + raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) kwargs['parameter']['model'] = str(model_path) super().__init__(*args, **kwargs) @@ -61,7 +63,7 @@ class SbbBinarizeProcessor(Processor): assert_file_grp_cardinality(self.output_file_grp, 1) oplevel = self.parameter['operation_level'] - model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init + model_path = self.resolve_resource(self.parameter['model']) binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) for n, input_file in enumerate(self.input_files):