Merge pull request #25 from OCR-D/resolve-resources

processor: self.resolve_resource model in addition to SBB_BINARIZE_DATA
pull/28/head
vahidrezanezhad 4 years ago committed by GitHub
commit fdd5587247
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -9,14 +9,13 @@ jobs:
- checkout
- restore_cache:
keys:
- model-cache
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
- run: make install
- run: make model
- save_cache:
key: model-cache
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
paths:
models.tar.gz
models
- run: make install
ocrd-resources
- run: git submodule update --init
- run: make test
@ -27,14 +26,13 @@ jobs:
- checkout
- restore_cache:
keys:
- model-cache
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
- run: make install
- run: make model
- save_cache:
key: model-cache
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
paths:
models.tar.gz
models
- run: make install
ocrd-resources
- run: git submodule update --init
- run: make test

@ -1,6 +1,3 @@
# Directory to store models
MODEL_DIR = $(PWD)/models
# BEGIN-EVAL makefile-parser --make-help Makefile
help:
@ -22,15 +19,11 @@ install:
pip install .
# Downloads the pre-trained models from qurator-data.de
model: $(MODEL_DIR)/model1_bin.h5
$(MODEL_DIR)/model1_bin.h5: models.tar.gz
tar xf models.tar.gz
models.tar.gz:
wget 'https://qurator-data.de/sbb_binarization/models.tar.gz'
.PHONY: model
model:
ocrd resmgr download --allow-uninstalled --location cwd ocrd-sbb-binarize default
# Run tests
test: model
cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR)
cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784-page-region/data/mets.xml -I OCR-D-IMG -O BIN -P model default -P operation_level region

@ -1,7 +1,7 @@
numpy >= 1.17.0, < 1.19.0
setuptools >= 41
opencv-python-headless
ocrd >= 2.18.0
ocrd >= 2.22.3
keras >= 2.3.1, < 2.4
h5py < 3
tensorflow-gpu >= 1.15, < 1.16

@ -40,15 +40,17 @@ class SbbBinarizeProcessor(Processor):
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
kwargs['version'] = OCRD_TOOL['version']
if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')):
LOG = getLogger('processor.SbbBinarize.__init__')
if not 'model' in kwargs['parameter']:
raise ValueError("'model' parameter is required")
model_path = Path(kwargs['parameter']['model'])
if not model_path.is_absolute():
if 'SBB_BINARIZE_DATA' in environ:
if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']:
LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s' - prepending to model value '%s'. If you don't want this mechanism, unset the SBB_BINARIZE_DATA environment variable.", environ['SBB_BINARIZE_DATA'], model_path)
model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path)
model_path = model_path.resolve()
if not model_path.is_dir():
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
model_path = model_path.resolve()
if not model_path.is_dir():
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
kwargs['parameter']['model'] = str(model_path)
super().__init__(*args, **kwargs)
@ -61,7 +63,7 @@ class SbbBinarizeProcessor(Processor):
assert_file_grp_cardinality(self.output_file_grp, 1)
oplevel = self.parameter['operation_level']
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
model_path = self.resolve_resource(self.parameter['model'])
binarizer = SbbBinarizer(model_dir=model_path, logger=LOG)
for n, input_file in enumerate(self.input_files):

Loading…
Cancel
Save