mirror of
https://github.com/qurator-spk/sbb_binarization.git
synced 2025-07-15 13:59:52 +02:00
Merge pull request #25 from OCR-D/resolve-resources
processor: self.resolve_resource model in addition to SBB_BINARIZE_DATA
This commit is contained in:
commit
fdd5587247
4 changed files with 21 additions and 28 deletions
|
@ -9,14 +9,13 @@ jobs:
|
||||||
- checkout
|
- checkout
|
||||||
- restore_cache:
|
- restore_cache:
|
||||||
keys:
|
keys:
|
||||||
- model-cache
|
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||||
|
- run: make install
|
||||||
- run: make model
|
- run: make model
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: model-cache
|
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||||
paths:
|
paths:
|
||||||
models.tar.gz
|
ocrd-resources
|
||||||
models
|
|
||||||
- run: make install
|
|
||||||
- run: git submodule update --init
|
- run: git submodule update --init
|
||||||
- run: make test
|
- run: make test
|
||||||
|
|
||||||
|
@ -27,14 +26,13 @@ jobs:
|
||||||
- checkout
|
- checkout
|
||||||
- restore_cache:
|
- restore_cache:
|
||||||
keys:
|
keys:
|
||||||
- model-cache
|
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||||
|
- run: make install
|
||||||
- run: make model
|
- run: make model
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: model-cache
|
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||||
paths:
|
paths:
|
||||||
models.tar.gz
|
ocrd-resources
|
||||||
models
|
|
||||||
- run: make install
|
|
||||||
- run: git submodule update --init
|
- run: git submodule update --init
|
||||||
- run: make test
|
- run: make test
|
||||||
|
|
||||||
|
|
17
Makefile
17
Makefile
|
@ -1,6 +1,3 @@
|
||||||
# Directory to store models
|
|
||||||
MODEL_DIR = $(PWD)/models
|
|
||||||
|
|
||||||
# BEGIN-EVAL makefile-parser --make-help Makefile
|
# BEGIN-EVAL makefile-parser --make-help Makefile
|
||||||
|
|
||||||
help:
|
help:
|
||||||
|
@ -22,15 +19,11 @@ install:
|
||||||
pip install .
|
pip install .
|
||||||
|
|
||||||
# Downloads the pre-trained models from qurator-data.de
|
# Downloads the pre-trained models from qurator-data.de
|
||||||
model: $(MODEL_DIR)/model1_bin.h5
|
.PHONY: model
|
||||||
|
model:
|
||||||
$(MODEL_DIR)/model1_bin.h5: models.tar.gz
|
ocrd resmgr download --allow-uninstalled --location cwd ocrd-sbb-binarize default
|
||||||
tar xf models.tar.gz
|
|
||||||
|
|
||||||
models.tar.gz:
|
|
||||||
wget 'https://qurator-data.de/sbb_binarization/models.tar.gz'
|
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
test: model
|
test: model
|
||||||
cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR)
|
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default
|
||||||
cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region
|
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784-page-region/data/mets.xml -I OCR-D-IMG -O BIN -P model default -P operation_level region
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
numpy >= 1.17.0, < 1.19.0
|
numpy >= 1.17.0, < 1.19.0
|
||||||
setuptools >= 41
|
setuptools >= 41
|
||||||
opencv-python-headless
|
opencv-python-headless
|
||||||
ocrd >= 2.18.0
|
ocrd >= 2.22.3
|
||||||
keras >= 2.3.1, < 2.4
|
keras >= 2.3.1, < 2.4
|
||||||
h5py < 3
|
h5py < 3
|
||||||
tensorflow-gpu >= 1.15, < 1.16
|
tensorflow-gpu >= 1.15, < 1.16
|
||||||
|
|
|
@ -40,15 +40,17 @@ class SbbBinarizeProcessor(Processor):
|
||||||
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
||||||
kwargs['version'] = OCRD_TOOL['version']
|
kwargs['version'] = OCRD_TOOL['version']
|
||||||
if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')):
|
if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')):
|
||||||
|
LOG = getLogger('processor.SbbBinarize.__init__')
|
||||||
if not 'model' in kwargs['parameter']:
|
if not 'model' in kwargs['parameter']:
|
||||||
raise ValueError("'model' parameter is required")
|
raise ValueError("'model' parameter is required")
|
||||||
model_path = Path(kwargs['parameter']['model'])
|
model_path = Path(kwargs['parameter']['model'])
|
||||||
if not model_path.is_absolute():
|
if not model_path.is_absolute():
|
||||||
if 'SBB_BINARIZE_DATA' in environ:
|
if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']:
|
||||||
|
LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s' - prepending to model value '%s'. If you don't want this mechanism, unset the SBB_BINARIZE_DATA environment variable.", environ['SBB_BINARIZE_DATA'], model_path)
|
||||||
model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path)
|
model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path)
|
||||||
model_path = model_path.resolve()
|
model_path = model_path.resolve()
|
||||||
if not model_path.is_dir():
|
if not model_path.is_dir():
|
||||||
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
||||||
kwargs['parameter']['model'] = str(model_path)
|
kwargs['parameter']['model'] = str(model_path)
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@ -61,7 +63,7 @@ class SbbBinarizeProcessor(Processor):
|
||||||
assert_file_grp_cardinality(self.output_file_grp, 1)
|
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||||
|
|
||||||
oplevel = self.parameter['operation_level']
|
oplevel = self.parameter['operation_level']
|
||||||
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
|
model_path = self.resolve_resource(self.parameter['model'])
|
||||||
binarizer = SbbBinarizer(model_dir=model_path, logger=LOG)
|
binarizer = SbbBinarizer(model_dir=model_path, logger=LOG)
|
||||||
|
|
||||||
for n, input_file in enumerate(self.input_files):
|
for n, input_file in enumerate(self.input_files):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue