mirror of
https://github.com/qurator-spk/sbb_binarization.git
synced 2025-06-26 12:39:54 +02:00
Merge pull request #25 from OCR-D/resolve-resources
processor: self.resolve_resource model in addition to SBB_BINARIZE_DATA
This commit is contained in:
commit
fdd5587247
4 changed files with 21 additions and 28 deletions
|
@ -9,14 +9,13 @@ jobs:
|
|||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- model-cache
|
||||
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||
- run: make install
|
||||
- run: make model
|
||||
- save_cache:
|
||||
key: model-cache
|
||||
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||
paths:
|
||||
models.tar.gz
|
||||
models
|
||||
- run: make install
|
||||
ocrd-resources
|
||||
- run: git submodule update --init
|
||||
- run: make test
|
||||
|
||||
|
@ -27,14 +26,13 @@ jobs:
|
|||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- model-cache
|
||||
- ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||
- run: make install
|
||||
- run: make model
|
||||
- save_cache:
|
||||
key: model-cache
|
||||
key: ocrd-resources-{{ checksum "requirements.txt" }}-{{ checksum "Makefile" }}
|
||||
paths:
|
||||
models.tar.gz
|
||||
models
|
||||
- run: make install
|
||||
ocrd-resources
|
||||
- run: git submodule update --init
|
||||
- run: make test
|
||||
|
||||
|
|
17
Makefile
17
Makefile
|
@ -1,6 +1,3 @@
|
|||
# Directory to store models
|
||||
MODEL_DIR = $(PWD)/models
|
||||
|
||||
# BEGIN-EVAL makefile-parser --make-help Makefile
|
||||
|
||||
help:
|
||||
|
@ -22,15 +19,11 @@ install:
|
|||
pip install .
|
||||
|
||||
# Downloads the pre-trained models from qurator-data.de
|
||||
model: $(MODEL_DIR)/model1_bin.h5
|
||||
|
||||
$(MODEL_DIR)/model1_bin.h5: models.tar.gz
|
||||
tar xf models.tar.gz
|
||||
|
||||
models.tar.gz:
|
||||
wget 'https://qurator-data.de/sbb_binarization/models.tar.gz'
|
||||
.PHONY: model
|
||||
model:
|
||||
ocrd resmgr download --allow-uninstalled --location cwd ocrd-sbb-binarize default
|
||||
|
||||
# Run tests
|
||||
test: model
|
||||
cd repo/assets/data/kant_aufklaerung_1784/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR)
|
||||
cd repo/assets/data/kant_aufklaerung_1784-page-region/data; ocrd-sbb-binarize -I OCR-D-IMG -O BIN -P model $(MODEL_DIR) -P level-of-operation region
|
||||
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784/data/mets.xml -I OCR-D-IMG -O BIN -P model default
|
||||
ocrd-sbb-binarize -m repo/assets/data/kant_aufklaerung_1784-page-region/data/mets.xml -I OCR-D-IMG -O BIN -P model default -P operation_level region
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
numpy >= 1.17.0, < 1.19.0
|
||||
setuptools >= 41
|
||||
opencv-python-headless
|
||||
ocrd >= 2.18.0
|
||||
ocrd >= 2.22.3
|
||||
keras >= 2.3.1, < 2.4
|
||||
h5py < 3
|
||||
tensorflow-gpu >= 1.15, < 1.16
|
||||
|
|
|
@ -40,15 +40,17 @@ class SbbBinarizeProcessor(Processor):
|
|||
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
|
||||
kwargs['version'] = OCRD_TOOL['version']
|
||||
if not(kwargs.get('show_help', None) or kwargs.get('dump_json', None) or kwargs.get('show_version')):
|
||||
LOG = getLogger('processor.SbbBinarize.__init__')
|
||||
if not 'model' in kwargs['parameter']:
|
||||
raise ValueError("'model' parameter is required")
|
||||
model_path = Path(kwargs['parameter']['model'])
|
||||
if not model_path.is_absolute():
|
||||
if 'SBB_BINARIZE_DATA' in environ:
|
||||
if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']:
|
||||
LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s' - prepending to model value '%s'. If you don't want this mechanism, unset the SBB_BINARIZE_DATA environment variable.", environ['SBB_BINARIZE_DATA'], model_path)
|
||||
model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path)
|
||||
model_path = model_path.resolve()
|
||||
if not model_path.is_dir():
|
||||
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
||||
model_path = model_path.resolve()
|
||||
if not model_path.is_dir():
|
||||
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
|
||||
kwargs['parameter']['model'] = str(model_path)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
@ -61,7 +63,7 @@ class SbbBinarizeProcessor(Processor):
|
|||
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||
|
||||
oplevel = self.parameter['operation_level']
|
||||
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
|
||||
model_path = self.resolve_resource(self.parameter['model'])
|
||||
binarizer = SbbBinarizer(model_dir=model_path, logger=LOG)
|
||||
|
||||
for n, input_file in enumerate(self.input_files):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue