mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
tests: symlink OCR models into layout model directory
(so layout with OCR options works with our split model packages)
This commit is contained in:
parent
a1904fa660
commit
23535998f7
2 changed files with 13 additions and 9 deletions
19
Makefile
19
Makefile
|
@ -90,26 +90,29 @@ deps-test: $(OCR_MODELNAME)
|
||||||
endif
|
endif
|
||||||
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
|
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
|
||||||
$(PIP) install -r requirements-test.txt
|
$(PIP) install -r requirements-test.txt
|
||||||
|
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
|
||||||
|
ln -s $(OCR_MODELNAME)/* $(SEG_MODELNAME)/
|
||||||
|
endif
|
||||||
|
|
||||||
smoke-test: TMPDIR != mktemp -d
|
smoke-test: TMPDIR != mktemp -d
|
||||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||||
# layout analysis:
|
# layout analysis:
|
||||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||||
# layout, directory mode (skip one, add one):
|
# layout, directory mode (skip one, add one):
|
||||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||||
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||||
# mbreorder, directory mode (overwrite):
|
# mbreorder, directory mode (overwrite):
|
||||||
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0
|
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||||
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
||||||
# binarize:
|
# binarize:
|
||||||
eynollah binarization -m $(CURDIR)/default-2021-03-09 -i $< -o $(TMPDIR)/$(<F)
|
eynollah binarization -m $(CURDIR)/$(BIN_MODELNAME) -i $< -o $(TMPDIR)/$(<F)
|
||||||
test -s $(TMPDIR)/$(<F)
|
test -s $(TMPDIR)/$(<F)
|
||||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||||
# enhance:
|
# enhance:
|
||||||
eynollah enhancement -m $(CURDIR)/models_layout_v0_5_0 -sos -i $< -o $(TMPDIR) -O
|
eynollah enhancement -m $(CURDIR)/$(SEG_MODELNAME) -sos -i $< -o $(TMPDIR) -O
|
||||||
test -s $(TMPDIR)/$(<F)
|
test -s $(TMPDIR)/$(<F)
|
||||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||||
$(RM) -r $(TMPDIR)
|
$(RM) -r $(TMPDIR)
|
||||||
|
@ -120,12 +123,12 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||||
cp $< $(TMPDIR)
|
cp $< $(TMPDIR)
|
||||||
ocrd workspace -d $(TMPDIR) init
|
ocrd workspace -d $(TMPDIR) init
|
||||||
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
|
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
|
||||||
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_layout_v0_5_0
|
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/$(SEG_MODELNAME)
|
||||||
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
|
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
|
||||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
|
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
|
||||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/default-2021-03-09
|
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/$(BIN_MODELNAME)
|
||||||
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/default-2021-03-09 -P operation_level region
|
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/$(BIN_MODELNAME) -P operation_level region
|
||||||
$(RM) -r $(TMPDIR)
|
$(RM) -r $(TMPDIR)
|
||||||
|
|
||||||
# Run unit tests
|
# Run unit tests
|
||||||
|
|
|
@ -17,7 +17,7 @@ from ocrd_models.constants import NAMESPACES as NS
|
||||||
testdir = Path(__file__).parent.resolve()
|
testdir = Path(__file__).parent.resolve()
|
||||||
|
|
||||||
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
|
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
|
||||||
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve()))
|
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve()))
|
||||||
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
|
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -31,6 +31,7 @@ MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-
|
||||||
"--textline_light", "--light_version"],
|
"--textline_light", "--light_version"],
|
||||||
# -ep ...
|
# -ep ...
|
||||||
# -eoi ...
|
# -eoi ...
|
||||||
|
# FIXME: find out whether OCR extra was installed, otherwise skip these
|
||||||
["--do_ocr"],
|
["--do_ocr"],
|
||||||
["--do_ocr", "--light_version", "--textline_light"],
|
["--do_ocr", "--light_version", "--textline_light"],
|
||||||
["--do_ocr", "--transformer_ocr"],
|
["--do_ocr", "--transformer_ocr"],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue