tests: symlink OCR models into layout model directory

(so layout with OCR options works with our split model packages)
This commit is contained in:
Robert Sachunsky 2025-10-06 21:27:21 +02:00
parent a1904fa660
commit 23535998f7
2 changed files with 13 additions and 9 deletions

View file

@ -90,26 +90,29 @@ deps-test: $(OCR_MODELNAME)
endif endif
deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME)
$(PIP) install -r requirements-test.txt $(PIP) install -r requirements-test.txt
ifeq (OCR,$(findstring OCR, $(EXTRAS)))
ln -s $(OCR_MODELNAME)/* $(SEG_MODELNAME)/
endif
smoke-test: TMPDIR != mktemp -d smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis: # layout analysis:
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# layout, directory mode (skip one, add one): # layout, directory mode (skip one, add one):
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# mbreorder, directory mode (overwrite): # mbreorder, directory mode (overwrite):
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
# binarize: # binarize:
eynollah binarization -m $(CURDIR)/default-2021-03-09 -i $< -o $(TMPDIR)/$(<F) eynollah binarization -m $(CURDIR)/$(BIN_MODELNAME) -i $< -o $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
# enhance: # enhance:
eynollah enhancement -m $(CURDIR)/models_layout_v0_5_0 -sos -i $< -o $(TMPDIR) -O eynollah enhancement -m $(CURDIR)/$(SEG_MODELNAME) -sos -i $< -o $(TMPDIR) -O
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)
@ -120,12 +123,12 @@ ocrd-test: tests/resources/kant_aufklaerung_1784_0020.tif
cp $< $(TMPDIR) cp $< $(TMPDIR)
ocrd workspace -d $(TMPDIR) init ocrd workspace -d $(TMPDIR) init
ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F) ocrd workspace -d $(TMPDIR) add -G OCR-D-IMG -g PHYS_0020 -i OCR-D-IMG_0020 $(<F)
ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/models_layout_v0_5_0 ocrd-eynollah-segment -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-SEG -P models $(CURDIR)/$(SEG_MODELNAME)
result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \ result=$$(ocrd workspace -d $(TMPDIR) find -G OCR-D-SEG); \
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \ fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$$result && \
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$$result
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/default-2021-03-09 ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-IMG -O OCR-D-BIN -P model $(CURDIR)/$(BIN_MODELNAME)
ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/default-2021-03-09 -P operation_level region ocrd-sbb-binarize -w $(TMPDIR) -I OCR-D-SEG -O OCR-D-SEG-BIN -P model $(CURDIR)/$(BIN_MODELNAME) -P operation_level region
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)
# Run unit tests # Run unit tests

View file

@ -17,7 +17,7 @@ from ocrd_models.constants import NAMESPACES as NS
testdir = Path(__file__).parent.resolve() testdir = Path(__file__).parent.resolve()
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve())) MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve()))
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -31,6 +31,7 @@ MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-
"--textline_light", "--light_version"], "--textline_light", "--light_version"],
# -ep ... # -ep ...
# -eoi ... # -eoi ...
# FIXME: find out whether OCR extra was installed, otherwise skip these
["--do_ocr"], ["--do_ocr"],
["--do_ocr", "--light_version", "--textline_light"], ["--do_ocr", "--light_version", "--textline_light"],
["--do_ocr", "--transformer_ocr"], ["--do_ocr", "--transformer_ocr"],