Add test images; call TrOCR processor from the same directory as the TrOCR model

This commit is contained in:
vahidrezanezhad 2025-11-07 12:47:21 +01:00
parent 8732007aaf
commit ed5b5c13dd
6 changed files with 8 additions and 8 deletions

View file

@ -76,22 +76,22 @@ deps-test:
smoke-test: TMPDIR != mktemp -d smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis: # layout analysis:
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah eynollah -m $(CURDIR)/models_eynollah layout -i $< -o $(TMPDIR)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# layout, directory mode (skip one, add one): # layout, directory mode (skip one, add one):
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah eynollah -m $(CURDIR)/models_eynollah layout -di $(<D) -o $(TMPDIR)
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# mbreorder, directory mode (overwrite): # mbreorder, directory mode (overwrite):
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) eynollah -m $(CURDIR)/$(SEG_MODELNAME) machine-based-reading-order -di $(<D) -o $(TMPDIR)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
# binarize: # binarize:
eynollah binarization -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -i $< -o $(TMPDIR)/$(<F) eynollah -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 binarization -i $< -o $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
# enhance: # enhance:
eynollah enhancement -m $(CURDIR)/models_eynollah -sos -i $< -o $(TMPDIR) -O eynollah -m $(CURDIR)/models_eynollah enhancement -sos -i $< -o $(TMPDIR) -O
test -s $(TMPDIR)/$(<F) test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))" @set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
$(RM) -r $(TMPDIR) $(RM) -r $(TMPDIR)

View file

@ -295,7 +295,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
EynollahModelSpec( EynollahModelSpec(
category="trocr_processor", category="trocr_processor",
variant='', variant='',
filename="models_eynollah/microsoft/trocr-base-printed", filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
dist_url=dist_url("trocr"), dist_url=dist_url("trocr"),
dists=['trocr'], dists=['trocr'],
type='TrOCRProcessor', type='TrOCRProcessor',

View file

@ -13,7 +13,7 @@ def test_run_eynollah_binarization_filename(
tests_dir, tests_dir,
options, options,
): ):
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = tests_dir.joinpath('resources/2files/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs( run_eynollah_ok_and_check_logs(
'binarization', 'binarization',

View file

@ -5,10 +5,10 @@ from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize( @pytest.mark.parametrize(
"options", "options",
[ [
["-trocr"],
[], # defaults [], # defaults
["-doit", #str(outrenderfile.parent)], ["-doit", #str(outrenderfile.parent)],
], ],
["-trocr"],
], ids=str) ], ids=str)
def test_run_eynollah_ocr_filename( def test_run_eynollah_ocr_filename(
tmp_path, tmp_path,

Binary file not shown.