Add test images; call TrOCR processor from the same directory as the TrOCR model

This commit is contained in:
vahidrezanezhad 2025-11-07 12:47:21 +01:00
parent 8732007aaf
commit ed5b5c13dd
6 changed files with 8 additions and 8 deletions

View file

@ -76,22 +76,22 @@ deps-test:
smoke-test: TMPDIR != mktemp -d
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
# layout analysis:
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
eynollah -m $(CURDIR)/models_eynollah layout -i $< -o $(TMPDIR)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
# layout, directory mode (skip one, add one):
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
eynollah -m $(CURDIR)/models_eynollah layout -di $(<D) -o $(TMPDIR)
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
# mbreorder, directory mode (overwrite):
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
eynollah -m $(CURDIR)/$(SEG_MODELNAME) machine-based-reading-order -di $(<D) -o $(TMPDIR)
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
# binarize:
eynollah binarization -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -i $< -o $(TMPDIR)/$(<F)
eynollah -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 binarization -i $< -o $(TMPDIR)/$(<F)
test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
# enhance:
eynollah enhancement -m $(CURDIR)/models_eynollah -sos -i $< -o $(TMPDIR) -O
eynollah -m $(CURDIR)/models_eynollah enhancement -sos -i $< -o $(TMPDIR) -O
test -s $(TMPDIR)/$(<F)
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
$(RM) -r $(TMPDIR)

View file

@ -295,7 +295,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
EynollahModelSpec(
category="trocr_processor",
variant='',
filename="models_eynollah/microsoft/trocr-base-printed",
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
dist_url=dist_url("trocr"),
dists=['trocr'],
type='TrOCRProcessor',

View file

@ -13,7 +13,7 @@ def test_run_eynollah_binarization_filename(
tests_dir,
options,
):
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
infile = tests_dir.joinpath('resources/2files/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
run_eynollah_ok_and_check_logs(
'binarization',

View file

@ -5,10 +5,10 @@ from ocrd_models.constants import NAMESPACES as NS
@pytest.mark.parametrize(
"options",
[
["-trocr"],
[], # defaults
["-doit", #str(outrenderfile.parent)],
],
["-trocr"],
], ids=str)
def test_run_eynollah_ocr_filename(
tmp_path,

Binary file not shown.