mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-11-10 06:34:11 +01:00
Add test images; call TrOCR processor from the same directory as the TrOCR model
This commit is contained in:
parent
8732007aaf
commit
ed5b5c13dd
6 changed files with 8 additions and 8 deletions
10
Makefile
10
Makefile
|
|
@ -76,22 +76,22 @@ deps-test:
|
||||||
smoke-test: TMPDIR != mktemp -d
|
smoke-test: TMPDIR != mktemp -d
|
||||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||||
# layout analysis:
|
# layout analysis:
|
||||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
eynollah -m $(CURDIR)/models_eynollah layout -i $< -o $(TMPDIR)
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||||
# layout, directory mode (skip one, add one):
|
# layout, directory mode (skip one, add one):
|
||||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
eynollah -m $(CURDIR)/models_eynollah layout -di $(<D) -o $(TMPDIR)
|
||||||
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||||
# mbreorder, directory mode (overwrite):
|
# mbreorder, directory mode (overwrite):
|
||||||
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
eynollah -m $(CURDIR)/$(SEG_MODELNAME) machine-based-reading-order -di $(<D) -o $(TMPDIR)
|
||||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||||
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
||||||
# binarize:
|
# binarize:
|
||||||
eynollah binarization -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -i $< -o $(TMPDIR)/$(<F)
|
eynollah -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 binarization -i $< -o $(TMPDIR)/$(<F)
|
||||||
test -s $(TMPDIR)/$(<F)
|
test -s $(TMPDIR)/$(<F)
|
||||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||||
# enhance:
|
# enhance:
|
||||||
eynollah enhancement -m $(CURDIR)/models_eynollah -sos -i $< -o $(TMPDIR) -O
|
eynollah -m $(CURDIR)/models_eynollah enhancement -sos -i $< -o $(TMPDIR) -O
|
||||||
test -s $(TMPDIR)/$(<F)
|
test -s $(TMPDIR)/$(<F)
|
||||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||||
$(RM) -r $(TMPDIR)
|
$(RM) -r $(TMPDIR)
|
||||||
|
|
|
||||||
|
|
@ -295,7 +295,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
||||||
EynollahModelSpec(
|
EynollahModelSpec(
|
||||||
category="trocr_processor",
|
category="trocr_processor",
|
||||||
variant='',
|
variant='',
|
||||||
filename="models_eynollah/microsoft/trocr-base-printed",
|
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
|
||||||
dist_url=dist_url("trocr"),
|
dist_url=dist_url("trocr"),
|
||||||
dists=['trocr'],
|
dists=['trocr'],
|
||||||
type='TrOCRProcessor',
|
type='TrOCRProcessor',
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ def test_run_eynollah_binarization_filename(
|
||||||
tests_dir,
|
tests_dir,
|
||||||
options,
|
options,
|
||||||
):
|
):
|
||||||
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
infile = tests_dir.joinpath('resources/2files/kant_aufklaerung_1784_0020.tif')
|
||||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||||
run_eynollah_ok_and_check_logs(
|
run_eynollah_ok_and_check_logs(
|
||||||
'binarization',
|
'binarization',
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@ from ocrd_models.constants import NAMESPACES as NS
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"options",
|
"options",
|
||||||
[
|
[
|
||||||
|
["-trocr"],
|
||||||
[], # defaults
|
[], # defaults
|
||||||
["-doit", #str(outrenderfile.parent)],
|
["-doit", #str(outrenderfile.parent)],
|
||||||
],
|
],
|
||||||
["-trocr"],
|
|
||||||
], ids=str)
|
], ids=str)
|
||||||
def test_run_eynollah_ocr_filename(
|
def test_run_eynollah_ocr_filename(
|
||||||
tmp_path,
|
tmp_path,
|
||||||
|
|
|
||||||
BIN
tests/resources/2files/euler_rechenkunst01_1738_0025.tif
Normal file
BIN
tests/resources/2files/euler_rechenkunst01_1738_0025.tif
Normal file
Binary file not shown.
BIN
tests/resources/2files/kant_aufklaerung_1784_0020.tif
Normal file
BIN
tests/resources/2files/kant_aufklaerung_1784_0020.tif
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue