mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-11-09 22:24:13 +01:00
Add test images; call TrOCR processor from the same directory as the TrOCR model
This commit is contained in:
parent
8732007aaf
commit
ed5b5c13dd
6 changed files with 8 additions and 8 deletions
10
Makefile
10
Makefile
|
|
@ -76,22 +76,22 @@ deps-test:
|
|||
smoke-test: TMPDIR != mktemp -d
|
||||
smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif
|
||||
# layout analysis:
|
||||
eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||
eynollah -m $(CURDIR)/models_eynollah layout -i $< -o $(TMPDIR)
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||
fgrep -c -e TextRegion -e ImageRegion -e SeparatorRegion $(TMPDIR)/$(basename $(<F)).xml
|
||||
# layout, directory mode (skip one, add one):
|
||||
eynollah layout -di $(<D) -o $(TMPDIR) -m $(CURDIR)/models_eynollah
|
||||
eynollah -m $(CURDIR)/models_eynollah layout -di $(<D) -o $(TMPDIR)
|
||||
test -s $(TMPDIR)/euler_rechenkunst01_1738_0025.xml
|
||||
# mbreorder, directory mode (overwrite):
|
||||
eynollah machine-based-reading-order -di $(<D) -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME)
|
||||
eynollah -m $(CURDIR)/$(SEG_MODELNAME) machine-based-reading-order -di $(<D) -o $(TMPDIR)
|
||||
fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $(<F)).xml
|
||||
fgrep -c -e RegionRefIndexed $(TMPDIR)/$(basename $(<F)).xml
|
||||
# binarize:
|
||||
eynollah binarization -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 -i $< -o $(TMPDIR)/$(<F)
|
||||
eynollah -m $(CURDIR)/models_eynollah/eynollah-binarization_20210425 binarization -i $< -o $(TMPDIR)/$(<F)
|
||||
test -s $(TMPDIR)/$(<F)
|
||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||
# enhance:
|
||||
eynollah enhancement -m $(CURDIR)/models_eynollah -sos -i $< -o $(TMPDIR) -O
|
||||
eynollah -m $(CURDIR)/models_eynollah enhancement -sos -i $< -o $(TMPDIR) -O
|
||||
test -s $(TMPDIR)/$(<F)
|
||||
@set -x; test "$$(identify -format '%w %h' $<)" = "$$(identify -format '%w %h' $(TMPDIR)/$(<F))"
|
||||
$(RM) -r $(TMPDIR)
|
||||
|
|
|
|||
|
|
@ -295,7 +295,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
EynollahModelSpec(
|
||||
category="trocr_processor",
|
||||
variant='',
|
||||
filename="models_eynollah/microsoft/trocr-base-printed",
|
||||
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
|
||||
dist_url=dist_url("trocr"),
|
||||
dists=['trocr'],
|
||||
type='TrOCRProcessor',
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ def test_run_eynollah_binarization_filename(
|
|||
tests_dir,
|
||||
options,
|
||||
):
|
||||
infile = tests_dir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||
infile = tests_dir.joinpath('resources/2files/kant_aufklaerung_1784_0020.tif')
|
||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.png')
|
||||
run_eynollah_ok_and_check_logs(
|
||||
'binarization',
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@ from ocrd_models.constants import NAMESPACES as NS
|
|||
@pytest.mark.parametrize(
|
||||
"options",
|
||||
[
|
||||
["-trocr"],
|
||||
[], # defaults
|
||||
["-doit", #str(outrenderfile.parent)],
|
||||
],
|
||||
["-trocr"],
|
||||
], ids=str)
|
||||
def test_run_eynollah_ocr_filename(
|
||||
tmp_path,
|
||||
|
|
|
|||
BIN
tests/resources/2files/euler_rechenkunst01_1738_0025.tif
Normal file
BIN
tests/resources/2files/euler_rechenkunst01_1738_0025.tif
Normal file
Binary file not shown.
BIN
tests/resources/2files/kant_aufklaerung_1784_0020.tif
Normal file
BIN
tests/resources/2files/kant_aufklaerung_1784_0020.tif
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue