1
0
Fork 0
mirror of https://github.com/mikegerber/ocrd_calamari.git synced 2025-07-05 16:39:53 +02:00
ocrd_calamari/test/test_recognize.py
Konstantin Baierer b54ccf90f7 smoke test, circle ci
Conflicts:
	Makefile
	ocrd_calamari/__init__.py
2019-12-03 17:11:09 +01:00

62 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from os.path import join, exists
import shutil
from test.base import TestCase, main, assets, skip
from ocrd.resolver import Resolver
from ocrd_tesserocr import TesserocrSegmentRegion
from ocrd_tesserocr import TesserocrSegmentLine
from ocrd_calamari import CalamariRecognize
#METS_HEROLD_SMALL = assets.url_of('SBB0000F29300010000/data/mets_one_file.xml')
# as long as #96 remains, we cannot use workspaces which have local relative files:
METS_HEROLD_SMALL = assets.url_of('kant_aufklaerung_1784-binarized/data/mets.xml')
WORKSPACE_DIR = '/tmp/test-ocrd-calamari'
class TestCalamariRecognize(TestCase):
def setUp(self):
if exists(WORKSPACE_DIR):
shutil.rmtree(WORKSPACE_DIR)
os.makedirs(WORKSPACE_DIR)
#skip("Takes too long")
def runTest(self):
resolver = Resolver()
workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, dst_dir=WORKSPACE_DIR)
TesserocrSegmentRegion(
workspace,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-SEG-BLOCK"
).process()
workspace.save_mets()
TesserocrSegmentLine(
workspace,
input_file_grp="OCR-D-SEG-BLOCK",
output_file_grp="OCR-D-SEG-LINE"
).process()
workspace.save_mets()
CalamariRecognize(
workspace,
input_file_grp="OCR-D-SEG-LINE",
output_file_grp="OCR-D-OCR-CALAMARI",
parameter={
'checkpoint': 'calamari_models/fraktur_historical/*.ckpt.json'
}
).process()
workspace.save_mets()
page1 = join(workspace.directory, 'OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml')
self.assertTrue(exists(page1))
with open(page1, 'r') as f:
self.assertIn('verſchuldeten', f.read())
if __name__ == '__main__':
main()