1
0
Fork 0
mirror of https://github.com/mikegerber/ocrd_calamari.git synced 2025-06-09 11:49:53 +02:00

implement "checkpoint_dir" parameter as a simpler alternative to "checkpoint"

This commit is contained in:
Konstantin Baierer 2020-12-22 15:20:45 +01:00
parent fe973e58db
commit 83adfcfd5a
4 changed files with 31 additions and 1 deletions

View file

@ -14,7 +14,8 @@ from .base import assets
METS_KANT = assets.url_of('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml')
WORKSPACE_DIR = '/tmp/test-ocrd-calamari'
CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari1/*.ckpt.json')
CHECKPPOINT_DIR = os.path.join(os.getcwd(), 'gt4histocr-calamari1')
CHECKPOINT = os.path.join(CHECKPPOINT_DIR, '*.ckpt.json')
# Because XML namespace versions are so much fun, we not only use one, we use TWO!
NSMAP = { "pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" }
@ -81,6 +82,22 @@ def test_recognize(workspace):
with open(page1, "r", encoding="utf-8") as f:
assert "verſchuldeten" in f.read()
def test_recognize_with_checkpoint_dir(workspace):
CalamariRecognize(
workspace,
input_file_grp="OCR-D-GT-SEG-LINE",
output_file_grp="OCR-D-OCR-CALAMARI",
parameter={
"checkpoin_dir": CHECKPOINT_DIR,
}
).process()
workspace.save_mets()
page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
assert os.path.exists(page1)
with open(page1, "r", encoding="utf-8") as f:
assert "verſchuldeten" in f.read()
def test_word_segmentation(workspace):
CalamariRecognize(