diff --git a/Makefile b/Makefile index 61ca7f3..00a8f69 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ help: @echo " Targets" @echo "" @echo " install Install ocrd_calamari" - @echo " gt4histocr-calamari Get GT4HistOCR Calamari model (from SBB)" + @echo " gt4histocr-calamari1 Get GT4HistOCR Calamari model (from SBB)" @echo " actevedef_718448162 Download example data" @echo " deps-test Install testing python deps via pip" @echo " repo/assets Clone OCR-D/assets to ./repo/assets" @@ -34,9 +34,9 @@ install: # Get GT4HistOCR Calamari model (from SBB) -gt4histocr-calamari: - mkdir gt4histocr-calamari - cd gt4histocr-calamari && \ +gt4histocr-calamari1: + mkdir -p gt4histocr-calamari1 + cd gt4histocr-calamari1 && \ wget https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz && \ tar xfv model.tar.xz && \ rm model.tar.xz @@ -73,12 +73,12 @@ assets-clean: rm -rf test/assets # Run unit tests -test: test/assets gt4histocr-calamari +test: test/assets gt4histocr-calamari1 # declare -p HTTP_PROXY $(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS) # Run unit tests and determine test coverage -coverage: test/assets gt4histocr-calamari +coverage: test/assets gt4histocr-calamari1 coverage erase make test PYTHON="coverage run" coverage report diff --git a/README.md b/README.md index e158b8d..9b111fb 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ pip install . Download models trained on GT4HistOCR data: ``` -make gt4histocr-calamari -ls gt4histocr-calamari +make gt4histocr-calamari1 +ls gt4histocr-calamari1 ``` ## Example Usage @@ -50,7 +50,7 @@ Before using `ocrd-calamari-recognize` get some example data and model, and prepare the document for OCR: ``` # Download model and example data -make gt4histocr-calamari +make gt4histocr-calamari1 make actevedef_718448162 # Create binarized images and line segmentation using other OCR-D projects @@ -62,7 +62,7 @@ ocrd-tesserocr-segment-line -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE Finally recognize the text using ocrd_calamari and the downloaded model: ``` -ocrd-calamari-recognize -p '{ "checkpoint": "../gt4histocr-calamari/*.ckpt.json" }' -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI +ocrd-calamari-recognize -p '{ "checkpoint": "../gt4histocr-calamari1/*.ckpt.json" }' -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI ``` You may want to have a look at the [ocrd-tool.json](ocrd_calamari/ocrd-tool.json) descriptions diff --git a/test/test_recognize.py b/test/test_recognize.py index 54faf87..eee45d9 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -14,7 +14,7 @@ from .base import assets METS_KANT = assets.url_of('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml') WORKSPACE_DIR = '/tmp/test-ocrd-calamari' -CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json') +CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari1/*.ckpt.json') # Because XML namespace versions are so much fun, we not only use one, we use TWO! NSMAP = { "pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" }