Merge branch 'feat/update-calamari1'

2026-07-21 18:19:10 +02:00 · 2020-11-25 12:10:59 +01:00 · 2020-11-25 12:10:59 +01:00 · 8fcd331fbd
commit 8fcd331fbd
parent 795826fa43 0e59c2317a
5 changed files with 20 additions and 17 deletions
--- a/14
+++ b/14
@ -11,7 +11,7 @@ help:
 	@echo "  Targets"
 	@echo ""
 	@echo "    install          Install ocrd_calamari"
-	@echo "    gt4histocr-calamari Get GT4HistOCR Calamari model (from SBB)"
+	@echo "    gt4histocr-calamari1 Get GT4HistOCR Calamari model (from SBB)"
 	@echo "    actevedef_718448162 Download example data"
 	@echo "    deps-test        Install testing python deps via pip"
 	@echo "    repo/assets      Clone OCR-D/assets to ./repo/assets"
@ -34,10 +34,10 @@ install:
 # Get GT4HistOCR Calamari model (from SBB)
-gt4histocr-calamari:
+gt4histocr-calamari1:
-	mkdir gt4histocr-calamari
+	mkdir -p gt4histocr-calamari1
-	cd gt4histocr-calamari && \
+	cd gt4histocr-calamari1 && \
-	wget https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz && \
+	wget https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz && \
 	tar xfv model.tar.xz && \
 	rm model.tar.xz
@ -73,12 +73,12 @@ assets-clean:
 	rm -rf test/assets
 # Run unit tests
-test: test/assets gt4histocr-calamari
+test: test/assets gt4histocr-calamari1
 	# declare -p HTTP_PROXY
 	$(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS)
 # Run unit tests and determine test coverage
-coverage: test/assets gt4histocr-calamari
+coverage: test/assets gt4histocr-calamari1
 	coverage erase
 	make test PYTHON="coverage run"
 	coverage report
--- a/README.md
+++ b/README.md
@ -43,8 +43,8 @@ pip install .
 Download models trained on GT4HistOCR data:
 ```
-make gt4histocr-calamari
+make gt4histocr-calamari1
-ls gt4histocr-calamari
+ls gt4histocr-calamari1
 ```
 ## Example Usage
@ -52,7 +52,7 @@ Before using `ocrd-calamari-recognize` get some example data and model, and
 prepare the document for OCR:
 ```
 # Download model and example data
-make gt4histocr-calamari
+make gt4histocr-calamari1
 make actevedef_718448162
 # Create binarized images and line segmentation using other OCR-D projects
@ -64,7 +64,7 @@ ocrd-tesserocr-segment-line -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
 Finally recognize the text using ocrd_calamari and the downloaded model:
 ```
-ocrd-calamari-recognize -p '{ "checkpoint": "../gt4histocr-calamari/*.ckpt.json" }' -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI
+ocrd-calamari-recognize -p '{ "checkpoint": "../gt4histocr-calamari1/*.ckpt.json" }' -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI
 ```
 You may want to have a look at the [ocrd-tool.json](ocrd_calamari/ocrd-tool.json) descriptions
--- a/ocrd_calamari/recognize.py
+++ b/ocrd_calamari/recognize.py
@ -70,13 +70,16 @@ class CalamariRecognize(Processor):
                textlines = region.get_TextLine()
                log.info("About to recognize %i lines of region '%s'", len(textlines), region.id)
                for (line_no, line) in enumerate(textlines):
                    log.debug("Recognizing line '%s' in region '%s'", line.id, region.id)
                line_images_np = []
                for (line_no, line) in enumerate(textlines):
                    line_image, line_coords = self.workspace.image_from_segment(line, region_image, region_xywh)
                    line_image_np = np.array(line_image, dtype=np.uint8)
                    line_images_np.append(line_image_np)
                raw_results_all = self.predictor.predict_raw(line_images_np, progress_bar=False)
                for line, raw_results in zip(textlines, raw_results_all):
                    raw_results = list(self.predictor.predict_raw([line_image_np], progress_bar=False))[0]
                    for i, p in enumerate(raw_results):
                        p.prediction.id = "fold_{}".format(i)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
-tensorflow-gpu == 1.15.*
+tensorflow >= 2.3.0rc2
-calamari-ocr == 0.3.5
+calamari-ocr == 1.0.*
 setuptools >= 41.0.0  # tensorboard depends on this, but why do we get an error at runtime?
 click
 ocrd >= 2.13.0
--- a/test/test_recognize.py
+++ b/test/test_recognize.py
@ -14,7 +14,7 @@ from .base import assets
 METS_KANT = assets.url_of('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml')
 WORKSPACE_DIR = '/tmp/test-ocrd-calamari'
-CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json')
+CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari1/*.ckpt.json')
 # Because XML namespace versions are so much fun, we not only use one, we use TWO!
 NSMAP = { "pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" }