diff --git a/.circleci/config.yml b/.circleci/config.yml
index a97d20b..b90ef37 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -7,15 +7,23 @@ jobs:
   build-python36:
     docker:
       - image: ubuntu:18.04
+    environment:
+      - PYTHONIOENCODING: utf-8
     steps:
-      - run: apt-get update ; apt-get install -y make git curl python3 python3-pip wget imagemagick
+      - run: apt-get update ; apt-get install -y make git curl python3 python3-pip wget imagemagick locales
+      - run: locale-gen "en_US.UTF-8"; update-locale LC_ALL="en_US.UTF-8"
       - checkout
+      - run: pip3 install --upgrade pip
       - run: make install PIP_INSTALL="pip3 install"
       - run: pip3 install -r requirements-test.txt
-      - run: make coverage
+      - run: make coverage LC_ALL=en_US.utf8
       - codecov/upload
 
 workflows:
   build:
     jobs:
-      - build-python36
+      - build-python36:
+          filters:
+            branches:
+              ignore:
+                - screenshots
diff --git a/.gitignore b/.gitignore
index 42c4957..0bea6c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -107,5 +107,7 @@ venv.bak/
 /calamari
 /calamari_models
 /gt4histocr-calamari
+/actevedef_718448162*
 /repo
 /test/assets
+gt4histocr-calamari*
diff --git a/Dockerfile b/Dockerfile
index 6bd7f73..6d63150 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ocrd/core:edge
+FROM ocrd/core
 MAINTAINER OCR-D
 ENV DEBIAN_FRONTEND noninteractive
 ENV PYTHONIOENCODING utf8
@@ -10,10 +10,12 @@ COPY Makefile .
 COPY setup.py .
 COPY ocrd-tool.json .
 COPY requirements.txt .
+COPY README.md .
 COPY ocrd_calamari ocrd_calamari
 
-RUN make calamari/build
-RUN pip3 install .
+RUN pip3 install --upgrade pip && \
+    pip3 install . && \
+    pip3 check
 
 ENTRYPOINT ["/usr/local/bin/ocrd-calamari-recognize"]
 
diff --git a/LICENSE b/LICENSE
index 261eeb9..bc7973a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2018-2020 Konstantin Baierer, Mike Gerber
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/Makefile b/Makefile
index 0508505..00a8f69 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,4 @@
+export  # export variables to subshells
 PIP_INSTALL = pip3 install
 GIT_CLONE = git clone
 PYTHON = python3
@@ -10,10 +11,8 @@ help:
 	@echo "  Targets"
 	@echo ""
 	@echo "    install          Install ocrd_calamari"
-	@echo "    calamari         Clone calamari repo"
-	@echo "    calamari_models  Clone calamari_models repo"
-	@echo "    gt4histocr-calamari Get GT4HistOCR Calamari model (from SBB)"
-	@echo "    calamari/build   pip install calamari"
+	@echo "    gt4histocr-calamari1 Get GT4HistOCR Calamari model (from SBB)"
+	@echo "    actevedef_718448162 Download example data"
 	@echo "    deps-test        Install testing python deps via pip"
 	@echo "    repo/assets      Clone OCR-D/assets to ./repo/assets"
 	@echo "    test/assets      Setup test assets"
@@ -33,29 +32,21 @@ help:
 install:
 	$(PIP_INSTALL) .
 
-# Clone calamari repo
-calamari:
-	$(GIT_CLONE) https://github.com/chwick/calamari
 
-# Clone calamari_models repo
-calamari_models:
-	$(GIT_CLONE) -n https://github.com/chwick/calamari_models
-	# Checkout latest version that works with calamari-ocr==0.3.5:
-	cd calamari_models && git checkout f76b1d3ec
-
-gt4histocr-calamari:
-	mkdir gt4histocr-calamari
-	cd gt4histocr-calamari && \
-	wget https://file.spk-berlin.de:8443/calamari-models/GT4HistOCR/model.tar.xz && \
+# Get GT4HistOCR Calamari model (from SBB)
+gt4histocr-calamari1:
+	mkdir -p gt4histocr-calamari1
+	cd gt4histocr-calamari1 && \
+	wget https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz && \
 	tar xfv model.tar.xz && \
 	rm model.tar.xz
 
+# Download example data
+actevedef_718448162:
+	wget https://qurator-data.de/examples/actevedef_718448162.zip && \
+	unzip actevedef_718448162.zip
 
 
-# pip install calamari
-calamari/build: calamari calamari_models
-	cd calamari && $(PIP_INSTALL) .
-
 
 #
 # Assets and Tests
@@ -82,12 +73,12 @@ assets-clean:
 	rm -rf test/assets
 
 # Run unit tests
-test: test/assets gt4histocr-calamari
+test: test/assets gt4histocr-calamari1
 	# declare -p HTTP_PROXY
 	$(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS)
 
 # Run unit tests and determine test coverage
-coverage: test/assets calamari_models
+coverage: test/assets gt4histocr-calamari1
 	coverage erase
 	make test PYTHON="coverage run"
 	coverage report
diff --git a/README-DEV.md b/README-DEV.md
index da2025a..40a237a 100644
--- a/README-DEV.md
+++ b/README-DEV.md
@@ -4,17 +4,20 @@ In a Python 3 virtualenv:
 
 ~~~
 pip install -e .
+pip install -r requirements-test.txt
 make test
 ~~~
 
-Release
--------
-* Update ocrd-tool.json version
-* Update setup.py version
-* git commit -m 'v<version>'
-* git tag -m 'v<version>' 'v<version>'
-* git push --tags
+Releasing
+---------
+* Update `ocrd-tool.json` version
+* Update `setup.py` version
+* `git commit -m 'v<version>'`
+* `git tag -m 'v<version>' 'v<version>'`
+* `git push --tags`
+* Do a release on GitHub
 
-PyPI:
-* python sdist bdist_wheel
-* twine upload dist/ocrd_calamari-<version>*
+### Uploading to PyPI
+* `rm -rf dist/` or backup if `dist/` exists already
+* In the virtualenv: `python setup.py sdist bdist_wheel`
+* `twine upload dist/ocrd_calamari-<version>*`
diff --git a/README.md b/README.md
index 4d7dc96..d277479 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,22 @@
 
 ## Introduction
 
-This offers a OCR-D compliant workspace processor for some of the functionality of Calamari OCR.
+**ocrd_calamari** offers a [OCR-D](https://ocr-d.de) compliant workspace processor for the functionality of Calamari OCR. It uses OCR-D workspaces (METS) with [PAGE XML](https://github.com/PRImA-Research-Lab/PAGE-XML) documents as input and output.
 
 This processor only operates on the text line level and so needs a line segmentation (and by extension a binarized 
 image) as its input.
 
+In addition to the line text it may also output word and glyph segmentation
+including per-glyph confidence values and per-glyph alternative predictions as
+provided by the Calamari OCR engine, using a `textequiv_level` of `word` or
+`glyph`. Note that while Calamari does not provide word segmentation, this
+processor produces word segmentation inferred from text
+segmentation and the glyph positions. The provided glyph and word segmentation
+can be used for text extraction and highlighting, but is probably not useful for
+further image-based processing.
+
+![Example output as viewed in PAGE Viewer](https://github.com/OCR-D/ocrd_calamari/raw/screenshots/output-in-page-viewer.jpg)
+
 ## Installation
 
 ### From PyPI
@@ -29,32 +40,44 @@ pip install .
 
 ## Install models
 
-Download standard models:
-
-```
-wget https://github.com/Calamari-OCR/calamari_models/archive/master.zip
-unzip master.zip
-```
-
 Download models trained on GT4HistOCR data:
 
 ```
-make gt4histocr-calamari
-ls gt4histocr-calamari
+make gt4histocr-calamari1
+ls gt4histocr-calamari1
 ```
 
+Manual download: [model.tar.xz](https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz)
+
 ## Example Usage
+Before using `ocrd-calamari-recognize` get some example data and model, and
+prepare the document for OCR:
+```
+# Download model and example data
+make gt4histocr-calamari1
+make actevedef_718448162
 
-~~~
-ocrd-calamari-recognize -p test-parameters.json -m mets.xml -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI
-~~~
+# Create binarized images and line segmentation using other OCR-D projects
+cd actevedef_718448162
+ocrd-olena-binarize -P impl sauvola-ms-split -I OCR-D-IMG -O OCR-D-IMG-BIN
+ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O OCR-D-SEG-REGION
+ocrd-tesserocr-segment-line -I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
+```
 
-With `test-parameters.json`:
-~~~
-{
-    "checkpoint": "/path/to/some/trained/models/*.ckpt.json"
-}
-~~~
+Finally recognize the text using ocrd_calamari and the downloaded model:
+```
+ocrd-calamari-recognize -P checkpoint "../gt4histocr-calamari1/*.ckpt.json" -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI
+```
+
+or
+
+```
+ocrd-calamari-recognize -P checkpoint_dir "../gt4histocr-calamari1" -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI
+```
+
+
+You may want to have a look at the [ocrd-tool.json](ocrd_calamari/ocrd-tool.json) descriptions
+for additional parameters and default values.
 
 ## Development & Testing
 For information regarding development and testing, please see
diff --git a/ocrd_calamari/ocrd-tool.json b/ocrd_calamari/ocrd-tool.json
index 54d2206..d4f83fa 100644
--- a/ocrd_calamari/ocrd-tool.json
+++ b/ocrd_calamari/ocrd-tool.json
@@ -1,6 +1,6 @@
 {
-  "git_url": "https://github.com/kba/ocrd_calamari",
-  "version": "0.0.3",
+  "git_url": "https://github.com/OCR-D/ocrd_calamari",
+  "version": "1.0.1",
   "tools": {
     "ocrd-calamari-recognize": {
       "executable": "ocrd-calamari-recognize",
@@ -18,6 +18,10 @@
         "OCR-D-OCR-CALAMARI"
       ],
       "parameters": {
+        "checkpoint_dir": {
+          "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory",
+          "type": "string", "format": "file", "cacheable": true, "default": "qurator-gt4histocr-1.0"
+        },
         "checkpoint": {
           "description": "The calamari model files (*.ckpt.json)",
           "type": "string", "format": "file", "cacheable": true
@@ -25,6 +29,18 @@
         "voter": {
           "description": "The voting algorithm to use",
           "type": "string", "default": "confidence_voter_default_ctc"
+        },
+        "textequiv_level": {
+          "type": "string",
+          "enum": ["line", "word", "glyph"],
+          "default": "line",
+          "description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
+        },
+        "glyph_conf_cutoff": {
+          "type": "number",
+          "format": "float",
+          "default": 0.001,
+          "description": "Only include glyph alternatives with confidences above this threshold"
         }
       }
     }
diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py
index 31a37e1..5c6807e 100644
--- a/ocrd_calamari/recognize.py
+++ b/ocrd_calamari/recognize.py
@@ -1,33 +1,50 @@
 from __future__ import absolute_import
 
 import os
+import itertools
 from glob import glob
 
 import numpy as np
+from calamari_ocr import __version__ as calamari_version
 from calamari_ocr.ocr import MultiPredictor
 from calamari_ocr.ocr.voting import voter_from_proto
 from calamari_ocr.proto import VoterParams
 from ocrd import Processor
 from ocrd_modelfactory import page_from_file
-from ocrd_models.ocrd_page import to_xml
-from ocrd_models.ocrd_page_generateds import TextEquivType
-from ocrd_utils import getLogger, concat_padded, MIMETYPE_PAGE
+from ocrd_models.ocrd_page import (
+        LabelType, LabelsType,
+        MetadataItemType,
+        TextEquivType,
+        WordType, GlyphType, CoordsType,
+        to_xml
+)
+from ocrd_utils import (
+        getLogger, concat_padded,
+        coordinates_for_segment, points_from_polygon, polygon_from_x0y0x1y1,
+        make_file_id, assert_file_grp_cardinality,
+        MIMETYPE_PAGE
+)
 
 from ocrd_calamari.config import OCRD_TOOL, TF_CPP_MIN_LOG_LEVEL
 
-log = getLogger('processor.CalamariRecognize')
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = TF_CPP_MIN_LOG_LEVEL
+from tensorflow import __version__ as tensorflow_version
+
+TOOL = 'ocrd-calamari-recognize'
 
 
 class CalamariRecognize(Processor):
 
     def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-calamari-recognize']
-        kwargs['version'] = OCRD_TOOL['version']
+        kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
+        kwargs['version'] = '%s (calamari %s, tensorflow %s)' % (OCRD_TOOL['version'], calamari_version, tensorflow_version)
         super(CalamariRecognize, self).__init__(*args, **kwargs)
 
     def _init_calamari(self):
-        os.environ['TF_CPP_MIN_LOG_LEVEL'] = TF_CPP_MIN_LOG_LEVEL
 
+        if not self.parameter.get('checkpoint', None) and self.parameter.get('checkpoint_dir', None):
+            resolved = self.resolve_resource(self.parameter['checkpoint_dir'])
+            self.parameter['checkpoint'] = '%s/*.ckpt.json' % resolved
         checkpoints = glob(self.parameter['checkpoint'])
         self.predictor = MultiPredictor(checkpoints=checkpoints)
 
@@ -43,16 +60,14 @@ class CalamariRecognize(Processor):
         voter_params.type = VoterParams.Type.Value(self.parameter['voter'].upper())
         self.voter = voter_from_proto(voter_params)
 
-    def _make_file_id(self, input_file, n):
-        file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
-        if file_id == input_file.ID:
-            file_id = concat_padded(self.output_file_grp, n)
-        return file_id
-
     def process(self):
         """
         Performs the recognition.
         """
+        log = getLogger('processor.CalamariRecognize')
+
+        assert_file_grp_cardinality(self.input_file_grp, 1)
+        assert_file_grp_cardinality(self.output_file_grp, 1)
 
         self._init_calamari()
 
@@ -71,44 +86,169 @@ class CalamariRecognize(Processor):
 
                 textlines = region.get_TextLine()
                 log.info("About to recognize %i lines of region '%s'", len(textlines), region.id)
+                line_images_np = []
                 for line in textlines:
                     log.debug("Recognizing line '%s' in region '%s'", line.id, region.id)
 
-                    line_image, line_coords = self.workspace.image_from_segment(
-                        line, region_image, region_coords, feature_selector=self.features)
-                    if ('binarized' not in line_coords['features'] and
-                        'grayscale_normalized' not in line_coords['features'] and
-                        self.input_channels == 1):
+                    line_image, line_coords = self.workspace.image_from_segment(line, region_image, region_coords, feature_selector=self.features)
+                    if ('binarized' not in line_coords['features'] and 'grayscale_normalized' not in line_coords['features'] and self.input_channels == 1):
                         # We cannot use a feature selector for this since we don't
                         # know whether the model expects (has been trained on)
                         # binarized or grayscale images; but raw images are likely
                         # always inadequate:
-                        log.warning("Using raw image for line '%s' in region '%s'",
-                                    line.id, region.id)
-                    
-                    line_image_np = np.array(line_image, dtype=np.uint8)
+                        log.warning("Using raw image for line '%s' in region '%s'", line.id, region.id)
+
+                    line_image = line_image if all(line_image.size) else [[0]]
+                    line_image_np = np.array(line_image, dtype=np.uint8)
+                    line_images_np.append(line_image_np)
+                raw_results_all = self.predictor.predict_raw(line_images_np, progress_bar=False)
+
+                for line, raw_results in zip(textlines, raw_results_all):
 
-                    raw_results = list(self.predictor.predict_raw([line_image_np], progress_bar=False))[0]
                     for i, p in enumerate(raw_results):
                         p.prediction.id = "fold_{}".format(i)
 
                     prediction = self.voter.vote_prediction_result(raw_results)
                     prediction.id = "voted"
 
-                    line_text = prediction.sentence
-                    line_conf = prediction.avg_char_probability
+                    # Build line text on our own
+                    #
+                    # Calamari does whitespace post-processing on prediction.sentence, while it does not do the same
+                    # on prediction.positions. Do it on our own to have consistency.
+                    #
+                    # XXX Check Calamari's built-in post-processing on prediction.sentence
 
+
+                    def _sort_chars(p):
+                        """Filter and sort chars of prediction p"""
+                        chars = p.chars
+                        chars = [c for c in chars if c.char]  # XXX Note that omission probabilities are not normalized?!
+                        chars = [c for c in chars if c.probability >= self.parameter['glyph_conf_cutoff']]
+                        chars = sorted(chars, key=lambda k: k.probability, reverse=True)
+                        return chars
+                    def _drop_leading_spaces(positions):
+                        return list(itertools.dropwhile(lambda p: _sort_chars(p)[0].char == " ", positions))
+                    def _drop_trailing_spaces(positions):
+                        return list(reversed(_drop_leading_spaces(reversed(positions))))
+                    def _drop_double_spaces(positions):
+                        def _drop_double_spaces_generator(positions):
+                            last_was_space = False
+                            for p in positions:
+                                if p.chars[0].char == " ":
+                                    if not last_was_space:
+                                        yield p
+                                    last_was_space = True
+                                else:
+                                    yield p
+                                    last_was_space = False
+                        return list(_drop_double_spaces_generator(positions))
+                    positions = prediction.positions
+                    positions = _drop_leading_spaces(positions)
+                    positions = _drop_trailing_spaces(positions)
+                    positions = _drop_double_spaces(positions)
+                    positions = list(positions)
+
+                    line_text = ''.join(_sort_chars(p)[0].char for p in positions)
+                    if line_text != prediction.sentence:
+                        log.warning("Our own line text is not the same as Calamari's: '%s' != '%s'",
+                                    line_text, prediction.sentence)
+
+                    # Delete existing results
                     if line.get_TextEquiv():
                         log.warning("Line '%s' already contained text results", line.id)
-                    line.set_TextEquiv([TextEquivType(Unicode=line_text, conf=line_conf)])
-                    
+                    line.set_TextEquiv([])
                     if line.get_Word():
                         log.warning("Line '%s' already contained word segmentation", line.id)
                     line.set_Word([])
 
+                    # Save line results
+                    line_conf = prediction.avg_char_probability
+                    line.set_TextEquiv([TextEquivType(Unicode=line_text, conf=line_conf)])
+
+
+                    # Save word results
+                    #
+                    # Calamari OCR does not provide word positions, so we infer word positions from a. text segmentation
+                    # and b. the glyph positions. This is necessary because the PAGE XML format enforces a strict
+                    # hierarchy of lines > words > glyphs.
+
+                    def _words(s):
+                        """Split words based on spaces and include spaces as 'words'"""
+                        spaces = None
+                        word = ''
+                        for c in s:
+                            if c == ' ' and spaces is True:
+                                word += c
+                            elif c != ' ' and spaces is False:
+                                word += c
+                            else:
+                                if word:
+                                    yield word
+                                word = c
+                                spaces = (c == ' ')
+                        yield word
+
+                    if self.parameter['textequiv_level'] in ['word', 'glyph']:
+                        word_no = 0
+                        i = 0
+
+
+
+                        for word_text in _words(line_text):
+                            word_length = len(word_text)
+                            if not all(c == ' ' for c in word_text):
+                                word_positions = positions[i:i+word_length]
+                                word_start = word_positions[0].global_start
+                                word_end = word_positions[-1].global_end
+
+                                polygon = polygon_from_x0y0x1y1([word_start, 0, word_end, line_image.height])
+                                points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
+                                # XXX Crop to line polygon?
+
+                                word = WordType(id='%s_word%04d' % (line.id, word_no), Coords=CoordsType(points))
+                                word.add_TextEquiv(TextEquivType(Unicode=word_text))
+
+                                if self.parameter['textequiv_level'] == 'glyph':
+                                    for glyph_no, p in enumerate(word_positions):
+                                        glyph_start = p.global_start
+                                        glyph_end = p.global_end
+
+                                        polygon = polygon_from_x0y0x1y1([glyph_start, 0, glyph_end, line_image.height])
+                                        points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
+
+                                        glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
+
+                                        # Add predictions (= TextEquivs)
+                                        char_index_start = 1  # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
+                                        for char_index, char in enumerate(_sort_chars(p), start=char_index_start):
+                                            glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
+
+                                        word.add_Glyph(glyph)
+
+                                line.add_Word(word)
+                                word_no += 1
+
+                            i += word_length
+
+
             _page_update_higher_textequiv_levels('line', pcgts)
 
-            file_id = self._make_file_id(input_file, n)
+
+            # Add metadata about this operation and its runtime parameters:
+            metadata = pcgts.get_Metadata()  # ensured by from_file()
+            metadata.add_MetadataItem(
+                MetadataItemType(type_="processingStep",
+                                 name=self.ocrd_tool['steps'][0],
+                                 value=TOOL,
+                                 Labels=[LabelsType(
+                                     externalModel="ocrd-tool",
+                                     externalId="parameters",
+                                     Label=[LabelType(type_=name, value=self.parameter[name])
+                                            for name in self.parameter.keys()])]))
+
+
+            file_id = make_file_id(input_file, self.output_file_grp)
+            pcgts.set_pcGtsId(file_id)
             self.workspace.add_file(
                 ID=file_id,
                 file_grp=self.output_file_grp,
@@ -151,3 +291,5 @@ def _page_update_higher_textequiv_levels(level, pcgts):
                                         else u'' for line in lines)
             region.set_TextEquiv(
                 [TextEquivType(Unicode=region_unicode)])  # remove old
+
+# vim:tw=120:
diff --git a/requirements.txt b/requirements.txt
index 17de3dc..cbfb800 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
-numpy
-tensorflow-gpu == 1.14.0
-calamari-ocr == 0.3.5
+h5py < 3  # XXX tensorflow 2.4.0rc3 requires h5py~=2.10.0, but you'll have h5py 3.1.0 which is incompatible.
+tensorflow >= 2.3.0rc2
+calamari-ocr == 1.0.*
 setuptools >= 41.0.0  # tensorboard depends on this, but why do we get an error at runtime?
 click
-ocrd >= 1.0.0b11
+ocrd >= 2.22.0
diff --git a/setup.py b/setup.py
index 323d68a..2a98d62 100644
--- a/setup.py
+++ b/setup.py
@@ -5,15 +5,15 @@ from setuptools import setup, find_packages
 
 setup(
     name='ocrd_calamari',
-    version='0.0.3',
+    version='1.0.1',
     description='Calamari bindings',
     long_description=Path('README.md').read_text(),
     long_description_content_type='text/markdown',
     author='Konstantin Baierer, Mike Gerber',
     author_email='unixprog@gmail.com, mike.gerber@sbb.spk-berlin.de',
-    url='https://github.com/kba/ocrd_calamari',
+    url='https://github.com/OCR-D/ocrd_calamari',
     license='Apache License 2.0',
-    packages=find_packages(exclude=('tests', 'docs')),
+    packages=find_packages(exclude=('test', 'docs')),
     install_requires=Path('requirements.txt').read_text().split('\n'),
     package_data={
         '': ['*.json', '*.yml', '*.yaml'],
diff --git a/test/test_recognize.py b/test/test_recognize.py
index f97ef91..b3e8540 100644
--- a/test/test_recognize.py
+++ b/test/test_recognize.py
@@ -2,6 +2,8 @@ import os
 import shutil
 import subprocess
 import urllib.request
+from lxml import etree
+from glob import glob
 
 import pytest
 import logging
@@ -10,9 +12,14 @@ from ocrd.resolver import Resolver
 from ocrd_calamari import CalamariRecognize
 from .base import assets
 
-METS_KANT = assets.url_of('kant_aufklaerung_1784-page-block-line-word_glyph/data/mets.xml')
-CHECKPOINT = os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json')
+METS_KANT = assets.url_of('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml')
 WORKSPACE_DIR = '/tmp/test-ocrd-calamari'
+CHECKPOINT_DIR = os.path.join(os.getcwd(), 'gt4histocr-calamari1')
+CHECKPOINT = os.path.join(CHECKPOINT_DIR, '*.ckpt.json')
+
+# Because XML namespace versions are so much fun, we not only use one, we use TWO!
+NSMAP = { "pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" }
+NSMAP_GT = { "pc": "http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" }
 
 
 @pytest.fixture
@@ -32,10 +39,6 @@ def workspace():
             "https://github.com/OCR-D/assets/raw/master/data/kant_aufklaerung_1784/data/OCR-D-IMG/" + f,
             os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f))
 
-    return workspace
-
-
-def test_recognize(workspace):
     # The binarization options I have are:
     #
     # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my machine (protobuf)
@@ -48,17 +51,49 @@ def test_recognize(workspace):
         ff = os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f)
         subprocess.call(['convert', ff, '-threshold', '50%', ff])
 
-    # XXX Should remove GT text to really test this
+    # Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text
+    # XXX Review data again
+    # XXX Make this more robust against namespace version changes
+    for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-LINE"):
+        workspace.download_file(of)
+    for to_remove in ["//pc:Word", "//pc:TextEquiv"]:
+        for ff in glob(os.path.join(WORKSPACE_DIR, "OCR-D-GT-SEG-LINE", "*")):
+            tree = etree.parse(ff)
+            for e in tree.xpath(to_remove, namespaces=NSMAP_GT):
+                e.getparent().remove(e)
+            tree.write(ff, xml_declaration=True, encoding="utf-8")
 
+    return workspace
+
+
+def test_recognize(workspace):
     CalamariRecognize(
         workspace,
         input_file_grp="OCR-D-GT-SEG-LINE",
         output_file_grp="OCR-D-OCR-CALAMARI",
-        parameter={'checkpoint': CHECKPOINT}
+        parameter={
+            "checkpoint": CHECKPOINT,
+        }
     ).process()
     workspace.save_mets()
 
-    page1 = os.path.join(workspace.directory, 'OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml')
+    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
+    assert os.path.exists(page1)
+    with open(page1, "r", encoding="utf-8") as f:
+        assert "verſchuldeten" in f.read()
+
+def test_recognize_with_checkpoint_dir(workspace):
+    CalamariRecognize(
+        workspace,
+        input_file_grp="OCR-D-GT-SEG-LINE",
+        output_file_grp="OCR-D-OCR-CALAMARI",
+        parameter={
+            "checkpoint_dir": CHECKPOINT_DIR,
+        }
+    ).process()
+    workspace.save_mets()
+
+    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
     assert os.path.exists(page1)
     with open(page1, 'r', encoding='utf-8') as f:
         assert 'verſchuldeten' in f.read()
@@ -75,3 +110,61 @@ def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(works
 
     interesting_log_messages = [t[2] for t in caplog.record_tuples if "Using raw image" in t[2]]
     assert len(interesting_log_messages) > 10  # For every line!
+    with open(page1, "r", encoding="utf-8") as f:
+        assert "verſchuldeten" in f.read()
+
+
+def test_word_segmentation(workspace):
+    CalamariRecognize(
+        workspace,
+        input_file_grp="OCR-D-GT-SEG-LINE",
+        output_file_grp="OCR-D-OCR-CALAMARI",
+        parameter={
+            "checkpoint": CHECKPOINT,
+            "textequiv_level": "word",   # Note that we're going down to word level here
+        }
+    ).process()
+    workspace.save_mets()
+
+    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
+    assert os.path.exists(page1)
+    tree = etree.parse(page1)
+
+    # The result should contain a TextLine that contains the text "December"
+    line = tree.xpath(".//pc:TextLine[pc:TextEquiv/pc:Unicode[contains(text(),'December')]]", namespaces=NSMAP)[0]
+    assert line
+
+    # The textline should a. contain multiple words and b. these should concatenate fine to produce the same line text
+    words = line.xpath(".//pc:Word", namespaces=NSMAP)
+    assert len(words) >= 2
+    words_text = " ".join(word.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text for word in words)
+    line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text
+    assert words_text == line_text
+
+    # For extra measure, check that we're not seeing any glyphs, as we asked for textequiv_level == "word"
+    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
+    assert len(glyphs) == 0
+
+
+def test_glyphs(workspace):
+    CalamariRecognize(
+        workspace,
+        input_file_grp="OCR-D-GT-SEG-LINE",
+        output_file_grp="OCR-D-OCR-CALAMARI",
+        parameter={
+            "checkpoint": CHECKPOINT,
+            "textequiv_level": "glyph",   # Note that we're going down to glyph level here
+        }
+    ).process()
+    workspace.save_mets()
+
+    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
+    assert os.path.exists(page1)
+    tree = etree.parse(page1)
+
+    # The result should contain a lot of glyphs
+    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
+    assert len(glyphs) >= 100
+
+
+# vim:tw=120: