diff --git a/README.md b/README.md
index 18041e8..303efe3 100644
--- a/README.md
+++ b/README.md
@@ -14,12 +14,12 @@ This processor only operates on the text line level and so needs a line segmenta
 image) as its input.
 
 In addition to the line text it also outputs glyph segmentation including
-per-glyph confidence values and per-glyph alternative predictions as provided
-by the Calamari OCR engine. Note that while Calamari does not provide word
-segmentation, this processor produces word segmentation inferred from Unicode
-text segmentation and the glyph positions. The provided glyph and word
-segmentation can be used for text extraction and highlighting, but is probably
-not useful for further image-based processing.
+per-glyph confidence values and per-glyph alternative predictions as provided by
+the Calamari OCR engine. Note that while Calamari does not provide word
+segmentation, this processor produces word segmentation inferred from text
+segmentation and the glyph positions. The provided glyph and word segmentation
+can be used for text extraction and highlighting, but is probably not useful for
+further image-based processing.
 
 ## Installation
 
diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py
index 4c27e5e..cd2d84e 100644
--- a/ocrd_calamari/recognize.py
+++ b/ocrd_calamari/recognize.py
@@ -4,7 +4,6 @@ import os
 from glob import glob
 
 import numpy as np
-import uniseg.wordbreak
 from calamari_ocr.ocr import MultiPredictor
 from calamari_ocr.ocr.voting import voter_from_proto
 from calamari_ocr.proto import VoterParams
@@ -101,26 +100,32 @@ class CalamariRecognize(Processor):
 
                     # Save word results
                     #
-                    # Calamari OCR does not provide word positions, so we infer word positions from a. Unicode text
-                    # segmentation and b. the glyph positions. This is necessary because the PAGE XML format enforces
-                    # a strict hierarchy of lines > words > glyphs.
+                    # Calamari OCR does not provide word positions, so we infer word positions from a. text segmentation
+                    # and b. the glyph positions. This is necessary because the PAGE XML format enforces a strict
+                    # hierarchy of lines > words > glyphs.
 
-                    def unwanted(c):
-                        """
-                        Define unwanted characters
-
-                        Words only containing these e.g. whitespace characters are not considered as words.
-                        """
-                        return c == " "
+                    def _words(s):
+                        """Split words based on spaces and include spaces as 'words'"""
+                        spaces = None
+                        word = ''
+                        for c in s:
+                            if c == ' ' and spaces is True:
+                                word += c
+                            elif c != ' ' and spaces is False:
+                                word += c
+                            else:
+                                if word:
+                                    yield word
+                                word = c
+                                spaces = (c == ' ')
+                        yield word
 
                     word_no = 0
                     i = 0
-                    for word_text in uniseg.wordbreak.words(prediction.sentence):
-                        # XXX Re-use word segmentation from dinglehopper, i.e. support private use characters
-                        word_length = len(word_text)
-                        do_not_include = all(unwanted(c) for c in word_text)
 
-                        if not do_not_include:
+                    for word_text in _words(prediction.sentence):
+                        word_length = len(word_text)
+                        if not all(c == ' ' for c in word_text):
                             word_positions = prediction.positions[i:i+word_length]
                             word_start = word_positions[0].global_start
                             word_end = word_positions[-1].global_end
@@ -152,10 +157,9 @@ class CalamariRecognize(Processor):
                                 word.add_Glyph(glyph)
 
                             line.add_Word(word)
-
+                            word_no += 1
 
                         i += word_length
-                        word_no += 1
 
 
             _page_update_higher_textequiv_levels('line', pcgts)
diff --git a/requirements.txt b/requirements.txt
index 1b6d3a6..0a426e0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,3 @@ calamari-ocr == 0.3.5
 setuptools >= 41.0.0  # tensorboard depends on this, but why do we get an error at runtime?
 click
 ocrd >= 2.2.1
-uniseg