diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/qurator/dinglehopper/character_error_rate.py b/qurator/dinglehopper/character_error_rate.py
index 29826e3..9f5fda0 100644
--- a/qurator/dinglehopper/character_error_rate.py
+++ b/qurator/dinglehopper/character_error_rate.py
@@ -17,7 +17,7 @@ def character_error_rate_n(reference, compared) -> Tuple[float, int]:
     """
     if isinstance(reference, str):
         return character_error_rate_n(
-                ExtractedText.from_text(reference),
+                ExtractedText.from_str(reference),
                 compared)
 
     d = distance(reference, compared)
diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py
index bc607a9..88d3127 100644
--- a/qurator/dinglehopper/edit_distance.py
+++ b/qurator/dinglehopper/edit_distance.py
@@ -77,14 +77,16 @@ def distance(s1, s2):
     clusters. This should be the correct way to compare two Unicode strings.
     """
 
-    if isinstance(s1, ExtractedText):
-        s1 = s1.text
-    if isinstance(s2, ExtractedText):
-        s2 = s2.text
+    # XXX Implicit normalization
+    if isinstance(s1, str):
+        s1 = ExtractedText.from_str(s1)
+    if isinstance(s2, str):
+        s2 = ExtractedText.from_str(s2)
+    # s1 and s2 are now guaranteed (by ExtractedText) to be in NFC
 
-    s1 = list(grapheme_clusters(unicodedata.normalize('NFC', s1)))
-    s2 = list(grapheme_clusters(unicodedata.normalize('NFC', s2)))
-    return levenshtein(s1, s2)
+    seq1 = list(grapheme_clusters(s1.text))
+    seq2 = list(grapheme_clusters(s2.text))
+    return levenshtein(seq1, seq2)
 
 
 def seq_editops(seq1, seq2):
diff --git a/qurator/dinglehopper/ocr_files.py b/qurator/dinglehopper/ocr_files.py
index 2b8b0de..5824dda 100644
--- a/qurator/dinglehopper/ocr_files.py
+++ b/qurator/dinglehopper/ocr_files.py
@@ -23,6 +23,12 @@ class Normalization(enum.Enum):
 
 @attr.s(frozen=True)
 class ExtractedText:
+    """
+    Extracted text
+
+    Objects of this class are guaranteed to be a. always in their normalization and
+    b. in NFC.
+    """
     segment_id = attr.ib(type=Optional[str])
 
     @segment_id.validator
@@ -48,6 +54,8 @@ class ExtractedText:
 
     @_text.validator
     def check(self, _, value):
+        if value is not None and unicodedata.normalize('NFC', value) != value:
+            raise ValueError('String "{}" is not in NFC.'.format(value))
         if value is not None and normalize(value, self.normalization) != value:
             raise ValueError('String "{}" is not normalized.'.format(value))
 
@@ -93,9 +101,9 @@ class ExtractedText:
         return cls(segment_id, None, None, segment_text)
 
     @classmethod
-    def from_text(cls, text):
-        return cls(None, None, None, text)
-
+    def from_str(cls, text, normalization=Normalization.NFC_SBB):
+        normalized_text = normalize(text, normalization)
+        return cls(None, None, None, normalized_text, normalization=normalization)
 
 
 def normalize(text, normalization):
@@ -138,7 +146,7 @@ def alto_extract(tree):
 
     return ExtractedText(
             None,
-            (ExtractedText.from_text(normalize_sbb(line_text)) for line_text in lines),
+            (ExtractedText.from_str(normalize_sbb(line_text)) for line_text in lines),
             '\n',
             None
     )
diff --git a/qurator/dinglehopper/tests/extracted_text_test.py b/qurator/dinglehopper/tests/extracted_text_test.py
index 8cac4c1..ef2776c 100644
--- a/qurator/dinglehopper/tests/extracted_text_test.py
+++ b/qurator/dinglehopper/tests/extracted_text_test.py
@@ -20,7 +20,7 @@ def test_text():
 
 
 def test_normalization_check():
-    with pytest.raises(ValueError, match=r'.*is not normalized.*'):
+    with pytest.raises(ValueError, match=r'.*is not in NFC.*'):
         ExtractedText('foo', None, None, unicodedata.normalize('NFD', 'Schlyñ'))
     assert ExtractedText('foo', None, None, unicodedata.normalize('NFC', 'Schlyñ'))