diff --git a/qurator/dinglehopper/extracted_text.py b/qurator/dinglehopper/extracted_text.py
index 46c8fec..916b123 100644
--- a/qurator/dinglehopper/extracted_text.py
+++ b/qurator/dinglehopper/extracted_text.py
@@ -4,9 +4,13 @@ import unicodedata
 from contextlib import suppress
 from itertools import repeat
 from typing import Optional
-from lxml import etree as ET
 
 import attr
+import numpy as np
+from lxml import etree as ET
+from ocrd_utils import getLogger
+
+LOG = getLogger('processor.OcrdDinglehopperEvaluate')
 
 
 class Normalization(enum.Enum):
@@ -47,15 +51,17 @@ def unjoin_ligatures(s):
         'ﬂ': 'fl',
         'ﬃ': 'ffi',
         '': 'ct',
-        '': 'tz',       # MUFI: LATIN SMALL LIGATURE TZ
+        '': 'tz',  # MUFI: LATIN SMALL LIGATURE TZ
         '\uf532': 'as',  # eMOP: Latin small ligature as
         '\uf533': 'is',  # eMOP: Latin small ligature is
         '\uf534': 'us',  # eMOP: Latin small ligature us
         '\uf535': 'Qu',  # eMOP: Latin ligature capital Q small u
-        'ĳ': 'ij',       # U+0133 LATIN SMALL LIGATURE IJ
-        '\uE8BF': 'q&',  # MUFI: LATIN SMALL LETTER Q LIGATED WITH FINAL ET  XXX How to replace this correctly?
+        'ĳ': 'ij',  # U+0133 LATIN SMALL LIGATURE IJ
+        '\uE8BF': 'q&',
+        # MUFI: LATIN SMALL LETTER Q LIGATED WITH FINAL ET
+        # XXX How to replace this correctly?
         '\uEBA5': 'ſp',  # MUFI: LATIN SMALL LIGATURE LONG S P
-        'ﬆ': 'st',      # U+FB06 LATIN SMALL LIGATURE ST
+        'ﬆ': 'st',  # U+FB06 LATIN SMALL LIGATURE ST
     }
     s = unicodedata.normalize('NFC', s)
     for fr, to in equivalences.items():
@@ -70,14 +76,14 @@ def substitute_equivalences(s):
         '': 'ü',
         '': 'ä',
         '==': '–',  # → en-dash
-        '—': '–',   # em-dash → en-dash
+        '—': '–',  # em-dash → en-dash
         '': 'ö',
         '’': '\'',
         '⸗': '-',
-        'aͤ': 'ä',        # LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER E
-        'oͤ': 'ö',        # LATIN SMALL LETTER O, COMBINING LATIN SMALL LETTER E
-        'uͤ': 'ü',        # LATIN SMALL LETTER U, COMBINING LATIN SMALL LETTER E
-        '\uF50E': 'q́'    # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT
+        'aͤ': 'ä',  # LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER E
+        'oͤ': 'ö',  # LATIN SMALL LETTER O, COMBINING LATIN SMALL LETTER E
+        'uͤ': 'ü',  # LATIN SMALL LETTER U, COMBINING LATIN SMALL LETTER E
+        '\uF50E': 'q́'  # U+F50E LATIN SMALL LETTER Q WITH ACUTE ACCENT
     }
 
     s = unicodedata.normalize('NFC', s)
@@ -178,27 +184,6 @@ class ExtractedText:
     def from_text_segment(cls, text_segment, nsmap, textequiv_level='region'):
         """Build an ExtractedText from a PAGE content text element"""
 
-        def invert_dict(d):
-            """Invert the given dict"""
-            return {v: k for k, v in d.items()}
-
-        def get_textequiv_unicode(s):
-            """Get the TextEquiv/Unicode text of the given PAGE text element"""
-            textequivs = s.findall('./page:TextEquiv', namespaces=nsmap)
-
-            if not textequivs:
-                return None
-
-            def get_index(te):
-                index = te.attrib.get('index')
-                try:
-                    return int(index)
-                except TypeError:
-                    return None
-            textequivs = sorted(textequivs, key=get_index)
-
-            return textequivs[0].find('./page:Unicode', namespaces=nsmap).text
-
         localname_for_textequiv_level = {
             'region': 'TextRegion',
             'line': 'TextLine'
@@ -216,9 +201,9 @@ class ExtractedText:
         if localname == localname_for_textequiv_level[textequiv_level]:
             segment_text = None
             with suppress(AttributeError):
-                segment_text = get_textequiv_unicode(text_segment)
-                segment_text = segment_text or ''
-                segment_text = normalize_sbb(segment_text)  # FIXME hardcoded SBB normalization
+                segment_text = get_textequiv_unicode(text_segment, nsmap)
+                # FIXME hardcoded SBB normalization
+                segment_text = normalize_sbb(segment_text)
             segment_text = segment_text or ''
             return cls(segment_id, None, None, segment_text)
         else:
@@ -226,17 +211,73 @@ class ExtractedText:
             sub_localname = children_for_localname[localname]
             sub_textequiv_level = textequiv_level_for_localname[sub_localname]
             segments = []
-            for sub_segment in text_segment.iterfind('./page:%s' % sub_localname, namespaces=nsmap):
+            for sub_segment in text_segment.iterfind('./page:%s' % sub_localname,
+                                                     namespaces=nsmap):
                 segments.append(
-                        ExtractedText.from_text_segment(
-                            sub_segment, nsmap,
-                            textequiv_level=sub_textequiv_level)
+                    ExtractedText.from_text_segment(
+                        sub_segment, nsmap,
+                        textequiv_level=sub_textequiv_level)
                 )
             joiner = joiner_for_textequiv_level[sub_textequiv_level]
             return cls(segment_id, segments, joiner, None)
 
-
     @classmethod
     def from_str(cls, text, normalization=Normalization.NFC_SBB):
         normalized_text = normalize(text, normalization)
         return cls(None, None, None, normalized_text, normalization=normalization)
+
+
+def invert_dict(d):
+    """Invert the given dict."""
+    return {v: k for k, v in d.items()}
+
+
+def get_textequiv_unicode(text_segment, nsmap) -> str:
+    """Get the TextEquiv/Unicode text of the given PAGE text element."""
+    segment_id = text_segment.attrib['id']
+    textequivs = text_segment.findall('./page:TextEquiv', namespaces=nsmap)
+
+    if not textequivs:
+        return ''
+
+    textequiv = get_first_textequiv(textequivs, segment_id)
+    return textequiv.find('./page:Unicode', namespaces=nsmap).text or ''
+
+
+def get_first_textequiv(textequivs, segment_id):
+    """Get the first TextEquiv based on index or conf order if index is not present."""
+    if len(textequivs) == 1:
+        return textequivs[0]
+
+    # try ordering by index
+    indices = np.array([get_attr(te, 'index') for te in textequivs], dtype=float)
+    nan_mask = np.isnan(indices)
+    if np.any(~nan_mask):
+        if np.any(nan_mask):
+            LOG.warning("TextEquiv without index in %s.", segment_id)
+        index = np.nanargmin(indices)
+    else:
+        # try ordering by conf
+        confidences = np.array([get_attr(te, 'conf') for te in textequivs], dtype=float)
+        if np.any(~np.isnan(confidences)):
+            LOG.info("No index attributes, use 'conf' attribute to sort TextEquiv in %s.",
+                     segment_id)
+            index = np.nanargmax(confidences)
+        else:
+            # fallback to first entry in case of neither index or conf present
+            LOG.warning("No index attributes, use first TextEquiv in %s.", segment_id)
+            index = 0
+    return textequivs[index]
+
+
+def get_attr(te, attr_name) -> float:
+    """Extract the attribute for the given name.
+
+    Note: currently only handles numeric values!
+    Other or non existend values are encoded as np.nan.
+    """
+    attr_value = te.attrib.get(attr_name)
+    try:
+        return float(attr_value)
+    except TypeError:
+        return np.nan
diff --git a/qurator/dinglehopper/tests/extracted_text_test.py b/qurator/dinglehopper/tests/extracted_text_test.py
index 0d59c99..2ce81cd 100644
--- a/qurator/dinglehopper/tests/extracted_text_test.py
+++ b/qurator/dinglehopper/tests/extracted_text_test.py
@@ -1,8 +1,10 @@
+import logging
 import unicodedata
-import pytest
-from uniseg.graphemecluster import grapheme_clusters
 from collections import namedtuple
+
+import pytest
 from lxml import etree as ET
+from uniseg.graphemecluster import grapheme_clusters
 
 from .. import seq_align, ExtractedText
 
@@ -45,12 +47,17 @@ def test_align():
     test2 = ExtractedText(None, [
         ExtractedText('x0', None, None, 'foo'),
         ExtractedText('x1', None, None, 'bar'),
-        ExtractedText('x2', None, None, '.'),  # extra .
-        ExtractedText('x3', None, None, 'bazim̃ga'),  # deletion + different grapheme cluster, m̃ also is two Python characters
+        # extra .
+        ExtractedText('x2', None, None, '.'),
+        # deletion + different grapheme cluster, m̃ also is two Python characters
+        ExtractedText('x3', None, None, 'bazim̃ga'),
     ], ' ', None)
 
-    left_pos = 0; right_pos = 0; alignment = []
-    for left, right in seq_align(grapheme_clusters(test1.text), grapheme_clusters(test2.text)):
+    left_pos = 0
+    right_pos = 0
+    alignment = []
+    for left, right in seq_align(grapheme_clusters(test1.text),
+                                 grapheme_clusters(test2.text)):
         left_id = test1.segment_id_for_pos(left_pos) if left is not None else None
         right_id = test2.segment_id_for_pos(right_pos) if right is not None else None
         el = AlignmentElement(left, right, left_id, right_id)
@@ -63,33 +70,49 @@ def test_align():
     print('test1: {}'.format(test1.text))
     print('test2: {}'.format(test2.text))
 
-    assert alignment[0]  == ('f',  'f',  's0', 'x0')
-    assert alignment[8]  == (None, '.',  None, 'x2')
-    assert alignment[12] == ('t',  None, 's2', None)
-    assert alignment[15] == ('n',  'm̃',  's2', 'x3')
-
+    assert alignment[0] == ('f', 'f', 's0', 'x0')
+    assert alignment[8] == (None, '.', None, 'x2')
+    assert alignment[12] == ('t', None, 's2', None)
+    assert alignment[15] == ('n', 'm̃', 's2', 'x3')
+
+
+@pytest.mark.parametrize("attributes,expected_index,expected_log", [
+    ([], None, None),
+    (['index="0"'], 0, None),
+    ([''], 0, None),
+    (['conf="0.5"'], 0, None),
+    (['index="1"', 'index="0"'], 1, None),
+    (['index="0" conf="0.4"', 'conf="0.5"'], 0, "TextEquiv without index"),
+    (['conf="0.4"', 'conf="0.5"', 'conf="0.9"'], 2,
+     "No index attributes, use 'conf' attribute to sort TextEquiv"),
+    (['index="0"', ''], 0, "TextEquiv without index"),
+    (['', 'conf="0.4"'], 1,
+     "No index attributes, use 'conf' attribute to sort TextEquiv"),
+    (['', ''], 0, "No index attributes, use first TextEquiv"),
+])
+def test_textequiv(attributes, expected_index, expected_log, caplog):
+    """Test that extracting text from a PAGE TextEquiv is working without index attr."""
+    caplog.set_level(logging.INFO)
+    xml = "<?xml version=\"1.0\"?>"
+    ns = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15"
+    text = ["Text {0}".format(i) for i in range(len(attributes) + 1)]
+
+    equiv = ["<TextEquiv {0}><Unicode>{1}</Unicode></TextEquiv>".format(attr, text[i])
+             for i, attr in enumerate(attributes)]
+
+    textline = "{0}<TextLine id=\"l3\" xmlns=\"{1}\">{2}</TextLine>"
+    textline = textline.format(xml, ns, ''.join(equiv))
 
-def test_textequiv_index():
-    """
-    Test that extracting text from a PAGE TextEquiv honors the "index".
-    """
-
-    # This example textline has two TextEquivs, the one with the lowest index
-    # should be used. The XML order of the TextEquivs is deliberately not
-    # in index order.
-    textline="""<?xml version="1.0"?>
-      <TextLine id="l3" xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15">
-        <TextEquiv index="1">
-          <Unicode>gefahren zu haben, einzelne Bemorkungen und Beobäch-</Unicode>
-        </TextEquiv>
-        <TextEquiv index="0">
-          <Unicode>gefahren zu haben, einzelne Bemerkungen und Beobach-</Unicode>
-        </TextEquiv>
-      </TextLine>
-    """
     root = ET.fromstring(textline)
-    nsmap = {'page': "http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15" }
-    result = ExtractedText.from_text_segment(root, nsmap, textequiv_level='line').text
-    expected = "gefahren zu haben, einzelne Bemerkungen und Beobach-"
-
-    assert expected == result
+    result = ExtractedText.from_text_segment(root,
+                                             {'page': ns},
+                                             textequiv_level='line').text
+    if expected_index is None:
+        assert not result
+    else:
+        assert result == text[expected_index]
+
+    if expected_log is None:
+        assert "no_index" not in caplog.text
+    else:
+        assert expected_log in caplog.text