🐛 Fix docstring of distance() for grapheme clusters

2025-08-10 02:00:02 +02:00 · 2023-10-31 19:08:25 +01:00 · 2023-10-31 19:08:25 +01:00 · 618ea567de
commit 618ea567de
parent e256526ea1
1 changed files with 4 additions and 4 deletions
--- a/qurator/dinglehopper/edit_distance.py
+++ b/qurator/dinglehopper/edit_distance.py
@ -9,11 +9,11 @@ from .extracted_text import ExtractedText

@multimethod
 def distance(seq1: list[str], seq2: list[str]):
-    """Compute the Levenshtein edit distance between two Unicode strings
+    """Compute the Levenshtein edit distance between two lists of grapheme clusters.

-    Note that this is different from levenshtein() as this function knows about Unicode
-    normalization and grapheme clusters. This should be the correct way to compare two
-    Unicode strings.
+    This assumes that the grapheme clusters are already normalized.
+
+    Use distance(str, str) instead if you need to compare two Unicode strings.
    """
    return Levenshtein.distance(seq1, seq2)