From 057af6d19285564480ae005296593dae5234d382 Mon Sep 17 00:00:00 2001 From: JKamlah <> Date: Fri, 15 Nov 2019 18:15:12 +0100 Subject: [PATCH] Join strings with unique symbol for the hash. --- qurator/dinglehopper/edit_distance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py index 4fa2dc2..3f532a2 100644 --- a/qurator/dinglehopper/edit_distance.py +++ b/qurator/dinglehopper/edit_distance.py @@ -23,8 +23,8 @@ def levenshtein_matrix(seq1, seq2, tempcache=True): strings, e.g. lists of grapheme clusters or lists of word strings. """ if tempcache: - hashseq1 = hashlib.sha1(("".join(seq1)).encode("utf-8")).hexdigest() - hashseq2 = hashlib.sha1(("".join(seq2)).encode("utf-8")).hexdigest() + hashseq1 = hashlib.sha1(("؟".join(seq1)).encode("utf-8")).hexdigest() + hashseq2 = hashlib.sha1(("؟".join(seq2)).encode("utf-8")).hexdigest() tempdir = os.path.join(tempfile.gettempdir() + "/dinglehopper/") if not os.path.exists(tempdir): os.makedirs(tempdir + "/dinglehopper/")