diff --git a/qurator/dinglehopper/cli.py b/qurator/dinglehopper/cli.py index 31e582b..00221f3 100644 --- a/qurator/dinglehopper/cli.py +++ b/qurator/dinglehopper/cli.py @@ -44,13 +44,13 @@ def gen_diff_report(gt_things, ocr_things, css_prefix, joiner, none, align): def delete_tempcache(): # Delete all tempfiles and the directory (if empty) - tempdir = tempfile.gettempdir() + "/dinglehopper/" + tempdir = os.path.join(tempfile.gettempdir(), "dinglehopper/") if os.path.exists(tempdir): tempfiles = glob.glob(tempdir+"*.np*") for tempfilename in tempfiles: os.remove(tempfilename) if not os.listdir(tempdir): - shutil.rmtree(os.path.normpath(tempdir)) + shutil.rmtree(tempdir) def process(gt, ocr, report_prefix): diff --git a/qurator/dinglehopper/edit_distance.py b/qurator/dinglehopper/edit_distance.py index 6f7c286..b224e8f 100644 --- a/qurator/dinglehopper/edit_distance.py +++ b/qurator/dinglehopper/edit_distance.py @@ -23,11 +23,12 @@ def levenshtein_matrix(seq1, seq2, tempcache=True): strings, e.g. lists of grapheme clusters or lists of word strings. """ if tempcache: - hashname = hashlib.sha1(("".join(seq1) + "".join(seq2)).encode("utf-8")).hexdigest() - tempdir = os.path.normpath(tempfile.gettempdir() + "/dinglehopper/") + hashseq1 = hashlib.sha1(("؟".join(seq1)).encode("utf-8")).hexdigest() + hashseq2 = hashlib.sha1(("؟".join(seq2)).encode("utf-8")).hexdigest() + tempdir = os.path.join(tempfile.gettempdir(), "dinglehopper/") if not os.path.exists(tempdir): - os.makedirs(tempdir + "/dinglehopper/") - tempfilename = os.path.normpath(tempdir + "/" + hashname + ".npy") + os.makedirs(tempdir) + tempfilename = os.path.join(tempdir, hashseq1 + "." + hashseq2 + ".npy") if os.path.exists(tempfilename): return np.load(tempfilename)