mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-11-04 11:24:17 +01:00 
			
		
		
		
	FIX naming, spacing and deletion of tempcachfiles.
This commit is contained in:
		
							parent
							
								
									c9cfdc59ae
								
							
						
					
					
						commit
						d458568ebd
					
				
					 2 changed files with 33 additions and 27 deletions
				
			
		| 
						 | 
				
			
			@ -1,6 +1,8 @@
 | 
			
		|||
import shutil
 | 
			
		||||
 | 
			
		||||
import click
 | 
			
		||||
import os
 | 
			
		||||
import glob
 | 
			
		||||
from jinja2 import Environment, FileSystemLoader
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -40,11 +42,15 @@ def gen_diff_report(gt_things, ocr_things, css_prefix, joiner, none, align):
 | 
			
		|||
        </div>
 | 
			
		||||
        '''.format(gtx, ocrx)
 | 
			
		||||
 | 
			
		||||
def delete_temp():
 | 
			
		||||
    # XXX Delete all np-tempfiles?
 | 
			
		||||
    tempath = os.path.normpath(tempfile.gettempdir() + "/dinglehopper/")
 | 
			
		||||
    if os.path.exists(tempath):
 | 
			
		||||
        shutil.rmtree(os.path.normpath(tempath))
 | 
			
		||||
def delete_tempcache():
 | 
			
		||||
    # Delete all tempfiles and the directory (if empty)
 | 
			
		||||
    tempdir = tempfile.gettempdir() + "/dinglehopper/"
 | 
			
		||||
    if os.path.exists(tempdir):
 | 
			
		||||
        tempfiles = glob.glob(tempdir+"*.np*")
 | 
			
		||||
        for tempfilename in tempfiles:
 | 
			
		||||
            os.remove(tempfilename)
 | 
			
		||||
        if not os.listdir(tempdir):
 | 
			
		||||
            shutil.rmtree(os.path.normpath(tempdir))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process(gt, ocr, report_prefix):
 | 
			
		||||
| 
						 | 
				
			
			@ -83,7 +89,7 @@ def process(gt, ocr, report_prefix):
 | 
			
		|||
            word_diff_report=word_diff_report
 | 
			
		||||
        ).dump(out_fn)
 | 
			
		||||
 | 
			
		||||
    delete_temp()
 | 
			
		||||
    delete_tempcache()
 | 
			
		||||
 | 
			
		||||
@click.command()
 | 
			
		||||
@click.argument('gt', type=click.Path(exists=True))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,19 +26,19 @@ def levenshtein_matrix(seq1, seq2, tempcache=True):
 | 
			
		|||
        hashname = hashlib.sha1(("".join(seq1) + "".join(seq2)).encode("utf-8")).hexdigest()
 | 
			
		||||
        tempdir = os.path.normpath(tempfile.gettempdir() + "/dinglehopper/")
 | 
			
		||||
        if not os.path.exists(tempdir):
 | 
			
		||||
            os.makedirs(os.path.normpath(tempfile.gettempdir() + "/dinglehopper/"))
 | 
			
		||||
        tempath = os.path.normpath(tempdir +"/"+hashname+".npy")
 | 
			
		||||
        if os.path.exists(tempath):
 | 
			
		||||
            return np.load(tempath)
 | 
			
		||||
            os.makedirs(tempdir + "/dinglehopper/")
 | 
			
		||||
        tempfilename = os.path.normpath(tempdir + "/" + hashname + ".npy")
 | 
			
		||||
        if os.path.exists(tempfilename):
 | 
			
		||||
            return np.load(tempfilename)
 | 
			
		||||
 | 
			
		||||
    m = len(seq1)
 | 
			
		||||
    n = len(seq2)
 | 
			
		||||
 | 
			
		||||
    D = np.ones((m + 1, n + 1), np.int)
 | 
			
		||||
    D = np.zeros((m + 1, n + 1), np.int)
 | 
			
		||||
    D[:, 0] = np.arange(m+1)
 | 
			
		||||
    D[0, :] = np.arange(n+1)
 | 
			
		||||
 | 
			
		||||
    if m > 10 and n > 10:
 | 
			
		||||
    if m > 26 and n > 26:
 | 
			
		||||
        # All grapheme which occur in both sets
 | 
			
		||||
        interset = set(seq1).intersection(set(seq2))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -51,27 +51,27 @@ def levenshtein_matrix(seq1, seq2, tempcache=True):
 | 
			
		|||
 | 
			
		||||
        # Calculate the levensthein matrix
 | 
			
		||||
        for row, grapheme in enumerate(seq1):
 | 
			
		||||
            if seq1[row] in interset:
 | 
			
		||||
            if grapheme in interset:
 | 
			
		||||
                mask = masks[grapheme]
 | 
			
		||||
                for col in range(0,n):
 | 
			
		||||
                    D[row + 1, col + 1] = min(D[row, col] + mask[col], # same or substitution
 | 
			
		||||
                    D[row + 1, col + 1] = 1 + min(D[row, col] + mask[col], # same or subsitution
 | 
			
		||||
                                              D[row + 1, col], # insertion
 | 
			
		||||
                                              D[row, col + 1])+1 # deletion
 | 
			
		||||
                                              D[row, col + 1]) # deletion
 | 
			
		||||
            else:
 | 
			
		||||
                for col in range(0,n):
 | 
			
		||||
                    D[row+1,col+1] = min(D[row,col], # same or substitution
 | 
			
		||||
                    D[row+1, col+1] = 1 + min(D[row, col], # substitution
 | 
			
		||||
                                         D[row+1, col], # insertion
 | 
			
		||||
                                         D[row,col+1])+1 # deletion
 | 
			
		||||
                                         D[row, col+1]) # deletion
 | 
			
		||||
    else:
 | 
			
		||||
        for i in range(1, m+1):
 | 
			
		||||
            for j in range(1, n+1):
 | 
			
		||||
                D[i, j] = min(
 | 
			
		||||
                    D[i - 1, j - 1] + 1 * (seq1[i - 1] != seq2[j - 1]),  # Same or Substitution
 | 
			
		||||
                    D[i, j - 1] + 1,  # Insertion
 | 
			
		||||
                    D[i - 1, j] + 1   # Deletion
 | 
			
		||||
        for row in range(1, m+1):
 | 
			
		||||
            for col in range(1, n+1):
 | 
			
		||||
                D[row, col] = min(
 | 
			
		||||
                    D[row - 1, col - 1] + 1 * (seq1[row - 1] != seq2[col - 1]),  # Same or Substitution
 | 
			
		||||
                    D[row, col - 1] + 1,  # Insertion
 | 
			
		||||
                    D[row - 1, col] + 1   # Deletion
 | 
			
		||||
                )
 | 
			
		||||
    if tempcache:
 | 
			
		||||
        np.save(tempath,D)
 | 
			
		||||
        np.save(tempfilename,D)
 | 
			
		||||
    return D
 | 
			
		||||
 | 
			
		||||
def levenshtein(seq1, seq2):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue