mirror of
				https://github.com/qurator-spk/dinglehopper.git
				synced 2025-11-04 11:24:17 +01:00 
			
		
		
		
	adapt tests
This commit is contained in:
		
							parent
							
								
									ca5de5729d
								
							
						
					
					
						commit
						a33b713f36
					
				
					 8 changed files with 14 additions and 14 deletions
				
			
		| 
						 | 
					@ -14,9 +14,9 @@ def test_character_error_rate():
 | 
				
			||||||
    assert character_error_rate("Foo", "") == 3 / 3
 | 
					    assert character_error_rate("Foo", "") == 3 / 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert character_error_rate("", "") == 0
 | 
					    assert character_error_rate("", "") == 0
 | 
				
			||||||
    assert math.isinf(character_error_rate("", "Foo"))
 | 
					    assert character_error_rate("", "Foo") == 3 / 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert character_error_rate("Foo", "Food") == 1 / 3
 | 
					    assert character_error_rate("Foo", "Food") == 1 / 4
 | 
				
			||||||
    assert character_error_rate("Fnord", "Food") == 2 / 5
 | 
					    assert character_error_rate("Fnord", "Food") == 2 / 5
 | 
				
			||||||
    assert character_error_rate("Müll", "Mull") == 1 / 4
 | 
					    assert character_error_rate("Müll", "Mull") == 1 / 4
 | 
				
			||||||
    assert character_error_rate("Abstand", "Sand") == 4 / 7
 | 
					    assert character_error_rate("Abstand", "Sand") == 4 / 7
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,8 +6,8 @@ from .. import distance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_distance():
 | 
					def test_distance():
 | 
				
			||||||
    assert distance("Fnord", "Food") == 2
 | 
					    assert distance("Fnord", "Food") == 2 / 5
 | 
				
			||||||
    assert distance("Müll", "Mull") == 1
 | 
					    assert distance("Müll", "Mull") == 1 / 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    word1 = unicodedata.normalize("NFC", "Schlyñ")
 | 
					    word1 = unicodedata.normalize("NFC", "Schlyñ")
 | 
				
			||||||
    word2 = unicodedata.normalize("NFD", "Schlyñ")  # Different, decomposed!
 | 
					    word2 = unicodedata.normalize("NFD", "Schlyñ")  # Different, decomposed!
 | 
				
			||||||
| 
						 | 
					@ -21,4 +21,4 @@ def test_distance():
 | 
				
			||||||
    assert (
 | 
					    assert (
 | 
				
			||||||
        len(word2) == 7
 | 
					        len(word2) == 7
 | 
				
			||||||
    )  # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
 | 
					    )  # This, OTOH, ends with LATIN SMALL LETTER M + COMBINING TILDE, 7 code points
 | 
				
			||||||
    assert distance(word1, word2) == 1
 | 
					    assert distance(word1, word2) == 1 / 6
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -56,4 +56,4 @@ def test_character_error_rate_between_page_alto_2():
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert character_error_rate(gt, ocr) == 8 / 591  # Manually verified
 | 
					    assert character_error_rate(gt, ocr) == 8 / 594  # Manually verified
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,11 +32,11 @@ def test_cli_json_cer_is_infinity(tmp_path):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with working_directory(tmp_path):
 | 
					    with working_directory(tmp_path):
 | 
				
			||||||
        with open("gt.txt", "w") as gtf:
 | 
					        with open("gt.txt", "w") as gtf:
 | 
				
			||||||
            gtf.write("")  # Empty to yield CER == inf
 | 
					            gtf.write("")
 | 
				
			||||||
        with open("ocr.txt", "w") as ocrf:
 | 
					        with open("ocr.txt", "w") as ocrf:
 | 
				
			||||||
            ocrf.write("Not important")
 | 
					            ocrf.write("Not important")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        process("gt.txt", "ocr.txt", "report")
 | 
					        process("gt.txt", "ocr.txt", "report")
 | 
				
			||||||
        with open("report.json", "r") as jsonf:
 | 
					        with open("report.json", "r") as jsonf:
 | 
				
			||||||
            j = json.load(jsonf)
 | 
					            j = json.load(jsonf)
 | 
				
			||||||
            assert j["cer"] == pytest.approx(float("inf"))
 | 
					            assert j["cer"] == pytest.approx(1.0)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -17,7 +17,7 @@ def test_distance_between_page_files():
 | 
				
			||||||
    # → 2 differences
 | 
					    # → 2 differences
 | 
				
			||||||
    gt = page_text(ET.parse(os.path.join(data_dir, "test-gt.page2018.xml")))
 | 
					    gt = page_text(ET.parse(os.path.join(data_dir, "test-gt.page2018.xml")))
 | 
				
			||||||
    ocr = page_text(ET.parse(os.path.join(data_dir, "test-fake-ocr.page2018.xml")))
 | 
					    ocr = page_text(ET.parse(os.path.join(data_dir, "test-fake-ocr.page2018.xml")))
 | 
				
			||||||
    assert distance(gt, ocr) == 2
 | 
					    assert distance(gt, ocr) == 2 / 827
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.integration
 | 
					@pytest.mark.integration
 | 
				
			||||||
| 
						 | 
					@ -52,4 +52,4 @@ def test_distance_between_page_alto_2():
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert distance(gt, ocr) == 8  # Manually verified
 | 
					    assert distance(gt, ocr) == 8 / 594  # Manually verified
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,9 +12,9 @@ from .util import working_directory
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
    "gt_file_content,ocr_file_content,cer_expected",
 | 
					    "gt_file_content,ocr_file_content,cer_expected",
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
        ("", "Lorem ipsum", math.inf),
 | 
					        ("", "Lorem ipsum", 1.0),
 | 
				
			||||||
        ("Lorem ipsum", "", 1.0),
 | 
					        ("Lorem ipsum", "", 1.0),
 | 
				
			||||||
        ("\ufeff", "Lorem ipsum", math.inf),
 | 
					        ("\ufeff", "Lorem ipsum", 1.0),
 | 
				
			||||||
        ("Lorem ipsum", "\ufeff", 1.0),
 | 
					        ("Lorem ipsum", "\ufeff", 1.0),
 | 
				
			||||||
        ("", "", 0.0),
 | 
					        ("", "", 0.0),
 | 
				
			||||||
        ("\ufeff", "", 0.0),
 | 
					        ("\ufeff", "", 0.0),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -64,5 +64,5 @@ def test_word_error_rate_between_page_alto_2():
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert (
 | 
					    assert (
 | 
				
			||||||
        word_error_rate(gt, ocr) == 7 / gt_word_count
 | 
					        word_error_rate(gt, ocr) == 7 / (gt_word_count + 1)
 | 
				
			||||||
    )  # Manually verified, 6 words are wrong, 1 got split (=2 errors)
 | 
					    )  # Manually verified, 6 words are wrong, 1 got split (=2 errors)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -76,7 +76,7 @@ def test_word_error_rate():
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert word_error_rate("Dies ist ein Beispielsatz!", "") == 4 / 4
 | 
					    assert word_error_rate("Dies ist ein Beispielsatz!", "") == 4 / 4
 | 
				
			||||||
    assert math.isinf(word_error_rate("", "Dies ist ein Beispielsatz!"))
 | 
					    assert word_error_rate("", "Dies ist ein Beispielsatz!") == 4 / 4
 | 
				
			||||||
    assert word_error_rate("", "") == 0
 | 
					    assert word_error_rate("", "") == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert (
 | 
					    assert (
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue