mirror of
				https://github.com/mikegerber/ocrd_calamari.git
				synced 2025-10-31 15:54:13 +01:00 
			
		
		
		
	✅ Convert to a pytest style test
This commit is contained in:
		
							parent
							
								
									2393edc645
								
							
						
					
					
						commit
						e07b333db1
					
				
					 1 changed files with 46 additions and 43 deletions
				
			
		|  | @ -3,63 +3,66 @@ import shutil | ||||||
| import subprocess | import subprocess | ||||||
| import urllib.request | import urllib.request | ||||||
| 
 | 
 | ||||||
| from test.base import TestCase, main, assets, skip | import pytest | ||||||
| 
 |  | ||||||
| from ocrd.resolver import Resolver | from ocrd.resolver import Resolver | ||||||
| 
 | 
 | ||||||
| from ocrd_calamari import CalamariRecognize | from ocrd_calamari import CalamariRecognize | ||||||
|  | from test.base import main, assets | ||||||
| 
 | 
 | ||||||
| METS_KANT = assets.url_of('kant_aufklaerung_1784-page-block-line-word_glyph/data/mets.xml') | METS_KANT = assets.url_of('kant_aufklaerung_1784-page-block-line-word_glyph/data/mets.xml') | ||||||
| 
 |  | ||||||
| WORKSPACE_DIR = '/tmp/test-ocrd-calamari' | WORKSPACE_DIR = '/tmp/test-ocrd-calamari' | ||||||
| 
 | 
 | ||||||
| class TestCalamariRecognize(TestCase): |  | ||||||
| 
 | 
 | ||||||
|     def setUp(self): | @pytest.fixture | ||||||
|         if os.path.exists(WORKSPACE_DIR): | def workspace(): | ||||||
|             shutil.rmtree(WORKSPACE_DIR) |     if os.path.exists(WORKSPACE_DIR): | ||||||
|         os.makedirs(WORKSPACE_DIR) |         shutil.rmtree(WORKSPACE_DIR) | ||||||
|  |     os.makedirs(WORKSPACE_DIR) | ||||||
| 
 | 
 | ||||||
|     def runTest(self): |     resolver = Resolver() | ||||||
|         resolver = Resolver() |     workspace = resolver.workspace_from_url(METS_KANT, dst_dir=WORKSPACE_DIR) | ||||||
|         workspace = resolver.workspace_from_url(METS_KANT, dst_dir=WORKSPACE_DIR) |  | ||||||
| 
 | 
 | ||||||
|         # XXX Work around data bug(?): |     # XXX Work around data bug(?): | ||||||
|         #     PAGE-XML links to OCR-D-IMG/INPUT_0017.tif, but this is nothing core can download |     #     PAGE-XML links to OCR-D-IMG/INPUT_0017.tif, but this is nothing core can download | ||||||
|         os.makedirs(os.path.join(WORKSPACE_DIR, 'OCR-D-IMG')) |     os.makedirs(os.path.join(WORKSPACE_DIR, 'OCR-D-IMG')) | ||||||
|         for f in ['INPUT_0017.tif', 'INPUT_0020.tif']: |     for f in ['INPUT_0017.tif', 'INPUT_0020.tif']: | ||||||
|             urllib.request.urlretrieve( |         urllib.request.urlretrieve( | ||||||
|                     "https://github.com/OCR-D/assets/raw/master/data/kant_aufklaerung_1784/data/OCR-D-IMG/" + f, |             "https://github.com/OCR-D/assets/raw/master/data/kant_aufklaerung_1784/data/OCR-D-IMG/" + f, | ||||||
|                     os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f)) |             os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f)) | ||||||
| 
 | 
 | ||||||
|         # The binarization options I have are: |     return workspace | ||||||
|         # |  | ||||||
|         # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my machine (protobuf) |  | ||||||
|         # b. ocrd_olena which 1. I cannot fully install via pip and 2. whose dependency olena doesn't compile on my |  | ||||||
|         #    machine |  | ||||||
|         # c. just fumble with the original files |  | ||||||
|         # |  | ||||||
|         # So I'm going for option c. |  | ||||||
|         for f in ['INPUT_0017.tif', 'INPUT_0020.tif']: |  | ||||||
|             ff = os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f) |  | ||||||
|             subprocess.call(['convert', ff, '-colorspace', 'Gray', ff]) |  | ||||||
| 
 | 
 | ||||||
|         # XXX Should remove GT text to really test this |  | ||||||
| 
 | 
 | ||||||
|         CalamariRecognize( | def test_recognize(workspace): | ||||||
|             workspace, |     # The binarization options I have are: | ||||||
|             input_file_grp="OCR-D-GT-SEG-LINE", |     # | ||||||
|             output_file_grp="OCR-D-OCR-CALAMARI", |     # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my machine (protobuf) | ||||||
|             parameter={ |     # b. ocrd_olena which 1. I cannot fully install via pip and 2. whose dependency olena doesn't compile on my | ||||||
|                 'checkpoint': os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json') |     #    machine | ||||||
|             } |     # c. just fumble with the original files | ||||||
|         ).process() |     # | ||||||
|         workspace.save_mets() |     # So I'm going for option c. | ||||||
|  |     for f in ['INPUT_0017.tif', 'INPUT_0020.tif']: | ||||||
|  |         ff = os.path.join(WORKSPACE_DIR, 'OCR-D-IMG', f) | ||||||
|  |         subprocess.call(['convert', ff, '-colorspace', 'Gray', ff]) | ||||||
|  | 
 | ||||||
|  |     # XXX Should remove GT text to really test this | ||||||
|  | 
 | ||||||
|  |     CalamariRecognize( | ||||||
|  |         workspace, | ||||||
|  |         input_file_grp="OCR-D-GT-SEG-LINE", | ||||||
|  |         output_file_grp="OCR-D-OCR-CALAMARI", | ||||||
|  |         parameter={ | ||||||
|  |             'checkpoint': os.path.join(os.getcwd(), 'gt4histocr-calamari/*.ckpt.json') | ||||||
|  |         } | ||||||
|  |     ).process() | ||||||
|  |     workspace.save_mets() | ||||||
|  | 
 | ||||||
|  |     page1 = os.path.join(workspace.directory, 'OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml') | ||||||
|  |     assert os.path.exists(page1) | ||||||
|  |     with open(page1, 'r', encoding='utf-8') as f: | ||||||
|  |         assert 'verſchuldeten' in f.read() | ||||||
| 
 | 
 | ||||||
|         page1 = os.path.join(workspace.directory, 'OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml') |  | ||||||
|         self.assertTrue(os.path.exists(page1)) |  | ||||||
|         with open(page1, 'r', encoding='utf-8') as f: |  | ||||||
|             self.assertIn('verſchuldeten', f.read()) |  | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     main() |     main() | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue