mirror of
				https://github.com/qurator-spk/sbb_binarization.git
				synced 2025-10-31 01:24:14 +01:00 
			
		
		
		
	Merge branch 'master-upstream'
# Conflicts: # README.md # sbb_binarize/cli.py # sbb_binarize/sbb_binarize.py
This commit is contained in:
		
						commit
						9ef8259677
					
				
					 5 changed files with 45 additions and 10 deletions
				
			
		|  | @ -1,5 +1,5 @@ | |||
| { | ||||
|   "version": "0.0.10", | ||||
|   "version": "0.0.11", | ||||
|   "git_url": "https://github.com/qurator-spk/sbb_binarization", | ||||
|   "tools": { | ||||
|     "ocrd-sbb-binarize": { | ||||
|  | @ -17,13 +17,31 @@ | |||
|           "description": "PAGE XML hierarchy level to operate on" | ||||
|         }, | ||||
|         "model": { | ||||
|           "description": "Directory containing HDF5 models. Can be an absolute path or a path relative to the current working directory or $SBB_BINARIZE_DATA environment variable (if set)", | ||||
|           "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", | ||||
|           "type": "string", | ||||
|           "format": "uri", | ||||
|           "content-type": "text/directory", | ||||
|           "required": true | ||||
|         } | ||||
|       } | ||||
|       }, | ||||
|       "resources": [ | ||||
|         { | ||||
|           "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2020_01_16.zip", | ||||
|           "name": "default", | ||||
|           "type": "archive", | ||||
|           "path_in_archive": "model_2020_01_16", | ||||
|           "size": 562917559, | ||||
|           "description": "default models provided by github.com/qurator-spk" | ||||
|         }, | ||||
|         { | ||||
|           "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2021_03_09.zip", | ||||
|           "name": "default-2021-03-09", | ||||
|           "type": "archive", | ||||
|           "path_in_archive": ".", | ||||
|           "size": 133693693, | ||||
|           "description": "updated default models provided by github.com/qurator-spk" | ||||
|         } | ||||
|       ] | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  |  | |||
|  | @ -30,6 +30,10 @@ def cv2pil(img): | |||
| 
 | ||||
| def pil2cv(img): | ||||
|     # from ocrd/workspace.py | ||||
|     if img.mode in ('LA', 'RGBA'): | ||||
|         newimg = Image.new(img.mode[:-1], img.size, 'white') | ||||
|         newimg.paste(img, mask=img.getchannel('A')) | ||||
|         img = newimg | ||||
|     color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else  cv2.COLOR_RGB2BGR | ||||
|     pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) | ||||
|     return cv2.cvtColor(pil_as_np_array, color_conversion) | ||||
|  | @ -106,7 +110,7 @@ class SbbBinarizeProcessor(Processor): | |||
| 
 | ||||
|             if oplevel == 'page': | ||||
|                 LOG.info("Binarizing on 'page' level in page '%s'", page_id) | ||||
|                 bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) | ||||
|                 bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image))) | ||||
|                 # update METS (add the image file): | ||||
|                 bin_image_path = self.workspace.save_image_file(bin_image, | ||||
|                         file_id + '.IMG-BIN', | ||||
|  | @ -120,7 +124,7 @@ class SbbBinarizeProcessor(Processor): | |||
|                     LOG.warning("Page '%s' contains no text/table regions", page_id) | ||||
|                 for region in regions: | ||||
|                     region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') | ||||
|                     region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) | ||||
|                     region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image))) | ||||
|                     region_image_bin_path = self.workspace.save_image_file( | ||||
|                             region_image_bin, | ||||
|                             "%s_%s.IMG-BIN" % (file_id, region.id), | ||||
|  | @ -135,7 +139,7 @@ class SbbBinarizeProcessor(Processor): | |||
|                     LOG.warning("Page '%s' contains no text lines", page_id) | ||||
|                 for region_id, line in region_line_tuples: | ||||
|                     line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') | ||||
|                     line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) | ||||
|                     line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image))) | ||||
|                     line_image_bin_path = self.workspace.save_image_file( | ||||
|                             line_image_bin, | ||||
|                             "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue