mirror of
https://github.com/qurator-spk/sbb_binarization.git
synced 2025-06-09 12:19:56 +02:00
Merge branch 'master-upstream'
# Conflicts: # README.md # sbb_binarize/cli.py # sbb_binarize/sbb_binarize.py
This commit is contained in:
commit
9ef8259677
5 changed files with 45 additions and 10 deletions
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"version": "0.0.10",
|
||||
"version": "0.0.11",
|
||||
"git_url": "https://github.com/qurator-spk/sbb_binarization",
|
||||
"tools": {
|
||||
"ocrd-sbb-binarize": {
|
||||
|
@ -17,13 +17,31 @@
|
|||
"description": "PAGE XML hierarchy level to operate on"
|
||||
},
|
||||
"model": {
|
||||
"description": "Directory containing HDF5 models. Can be an absolute path or a path relative to the current working directory or $SBB_BINARIZE_DATA environment variable (if set)",
|
||||
"description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)",
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"content-type": "text/directory",
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
{
|
||||
"url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2020_01_16.zip",
|
||||
"name": "default",
|
||||
"type": "archive",
|
||||
"path_in_archive": "model_2020_01_16",
|
||||
"size": 562917559,
|
||||
"description": "default models provided by github.com/qurator-spk"
|
||||
},
|
||||
{
|
||||
"url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2021_03_09.zip",
|
||||
"name": "default-2021-03-09",
|
||||
"type": "archive",
|
||||
"path_in_archive": ".",
|
||||
"size": 133693693,
|
||||
"description": "updated default models provided by github.com/qurator-spk"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,10 @@ def cv2pil(img):
|
|||
|
||||
def pil2cv(img):
|
||||
# from ocrd/workspace.py
|
||||
if img.mode in ('LA', 'RGBA'):
|
||||
newimg = Image.new(img.mode[:-1], img.size, 'white')
|
||||
newimg.paste(img, mask=img.getchannel('A'))
|
||||
img = newimg
|
||||
color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR
|
||||
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||
return cv2.cvtColor(pil_as_np_array, color_conversion)
|
||||
|
@ -106,7 +110,7 @@ class SbbBinarizeProcessor(Processor):
|
|||
|
||||
if oplevel == 'page':
|
||||
LOG.info("Binarizing on 'page' level in page '%s'", page_id)
|
||||
bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True))
|
||||
bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image)))
|
||||
# update METS (add the image file):
|
||||
bin_image_path = self.workspace.save_image_file(bin_image,
|
||||
file_id + '.IMG-BIN',
|
||||
|
@ -120,7 +124,7 @@ class SbbBinarizeProcessor(Processor):
|
|||
LOG.warning("Page '%s' contains no text/table regions", page_id)
|
||||
for region in regions:
|
||||
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
|
||||
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True))
|
||||
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image)))
|
||||
region_image_bin_path = self.workspace.save_image_file(
|
||||
region_image_bin,
|
||||
"%s_%s.IMG-BIN" % (file_id, region.id),
|
||||
|
@ -135,7 +139,7 @@ class SbbBinarizeProcessor(Processor):
|
|||
LOG.warning("Page '%s' contains no text lines", page_id)
|
||||
for region_id, line in region_line_tuples:
|
||||
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
|
||||
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True))
|
||||
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image)))
|
||||
line_image_bin_path = self.workspace.save_image_file(
|
||||
line_image_bin,
|
||||
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue