|
|
@ -50,7 +50,7 @@ class SbbBinarizeProcessor(Processor):
|
|
|
|
oplevel = self.parameter['operation_level']
|
|
|
|
oplevel = self.parameter['operation_level']
|
|
|
|
use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init
|
|
|
|
use_patches = self.parameter['patches'] # pylint: disable=attribute-defined-outside-init
|
|
|
|
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
|
|
|
|
model_path = self.parameter['model'] # pylint: disable=attribute-defined-outside-init
|
|
|
|
binarizer = SbbBinarizer(model_dir=self.model_path)
|
|
|
|
binarizer = SbbBinarizer(model_dir=model_path)
|
|
|
|
|
|
|
|
|
|
|
|
for n, input_file in enumerate(self.input_files):
|
|
|
|
for n, input_file in enumerate(self.input_files):
|
|
|
|
file_id = make_file_id(input_file, self.output_file_grp)
|
|
|
|
file_id = make_file_id(input_file, self.output_file_grp)
|
|
|
@ -64,7 +64,7 @@ class SbbBinarizeProcessor(Processor):
|
|
|
|
if oplevel == 'page':
|
|
|
|
if oplevel == 'page':
|
|
|
|
LOG.info("Binarizing on 'page' level in page '%s'", page_id)
|
|
|
|
LOG.info("Binarizing on 'page' level in page '%s'", page_id)
|
|
|
|
page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
|
|
|
page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
|
|
|
bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), patches=use_patches))
|
|
|
|
bin_image = cv2pil(binarizer.run(image=pil2cv(page_image), use_patches=use_patches))
|
|
|
|
# update METS (add the image file):
|
|
|
|
# update METS (add the image file):
|
|
|
|
bin_image_path = self.workspace.save_image_file(bin_image,
|
|
|
|
bin_image_path = self.workspace.save_image_file(bin_image,
|
|
|
|
file_id + '.IMG-BIN',
|
|
|
|
file_id + '.IMG-BIN',
|
|
|
@ -78,7 +78,7 @@ class SbbBinarizeProcessor(Processor):
|
|
|
|
LOG.warning("Page '%s' contains no text/table regions", page_id)
|
|
|
|
LOG.warning("Page '%s' contains no text/table regions", page_id)
|
|
|
|
for region in regions:
|
|
|
|
for region in regions:
|
|
|
|
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
|
|
|
|
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
|
|
|
|
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), patches=use_patches))
|
|
|
|
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=use_patches))
|
|
|
|
region_image_bin_path = self.workspace.save_image_file(
|
|
|
|
region_image_bin_path = self.workspace.save_image_file(
|
|
|
|
region_image_bin,
|
|
|
|
region_image_bin,
|
|
|
|
"%s_%s.IMG-BIN" % (file_id, region.id),
|
|
|
|
"%s_%s.IMG-BIN" % (file_id, region.id),
|
|
|
@ -93,7 +93,7 @@ class SbbBinarizeProcessor(Processor):
|
|
|
|
LOG.warning("Page '%s' contains no text lines", page_id)
|
|
|
|
LOG.warning("Page '%s' contains no text lines", page_id)
|
|
|
|
for region_id, line in region_line_tuples:
|
|
|
|
for region_id, line in region_line_tuples:
|
|
|
|
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
|
|
|
|
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
|
|
|
|
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), patches=use_patches))
|
|
|
|
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=use_patches))
|
|
|
|
line_image_bin_path = self.workspace.save_image_file(
|
|
|
|
line_image_bin_path = self.workspace.save_image_file(
|
|
|
|
line_image_bin,
|
|
|
|
line_image_bin,
|
|
|
|
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
|
|
|
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
|
|
|
|