line-level binarization independent of region-level

pull/5/head
Konstantin Baierer 4 years ago
parent e5bc5572a2
commit a1c8f6f465

@ -79,38 +79,35 @@ class SbbBinarizeProcessor(Processor):
file_grp=self.output_file_grp) file_grp=self.output_file_grp)
page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features'])) page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comment='%s,binarized' % page_xywh['features']))
else: elif oplevel == 'region':
regions = page.get_AllRegions(['Text', 'Table']) regions = page.get_AllRegions(['Text', 'Table'], depth=1)
if not regions: if not regions:
LOG.warning("Page '%s' contains no text/table regions", page_id) LOG.warning("Page '%s' contains no text/table regions", page_id)
for region in regions: for region in regions:
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
region_image_bin = self._run_binarizer(region_image)
if oplevel == 'region': region_image_bin_path = self.workspace.save_image_file(
region_image_bin = self._run_binarizer(region_image) region_image_bin,
region_image_bin_path = self.workspace.save_image_file( "%s_%s.IMG-BIN" % (file_id, region.id),
region_image_bin, page_id=input_file.pageId,
"%s_%s.IMG-BIN" % (file_id, region.id), file_grp=self.output_file_grp)
page_id=input_file.pageId, region.add_AlternativeImage(
file_grp=self.output_file_grp) AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features']))
region.add_AlternativeImage(
AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) elif oplevel == 'line':
region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)]
elif oplevel == 'line': if not region_line_tuples:
lines = region.get_TextLine() LOG.warning("Page '%s' contains no text lines", page_id)
if not lines: for region_id, line in region_line_tuples:
LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
for line in lines: line_image_bin = self._run_binarizer(line_image)
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') line_image_bin_path = self.workspace.save_image_file(
line_image_bin = self._run_binarizer(line_image) line_image_bin,
line_image_bin_path = self.workspace.save_image_file( "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),
line_image_bin, page_id=input_file.pageId,
"%s_%s_%s.IMG-BIN" % (file_id, region.id, line.id), file_grp=self.output_file_grp)
page_id=input_file.pageId, line.add_AlternativeImage(
file_grp=self.output_file_grp) AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features']))
line.add_AlternativeImage(
AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features']))
self.workspace.add_file( self.workspace.add_file(
ID=file_id, ID=file_id,

Loading…
Cancel
Save