adapt OCR-D wrapper

pull/48/head
Robert Sachunsky 2 years ago
parent 342e94e287
commit 4086c6956a

@ -69,7 +69,8 @@ class SbbBinarizeProcessor(Processor):
raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path)
# resolve relative path via OCR-D ResourceManager # resolve relative path via OCR-D ResourceManager
model_path = self.resolve_resource(str(model_path)) model_path = self.resolve_resource(str(model_path))
self.binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) self.binarizer = SbbBinarizer()
self.binarizer.load_model(model_path)
def process(self): def process(self):
""" """
@ -110,7 +111,7 @@ class SbbBinarizeProcessor(Processor):
if oplevel == 'page': if oplevel == 'page':
LOG.info("Binarizing on 'page' level in page '%s'", page_id) LOG.info("Binarizing on 'page' level in page '%s'", page_id)
bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image))) bin_image = cv2pil(self.binarizer.binarize_image(pil2cv(page_image)))
# update METS (add the image file): # update METS (add the image file):
bin_image_path = self.workspace.save_image_file(bin_image, bin_image_path = self.workspace.save_image_file(bin_image,
file_id + '.IMG-BIN', file_id + '.IMG-BIN',
@ -124,7 +125,7 @@ class SbbBinarizeProcessor(Processor):
LOG.warning("Page '%s' contains no text/table regions", page_id) LOG.warning("Page '%s' contains no text/table regions", page_id)
for region in regions: for region in regions:
region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized')
region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image))) region_image_bin = cv2pil(self.binarizer.binarize_image(image=pil2cv(region_image)))
region_image_bin_path = self.workspace.save_image_file( region_image_bin_path = self.workspace.save_image_file(
region_image_bin, region_image_bin,
"%s_%s.IMG-BIN" % (file_id, region.id), "%s_%s.IMG-BIN" % (file_id, region.id),
@ -139,7 +140,7 @@ class SbbBinarizeProcessor(Processor):
LOG.warning("Page '%s' contains no text lines", page_id) LOG.warning("Page '%s' contains no text lines", page_id)
for region_id, line in region_line_tuples: for region_id, line in region_line_tuples:
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image))) line_image_bin = cv2pil(self.binarizer.binarize_image(image=pil2cv(line_image)))
line_image_bin_path = self.workspace.save_image_file( line_image_bin_path = self.workspace.save_image_file(
line_image_bin, line_image_bin,
"%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id),

Loading…
Cancel
Save