dinglehopper: Add OCR-D parameter to choose TextEquiv level

pull/38/head
Gerber, Mike 4 years ago
parent 9744fa2567
commit b23e4ce30e

@ -148,10 +148,7 @@ class ExtractedText:
@property @property
def text(self): def text(self):
if self._text is not None: if self._text is not None:
if self._text == '': return self._text
return None
else:
return self._text
else: else:
return self.joiner.join(s.text for s in self.segments) return self.joiner.join(s.text for s in self.segments)

@ -82,7 +82,7 @@ def page_extract(tree, *, textequiv_level='region'):
regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level)) regions.append(ExtractedText.from_text_segment(region, nsmap, textequiv_level=textequiv_level))
# Filter empty region texts # Filter empty region texts
regions = [r for r in regions if r.text is not None] regions = [r for r in regions if r.text != '']
return ExtractedText(None, regions, '\n', None) return ExtractedText(None, regions, '\n', None)

@ -22,6 +22,12 @@
"type": "boolean", "type": "boolean",
"default": true, "default": true,
"description": "Enable/disable metrics and green/red" "description": "Enable/disable metrics and green/red"
},
"textequiv_level": {
"type": "string",
"enum": ["region", "line"],
"default": "region",
"description": "PAGE XML hierarchy level to extract the text from"
} }
} }
} }

@ -32,6 +32,7 @@ class OcrdDinglehopperEvaluate(Processor):
log = getLogger('processor.OcrdDinglehopperEvaluate') log = getLogger('processor.OcrdDinglehopperEvaluate')
metrics = self.parameter['metrics'] metrics = self.parameter['metrics']
textequiv_level = self.parameter['textequiv_level']
gt_grp, ocr_grp = self.input_file_grp.split(',') gt_grp, ocr_grp = self.input_file_grp.split(',')
for n, page_id in enumerate(self.workspace.mets.physical_pages): for n, page_id in enumerate(self.workspace.mets.physical_pages):
gt_file = next(self.workspace.mets.find_files(fileGrp=gt_grp, pageId=page_id)) gt_file = next(self.workspace.mets.find_files(fileGrp=gt_grp, pageId=page_id))
@ -52,7 +53,8 @@ class OcrdDinglehopperEvaluate(Processor):
gt_file.local_filename, gt_file.local_filename,
ocr_file.local_filename, ocr_file.local_filename,
report_prefix, report_prefix,
metrics=metrics metrics=metrics,
textequiv_level=textequiv_level
) )
# Add reports to the workspace # Add reports to the workspace

Loading…
Cancel
Save