mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-11 04:39:53 +02:00
🚧 Use character positions as word segmentation
This commit is contained in:
parent
17dbeb2480
commit
24532f693a
1 changed files with 32 additions and 4 deletions
|
@ -13,9 +13,10 @@ from ocrd_models.ocrd_page import (
|
||||||
LabelType, LabelsType,
|
LabelType, LabelsType,
|
||||||
MetadataItemType,
|
MetadataItemType,
|
||||||
TextEquivType,
|
TextEquivType,
|
||||||
|
WordType, CoordsType,
|
||||||
to_xml
|
to_xml
|
||||||
)
|
)
|
||||||
from ocrd_utils import getLogger, concat_padded, MIMETYPE_PAGE
|
from ocrd_utils import getLogger, concat_padded, coordinates_for_segment, points_from_polygon, MIMETYPE_PAGE
|
||||||
|
|
||||||
from ocrd_calamari.config import OCRD_TOOL, TF_CPP_MIN_LOG_LEVEL
|
from ocrd_calamari.config import OCRD_TOOL, TF_CPP_MIN_LOG_LEVEL
|
||||||
|
|
||||||
|
@ -69,7 +70,7 @@ class CalamariRecognize(Processor):
|
||||||
for (line_no, line) in enumerate(textlines):
|
for (line_no, line) in enumerate(textlines):
|
||||||
log.debug("Recognizing line '%s' in region '%s'", line_no, region.id)
|
log.debug("Recognizing line '%s' in region '%s'", line_no, region.id)
|
||||||
|
|
||||||
line_image, line_xywh = self.workspace.image_from_segment(line, region_image, region_xywh)
|
line_image, line_coords = self.workspace.image_from_segment(line, region_image, region_xywh)
|
||||||
line_image_np = np.array(line_image, dtype=np.uint8)
|
line_image_np = np.array(line_image, dtype=np.uint8)
|
||||||
|
|
||||||
raw_results = list(self.predictor.predict_raw([line_image_np], progress_bar=False))[0]
|
raw_results = list(self.predictor.predict_raw([line_image_np], progress_bar=False))[0]
|
||||||
|
@ -82,14 +83,41 @@ class CalamariRecognize(Processor):
|
||||||
line_text = prediction.sentence
|
line_text = prediction.sentence
|
||||||
line_conf = prediction.avg_char_probability
|
line_conf = prediction.avg_char_probability
|
||||||
|
|
||||||
|
# Delete existing results
|
||||||
if line.get_TextEquiv():
|
if line.get_TextEquiv():
|
||||||
log.warning("Line '%s' already contained text results", line.id)
|
log.warning("Line '%s' already contained text results", line.id)
|
||||||
line.set_TextEquiv([TextEquivType(Unicode=line_text, conf=line_conf)])
|
line.set_TextEquiv([])
|
||||||
|
|
||||||
if line.get_Word():
|
if line.get_Word():
|
||||||
log.warning("Line '%s' already contained word segmentation", line.id)
|
log.warning("Line '%s' already contained word segmentation", line.id)
|
||||||
line.set_Word([])
|
line.set_Word([])
|
||||||
|
|
||||||
|
# Save line results
|
||||||
|
line.set_TextEquiv([TextEquivType(Unicode=line_text, conf=line_conf)])
|
||||||
|
|
||||||
|
# Save word results
|
||||||
|
# XXX For early development just put every char = glyph into its own word
|
||||||
|
for word_no, p in enumerate(prediction.positions):
|
||||||
|
start = p.global_start
|
||||||
|
end = p.global_end
|
||||||
|
|
||||||
|
|
||||||
|
# XXX Maybe use version in ocrd_tesserocr
|
||||||
|
h = line_image.height
|
||||||
|
polygon = [(start, 0), (end, 0), (end, h), (start, h)]
|
||||||
|
points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
|
||||||
|
|
||||||
|
word = WordType(
|
||||||
|
id='%s_word%04d' % (line.id, word_no),
|
||||||
|
Coords=CoordsType(points))
|
||||||
|
|
||||||
|
chars = sorted(p.chars, key=lambda k: k.probability, reverse=True)
|
||||||
|
for index, char in enumerate(chars):
|
||||||
|
if char.char:
|
||||||
|
word.add_TextEquiv(TextEquivType(Unicode=char.char, index=index, conf=char.probability))
|
||||||
|
# XXX Note that omission probabilities are not normalized?!
|
||||||
|
|
||||||
|
line.add_Word(word)
|
||||||
|
|
||||||
_page_update_higher_textequiv_levels('line', pcgts)
|
_page_update_higher_textequiv_levels('line', pcgts)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue