mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-09 03:39:55 +02:00
✨ Allow controlling of output hierarchy level, e.g. only line, not words+glyphs
This commit is contained in:
parent
0f0bae18ba
commit
ef3fb44fb5
4 changed files with 73 additions and 37 deletions
12
README.md
12
README.md
|
@ -13,10 +13,11 @@ This offers a OCR-D compliant workspace processor for some of the functionality
|
||||||
This processor only operates on the text line level and so needs a line segmentation (and by extension a binarized
|
This processor only operates on the text line level and so needs a line segmentation (and by extension a binarized
|
||||||
image) as its input.
|
image) as its input.
|
||||||
|
|
||||||
In addition to the line text it also outputs glyph segmentation including
|
In addition to the line text it may also output word and glyph segmentation
|
||||||
per-glyph confidence values and per-glyph alternative predictions as provided by
|
including per-glyph confidence values and per-glyph alternative predictions as
|
||||||
the Calamari OCR engine. Note that while Calamari does not provide word
|
provided by the Calamari OCR engine, using a `textequiv_level` of `word` or
|
||||||
segmentation, this processor produces word segmentation inferred from text
|
`glyph`. Note that while Calamari does not provide word segmentation, this
|
||||||
|
processor produces word segmentation inferred from text
|
||||||
segmentation and the glyph positions. The provided glyph and word segmentation
|
segmentation and the glyph positions. The provided glyph and word segmentation
|
||||||
can be used for text extraction and highlighting, but is probably not useful for
|
can be used for text extraction and highlighting, but is probably not useful for
|
||||||
further image-based processing.
|
further image-based processing.
|
||||||
|
@ -53,7 +54,8 @@ ocrd-calamari-recognize -p test-parameters.json -m mets.xml -I OCR-D-SEG-LINE -O
|
||||||
With `test-parameters.json`:
|
With `test-parameters.json`:
|
||||||
~~~
|
~~~
|
||||||
{
|
{
|
||||||
"checkpoint": "/path/to/some/trained/models/*.ckpt.json"
|
"checkpoint": "/path/to/some/trained/models/*.ckpt.json",
|
||||||
|
"textequiv_level": "line",
|
||||||
}
|
}
|
||||||
~~~
|
~~~
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,12 @@
|
||||||
"voter": {
|
"voter": {
|
||||||
"description": "The voting algorithm to use",
|
"description": "The voting algorithm to use",
|
||||||
"type": "string", "default": "confidence_voter_default_ctc"
|
"type": "string", "default": "confidence_voter_default_ctc"
|
||||||
|
},
|
||||||
|
"textequiv_level": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["line", "word", "glyph"],
|
||||||
|
"default": "line",
|
||||||
|
"description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,46 +120,48 @@ class CalamariRecognize(Processor):
|
||||||
spaces = (c == ' ')
|
spaces = (c == ' ')
|
||||||
yield word
|
yield word
|
||||||
|
|
||||||
word_no = 0
|
if self.parameter['textequiv_level'] in ['word', 'glyph']:
|
||||||
i = 0
|
word_no = 0
|
||||||
|
i = 0
|
||||||
|
|
||||||
for word_text in _words(prediction.sentence):
|
for word_text in _words(prediction.sentence):
|
||||||
word_length = len(word_text)
|
word_length = len(word_text)
|
||||||
if not all(c == ' ' for c in word_text):
|
if not all(c == ' ' for c in word_text):
|
||||||
word_positions = prediction.positions[i:i+word_length]
|
word_positions = prediction.positions[i:i+word_length]
|
||||||
word_start = word_positions[0].global_start
|
word_start = word_positions[0].global_start
|
||||||
word_end = word_positions[-1].global_end
|
word_end = word_positions[-1].global_end
|
||||||
|
|
||||||
polygon = polygon_from_x0y0x1y1([word_start, 0, word_end, line_image.height])
|
polygon = polygon_from_x0y0x1y1([word_start, 0, word_end, line_image.height])
|
||||||
points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
|
|
||||||
# XXX Crop to line polygon?
|
|
||||||
|
|
||||||
word = WordType(id='%s_word%04d' % (line.id, word_no), Coords=CoordsType(points))
|
|
||||||
word.add_TextEquiv(TextEquivType(Unicode=word_text))
|
|
||||||
|
|
||||||
for glyph_no, p in enumerate(word_positions):
|
|
||||||
glyph_start = p.global_start
|
|
||||||
glyph_end = p.global_end
|
|
||||||
|
|
||||||
polygon = polygon_from_x0y0x1y1([glyph_start, 0, glyph_end, line_image.height])
|
|
||||||
points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
|
points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
|
||||||
|
# XXX Crop to line polygon?
|
||||||
|
|
||||||
glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
|
word = WordType(id='%s_word%04d' % (line.id, word_no), Coords=CoordsType(points))
|
||||||
|
word.add_TextEquiv(TextEquivType(Unicode=word_text))
|
||||||
|
|
||||||
chars = sorted(p.chars, key=lambda k: k.probability, reverse=True)
|
if self.parameter['textequiv_level'] == 'glyph':
|
||||||
char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
|
for glyph_no, p in enumerate(word_positions):
|
||||||
for char in chars:
|
glyph_start = p.global_start
|
||||||
if char.char:
|
glyph_end = p.global_end
|
||||||
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
|
|
||||||
char_index += 1
|
|
||||||
# XXX Note that omission probabilities are not normalized?!
|
|
||||||
|
|
||||||
word.add_Glyph(glyph)
|
polygon = polygon_from_x0y0x1y1([glyph_start, 0, glyph_end, line_image.height])
|
||||||
|
points = points_from_polygon(coordinates_for_segment(polygon, None, line_coords))
|
||||||
|
|
||||||
line.add_Word(word)
|
glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
|
||||||
word_no += 1
|
|
||||||
|
|
||||||
i += word_length
|
chars = sorted(p.chars, key=lambda k: k.probability, reverse=True)
|
||||||
|
char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
|
||||||
|
for char in chars:
|
||||||
|
if char.char:
|
||||||
|
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
|
||||||
|
char_index += 1
|
||||||
|
# XXX Note that omission probabilities are not normalized?!
|
||||||
|
|
||||||
|
word.add_Glyph(glyph)
|
||||||
|
|
||||||
|
line.add_Word(word)
|
||||||
|
word_no += 1
|
||||||
|
|
||||||
|
i += word_length
|
||||||
|
|
||||||
|
|
||||||
_page_update_higher_textequiv_levels('line', pcgts)
|
_page_update_higher_textequiv_levels('line', pcgts)
|
||||||
|
|
|
@ -87,6 +87,7 @@ def test_word_segmentation(workspace):
|
||||||
output_file_grp="OCR-D-OCR-CALAMARI",
|
output_file_grp="OCR-D-OCR-CALAMARI",
|
||||||
parameter={
|
parameter={
|
||||||
"checkpoint": CHECKPOINT,
|
"checkpoint": CHECKPOINT,
|
||||||
|
"textequiv_level": "word", # Note that we're going down to word level here
|
||||||
}
|
}
|
||||||
).process()
|
).process()
|
||||||
workspace.save_mets()
|
workspace.save_mets()
|
||||||
|
@ -106,5 +107,30 @@ def test_word_segmentation(workspace):
|
||||||
line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text
|
line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text
|
||||||
assert words_text == line_text
|
assert words_text == line_text
|
||||||
|
|
||||||
|
# For extra measure, check that we're not seeing any glyphs, as we asked for textequiv_level == "word"
|
||||||
|
glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
|
||||||
|
assert len(glyphs) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_glyphs(workspace):
|
||||||
|
CalamariRecognize(
|
||||||
|
workspace,
|
||||||
|
input_file_grp="OCR-D-GT-SEG-LINE",
|
||||||
|
output_file_grp="OCR-D-OCR-CALAMARI",
|
||||||
|
parameter={
|
||||||
|
"checkpoint": CHECKPOINT,
|
||||||
|
"textequiv_level": "glyph", # Note that we're going down to glyph level here
|
||||||
|
}
|
||||||
|
).process()
|
||||||
|
workspace.save_mets()
|
||||||
|
|
||||||
|
page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
|
||||||
|
assert os.path.exists(page1)
|
||||||
|
tree = etree.parse(page1)
|
||||||
|
|
||||||
|
# The result should contain a lot of glyphs
|
||||||
|
glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
|
||||||
|
assert len(glyphs) >= 100
|
||||||
|
|
||||||
|
|
||||||
# vim:tw=120:
|
# vim:tw=120:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue