Allow configuring a cut off confidence value for glyph alternatives

fix/readme-no-checkpoint
Gerber, Mike 4 years ago
parent e39a2bce01
commit b802b4deaf

@ -59,6 +59,9 @@ With `test-parameters.json`:
}
~~~
You may want to have a look at the [ocrd-tool.json](ocrd-tool.json) descriptions
for additional parameters and default values.
## Development & Testing
For information regarding development and testing, please see
[README-DEV.md](README-DEV.md).

@ -31,6 +31,12 @@
"enum": ["line", "word", "glyph"],
"default": "line",
"description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
},
"glyph_conf_cutoff": {
"type": "number",
"format": "float",
"default": 0.001,
"description": "Only include glyph alternatives with confidences above this threshold"
}
}
}

@ -148,13 +148,17 @@ class CalamariRecognize(Processor):
glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
chars = sorted(p.chars, key=lambda k: k.probability, reverse=True)
# Filter predictions
chars = p.chars
chars = [c for c in chars if c.char] # XXX Note that omission probabilities are not normalized?!
chars = [c for c in chars if c.probability >= self.parameter['glyph_conf_cutoff']]
# Sort and add predictions (= TextEquivs)
chars = sorted(chars, key=lambda k: k.probability, reverse=True)
char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
for char in chars:
if char.char:
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
char_index += 1
# XXX Note that omission probabilities are not normalized?!
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
char_index += 1
word.add_Glyph(glyph)

Loading…
Cancel
Save