mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-09 03:39:55 +02:00
✨ Allow configuring a cut off confidence value for glyph alternatives
This commit is contained in:
parent
e39a2bce01
commit
b802b4deaf
3 changed files with 18 additions and 5 deletions
|
@ -59,6 +59,9 @@ With `test-parameters.json`:
|
||||||
}
|
}
|
||||||
~~~
|
~~~
|
||||||
|
|
||||||
|
You may want to have a look at the [ocrd-tool.json](ocrd-tool.json) descriptions
|
||||||
|
for additional parameters and default values.
|
||||||
|
|
||||||
## Development & Testing
|
## Development & Testing
|
||||||
For information regarding development and testing, please see
|
For information regarding development and testing, please see
|
||||||
[README-DEV.md](README-DEV.md).
|
[README-DEV.md](README-DEV.md).
|
||||||
|
|
|
@ -31,6 +31,12 @@
|
||||||
"enum": ["line", "word", "glyph"],
|
"enum": ["line", "word", "glyph"],
|
||||||
"default": "line",
|
"default": "line",
|
||||||
"description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
|
"description": "Deepest PAGE XML hierarchy level to include TextEquiv results for"
|
||||||
|
},
|
||||||
|
"glyph_conf_cutoff": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"default": 0.001,
|
||||||
|
"description": "Only include glyph alternatives with confidences above this threshold"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,13 +148,17 @@ class CalamariRecognize(Processor):
|
||||||
|
|
||||||
glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
|
glyph = GlyphType(id='%s_glyph%04d' % (word.id, glyph_no), Coords=CoordsType(points))
|
||||||
|
|
||||||
chars = sorted(p.chars, key=lambda k: k.probability, reverse=True)
|
# Filter predictions
|
||||||
|
chars = p.chars
|
||||||
|
chars = [c for c in chars if c.char] # XXX Note that omission probabilities are not normalized?!
|
||||||
|
chars = [c for c in chars if c.probability >= self.parameter['glyph_conf_cutoff']]
|
||||||
|
|
||||||
|
# Sort and add predictions (= TextEquivs)
|
||||||
|
chars = sorted(chars, key=lambda k: k.probability, reverse=True)
|
||||||
char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
|
char_index = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs
|
||||||
for char in chars:
|
for char in chars:
|
||||||
if char.char:
|
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
|
||||||
glyph.add_TextEquiv(TextEquivType(Unicode=char.char, index=char_index, conf=char.probability))
|
char_index += 1
|
||||||
char_index += 1
|
|
||||||
# XXX Note that omission probabilities are not normalized?!
|
|
||||||
|
|
||||||
word.add_Glyph(glyph)
|
word.add_Glyph(glyph)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue