dinglehopper: Support disabling the metrics using CLI option --no-metrics

pull/23/head
Gerber, Mike 4 years ago
parent 745095e52c
commit 5cbeb7b0dd

@ -23,6 +23,22 @@ sudo pip install .
Usage
-----
~~~
Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
Compare the PAGE/ALTO/text document GT against the document OCR.
The files GT and OCR are usually a ground truth document and the result of
an OCR software, but you may use dinglehopper to compare two OCR results.
In that case, use --no-metrics to disable the then meaningless metrics and
also change the color scheme from green/red to blue.
Options:
--metrics / --no-metrics Enable/disable metrics and green/red
--help Show this message and exit.
~~~
For example:
~~~
dinglehopper some-document.gt.page.xml some-document.ocr.alto.xml
~~~
This generates `report.html` and `report.json`.

@ -44,7 +44,7 @@ def gen_diff_report(gt_things, ocr_things, css_prefix, joiner, none, align):
'''.format(gtx, ocrx)
def process(gt, ocr, report_prefix):
def process(gt, ocr, report_prefix, metrics):
"""Check OCR result against GT.
The @click decorators change the signature of the decorated functions, so we keep this undecorated version and use
@ -91,7 +91,8 @@ def process(gt, ocr, report_prefix):
cer=cer, n_characters=n_characters,
wer=wer, n_words=n_words,
char_diff_report=char_diff_report,
word_diff_report=word_diff_report
word_diff_report=word_diff_report,
metrics=metrics,
).dump(out_fn)
@ -99,8 +100,17 @@ def process(gt, ocr, report_prefix):
@click.argument('gt', type=click.Path(exists=True))
@click.argument('ocr', type=click.Path(exists=True))
@click.argument('report_prefix', type=click.Path(), default='report')
def main(gt, ocr, report_prefix):
process(gt, ocr, report_prefix)
@click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red')
def main(gt, ocr, report_prefix, metrics):
"""
Compare the PAGE/ALTO/text document GT against the document OCR.
The files GT and OCR are usually a ground truth document and the result of
an OCR software, but you may use dinglehopper to compare two OCR results. In
that case, use --no-metrics to disable the then meaningless metrics and also
change the color scheme from green/red to blue.
"""
process(gt, ocr, report_prefix, metrics)
if __name__ == '__main__':

@ -6,12 +6,18 @@
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
<style type="text/css">
{% if metrics %}
.gt .diff {
color: green;
}
.ocr .diff {
color: red;
}
{% else %}
.gt .diff, .ocr .diff {
color: blue;
}
{% endif %}
.ellipsis {
opacity: 0.5;
font-style: italic;
@ -32,9 +38,11 @@
{{ ocr }}
{% if metrics %}
<h2>Metrics</h2>
<p>CER: {{ cer|round(4) }}</p>
<p>WER: {{ wer|round(4) }}</p>
{% endif %}
<h2>Character differences</h2>
{{ char_diff_report }}

Loading…
Cancel
Save