mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-09 11:50:00 +02:00
✨ dinglehopper: Support disabling the metrics using CLI option --no-metrics
This commit is contained in:
parent
745095e52c
commit
5cbeb7b0dd
3 changed files with 38 additions and 4 deletions
16
README.md
16
README.md
|
@ -23,6 +23,22 @@ sudo pip install .
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
~~~
|
~~~
|
||||||
|
Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
|
||||||
|
|
||||||
|
Compare the PAGE/ALTO/text document GT against the document OCR.
|
||||||
|
|
||||||
|
The files GT and OCR are usually a ground truth document and the result of
|
||||||
|
an OCR software, but you may use dinglehopper to compare two OCR results.
|
||||||
|
In that case, use --no-metrics to disable the then meaningless metrics and
|
||||||
|
also change the color scheme from green/red to blue.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--metrics / --no-metrics Enable/disable metrics and green/red
|
||||||
|
--help Show this message and exit.
|
||||||
|
~~~
|
||||||
|
|
||||||
|
For example:
|
||||||
|
~~~
|
||||||
dinglehopper some-document.gt.page.xml some-document.ocr.alto.xml
|
dinglehopper some-document.gt.page.xml some-document.ocr.alto.xml
|
||||||
~~~
|
~~~
|
||||||
This generates `report.html` and `report.json`.
|
This generates `report.html` and `report.json`.
|
||||||
|
|
|
@ -44,7 +44,7 @@ def gen_diff_report(gt_things, ocr_things, css_prefix, joiner, none, align):
|
||||||
'''.format(gtx, ocrx)
|
'''.format(gtx, ocrx)
|
||||||
|
|
||||||
|
|
||||||
def process(gt, ocr, report_prefix):
|
def process(gt, ocr, report_prefix, metrics):
|
||||||
"""Check OCR result against GT.
|
"""Check OCR result against GT.
|
||||||
|
|
||||||
The @click decorators change the signature of the decorated functions, so we keep this undecorated version and use
|
The @click decorators change the signature of the decorated functions, so we keep this undecorated version and use
|
||||||
|
@ -91,7 +91,8 @@ def process(gt, ocr, report_prefix):
|
||||||
cer=cer, n_characters=n_characters,
|
cer=cer, n_characters=n_characters,
|
||||||
wer=wer, n_words=n_words,
|
wer=wer, n_words=n_words,
|
||||||
char_diff_report=char_diff_report,
|
char_diff_report=char_diff_report,
|
||||||
word_diff_report=word_diff_report
|
word_diff_report=word_diff_report,
|
||||||
|
metrics=metrics,
|
||||||
).dump(out_fn)
|
).dump(out_fn)
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,8 +100,17 @@ def process(gt, ocr, report_prefix):
|
||||||
@click.argument('gt', type=click.Path(exists=True))
|
@click.argument('gt', type=click.Path(exists=True))
|
||||||
@click.argument('ocr', type=click.Path(exists=True))
|
@click.argument('ocr', type=click.Path(exists=True))
|
||||||
@click.argument('report_prefix', type=click.Path(), default='report')
|
@click.argument('report_prefix', type=click.Path(), default='report')
|
||||||
def main(gt, ocr, report_prefix):
|
@click.option('--metrics/--no-metrics', default=True, help='Enable/disable metrics and green/red')
|
||||||
process(gt, ocr, report_prefix)
|
def main(gt, ocr, report_prefix, metrics):
|
||||||
|
"""
|
||||||
|
Compare the PAGE/ALTO/text document GT against the document OCR.
|
||||||
|
|
||||||
|
The files GT and OCR are usually a ground truth document and the result of
|
||||||
|
an OCR software, but you may use dinglehopper to compare two OCR results. In
|
||||||
|
that case, use --no-metrics to disable the then meaningless metrics and also
|
||||||
|
change the color scheme from green/red to blue.
|
||||||
|
"""
|
||||||
|
process(gt, ocr, report_prefix, metrics)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -6,12 +6,18 @@
|
||||||
|
|
||||||
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
|
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
|
{% if metrics %}
|
||||||
.gt .diff {
|
.gt .diff {
|
||||||
color: green;
|
color: green;
|
||||||
}
|
}
|
||||||
.ocr .diff {
|
.ocr .diff {
|
||||||
color: red;
|
color: red;
|
||||||
}
|
}
|
||||||
|
{% else %}
|
||||||
|
.gt .diff, .ocr .diff {
|
||||||
|
color: blue;
|
||||||
|
}
|
||||||
|
{% endif %}
|
||||||
.ellipsis {
|
.ellipsis {
|
||||||
opacity: 0.5;
|
opacity: 0.5;
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
|
@ -32,9 +38,11 @@
|
||||||
{{ ocr }}
|
{{ ocr }}
|
||||||
|
|
||||||
|
|
||||||
|
{% if metrics %}
|
||||||
<h2>Metrics</h2>
|
<h2>Metrics</h2>
|
||||||
<p>CER: {{ cer|round(4) }}</p>
|
<p>CER: {{ cer|round(4) }}</p>
|
||||||
<p>WER: {{ wer|round(4) }}</p>
|
<p>WER: {{ wer|round(4) }}</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<h2>Character differences</h2>
|
<h2>Character differences</h2>
|
||||||
{{ char_diff_report }}
|
{{ char_diff_report }}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue