mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-12 06:29:53 +02:00
📝 Document do_validate() options better
This commit is contained in:
parent
1252d8ccc3
commit
6ae85063c5
1 changed files with 15 additions and 1 deletions
|
@ -10,13 +10,27 @@ fi
|
||||||
|
|
||||||
|
|
||||||
do_validate() {
|
do_validate() {
|
||||||
|
# Validate the workspace
|
||||||
|
|
||||||
|
# Both ocrd_tesserocr + ocrd_calamari produce segment coordinates that are not strictly within their parent's
|
||||||
|
# coordinates:
|
||||||
|
#
|
||||||
|
# INCONSISTENCY in [...] coords [...] not within parent coords
|
||||||
|
#
|
||||||
|
# → --page-coordinate-consistency off
|
||||||
|
#
|
||||||
|
# ocrd_tesserocr sometimes produces segment text results that aren't concatenating as expected by the validator:
|
||||||
|
#
|
||||||
|
# INCONSISTENCY in [...]: text results '[...]' != concatenated '[...]'
|
||||||
|
#
|
||||||
|
# → --page-strictness lax
|
||||||
|
#
|
||||||
validate_options='
|
validate_options='
|
||||||
--skip dimension
|
--skip dimension
|
||||||
--skip pixel_density
|
--skip pixel_density
|
||||||
--page-strictness lax
|
--page-strictness lax
|
||||||
--page-coordinate-consistency off'
|
--page-coordinate-consistency off'
|
||||||
ocrd workspace validate $validate_options
|
ocrd workspace validate $validate_options
|
||||||
# XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now
|
|
||||||
}
|
}
|
||||||
|
|
||||||
do_binarization() {
|
do_binarization() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue