mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 06:39:53 +02:00
📝 Document do_validate() options better
This commit is contained in:
parent
1252d8ccc3
commit
6ae85063c5
1 changed files with 15 additions and 1 deletions
|
@ -10,13 +10,27 @@ fi
|
|||
|
||||
|
||||
do_validate() {
|
||||
# Validate the workspace
|
||||
|
||||
# Both ocrd_tesserocr + ocrd_calamari produce segment coordinates that are not strictly within their parent's
|
||||
# coordinates:
|
||||
#
|
||||
# INCONSISTENCY in [...] coords [...] not within parent coords
|
||||
#
|
||||
# → --page-coordinate-consistency off
|
||||
#
|
||||
# ocrd_tesserocr sometimes produces segment text results that aren't concatenating as expected by the validator:
|
||||
#
|
||||
# INCONSISTENCY in [...]: text results '[...]' != concatenated '[...]'
|
||||
#
|
||||
# → --page-strictness lax
|
||||
#
|
||||
validate_options='
|
||||
--skip dimension
|
||||
--skip pixel_density
|
||||
--page-strictness lax
|
||||
--page-coordinate-consistency off'
|
||||
ocrd workspace validate $validate_options
|
||||
# XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now
|
||||
}
|
||||
|
||||
do_binarization() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue