mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-07-09 04:59:54 +02:00
🧹 Update/move some XXXs/TODOs
This commit is contained in:
parent
98aee51801
commit
6b83d5ae1e
2 changed files with 2 additions and 5 deletions
|
@ -86,7 +86,7 @@ COPY my_ocrd_workflow /usr/bin
|
||||||
COPY xsd/* /usr/share/xml/
|
COPY xsd/* /usr/share/xml/
|
||||||
|
|
||||||
|
|
||||||
# XXX Work around concurrency problems(?)
|
# XXX Work around suspected concurrency problems in ocrd-sbb-textline-detector
|
||||||
RUN sed -i 's#num_cores *= *cpu_count()#num_cores = 1#' /usr/local/lib/python3.6/dist-packages/qurator/sbb_textline_detector/main.py
|
RUN sed -i 's#num_cores *= *cpu_count()#num_cores = 1#' /usr/local/lib/python3.6/dist-packages/qurator/sbb_textline_detector/main.py
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@ do_validate() {
|
||||||
--page-coordinate-consistency off"
|
--page-coordinate-consistency off"
|
||||||
ocrd workspace validate $validate_options
|
ocrd workspace validate $validate_options
|
||||||
# XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now
|
# XXX ocrd-tesserocr INCONSISTENCY in TextRegion → use "--page-strictness lax" for now
|
||||||
# XXX INVALIDITY in Glyph ID etc. in GT → --page-coordinate-consistency off
|
|
||||||
}
|
}
|
||||||
|
|
||||||
do_binarization() {
|
do_binarization() {
|
||||||
|
@ -40,8 +39,6 @@ do_linesegmentation_tesserocr() {
|
||||||
-I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION
|
-I OCR-D-IMG-BINPAGE -O OCR-D-SEG-REGION
|
||||||
ocrd-tesserocr-segment-line \
|
ocrd-tesserocr-segment-line \
|
||||||
-I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
-I OCR-D-SEG-REGION -O OCR-D-SEG-LINE
|
||||||
|
|
||||||
# XXX compare ocrd-tesserocr-segment* vs tesseract native
|
|
||||||
}
|
}
|
||||||
|
|
||||||
do_linesegmentation_sbb() {
|
do_linesegmentation_sbb() {
|
||||||
|
@ -58,7 +55,7 @@ do_linesegmentation_sbb() {
|
||||||
do_ocr() {
|
do_ocr() {
|
||||||
# Perform OCR on the segmented lines
|
# Perform OCR on the segmented lines
|
||||||
|
|
||||||
ocrd_tesserocr_recognize_parameters='{ "model": "GT4HistOCR_2000000" }' # TODO mods:language + fontident → model
|
ocrd_tesserocr_recognize_parameters='{ "model": "GT4HistOCR_2000000" }'
|
||||||
ocrd workspace remove-group -rf OCR-D-OCR-TESS
|
ocrd workspace remove-group -rf OCR-D-OCR-TESS
|
||||||
ocrd-tesserocr-recognize \
|
ocrd-tesserocr-recognize \
|
||||||
-I OCR-D-SEG-LINE -O OCR-D-OCR-TESS \
|
-I OCR-D-SEG-LINE -O OCR-D-OCR-TESS \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue