diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..f9c159a --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,16 @@ +name: Test + +on: push + +jobs: + build: + name: Setup, Build, Publish, and Deploy + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Build + run: |- + FORCE_DOWNLOAD=y ./build diff --git a/Dockerfile-core b/Dockerfile-core index 56c2c13..97f4c87 100644 --- a/Dockerfile-core +++ b/Dockerfile-core @@ -1,7 +1,7 @@ FROM ubuntu:18.04 -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" -ARG OCRD_VERSION_MINIMUM="2.18.1" +ARG PIP_INSTALL="pip install --no-cache-dir" +ARG OCRD_VERSION_MINIMUM="2.21.0" ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 ENV PIP_DEFAULT_TIMEOUT=120 diff --git a/Dockerfile-dinglehopper b/Dockerfile-dinglehopper index a95d487..8885325 100644 --- a/Dockerfile-dinglehopper +++ b/Dockerfile-dinglehopper @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG DINGLEHOPPER_COMMIT="6e47acd" diff --git a/Dockerfile-ocrd_calamari b/Dockerfile-ocrd_calamari index 640e94b..f94d54c 100644 --- a/Dockerfile-ocrd_calamari +++ b/Dockerfile-ocrd_calamari @@ -2,7 +2,7 @@ FROM my_ocrd_workflow-core-cuda10.1 # XXX https://github.com/OCR-D/core/issues/642 -#ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +#ARG PIP_INSTALL="pip install --no-cache-dir" ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_CALAMARI_VERSION="1.0.0" @@ -20,8 +20,7 @@ COPY data/mirror/github.com/Calamari-OCR/calamari_models/gt4histocr # Check pip dependencies -# XXX https://github.com/OCR-D/core/issues/642 -#RUN pip check +RUN pip check # Default command diff --git a/Dockerfile-ocrd_calamari03 b/Dockerfile-ocrd_calamari03 index 549c4ed..6d4089b 100644 --- a/Dockerfile-ocrd_calamari03 +++ b/Dockerfile-ocrd_calamari03 @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" # Build pip installable stuff diff --git a/Dockerfile-ocrd_cis b/Dockerfile-ocrd_cis index 9cb02bd..f846251 100644 --- a/Dockerfile-ocrd_cis +++ b/Dockerfile-ocrd_cis @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_CIS_VERSION="0.1.5" diff --git a/Dockerfile-ocrd_fileformat b/Dockerfile-ocrd_fileformat index c1a2180..deff0c6 100644 --- a/Dockerfile-ocrd_fileformat +++ b/Dockerfile-ocrd_fileformat @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_FILEFORMAT_VERSION="0.1.1" diff --git a/Dockerfile-ocrd_olena b/Dockerfile-ocrd_olena index 644ad4e..1f2bcd4 100644 --- a/Dockerfile-ocrd_olena +++ b/Dockerfile-ocrd_olena @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_OLENA_VERSION="1.2.0" diff --git a/Dockerfile-ocrd_tesserocr b/Dockerfile-ocrd_tesserocr index 6776eff..b95ab2a 100644 --- a/Dockerfile-ocrd_tesserocr +++ b/Dockerfile-ocrd_tesserocr @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG TESSDATA_BEST_VERSION="4.0.0" ENV TESSDATA_PREFIX /usr/local/share/tessdata diff --git a/Dockerfile-sbb_binarization b/Dockerfile-sbb_binarization index 6ee301d..64cd158 100644 --- a/Dockerfile-sbb_binarization +++ b/Dockerfile-sbb_binarization @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG SBB_BINARIZATION_COMMIT="4d145cc" diff --git a/Dockerfile-sbb_textline_detector b/Dockerfile-sbb_textline_detector index d989c0a..006799f 100644 --- a/Dockerfile-sbb_textline_detector +++ b/Dockerfile-sbb_textline_detector @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core-cuda10.0 -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG SBB_TEXTLINE_DETECTOR_COMMIT="247d5f9" diff --git a/README.md b/README.md index 27e4cb9..b1597e3 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,25 @@ including all dependencies in Docker. How to use ---------- -It's easiest to use it as pre-built containers. To run the containers on an -example workspace: +**Currently, due to problems with the Travis CI, we do not provide pre-built +containers anymore.*** +To build the containers yourself using Docker: +~~~ +cd ~/devel/ocrd-galley/ +./build ~~~ -# Update to the latest stable containers -(cd ~/devel/ocrd-galley/; ./run-docker-hub-update) +You can then install the wrappers into a Python venv: +~~~ +cd ~/devel/ocrd-galley/wrapper +pip install . +~~~ + +You may then use the script `my_ocrd_workflow` to use your self-built +containers on an example workspace: + +~~~ # Download an example workspace cd /tmp wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip @@ -41,17 +53,8 @@ unzip actevedef_718448162.first-page.zip # Run the workflow on it cd actevedef_718448162.first-page -~/devel/ocrd-galley/run-docker-hub -~~~ - -### Build the containers yourself -To build the containers yourself using Docker: -~~~ -cd ~/devel/ocrd-galley/ -./build +~/devel/ocrd-galley/my_ocrd_workflow ~~~ -You may then use the script `run` to use your self-built containers, analogous to -the example above. ### Viewing results You may then examine the results using @@ -83,7 +86,7 @@ The document must be specified by its PPN, for example: ~~~ ~/devel/ocrd-galley/ppn2ocr PPN77164308X cd PPN77164308X -~/devel/ocrd-galley/run-docker-hub -I BEST --skip-validation +~/devel/ocrd-galley/my_ocrd_workflow -I BEST --skip-validation ~~~ This produces a workspace directory `PPN77164308X` with the OCR results in it; @@ -101,7 +104,7 @@ for the given images. ~~~ ~/devel/ocrd-galley/ocrd-workspace-from-images 0005.png cd workspace-xxxxx # output by the last command -~/devel/ocrd-galley/run-docker-hub +~/devel/ocrd-galley/my_ocrd_workflow ~~~ This produces a workspace from the files and then runs the OCR workflow on it. diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 50d2724..113c6ab 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -70,7 +70,7 @@ main() { do_validate - ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL" + ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL" ocrd-tesserocr-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -P model "GT4HistOCR_2000000" -P textequiv_level "$TEXTEQUIV_LEVEL" do_validate diff --git a/run-docker-hub-update b/run-docker-hub-update deleted file mode 100755 index cc6c860..0000000 --- a/run-docker-hub-update +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# Update the my_ocrd_workflow containers - -DOCKER_IMAGE_PREFIX=mikegerber/my_ocrd_workflow -DOCKER_IMAGE_TAG=stable - - -sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'` -for x in $sub_images; do - docker pull $DOCKER_IMAGE_PREFIX-$x:$DOCKER_IMAGE_TAG -done