From cb05e8229e5349a0ad635ecc9e67c3956a222e99 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 3 Dec 2020 17:03:30 +0100 Subject: [PATCH 1/8] =?UTF-8?q?=F0=9F=9A=A7=20Test=20GitHub=20Actions=20bu?= =?UTF-8?q?ild?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..b0cae23 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,21 @@ +name: Test + +on: + release: + types: [created] + +env: + + +jobs: + build: + name: Setup, Build, Publish, and Deploy + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Build + run: |- + FORCE_DOWNLOAD=y ./build From 972e03974c09017e6a0b56c3b447e0de091b748d Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 3 Dec 2020 17:05:38 +0100 Subject: [PATCH 2/8] =?UTF-8?q?=F0=9F=9A=A7=20Test=20GitHub=20Actions=20bu?= =?UTF-8?q?ild?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b0cae23..77aa064 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,8 +1,6 @@ name: Test -on: - release: - types: [created] +on: push env: From d30b1130c23af9110e79f3d6af42008229bff0cf Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 3 Dec 2020 17:06:24 +0100 Subject: [PATCH 3/8] =?UTF-8?q?=F0=9F=9A=A7=20Test=20GitHub=20Actions=20bu?= =?UTF-8?q?ild?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 77aa064..f9c159a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,9 +2,6 @@ name: Test on: push -env: - - jobs: build: name: Setup, Build, Publish, and Deploy From cdd81169241f82e4ea39943a7c1987fb996cd8f9 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 3 Dec 2020 18:01:21 +0100 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=90=9B=20Update=20to=20ocrd=20>=3D=20?= =?UTF-8?q?2.21.0=20for=20a=20fix=20for=20OCR-D/core#642?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile-core | 2 +- Dockerfile-ocrd_calamari | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile-core b/Dockerfile-core index 56c2c13..f548c83 100644 --- a/Dockerfile-core +++ b/Dockerfile-core @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" -ARG OCRD_VERSION_MINIMUM="2.18.1" +ARG OCRD_VERSION_MINIMUM="2.21.0" ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 ENV PIP_DEFAULT_TIMEOUT=120 diff --git a/Dockerfile-ocrd_calamari b/Dockerfile-ocrd_calamari index 64d2e62..083d97d 100644 --- a/Dockerfile-ocrd_calamari +++ b/Dockerfile-ocrd_calamari @@ -20,8 +20,7 @@ COPY data/mirror/github.com/Calamari-OCR/calamari_models/gt4histocr # Check pip dependencies -# XXX https://github.com/OCR-D/core/issues/642 -#RUN pip check +RUN pip check # Default command From f05c26232f9455aa636b0f048662f298eb51b294 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 3 Dec 2020 18:45:52 +0100 Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=A7=B9=20Don't=20use=20pip=20option?= =?UTF-8?q?=20=20--use-feature=3D2020-resolver=20anymore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile-core | 2 +- Dockerfile-dinglehopper | 2 +- Dockerfile-ocrd_calamari | 2 +- Dockerfile-ocrd_calamari03 | 2 +- Dockerfile-ocrd_cis | 2 +- Dockerfile-ocrd_fileformat | 2 +- Dockerfile-ocrd_olena | 2 +- Dockerfile-ocrd_tesserocr | 2 +- Dockerfile-sbb_binarization | 2 +- Dockerfile-sbb_textline_detector | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Dockerfile-core b/Dockerfile-core index f548c83..97f4c87 100644 --- a/Dockerfile-core +++ b/Dockerfile-core @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_VERSION_MINIMUM="2.21.0" ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 ENV PIP_DEFAULT_TIMEOUT=120 diff --git a/Dockerfile-dinglehopper b/Dockerfile-dinglehopper index a95d487..8885325 100644 --- a/Dockerfile-dinglehopper +++ b/Dockerfile-dinglehopper @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG DINGLEHOPPER_COMMIT="6e47acd" diff --git a/Dockerfile-ocrd_calamari b/Dockerfile-ocrd_calamari index 083d97d..d1f4493 100644 --- a/Dockerfile-ocrd_calamari +++ b/Dockerfile-ocrd_calamari @@ -2,7 +2,7 @@ FROM my_ocrd_workflow-core # XXX https://github.com/OCR-D/core/issues/642 -#ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +#ARG PIP_INSTALL="pip install --no-cache-dir" ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_CALAMARI_VERSION="1.0.0" diff --git a/Dockerfile-ocrd_calamari03 b/Dockerfile-ocrd_calamari03 index 549c4ed..6d4089b 100644 --- a/Dockerfile-ocrd_calamari03 +++ b/Dockerfile-ocrd_calamari03 @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" # Build pip installable stuff diff --git a/Dockerfile-ocrd_cis b/Dockerfile-ocrd_cis index 9cb02bd..f846251 100644 --- a/Dockerfile-ocrd_cis +++ b/Dockerfile-ocrd_cis @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_CIS_VERSION="0.1.5" diff --git a/Dockerfile-ocrd_fileformat b/Dockerfile-ocrd_fileformat index c1a2180..deff0c6 100644 --- a/Dockerfile-ocrd_fileformat +++ b/Dockerfile-ocrd_fileformat @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_FILEFORMAT_VERSION="0.1.1" diff --git a/Dockerfile-ocrd_olena b/Dockerfile-ocrd_olena index 644ad4e..1f2bcd4 100644 --- a/Dockerfile-ocrd_olena +++ b/Dockerfile-ocrd_olena @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG OCRD_OLENA_VERSION="1.2.0" diff --git a/Dockerfile-ocrd_tesserocr b/Dockerfile-ocrd_tesserocr index 6776eff..b95ab2a 100644 --- a/Dockerfile-ocrd_tesserocr +++ b/Dockerfile-ocrd_tesserocr @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG TESSDATA_BEST_VERSION="4.0.0" ENV TESSDATA_PREFIX /usr/local/share/tessdata diff --git a/Dockerfile-sbb_binarization b/Dockerfile-sbb_binarization index 6ee301d..64cd158 100644 --- a/Dockerfile-sbb_binarization +++ b/Dockerfile-sbb_binarization @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG SBB_BINARIZATION_COMMIT="4d145cc" diff --git a/Dockerfile-sbb_textline_detector b/Dockerfile-sbb_textline_detector index d445bb8..9c864bb 100644 --- a/Dockerfile-sbb_textline_detector +++ b/Dockerfile-sbb_textline_detector @@ -1,6 +1,6 @@ FROM my_ocrd_workflow-core -ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" +ARG PIP_INSTALL="pip install --no-cache-dir" ARG SBB_TEXTLINE_DETECTOR_COMMIT="247d5f9" From 53d752f58d3706a7266a1da36d5985a83ac6a60f Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 4 Dec 2020 13:13:52 +0100 Subject: [PATCH 6/8] =?UTF-8?q?=F0=9F=90=9B=20Fix=20model=20path=20for=20o?= =?UTF-8?q?crd=5Fcalamari=201.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- my_ocrd_workflow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_ocrd_workflow b/my_ocrd_workflow index 50d2724..113c6ab 100755 --- a/my_ocrd_workflow +++ b/my_ocrd_workflow @@ -70,7 +70,7 @@ main() { do_validate - ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL" + ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL" ocrd-tesserocr-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -P model "GT4HistOCR_2000000" -P textequiv_level "$TEXTEQUIV_LEVEL" do_validate From 5bf350056fbde71ffa790f523761dff80b1f8cea Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 9 Dec 2020 12:50:02 +0100 Subject: [PATCH 7/8] =?UTF-8?q?=F0=9F=A7=B9=20Update=20README=20for=20the?= =?UTF-8?q?=20newest=20changes=20+=20clean-up?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 31 +++++++++++++++++-------------- run-docker-hub-update | 11 ----------- 2 files changed, 17 insertions(+), 25 deletions(-) delete mode 100755 run-docker-hub-update diff --git a/README.md b/README.md index 27e4cb9..c1c8c83 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,25 @@ including all dependencies in Docker. How to use ---------- -It's easiest to use it as pre-built containers. To run the containers on an -example workspace: +**Currently, due to problems with the Travis CI, we do not provide pre-built +containers anymore.*** +To build the containers yourself using Docker: +~~~ +cd ~/devel/ocrd-galley/ +./build ~~~ -# Update to the latest stable containers -(cd ~/devel/ocrd-galley/; ./run-docker-hub-update) +You can then install the wrappers into a Python venv: +~~~ +cd ~/devel/ocrd-galley/wrapper +pip install . +~~~ + +You may then use the script `my_ocrd_workflow` to use your self-built +containers on an example workspace: + +~~~ # Download an example workspace cd /tmp wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip @@ -41,17 +53,8 @@ unzip actevedef_718448162.first-page.zip # Run the workflow on it cd actevedef_718448162.first-page -~/devel/ocrd-galley/run-docker-hub -~~~ - -### Build the containers yourself -To build the containers yourself using Docker: -~~~ -cd ~/devel/ocrd-galley/ -./build +~/devel/ocrd-galley/my_ocrd_workflow ~~~ -You may then use the script `run` to use your self-built containers, analogous to -the example above. ### Viewing results You may then examine the results using diff --git a/run-docker-hub-update b/run-docker-hub-update deleted file mode 100755 index cc6c860..0000000 --- a/run-docker-hub-update +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# Update the my_ocrd_workflow containers - -DOCKER_IMAGE_PREFIX=mikegerber/my_ocrd_workflow -DOCKER_IMAGE_TAG=stable - - -sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'` -for x in $sub_images; do - docker pull $DOCKER_IMAGE_PREFIX-$x:$DOCKER_IMAGE_TAG -done From d226486b3fc5f24abc5f31df64113e67cb56c3cf Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 9 Dec 2020 12:51:00 +0100 Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=A7=B9=20Update=20README=20for=20the?= =?UTF-8?q?=20newest=20changes=20+=20clean-up?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c1c8c83..b1597e3 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ The document must be specified by its PPN, for example: ~~~ ~/devel/ocrd-galley/ppn2ocr PPN77164308X cd PPN77164308X -~/devel/ocrd-galley/run-docker-hub -I BEST --skip-validation +~/devel/ocrd-galley/my_ocrd_workflow -I BEST --skip-validation ~~~ This produces a workspace directory `PPN77164308X` with the OCR results in it; @@ -104,7 +104,7 @@ for the given images. ~~~ ~/devel/ocrd-galley/ocrd-workspace-from-images 0005.png cd workspace-xxxxx # output by the last command -~/devel/ocrd-galley/run-docker-hub +~/devel/ocrd-galley/my_ocrd_workflow ~~~ This produces a workspace from the files and then runs the OCR workflow on it.