Merge branch 'master' of code.dev.sbb.berlin:qurator/ocrd-galley

pull/38/head
Gerber, Mike 4 years ago
commit 82d3d71ed4

@ -0,0 +1,16 @@
name: Test
on: push
jobs:
build:
name: Setup, Build, Publish, and Deploy
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build
run: |-
FORCE_DOWNLOAD=y ./build

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_VERSION_MINIMUM="2.18.1" ARG OCRD_VERSION_MINIMUM="2.21.0"
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
ENV PIP_DEFAULT_TIMEOUT=120 ENV PIP_DEFAULT_TIMEOUT=120

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG DINGLEHOPPER_COMMIT="6e47acd" ARG DINGLEHOPPER_COMMIT="6e47acd"

@ -2,7 +2,7 @@ FROM my_ocrd_workflow-core-cuda10.1
# XXX https://github.com/OCR-D/core/issues/642 # XXX https://github.com/OCR-D/core/issues/642
#ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" #ARG PIP_INSTALL="pip install --no-cache-dir"
ARG PIP_INSTALL="pip install --no-cache-dir" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_CALAMARI_VERSION="1.0.0" ARG OCRD_CALAMARI_VERSION="1.0.0"
@ -20,8 +20,7 @@ COPY data/mirror/github.com/Calamari-OCR/calamari_models/gt4histocr
# Check pip dependencies # Check pip dependencies
# XXX https://github.com/OCR-D/core/issues/642 RUN pip check
#RUN pip check
# Default command # Default command

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
# Build pip installable stuff # Build pip installable stuff

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_CIS_VERSION="0.1.5" ARG OCRD_CIS_VERSION="0.1.5"

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_FILEFORMAT_VERSION="0.1.1" ARG OCRD_FILEFORMAT_VERSION="0.1.1"

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_OLENA_VERSION="1.2.0" ARG OCRD_OLENA_VERSION="1.2.0"

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG TESSDATA_BEST_VERSION="4.0.0" ARG TESSDATA_BEST_VERSION="4.0.0"
ENV TESSDATA_PREFIX /usr/local/share/tessdata ENV TESSDATA_PREFIX /usr/local/share/tessdata

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core FROM my_ocrd_workflow-core
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG SBB_BINARIZATION_COMMIT="4d145cc" ARG SBB_BINARIZATION_COMMIT="4d145cc"

@ -1,6 +1,6 @@
FROM my_ocrd_workflow-core-cuda10.0 FROM my_ocrd_workflow-core-cuda10.0
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver" ARG PIP_INSTALL="pip install --no-cache-dir"
ARG SBB_TEXTLINE_DETECTOR_COMMIT="247d5f9" ARG SBB_TEXTLINE_DETECTOR_COMMIT="247d5f9"

@ -27,13 +27,25 @@ including all dependencies in Docker.
How to use How to use
---------- ----------
It's easiest to use it as pre-built containers. To run the containers on an **Currently, due to problems with the Travis CI, we do not provide pre-built
example workspace: containers anymore.***
To build the containers yourself using Docker:
~~~
cd ~/devel/ocrd-galley/
./build
~~~ ~~~
# Update to the latest stable containers
(cd ~/devel/ocrd-galley/; ./run-docker-hub-update)
You can then install the wrappers into a Python venv:
~~~
cd ~/devel/ocrd-galley/wrapper
pip install .
~~~
You may then use the script `my_ocrd_workflow` to use your self-built
containers on an example workspace:
~~~
# Download an example workspace # Download an example workspace
cd /tmp cd /tmp
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
@ -41,17 +53,8 @@ unzip actevedef_718448162.first-page.zip
# Run the workflow on it # Run the workflow on it
cd actevedef_718448162.first-page cd actevedef_718448162.first-page
~/devel/ocrd-galley/run-docker-hub ~/devel/ocrd-galley/my_ocrd_workflow
~~~
### Build the containers yourself
To build the containers yourself using Docker:
~~~
cd ~/devel/ocrd-galley/
./build
~~~ ~~~
You may then use the script `run` to use your self-built containers, analogous to
the example above.
### Viewing results ### Viewing results
You may then examine the results using You may then examine the results using
@ -83,7 +86,7 @@ The document must be specified by its PPN, for example:
~~~ ~~~
~/devel/ocrd-galley/ppn2ocr PPN77164308X ~/devel/ocrd-galley/ppn2ocr PPN77164308X
cd PPN77164308X cd PPN77164308X
~/devel/ocrd-galley/run-docker-hub -I BEST --skip-validation ~/devel/ocrd-galley/my_ocrd_workflow -I BEST --skip-validation
~~~ ~~~
This produces a workspace directory `PPN77164308X` with the OCR results in it; This produces a workspace directory `PPN77164308X` with the OCR results in it;
@ -101,7 +104,7 @@ for the given images.
~~~ ~~~
~/devel/ocrd-galley/ocrd-workspace-from-images 0005.png ~/devel/ocrd-galley/ocrd-workspace-from-images 0005.png
cd workspace-xxxxx # output by the last command cd workspace-xxxxx # output by the last command
~/devel/ocrd-galley/run-docker-hub ~/devel/ocrd-galley/my_ocrd_workflow
~~~ ~~~
This produces a workspace from the files and then runs the OCR workflow on it. This produces a workspace from the files and then runs the OCR workflow on it.

@ -70,7 +70,7 @@ main() {
do_validate do_validate
ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL" ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL"
ocrd-tesserocr-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -P model "GT4HistOCR_2000000" -P textequiv_level "$TEXTEQUIV_LEVEL" ocrd-tesserocr-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -P model "GT4HistOCR_2000000" -P textequiv_level "$TEXTEQUIV_LEVEL"
do_validate do_validate

@ -1,11 +0,0 @@
#!/bin/sh
# Update the my_ocrd_workflow containers
DOCKER_IMAGE_PREFIX=mikegerber/my_ocrd_workflow
DOCKER_IMAGE_TAG=stable
sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
for x in $sub_images; do
docker pull $DOCKER_IMAGE_PREFIX-$x:$DOCKER_IMAGE_TAG
done
Loading…
Cancel
Save