mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-10 07:09:52 +02:00
Merge branch 'feat/boxed-processors'
This commit is contained in:
commit
c5cd3f17e2
17 changed files with 255 additions and 7446 deletions
54
.travis.yml
54
.travis.yml
|
@ -7,7 +7,7 @@ git:
|
||||||
submodules: false # Avoid trying to checkout private data/ submodule
|
submodules: false # Avoid trying to checkout private data/ submodule
|
||||||
|
|
||||||
stages:
|
stages:
|
||||||
- name: "Build Docker image"
|
- name: "Build Docker images"
|
||||||
- name: "Test"
|
- name: "Test"
|
||||||
- name: "Deploy Docker image - latest"
|
- name: "Deploy Docker image - latest"
|
||||||
if: branch = master
|
if: branch = master
|
||||||
|
@ -16,24 +16,40 @@ stages:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
include:
|
include:
|
||||||
- stage: "Build Docker image"
|
- stage: "Build Docker images"
|
||||||
script:
|
script:
|
||||||
|
- sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
|
||||||
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
||||||
|
|
||||||
# We are using the image my_ocrd_workflow to cache, so pull and tag it
|
# We are using the images to cache, so pull and tag it
|
||||||
- docker pull $DOCKER_USERNAME/my_ocrd_workflow
|
- |
|
||||||
- docker tag $DOCKER_USERNAME/my_ocrd_workflow my_ocrd_workflow
|
for x in $sub_images; do
|
||||||
|
docker pull $DOCKER_USERNAME/my_ocrd_workflow-$x || true
|
||||||
|
docker tag $DOCKER_USERNAME/my_ocrd_workflow-$x my_ocrd_workflow-$x || true
|
||||||
|
done
|
||||||
|
|
||||||
- FORCE_DOWNLOAD=y ./build
|
- FORCE_DOWNLOAD=y ./build
|
||||||
|
|
||||||
- docker tag my_ocrd_workflow $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT
|
- |
|
||||||
|
set -e
|
||||||
|
for x in $sub_images; do
|
||||||
|
docker tag my_ocrd_workflow-$x $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT
|
||||||
|
done
|
||||||
- docker images
|
- docker images
|
||||||
- docker push $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT
|
- |
|
||||||
|
set -e
|
||||||
|
for x in $sub_images; do
|
||||||
|
docker push $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT
|
||||||
|
done
|
||||||
|
|
||||||
- stage: "Test"
|
- stage: "Test"
|
||||||
script:
|
script:
|
||||||
- docker pull $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT
|
- sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
|
||||||
- docker tag $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT my_ocrd_workflow
|
- |
|
||||||
|
for x in $sub_images; do
|
||||||
|
docker pull $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT
|
||||||
|
docker tag $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT my_ocrd_workflow-$x
|
||||||
|
done
|
||||||
|
|
||||||
- curl -O https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
- curl -O https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||||
- unzip actevedef_718448162.first-page.zip
|
- unzip actevedef_718448162.first-page.zip
|
||||||
|
@ -48,17 +64,25 @@ jobs:
|
||||||
- stage: "Deploy Docker image - latest"
|
- stage: "Deploy Docker image - latest"
|
||||||
env: DOCKER_TAG=latest
|
env: DOCKER_TAG=latest
|
||||||
script:
|
script:
|
||||||
|
- sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
|
||||||
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
||||||
|
|
||||||
- docker pull $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT
|
- |
|
||||||
- docker tag $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT $DOCKER_USERNAME/my_ocrd_workflow:$DOCKER_TAG
|
for x in $sub_images; do
|
||||||
- docker push $DOCKER_USERNAME/my_ocrd_workflow:$DOCKER_TAG
|
docker pull $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT
|
||||||
|
docker tag $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT $DOCKER_USERNAME/my_ocrd_workflow-$x:$DOCKER_TAG
|
||||||
|
docker push $DOCKER_USERNAME/my_ocrd_workflow-$x:$DOCKER_TAG
|
||||||
|
done
|
||||||
|
|
||||||
- stage: "Deploy Docker image - tagged"
|
- stage: "Deploy Docker image - tagged"
|
||||||
env: DOCKER_TAG=$TRAVIS_TAG
|
env: DOCKER_TAG=$TRAVIS_TAG
|
||||||
script:
|
script:
|
||||||
|
- sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
|
||||||
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
- echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
|
||||||
|
|
||||||
- docker pull $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT
|
- |
|
||||||
- docker tag $DOCKER_USERNAME/my_ocrd_workflow:$TRAVIS_COMMIT $DOCKER_USERNAME/my_ocrd_workflow:$DOCKER_TAG
|
for x in $sub_images; do
|
||||||
- docker push $DOCKER_USERNAME/my_ocrd_workflow:$DOCKER_TAG
|
docker pull $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT
|
||||||
|
docker tag $DOCKER_USERNAME/my_ocrd_workflow-$x:$TRAVIS_COMMIT $DOCKER_USERNAME/my_ocrd_workflow-$x:$DOCKER_TAG
|
||||||
|
docker push $DOCKER_USERNAME/my_ocrd_workflow-$x:$DOCKER_TAG
|
||||||
|
done
|
||||||
|
|
86
Dockerfile
86
Dockerfile
|
@ -1,86 +0,0 @@
|
||||||
FROM ubuntu:18.04
|
|
||||||
|
|
||||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
|
||||||
ENV PIP_DEFAULT_TIMEOUT=120
|
|
||||||
|
|
||||||
ENV OCRD_OLENA_VERSION 1.2.0
|
|
||||||
ENV TESSDATA_BEST_VERSION 4.0.0
|
|
||||||
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
|
||||||
|
|
||||||
|
|
||||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
curl xz-utils \
|
|
||||||
python3-pip \
|
|
||||||
git \
|
|
||||||
software-properties-common \
|
|
||||||
# For clstm on Ubuntu 19.04:
|
|
||||||
swig libeigen3-dev libpng-dev libprotobuf-dev \
|
|
||||||
# For cv2:
|
|
||||||
libsm6 libxrender1 \
|
|
||||||
# For ocrd_olena:
|
|
||||||
imagemagick \
|
|
||||||
# XML utils
|
|
||||||
libxml2-utils \
|
|
||||||
xmlstarlet \
|
|
||||||
&& \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Install Leptonica and Tesseract.
|
|
||||||
RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
tesseract-ocr \
|
|
||||||
libtesseract-dev \
|
|
||||||
&& \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Set up OCR-D logging
|
|
||||||
COPY ocrd_logging.py /etc/
|
|
||||||
|
|
||||||
|
|
||||||
# Build ocrd_olena
|
|
||||||
# XXX .deb needs an update
|
|
||||||
RUN curl -sSL --retry 3 -O https://qurator-data.de/~mike.gerber/olena_2.1-0+ocrd-git/olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
|
||||||
dpkg -i --force-depends olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
|
||||||
rm -f olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get -f install -y && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip && \
|
|
||||||
curl -sSL --retry 3 -o ocrd_olena.tar.gz https://github.com/OCR-D/ocrd_olena/archive/v${OCRD_OLENA_VERSION}.tar.gz && \
|
|
||||||
mkdir ocrd_olena && \
|
|
||||||
tar xvz -C ocrd_olena --strip-components=1 -f ocrd_olena.tar.gz && \
|
|
||||||
cd ocrd_olena && \
|
|
||||||
sed -i 's/^install: deps$/install:/' Makefile && \
|
|
||||||
pip3 install --no-cache-dir --use-feature=2020-resolver ocrd && \
|
|
||||||
make install PREFIX=/usr/local && \
|
|
||||||
cd .. && rm -rf ocrd_olena ocrd_olena.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
# Copy OCR models
|
|
||||||
RUN mkdir -p /var/lib/calamari-models
|
|
||||||
COPY data/calamari-models/GT4HistOCR /var/lib/calamari-models/GT4HistOCR
|
|
||||||
RUN mkdir -p $TESSDATA_PREFIX
|
|
||||||
ADD data/mirror/github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_VERSION}-repacked.tar.gz $TESSDATA_PREFIX/
|
|
||||||
COPY data/tesseract-models/GT4HistOCR/GT4HistOCR_2000000.traineddata $TESSDATA_PREFIX/
|
|
||||||
COPY data/textline_detection /var/lib/textline_detection
|
|
||||||
|
|
||||||
|
|
||||||
# Install requirements
|
|
||||||
# Using pipdeptree here to get more info than from pip3 check
|
|
||||||
COPY requirements.txt /tmp/
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip && \
|
|
||||||
pip3 install --no-cache-dir --use-feature=2020-resolver -r /tmp/requirements.txt && \
|
|
||||||
pip3 install --no-cache-dir pipdeptree && \
|
|
||||||
pipdeptree -w fail
|
|
||||||
|
|
||||||
|
|
||||||
COPY my_ocrd_workflow /usr/bin/
|
|
||||||
COPY xsd/* /usr/share/xml/
|
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /data
|
|
||||||
ENTRYPOINT ["/usr/bin/my_ocrd_workflow"]
|
|
49
Dockerfile-core
Normal file
49
Dockerfile-core
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
FROM ubuntu:18.04
|
||||||
|
|
||||||
|
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||||
|
ENV PIP_DEFAULT_TIMEOUT=120
|
||||||
|
|
||||||
|
|
||||||
|
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
curl xz-utils \
|
||||||
|
build-essential python3-dev \
|
||||||
|
# For get-pip.py:
|
||||||
|
python3-distutils \
|
||||||
|
# For add-apt-repository:
|
||||||
|
software-properties-common \
|
||||||
|
# XML utils
|
||||||
|
libxml2-utils \
|
||||||
|
xmlstarlet \
|
||||||
|
&& \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
|
# Set up OCR-D logging
|
||||||
|
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
||||||
|
|
||||||
|
|
||||||
|
# Install pip (and setuptools)
|
||||||
|
# We use get-pip.py here to avoid
|
||||||
|
# a. having to upgrade from Ubuntu's pip
|
||||||
|
# b. the dreaded "old script wrapper" error message
|
||||||
|
RUN curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
|
||||||
|
python3 get-pip.py && \
|
||||||
|
rm -f get-pip.py
|
||||||
|
|
||||||
|
|
||||||
|
# Install pip installable-stuff
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
'ocrd >= 2.13.1'
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
WORKDIR /data
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ['ocrd']
|
18
Dockerfile-dinglehopper
Normal file
18
Dockerfile-dinglehopper
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
FROM my_ocrd_workflow-core
|
||||||
|
|
||||||
|
|
||||||
|
ENV DINGLEHOPPER_COMMIT 2b98f69
|
||||||
|
|
||||||
|
|
||||||
|
# Build pip installable stuff
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
# Now the real stuff:
|
||||||
|
https://github.com/qurator-spk/dinglehopper/archive/$DINGLEHOPPER_COMMIT.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["ocrd-dinglehopper"]
|
24
Dockerfile-ocrd_calamari
Normal file
24
Dockerfile-ocrd_calamari
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
FROM my_ocrd_workflow-core
|
||||||
|
|
||||||
|
|
||||||
|
# Build pip installable stuff
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
# Resolve conflicts early:
|
||||||
|
'tensorflow-gpu == 1.15.*' \
|
||||||
|
'calamari-ocr == 0.3.5' \
|
||||||
|
# Now the real stuff:
|
||||||
|
'ocrd_calamari >= 0.0.7'
|
||||||
|
|
||||||
|
|
||||||
|
# Copy OCR models
|
||||||
|
RUN mkdir -p /var/lib/calamari-models
|
||||||
|
COPY data/calamari-models/GT4HistOCR /var/lib/calamari-models/GT4HistOCR
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["ocrd-calamari-recognize"]
|
32
Dockerfile-ocrd_olena
Normal file
32
Dockerfile-ocrd_olena
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
FROM my_ocrd_workflow-core
|
||||||
|
|
||||||
|
ENV OCRD_OLENA_VERSION 1.2.0
|
||||||
|
|
||||||
|
# Build ocrd_olena
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
imagemagick \
|
||||||
|
&& \
|
||||||
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN curl -sSL --retry 3 -O https://qurator-data.de/~mike.gerber/olena_2.1-0+ocrd-git/olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
||||||
|
dpkg -i --force-depends olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
||||||
|
rm -f olena-bin_2.1-0+ocrd-git_amd64.deb && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get -f install -y && \
|
||||||
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN curl -sSL --retry 3 -o ocrd_olena.tar.gz https://github.com/OCR-D/ocrd_olena/archive/v${OCRD_OLENA_VERSION}.tar.gz && \
|
||||||
|
mkdir ocrd_olena && \
|
||||||
|
tar xvz -C ocrd_olena --strip-components=1 -f ocrd_olena.tar.gz && \
|
||||||
|
cd ocrd_olena && \
|
||||||
|
sed -i 's/^install: deps$/install:/' Makefile && \
|
||||||
|
pip3 install --no-cache-dir --use-feature=2020-resolver ocrd && \
|
||||||
|
make install PREFIX=/usr/local && \
|
||||||
|
cd .. && rm -rf ocrd_olena ocrd_olena.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ['ocrd-olena-binarize']
|
35
Dockerfile-ocrd_tesserocr
Normal file
35
Dockerfile-ocrd_tesserocr
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
FROM my_ocrd_workflow-core
|
||||||
|
|
||||||
|
|
||||||
|
ENV TESSDATA_BEST_VERSION 4.0.0
|
||||||
|
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
||||||
|
|
||||||
|
|
||||||
|
# Install Leptonica and Tesseract.
|
||||||
|
RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
tesseract-ocr \
|
||||||
|
libtesseract-dev \
|
||||||
|
&& \
|
||||||
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
|
# Copy OCR models
|
||||||
|
RUN mkdir -p $TESSDATA_PREFIX
|
||||||
|
ADD data/mirror/github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_VERSION}-repacked.tar.gz $TESSDATA_PREFIX/
|
||||||
|
COPY data/tesseract-models/GT4HistOCR/GT4HistOCR_2000000.traineddata $TESSDATA_PREFIX/
|
||||||
|
|
||||||
|
|
||||||
|
# Build pip installable stuff
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
# Now the real stuff:
|
||||||
|
'ocrd_tesserocr >= 0.9.0'
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["ocrd-tesserocr-recognize"]
|
22
Dockerfile-sbb_textline_detector
Normal file
22
Dockerfile-sbb_textline_detector
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
FROM my_ocrd_workflow-core
|
||||||
|
|
||||||
|
|
||||||
|
ENV SBB_TEXTLINE_DETECTOR_COMMIT 8b01d9e
|
||||||
|
|
||||||
|
|
||||||
|
# Build pip installable stuff
|
||||||
|
RUN pip3 install --no-cache-dir \
|
||||||
|
# Now the real stuff:
|
||||||
|
https://github.com/qurator-spk/sbb_textline_detector/archive/$SBB_TEXTLINE_DETECTOR_COMMIT.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
# Copy OCR models
|
||||||
|
COPY data/textline_detection /var/lib/textline_detection
|
||||||
|
|
||||||
|
|
||||||
|
# Check pip dependencies
|
||||||
|
RUN pip3 check
|
||||||
|
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["ocrd-sbb-textline-detector"]
|
7
build
7
build
|
@ -21,4 +21,9 @@ get_from_web() {
|
||||||
handle_data
|
handle_data
|
||||||
|
|
||||||
|
|
||||||
docker build --cache-from my_ocrd_workflow -t my_ocrd_workflow .
|
docker build -t my_ocrd_workflow-core -f Dockerfile-core .
|
||||||
|
docker build -t my_ocrd_workflow-ocrd_calamari -f Dockerfile-ocrd_calamari .
|
||||||
|
docker build -t my_ocrd_workflow-dinglehopper -f Dockerfile-dinglehopper .
|
||||||
|
docker build -t my_ocrd_workflow-ocrd_olena -f Dockerfile-ocrd_olena .
|
||||||
|
docker build -t my_ocrd_workflow-ocrd_tesserocr -f Dockerfile-ocrd_tesserocr .
|
||||||
|
docker build -t my_ocrd_workflow-sbb_textline_detector -f Dockerfile-sbb_textline_detector .
|
||||||
|
|
|
@ -82,7 +82,7 @@ main() {
|
||||||
|
|
||||||
|
|
||||||
if [ "$LOG_LEVEL" = "DEBUG" -o "$LOG_LEVEL" = "TRACE" ]; then
|
if [ "$LOG_LEVEL" = "DEBUG" -o "$LOG_LEVEL" = "TRACE" ]; then
|
||||||
pip3 list
|
pip3 list || true
|
||||||
fi
|
fi
|
||||||
main
|
main
|
||||||
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))
|
|
|
@ -1,13 +0,0 @@
|
||||||
tensorflow-gpu < 2.0 # Needed for sbb_text_linedetector
|
|
||||||
|
|
||||||
ocrd >= 2.13.1
|
|
||||||
|
|
||||||
# XXX See https://github.com/OCR-D/ocrd_tesserocr/issues/135
|
|
||||||
# ocrd_tesserocr >= 0.8.XXX
|
|
||||||
https://github.com/mikegerber/ocrd_tesserocr/archive/fix/set-pcgtsid.tar.gz
|
|
||||||
|
|
||||||
ocrd_calamari >= 0.0.7
|
|
||||||
|
|
||||||
https://github.com/qurator-spk/sbb_textline_detector/archive/8b01d9e.tar.gz
|
|
||||||
|
|
||||||
https://github.com/qurator-spk/dinglehopper/archive/2b98f69.tar.gz
|
|
47
run
47
run
|
@ -1,31 +1,42 @@
|
||||||
#!/bin/sh
|
#!/bin/bash
|
||||||
# Run the my_ocrd_workflow container on the current workspace
|
|
||||||
|
|
||||||
set -e # Abort on error
|
set -e # Abort on error
|
||||||
|
|
||||||
DOCKER_IMAGE=${DOCKER_IMAGE:-my_ocrd_workflow:latest} # default to locally built
|
self=`realpath $0`
|
||||||
|
self_dir=`dirname "$self"`
|
||||||
if echo "$DOCKER_IMAGE" | grep -q "/"; then
|
|
||||||
docker pull "$DOCKER_IMAGE"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
# XXX Work around podman vs docker uid behaviour
|
# Docker run options
|
||||||
|
docker_run_options="--rm -t"
|
||||||
|
docker_run_options="$docker_run_options --mount type=bind,src=\"$(pwd)\",target=/data"
|
||||||
|
# In podman, the container always runs as the real user == uid 0 in container
|
||||||
if docker -v 2>&1 | grep -q podman; then
|
if docker -v 2>&1 | grep -q podman; then
|
||||||
user="0:0"
|
user="0:0"
|
||||||
else
|
else
|
||||||
user="`id -u`:`id -g`"
|
user="`id -u`:`id -g`"
|
||||||
fi
|
fi
|
||||||
|
docker_run_options="$docker_run_options --user $user"
|
||||||
|
docker_run_options="$docker_run_options -e LOG_LEVEL=$LOG_LEVEL"
|
||||||
# The container currently needs to run privileged to allow it to read from e.g.
|
# The containers currently need to run privileged to allow it to read from e.g.
|
||||||
# /home on SELinux secured systems such as Fedora. We might want to use udica
|
# /home on SELinux secured systems such as Fedora. We might want to use udica
|
||||||
# instead in the future.
|
# instead in the future.
|
||||||
|
docker_run_options="$docker_run_options --privileged=true"
|
||||||
|
|
||||||
docker run --privileged=true --rm -t \
|
|
||||||
\
|
# Build aliases for the containerized ocrd processors
|
||||||
--user $user \
|
build_alias() {
|
||||||
--mount type=bind,src="$(pwd)",target=/data \
|
local command=$1
|
||||||
\
|
local docker_image=$2
|
||||||
-e LOG_LEVEL=$LOG_LEVEL \
|
|
||||||
$DOCKER_IMAGE "$@"
|
alias $command="docker run $docker_run_options $docker_image $command"
|
||||||
|
}
|
||||||
|
shopt -s expand_aliases # Required for non-interactive shells
|
||||||
|
build_alias ocrd my_ocrd_workflow-core
|
||||||
|
build_alias ocrd-olena-binarize my_ocrd_workflow-ocrd_olena
|
||||||
|
build_alias ocrd-sbb-textline-detector my_ocrd_workflow-sbb_textline_detector
|
||||||
|
build_alias ocrd-calamari-recognize my_ocrd_workflow-ocrd_calamari
|
||||||
|
build_alias ocrd-tesserocr-recognize my_ocrd_workflow-ocrd_tesserocr
|
||||||
|
build_alias ocrd-dinglehopper my_ocrd_workflow-dinglehopper
|
||||||
|
|
||||||
|
|
||||||
|
. $self_dir/my_ocrd_workflow
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
# Run the my_ocrd_workflow container on the current workspace
|
|
||||||
|
|
||||||
DOCKER_IMAGE=mikegerber/my_ocrd_workflow:stable `dirname $0`/run "$@"
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue