mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-08-20 01:09:53 +02:00
Merge 7b5f593709
into fc911f3734
This commit is contained in:
commit
201624959d
45 changed files with 142 additions and 688 deletions
1
.dockerignore
Normal file
1
.dockerignore
Normal file
|
@ -0,0 +1 @@
|
||||||
|
.git
|
|
@ -1,70 +0,0 @@
|
||||||
FROM ubuntu:22.04
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_VERSION_MINIMUM="2.47.0"
|
|
||||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
|
||||||
ENV PIP_DEFAULT_TIMEOUT=120
|
|
||||||
|
|
||||||
|
|
||||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
curl \
|
|
||||||
git \
|
|
||||||
xz-utils \
|
|
||||||
pkg-config \
|
|
||||||
# For add-apt-repository:
|
|
||||||
software-properties-common \
|
|
||||||
# XML utils
|
|
||||||
libxml2-utils \
|
|
||||||
xmlstarlet \
|
|
||||||
# OCR-D uses ImageMagick for pixel density estimation
|
|
||||||
imagemagick \
|
|
||||||
# pyenv builds
|
|
||||||
# TODO: builder container?
|
|
||||||
libz-dev \
|
|
||||||
libssl-dev \
|
|
||||||
libbz2-dev \
|
|
||||||
liblzma-dev \
|
|
||||||
libncurses-dev \
|
|
||||||
libffi-dev \
|
|
||||||
libreadline-dev \
|
|
||||||
libsqlite3-dev \
|
|
||||||
libmagic-dev \
|
|
||||||
&& \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Set up OCR-D logging
|
|
||||||
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
|
||||||
|
|
||||||
|
|
||||||
# Install pyenv
|
|
||||||
# TODO: do not run as root
|
|
||||||
# TODO: does just saying "3.7" work as intended?
|
|
||||||
ENV HOME=/root
|
|
||||||
ENV PYENV_ROOT=/usr/local/share/pyenv
|
|
||||||
ENV PATH=$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
|
|
||||||
RUN \
|
|
||||||
git clone --depth=1 https://github.com/yyuu/pyenv.git $PYENV_ROOT && \
|
|
||||||
pyenv install 3.7 && \
|
|
||||||
pyenv global 3.7 && \
|
|
||||||
pyenv rehash && \
|
|
||||||
pip install -U pip wheel && \
|
|
||||||
pip install setuptools
|
|
||||||
|
|
||||||
# Install pip installable-stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd >= ${OCRD_VERSION_MINIMUM}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /data
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ['ocrd']
|
|
|
@ -1,70 +0,0 @@
|
||||||
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_VERSION_MINIMUM="2.47.0"
|
|
||||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
|
||||||
ENV PIP_DEFAULT_TIMEOUT=120
|
|
||||||
|
|
||||||
|
|
||||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
curl \
|
|
||||||
git \
|
|
||||||
xz-utils \
|
|
||||||
pkg-config \
|
|
||||||
# For add-apt-repository:
|
|
||||||
software-properties-common \
|
|
||||||
# XML utils
|
|
||||||
libxml2-utils \
|
|
||||||
xmlstarlet \
|
|
||||||
# OCR-D uses ImageMagick for pixel density estimation
|
|
||||||
imagemagick \
|
|
||||||
# pyenv builds
|
|
||||||
# TODO: builder container?
|
|
||||||
libz-dev \
|
|
||||||
libssl-dev \
|
|
||||||
libbz2-dev \
|
|
||||||
liblzma-dev \
|
|
||||||
libncurses-dev \
|
|
||||||
libffi-dev \
|
|
||||||
libreadline-dev \
|
|
||||||
libsqlite3-dev \
|
|
||||||
libmagic-dev \
|
|
||||||
&& \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Set up OCR-D logging
|
|
||||||
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
|
||||||
|
|
||||||
|
|
||||||
# Install pyenv
|
|
||||||
# TODO: do not run as root
|
|
||||||
# TODO: does just saying "3.7" work as intended?
|
|
||||||
ENV HOME=/root
|
|
||||||
ENV PYENV_ROOT=/usr/local/share/pyenv
|
|
||||||
ENV PATH=$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
|
|
||||||
RUN \
|
|
||||||
git clone --depth=1 https://github.com/yyuu/pyenv.git $PYENV_ROOT && \
|
|
||||||
pyenv install 3.7 && \
|
|
||||||
pyenv global 3.7 && \
|
|
||||||
pyenv rehash && \
|
|
||||||
pip install -U pip wheel && \
|
|
||||||
pip install setuptools
|
|
||||||
|
|
||||||
# Install pip installable-stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd >= ${OCRD_VERSION_MINIMUM}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /data
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ['ocrd']
|
|
|
@ -1,18 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG DINGLEHOPPER_VERSION="0.9.2"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"dinglehopper == $DINGLEHOPPER_VERSION"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-dinglehopper"]
|
|
|
@ -1,18 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG EYNOLLAH_VERSION="0.3.0"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"eynollah == ${EYNOLLAH_VERSION}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-eynollah-segment"]
|
|
|
@ -1,19 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_ANYBASEOCR_VERSION="1.8.2"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd_anybaseocr == ${OCRD_ANYBASEOCR_VERSION}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-anybaseocr-crop"]
|
|
|
@ -1,20 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
|
|
||||||
# XXX https://github.com/OCR-D/core/issues/642
|
|
||||||
#ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_CALAMARI_VERSION="1.0.5"
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd_calamari == $OCRD_CALAMARI_VERSION"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-calamari-recognize"]
|
|
|
@ -1,22 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# Resolve conflicts early:
|
|
||||||
'tensorflow-gpu == 1.15.*' \
|
|
||||||
'calamari-ocr == 0.3.5' \
|
|
||||||
# Now the real stuff:
|
|
||||||
'ocrd_calamari == 0.0.7'
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
RUN ln -s ocrd-calamari-recognize /usr/local/bin/ocrd-calamari-recognize03
|
|
||||||
CMD ["ocrd-calamari-recognize"]
|
|
|
@ -1,19 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_CIS_VERSION="0.1.5"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# Now the real stuff:
|
|
||||||
"https://github.com/cisocrgroup/ocrd_cis/archive/v${OCRD_CIS_VERSION}.tar.gz"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-cis-ocropy-segment"]
|
|
|
@ -1,24 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_FILEFORMAT_VERSION="0.5.0"
|
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
git \
|
|
||||||
openjdk-11-jdk-headless \
|
|
||||||
wget \
|
|
||||||
unzip \
|
|
||||||
&& \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
RUN git clone --depth 1 --branch v${OCRD_FILEFORMAT_VERSION} https://github.com/OCR-D/ocrd_fileformat.git && \
|
|
||||||
cd ocrd_fileformat/ && \
|
|
||||||
git submodule update --init && \
|
|
||||||
make install-fileformat install PREFIX=/usr/local && \
|
|
||||||
cd .. && rm -rf ocrd_fileformat/
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ['ocrd-fileformat-transform']
|
|
|
@ -1,39 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_OLENA_VERSION="1.3.0"
|
|
||||||
|
|
||||||
|
|
||||||
# Build ocrd_olena
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
imagemagick \
|
|
||||||
&& \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install olena from .deb
|
|
||||||
RUN curl -sSL --retry 3 -O https://qurator-data.de/~mike.gerber/olena_2.1.0+ocrd-git+2-ubuntu22.04/olena-bin_2.1.0+ocrd-git+2_amd64.deb && \
|
|
||||||
dpkg -i --force-depends olena-bin_2.1.0+ocrd-git+2_amd64.deb && \
|
|
||||||
rm -f olena-bin_2.1.0+ocrd-git+2_amd64.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get -f install -y && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/* && \
|
|
||||||
if ! scribo-cli sauvola --help >/dev/null 2>&1; then echo "Olena/scribo is not installed correctly" >&2; exit 1; fi
|
|
||||||
RUN curl -sSL --retry 3 -o ocrd_olena.tar.gz https://github.com/OCR-D/ocrd_olena/archive/v${OCRD_OLENA_VERSION}.tar.gz && \
|
|
||||||
mkdir ocrd_olena && \
|
|
||||||
tar xvz -C ocrd_olena --strip-components=1 -f ocrd_olena.tar.gz && \
|
|
||||||
cd ocrd_olena && \
|
|
||||||
sed -i 's/^install: deps/install:/' Makefile && \
|
|
||||||
${PIP_INSTALL} ocrd && \
|
|
||||||
apt install xmlstarlet && \
|
|
||||||
make install PREFIX=/usr/local && \
|
|
||||||
cd .. && rm -rf ocrd_olena ocrd_olena.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ['ocrd-olena-binarize']
|
|
|
@ -1,19 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_SEGMENT_VERSION="0.1.22"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# Now the real stuff:
|
|
||||||
"ocrd-segment == ${OCRD_SEGMENT_VERSION}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-segment-extract-regions"]
|
|
|
@ -1,31 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG TESSDATA_BEST_VERSION="4.0.0"
|
|
||||||
ARG OCRD_TESSEROCR_VERSION="0.17.0"
|
|
||||||
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
|
||||||
|
|
||||||
|
|
||||||
# Install Leptonica and Tesseract.
|
|
||||||
# TODO: Review if alex-p's repo is still necessary on jammy (jammy has 4.1.1,
|
|
||||||
# alex-p has 4.1.3, but not for jammy.)
|
|
||||||
# RUN add-apt-repository ppa:alex-p/tesseract-ocr && \
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
tesseract-ocr \
|
|
||||||
libtesseract-dev \
|
|
||||||
&& \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd_tesserocr == ${OCRD_TESSEROCR_VERSION}"
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-tesserocr-recognize"]
|
|
|
@ -1,8 +1,7 @@
|
||||||
ARG GIT_COMMIT="latest"
|
FROM ocrd/all:maximum
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
ARG PIP_INSTALL="pip3 install --no-cache-dir"
|
||||||
ARG OCRD_TROCR_COMMIT="250ff1c"
|
ARG OCRD_TROCR_COMMIT="30696cb"
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
# Build pip installable stuff
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG OCRD_WRAP_VERSION="0.1.7"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
"ocrd_wrap == ${OCRD_WRAP_VERSION}"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-preprocess-image"]
|
|
|
@ -1,19 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG SBB_BINARIZATION_VERSION="0.1.0"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# Now the real stuff:
|
|
||||||
"sbb_binarization == $SBB_BINARIZATION_VERSION"
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-sbb-binarize"]
|
|
|
@ -1,20 +0,0 @@
|
||||||
ARG GIT_COMMIT="latest"
|
|
||||||
FROM quratorspk/ocrd-galley-core-cuda12.1:$GIT_COMMIT
|
|
||||||
|
|
||||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
|
||||||
ARG SBB_TEXTLINE_DETECTOR_COMMIT="c4df3d6"
|
|
||||||
|
|
||||||
|
|
||||||
# Build pip installable stuff
|
|
||||||
RUN ${PIP_INSTALL} \
|
|
||||||
# https://github.com/qurator-spk/sbb_textline_detection/issues/50
|
|
||||||
"h5py < 3" \
|
|
||||||
https://github.com/qurator-spk/sbb_textline_detector/archive/$SBB_TEXTLINE_DETECTOR_COMMIT.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
# Check pip dependencies
|
|
||||||
RUN pip check
|
|
||||||
|
|
||||||
|
|
||||||
# Default command
|
|
||||||
CMD ["ocrd-sbb-textline-detector"]
|
|
33
build
33
build
|
@ -1,33 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
self=`realpath $0`
|
|
||||||
self_dir=`dirname "$self"`
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if [ -n "$1" ]; then
|
|
||||||
sub_images=""
|
|
||||||
for arg in "$@"; do
|
|
||||||
arg_sub_image=`echo "$arg" | sed 's/Dockerfile-//'`
|
|
||||||
NL=$'\n'
|
|
||||||
sub_images+="$NL$arg_sub_image"
|
|
||||||
done
|
|
||||||
else
|
|
||||||
sub_images=`ls -1 Dockerfile-core* | sed 's/Dockerfile-//'`
|
|
||||||
sub_images="$sub_images `ls -1 Dockerfile-* | sed 's/Dockerfile-//'`"
|
|
||||||
fi
|
|
||||||
echo "Building:"
|
|
||||||
echo "$sub_images"
|
|
||||||
echo
|
|
||||||
|
|
||||||
|
|
||||||
# Update base images if we build a core image
|
|
||||||
if echo "$sub_images" | grep -q core; then
|
|
||||||
docker pull ubuntu:22.04
|
|
||||||
docker pull nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
||||||
fi
|
|
||||||
|
|
||||||
for sub_image in $sub_images; do
|
|
||||||
docker build --cache-from=quratorspk/ocrd-galley-$sub_image -t quratorspk/ocrd-galley-$sub_image -f Dockerfile-$sub_image .
|
|
||||||
done
|
|
4
build.sh
Executable file
4
build.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
docker build . -t ocrd_trocr:latest -f Dockerfile-ocrd_trocr
|
|
@ -1,156 +0,0 @@
|
||||||
# ______________________________________
|
|
||||||
#/ always copy the file from \
|
|
||||||
#| mono-repo/qurator_data_lib.sh, never |
|
|
||||||
#\ edit the copy in the project /
|
|
||||||
# --------------------------------------
|
|
||||||
# \ ^__^
|
|
||||||
# \ (oo)\_______
|
|
||||||
# (__)\ )\/\
|
|
||||||
# ||----w |
|
|
||||||
# || ||
|
|
||||||
|
|
||||||
if [ -z "$BASH" ]; then
|
|
||||||
echo "qurator_data_lib.sh uses bash features, please make sure to run $0 in bash"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
check_data_subdir() {
|
|
||||||
result=0
|
|
||||||
|
|
||||||
if git submodule status $DATA_SUBDIR | grep -q '^-'; then
|
|
||||||
echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
|
|
||||||
fi
|
|
||||||
if ! [ -e $DATA_SUBDIR/.git/annex ]; then
|
|
||||||
echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
|
|
||||||
fi
|
|
||||||
if ! (cd $DATA_SUBDIR && git annex version | egrep -q 'local repository version: (7|8)'); then
|
|
||||||
echo "$DATA_SUBDIR/ is not a git annex repository version 7 or 8"; result=1
|
|
||||||
fi
|
|
||||||
if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
|
|
||||||
echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
return $result
|
|
||||||
}
|
|
||||||
|
|
||||||
annex_get() {
|
|
||||||
if [[ "$1" = '--allow_symlinks' ]]; then
|
|
||||||
allow_symlinks=1
|
|
||||||
shift
|
|
||||||
else
|
|
||||||
allow_symlinks=0
|
|
||||||
fi
|
|
||||||
file_pattern="$1"
|
|
||||||
|
|
||||||
(
|
|
||||||
cd $DATA_SUBDIR
|
|
||||||
git annex get $file_pattern
|
|
||||||
|
|
||||||
# fsck seems to be necessary to fix the files if we are in a submodule
|
|
||||||
git annex fsck $file_pattern
|
|
||||||
|
|
||||||
# Check that there are no symlinks = only unlocked files. This is needed for
|
|
||||||
# Docker builds, as we cannot dereference symlinks in a Dockerfile COPY.
|
|
||||||
if [[ $allow_symlinks = 0 ]]; then
|
|
||||||
git ls-files $file_pattern | while read f; do
|
|
||||||
if ! [[ -f "$f" ]]; then
|
|
||||||
echo "$DATA_SUBDIR/$f is not a regular file – Is an unlock needed?"
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Options:
|
|
||||||
# --no-unpack Do NOT unpack the file
|
|
||||||
# --strip-components NUMBER (as tar's option)
|
|
||||||
download_to() {
|
|
||||||
unpack=1
|
|
||||||
tar_options=""
|
|
||||||
|
|
||||||
_options=$(getopt --long no-unpack,strip-components: -- "" "$@")
|
|
||||||
if [[ $? != 0 ]]; then
|
|
||||||
echo "Bad parameters for download_to" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
eval set -- "$_options"
|
|
||||||
while true; do
|
|
||||||
case "$1" in
|
|
||||||
--no-unpack)
|
|
||||||
unpack=0
|
|
||||||
;;
|
|
||||||
--strip-components)
|
|
||||||
shift
|
|
||||||
components=$1
|
|
||||||
tar_options="$tar_options --strip-components $components"
|
|
||||||
;;
|
|
||||||
--)
|
|
||||||
shift
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
download_source="$1"
|
|
||||||
dest="$2"
|
|
||||||
|
|
||||||
(
|
|
||||||
cd $DATA_SUBDIR
|
|
||||||
tmpf=`mktemp 'tmp.XXXXXX'`
|
|
||||||
curl -sSL -o $tmpf "$download_source"
|
|
||||||
if [[ $unpack = 1 ]]; then
|
|
||||||
mkdir -p "$dest"
|
|
||||||
# Unpacking relies on tar -a unpacking any tar compression
|
|
||||||
tar -C "$dest" $tar_options -af $tmpf -xv
|
|
||||||
rm -f $tmpf
|
|
||||||
else
|
|
||||||
dest_dir=`dirname "$dest"`
|
|
||||||
mkdir -p "$dest_dir"
|
|
||||||
mv $tmpf "$dest"
|
|
||||||
fi
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
suggest_commands() {
|
|
||||||
echo "Suggested commands:"
|
|
||||||
echo
|
|
||||||
echo "git submodule update --init"
|
|
||||||
echo "(cd $DATA_SUBDIR && git annex init --version=7)"
|
|
||||||
echo "(cd $DATA_SUBDIR && git remote add nfs annex@b-lx0053.sbb.spk-berlin.de:/var/lib/annex/qurator-data.git)"
|
|
||||||
}
|
|
||||||
|
|
||||||
handle_data() {
|
|
||||||
if [[ "$1" = '--no-download' ]]; then
|
|
||||||
no_download=1
|
|
||||||
shift
|
|
||||||
else
|
|
||||||
no_download=0
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n "$FORCE_DOWNLOAD" ]; then
|
|
||||||
get_from_web
|
|
||||||
elif ! check_data_subdir; then
|
|
||||||
if [[ $no_download = 1 ]]; then
|
|
||||||
select choice in "Abort to manually fix $DATA_SUBDIR submodule"; do
|
|
||||||
if [ $REPLY = 1 ]; then
|
|
||||||
suggest_commands
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
else
|
|
||||||
select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
|
|
||||||
if [ $REPLY = 1 ]; then
|
|
||||||
suggest_commands
|
|
||||||
exit
|
|
||||||
else
|
|
||||||
get_from_web
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
get_from_annex
|
|
||||||
fi
|
|
||||||
}
|
|
|
@ -1 +0,0 @@
|
||||||
test-core.sh
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
test_id=`basename $0`
|
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
|
||||||
|
|
||||||
# Prepare test workspace
|
|
||||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
|
||||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
|
||||||
cd actevedef_718448162.first-page+binarization+segmentation
|
|
||||||
|
|
||||||
# Run tests
|
|
||||||
ocrd-cis-ocropy-segment -I OCR-D-IMG-BIN -O TEST-CIS-OCRPY-SEGMENT
|
|
||||||
# TODO -recognize
|
|
24
test.sh
Executable file
24
test.sh
Executable file
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
count_ok=0
|
||||||
|
count_failed=0
|
||||||
|
|
||||||
|
for test in tests/*.sh; do
|
||||||
|
echo "== $test"
|
||||||
|
$test && result=$? || result=$?
|
||||||
|
|
||||||
|
if [[ $result = 0 ]]; then
|
||||||
|
echo "✔"
|
||||||
|
count_ok=$((count_ok+1))
|
||||||
|
else
|
||||||
|
echo "❌"
|
||||||
|
count_failed=$((count_failed+1))
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "$count_ok ok, $count_failed failed"
|
||||||
|
if [[ $count_failed -gt 0 ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
16
tests/test-ocrd_anybaseocr.sh
Executable file
16
tests/test-ocrd_anybaseocr.sh
Executable file
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
test_id=`basename $0`
|
||||||
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
|
# Prepare processors
|
||||||
|
|
||||||
|
# Prepare test workspace
|
||||||
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||||
|
unzip actevedef_718448162.first-page.zip
|
||||||
|
cd actevedef_718448162.first-page
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
ocrd-anybaseocr-binarize -I OCR-D-IMG -O OCR-D-BIN -P operation_level page -P threshold 0.3
|
||||||
|
ocrd-anybaseocr-deskew -I OCR-D-BIN -O OCR-D-DESKEW -P maxskew 5.0 -P skewsteps 20 -P operation_level page
|
|
@ -5,7 +5,7 @@ test_id=`basename $0`
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
# Prepare processors
|
# Prepare processors
|
||||||
ocrd resmgr download ocrd-sbb-binarize default-2021-03-09
|
ocrd resmgr download ocrd-calamari-recognize qurator-gt4histocr-1.0
|
||||||
|
|
||||||
# Prepare test workspace
|
# Prepare test workspace
|
||||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
|
@ -13,4 +13,4 @@ unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
cd actevedef_718448162.first-page+binarization+segmentation
|
cd actevedef_718448162.first-page+binarization+segmentation
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
ocrd-sbb-binarize -P model default-2021-03-09 -I OCR-D-IMG -O TEST-OCRD-SBB-BINARIZE
|
ocrd-calamari-recognize -I OCR-D-SEG-LINE-SBB -O OCR-D-OCR-CALA -P checkpoint_dir qurator-gt4histocr-1.0
|
30
tests/test-ocrd_cis.sh
Executable file
30
tests/test-ocrd_cis.sh
Executable file
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
test_id=`basename $0`
|
||||||
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
|
OCRD_CIS_OCROPY_MODEL=fraktur.pyrnn.gz
|
||||||
|
|
||||||
|
# Prepare processors
|
||||||
|
ocrd resmgr download ocrd-cis-ocropy-recognize $OCRD_CIS_OCROPY_MODEL
|
||||||
|
|
||||||
|
# Prepare test workspace
|
||||||
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||||
|
unzip actevedef_718448162.first-page.zip
|
||||||
|
cd actevedef_718448162.first-page
|
||||||
|
|
||||||
|
# XXX ocrd-cis-ocropy-segment wasn't happy with the binarized input (no
|
||||||
|
# "binarized" AlternativeImage?!), so we do it here again
|
||||||
|
ocrd-skimage-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
ocrd-cis-ocropy-segment \
|
||||||
|
-I OCR-D-IMG-BIN -O TEST-CIS-OCROPY-SEG-LINE \
|
||||||
|
-P level-of-operation page
|
||||||
|
test "$(grep TextLine TEST-CIS-OCROPY-SEG-LINE/*.xml | wc -l)" -gt 50
|
||||||
|
|
||||||
|
ocrd-cis-ocropy-recognize \
|
||||||
|
-I TEST-CIS-OCROPY-SEG-LINE -O TEST-CIS-OCROPY-OCR \
|
||||||
|
-P model $OCRD_CIS_OCROPY_MODEL
|
||||||
|
test "$(grep Unicode TEST-CIS-OCROPY-OCR/*.xml | wc -l)" -gt 50
|
|
@ -5,6 +5,8 @@ test_id=`basename $0`
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
# Prepare processors
|
# Prepare processors
|
||||||
|
ocrd resmgr download ocrd-tesserocr-recognize eng.traineddata
|
||||||
|
ocrd resmgr download ocrd-tesserocr-recognize osd.traineddata
|
||||||
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
|
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
|
||||||
|
|
||||||
# Prepare test workspace
|
# Prepare test workspace
|
|
@ -5,7 +5,6 @@ test_id=`basename $0`
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
# Prepare processors
|
# Prepare processors
|
||||||
ocrd resmgr download ocrd-sbb-textline-detector default
|
|
||||||
|
|
||||||
# Prepare test workspace
|
# Prepare test workspace
|
||||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
|
@ -13,4 +12,6 @@ unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
cd actevedef_718448162.first-page+binarization+segmentation
|
cd actevedef_718448162.first-page+binarization+segmentation
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
ocrd-sbb-textline-detector -P models default -I OCR-D-IMG-BIN -O TEST-EYNOLLAH-SEG
|
ocrd-trocr-recognize -I OCR-D-SEG-LINE-SBB -O TEST-TROCR
|
||||||
|
|
||||||
|
# TODO Does not use a useful model, does not check that text was recognize
|
|
@ -5,6 +5,7 @@ test_id=`basename $0`
|
||||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||||
|
|
||||||
# Prepare processors
|
# Prepare processors
|
||||||
|
ocrd resmgr download ocrd-sbb-binarize default
|
||||||
|
|
||||||
# Prepare test workspace
|
# Prepare test workspace
|
||||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
|
@ -12,4 +13,4 @@ unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||||
cd actevedef_718448162.first-page+binarization+segmentation
|
cd actevedef_718448162.first-page+binarization+segmentation
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
ocrd-trocr-recognize -I OCR-D-SEG-LINE-SBB -O TEST-TROCR
|
ocrd-sbb-binarize -P model default -I OCR-D-IMG -O TEST-OCRD-SBB-BINARIZE
|
|
@ -5,10 +5,9 @@ import colorama
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from termcolor import colored
|
from termcolor import colored
|
||||||
|
|
||||||
from .sub_images import sub_images
|
from .processor_images import processor_images
|
||||||
|
|
||||||
|
|
||||||
DOCKER_IMAGE_PREFIX = os.environ.get("DOCKER_IMAGE_PREFIX", "quratorspk/ocrd-galley")
|
|
||||||
DOCKER_IMAGE_TAG = os.environ.get("DOCKER_IMAGE_TAG", "latest")
|
|
||||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
||||||
|
|
||||||
# xdg-user-dirs is only available under Python 3.10+ etc. pp. → it is simpler
|
# xdg-user-dirs is only available under Python 3.10+ etc. pp. → it is simpler
|
||||||
|
@ -26,22 +25,23 @@ def main():
|
||||||
argv = sys.argv.copy()
|
argv = sys.argv.copy()
|
||||||
argv[0] = os.path.basename(argv[0])
|
argv[0] = os.path.basename(argv[0])
|
||||||
|
|
||||||
|
|
||||||
# If we're running ocrd resmgr download we need to run the correct subimage.
|
# If we're running ocrd resmgr download we need to run the correct subimage.
|
||||||
if argv[:3] == ["ocrd", "resmgr", "download"] or \
|
if argv[:3] == ["ocrd", "resmgr", "download"] or \
|
||||||
argv[:3] == ["ocrd", "resmgr", "list-available"]:
|
argv[:3] == ["ocrd", "resmgr", "list-available"]:
|
||||||
# Default to the base image
|
# Default to the base image
|
||||||
sub_image = sub_images[argv[0]]
|
processor_image = processor_images[argv[0]]
|
||||||
# But look for a match of the executable
|
# But look for a match of the executable
|
||||||
for x in argv[3:]:
|
for x in argv[3:]:
|
||||||
if x in sub_images:
|
if x in processor_images:
|
||||||
sub_image = sub_images[x]
|
processor_image = processor_images[x]
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
sub_image = sub_images[argv[0]]
|
processor_image = processor_images[argv[0]]
|
||||||
|
|
||||||
docker_image = "%s-%s:%s" % (DOCKER_IMAGE_PREFIX, sub_image, DOCKER_IMAGE_TAG)
|
docker_image = processor_image
|
||||||
|
|
||||||
if DOCKER_IMAGE_TAG != "latest":
|
if docker_image != "ocrd/all:maximum":
|
||||||
print(colored(f"Using {docker_image}", 'red'))
|
print(colored(f"Using {docker_image}", 'red'))
|
||||||
docker_run(argv, docker_image)
|
docker_run(argv, docker_image)
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ def docker_run(argv, docker_image):
|
||||||
docker_run_options = []
|
docker_run_options = []
|
||||||
docker_run_options.extend(["--rm", "-t"])
|
docker_run_options.extend(["--rm", "-t"])
|
||||||
docker_run_options.extend(["--mount", "type=bind,src=%s,target=/data" % os.getcwd()])
|
docker_run_options.extend(["--mount", "type=bind,src=%s,target=/data" % os.getcwd()])
|
||||||
|
docker_run_options.extend(["--mount", "type=tmpfs,target=/tmp"])
|
||||||
docker_run_options.extend(["--user", "%s:%s" % (os.getuid(), os.getgid())])
|
docker_run_options.extend(["--user", "%s:%s" % (os.getuid(), os.getgid())])
|
||||||
docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL])
|
docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL])
|
||||||
docker_run_options.extend(["-e", "_OCRD_COMPLETE"])
|
docker_run_options.extend(["-e", "_OCRD_COMPLETE"])
|
||||||
|
|
45
wrapper/qurator/ocrd_galley/processor_images.py
Normal file
45
wrapper/qurator/ocrd_galley/processor_images.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
processor_images = {
|
||||||
|
"ocrd": "ocrd/all:maximum",
|
||||||
|
"ocrd-olena-binarize": "ocrd/all:maximum",
|
||||||
|
"ocrd-sbb-binarize": "ocrd/all:maximum",
|
||||||
|
"ocrd-sbb-textline-detector": "ocrd/all:maximum",
|
||||||
|
"ocrd-calamari-recognize": "ocrd/all:maximum",
|
||||||
|
"ocrd-calamari-recognize03": "ocrd/all:maximum",
|
||||||
|
"ocrd-tesserocr-segment-region": "ocrd/all:maximum",
|
||||||
|
"ocrd-tesserocr-segment-line": "ocrd/all:maximum",
|
||||||
|
"ocrd-tesserocr-recognize": "ocrd/all:maximum",
|
||||||
|
"ocrd-dinglehopper": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-clip": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-resegment": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-segment": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-deskew": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-denoise": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-binarize": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-dewarp": "ocrd/all:maximum",
|
||||||
|
"ocrd-cis-ocropy-recognize": "ocrd/all:maximum",
|
||||||
|
"ocrd-fileformat-transform": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-extract-pages": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-extract-regions": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-extract-lines": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-from-masks": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-from-coco": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-repair": "ocrd/all:maximum",
|
||||||
|
"ocrd-segment-evaluate": "ocrd/all:maximum",
|
||||||
|
"ocrd-preprocess-image": "ocrd/all:maximum",
|
||||||
|
"ocrd-skimage-normalize": "ocrd/all:maximum",
|
||||||
|
"ocrd-skimage-denoise-raw": "ocrd/all:maximum",
|
||||||
|
"ocrd-skimage-binarize": "ocrd/all:maximum",
|
||||||
|
"ocrd-skimage-denoise": "ocrd/all:maximum",
|
||||||
|
"ocrd-eynollah-segment": "ocrd/all:maximum",
|
||||||
|
"ocrd-anybaseocr-binarize": "ocrd/all:maximum",
|
||||||
|
"ocrd-anybaseocr-crop": "ocrd/all:maximum",
|
||||||
|
"ocrd-anybaseocr-deskew": "ocrd/all:maximum",
|
||||||
|
|
||||||
|
# non OCR-D CLI
|
||||||
|
"ocr-transform": "ocrd/all:maximum",
|
||||||
|
"dinglehopper": "ocrd/all:maximum",
|
||||||
|
"dinglehopper-extract": "ocrd/all:maximum",
|
||||||
|
|
||||||
|
# specialized images
|
||||||
|
"ocrd-trocr-recognize": "ocrd_trocr",
|
||||||
|
}
|
|
@ -1,40 +0,0 @@
|
||||||
sub_images = {
|
|
||||||
"ocrd": "core",
|
|
||||||
"ocrd-olena-binarize": "ocrd_olena",
|
|
||||||
"ocrd-sbb-binarize": "sbb_binarization",
|
|
||||||
"ocrd-sbb-textline-detector": "sbb_textline_detector",
|
|
||||||
"ocrd-calamari-recognize": "ocrd_calamari",
|
|
||||||
"ocrd-calamari-recognize03": "ocrd_calamari03",
|
|
||||||
"ocrd-tesserocr-segment-region": "ocrd_tesserocr",
|
|
||||||
"ocrd-tesserocr-segment-line": "ocrd_tesserocr",
|
|
||||||
"ocrd-tesserocr-recognize": "ocrd_tesserocr",
|
|
||||||
"ocrd-dinglehopper": "dinglehopper",
|
|
||||||
"ocrd-cis-ocropy-clip": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-resegment": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-segment": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-deskew": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-denoise": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-binarize": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-dewarp": "ocrd_cis",
|
|
||||||
"ocrd-cis-ocropy-recognize": "ocrd_cis",
|
|
||||||
"ocrd-fileformat-transform": "ocrd_fileformat",
|
|
||||||
"ocrd-segment-extract-pages": "ocrd_segment",
|
|
||||||
"ocrd-segment-extract-regions": "ocrd_segment",
|
|
||||||
"ocrd-segment-extract-lines": "ocrd_segment",
|
|
||||||
"ocrd-segment-from-masks": "ocrd_segment",
|
|
||||||
"ocrd-segment-from-coco": "ocrd_segment",
|
|
||||||
"ocrd-segment-repair": "ocrd_segment",
|
|
||||||
"ocrd-segment-evaluate": "ocrd_segment",
|
|
||||||
"ocrd-preprocess-image": "ocrd_wrap",
|
|
||||||
"ocrd-skimage-normalize": "ocrd_wrap",
|
|
||||||
"ocrd-skimage-denoise-raw": "ocrd_wrap",
|
|
||||||
"ocrd-skimage-binarize": "ocrd_wrap",
|
|
||||||
"ocrd-skimage-denoise": "ocrd_wrap",
|
|
||||||
"ocrd-eynollah-segment": "eynollah",
|
|
||||||
"ocrd-anybaseocr-crop": "ocrd_anybaseocr",
|
|
||||||
"ocrd-anybaseocr-deskew": "ocrd_anybaseocr",
|
|
||||||
"ocrd-trocr-recognize": "ocrd_trocr",
|
|
||||||
|
|
||||||
# non OCR-D CLI
|
|
||||||
"ocr-transform": "ocrd_fileformat",
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue