Merge branch 'master' of https://github.com/qurator-spk/ocrd-galley
commit
4c1f198da9
@ -1,74 +0,0 @@
|
||||
def main(ctx):
|
||||
tags = [ctx.build.commit]
|
||||
|
||||
if ctx.build.event == "tag":
|
||||
name = "release"
|
||||
elif ctx.build.branch == "master":
|
||||
name = "master"
|
||||
tags.append("latest")
|
||||
else:
|
||||
return
|
||||
|
||||
return [
|
||||
{
|
||||
"kind": "pipeline",
|
||||
"name": name,
|
||||
"steps": [
|
||||
{
|
||||
"name": "prepare data",
|
||||
"image": "alpine",
|
||||
"commands": [
|
||||
"apk update && apk add bash curl",
|
||||
"FORCE_DOWNLOAD=y ./build-tmp-XXX"
|
||||
]
|
||||
},
|
||||
# We can't glob and have to add here manually...
|
||||
step_for(ctx, "core", tags),
|
||||
step_for(ctx, "core-cuda10.0", tags),
|
||||
step_for(ctx, "core-cuda10.1", tags),
|
||||
|
||||
step_for(ctx, "dinglehopper", tags),
|
||||
step_for(ctx, "ocrd_calamari", tags),
|
||||
step_for(ctx, "ocrd_calamari03", tags),
|
||||
step_for(ctx, "ocrd_cis", tags),
|
||||
step_for(ctx, "ocrd_fileformat", tags),
|
||||
step_for(ctx, "ocrd_olena", tags),
|
||||
step_for(ctx, "ocrd_segment", tags),
|
||||
step_for(ctx, "ocrd_tesserocr", tags),
|
||||
step_for(ctx, "ocrd_wrap", tags),
|
||||
step_for(ctx, "sbb_binarization", tags),
|
||||
step_for(ctx, "sbb_textline_detector", tags),
|
||||
step_for(ctx, "eynollah", tags),
|
||||
step_for(ctx, "ocrd_anybaseocr", tags),
|
||||
{
|
||||
"name": "notify",
|
||||
"image": "drillster/drone-email",
|
||||
"settings": {
|
||||
"host": "172.17.0.1",
|
||||
"port": "25",
|
||||
"from": "drone@ci.moegen-wir.net",
|
||||
},
|
||||
"when": {
|
||||
"status": [ "success", "failure" ]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def step_for(ctx, sub_image, tags):
|
||||
return {
|
||||
"name": "build %s" % sub_image,
|
||||
"image": "plugins/docker",
|
||||
"settings": {
|
||||
"build_args": [
|
||||
"DRONE_COMMIT=%s" % ctx.build.commit,
|
||||
],
|
||||
"tags": tags,
|
||||
"username": { "from_secret": "docker_username" },
|
||||
"password": { "from_secret": "docker_password" },
|
||||
"repo": "quratorspk/ocrd-galley-%s" % sub_image,
|
||||
"dockerfile": "Dockerfile-%s" % sub_image,
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
import glob
|
||||
import re
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
|
||||
|
||||
all_subimages = {re.sub(r"^Dockerfile-", "", dockerfile) for dockerfile in glob.glob("Dockerfile-*")}
|
||||
core_subimages = {si for si in all_subimages if si.startswith("core")}
|
||||
rest_subimages = all_subimages - core_subimages
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='List subimages.')
|
||||
parser.add_argument('--core', action='store_true',
|
||||
default=False, help='List core subimages')
|
||||
parser.add_argument('--rest', action='store_true',
|
||||
default=False, help='List rest subimages')
|
||||
parser.add_argument('--json', action='store_true',
|
||||
default=False, help='Return list as JSON')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def list_(subimages):
|
||||
subimages = sorted(subimages)
|
||||
if args.json:
|
||||
print(json.dumps(subimages))
|
||||
else:
|
||||
print("\n".join(subimages))
|
||||
|
||||
|
||||
if not args.core and not args.rest:
|
||||
list_(core_subimages | rest_subimages)
|
||||
if args.core:
|
||||
list_(core_subimages)
|
||||
if args.rest:
|
||||
list_(rest_subimages)
|
@ -0,0 +1,58 @@
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
subimage:
|
||||
required: true
|
||||
type: string
|
||||
tags:
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
DOCKERHUB_USERNAME:
|
||||
required: true
|
||||
DOCKERHUB_TOKEN:
|
||||
required: true
|
||||
|
||||
|
||||
jobs:
|
||||
build-subimage-job:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
# We are checking out explicitly, so build-push-action isn't trying
|
||||
# to checkout the (unreachable) submodule. (Using "context" there.)
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: |
|
||||
quratorspk/ocrd-galley-${{ inputs.subimage }}
|
||||
flavor: |
|
||||
latest=auto
|
||||
# latest=auto should generate "latest" for the type=semver tags entry
|
||||
tags: ${{ inputs.tags }}
|
||||
-
|
||||
name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Build ${{ inputs.subimage }}
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile-${{ inputs.subimage }}
|
||||
build-args: |
|
||||
GIT_COMMIT=sha-${{ github.sha }}
|
||||
BUILDKIT_INLINE_CACHE=1
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
push: true
|
||||
|
||||
cache-from: quratorspk/ocrd-galley-${{ inputs.subimage }}:sha-${{ github.sha }}
|
@ -1,36 +1,104 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
- 'test/github-actions'
|
||||
- 'fix/*'
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
core: ${{ steps.step1.outputs.core }}
|
||||
rest: ${{ steps.step1.outputs.rest }}
|
||||
all: ${{ steps.step1.outputs.all }}
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
# We are checking out explicitly, so build-push-action isn't trying
|
||||
# to checkout the (unreachable) submodule. (Using "context" there.)
|
||||
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
name: Generate outputs
|
||||
id: step1
|
||||
run: |
|
||||
echo "core=$(./.github/list-subimages --core --json)" >>$GITHUB_OUTPUT
|
||||
echo "rest=$(./.github/list-subimages --rest --json)" >>$GITHUB_OUTPUT
|
||||
echo "all=$(./.github/list-subimages --json)" >>$GITHUB_OUTPUT
|
||||
|
||||
echo "GITHUB_OUTPUT:"
|
||||
cat $GITHUB_OUTPUT
|
||||
|
||||
build-core:
|
||||
needs: matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
subimage: ${{ fromJson(needs.matrix.outputs.core) }}
|
||||
uses: ./.github/workflows/build-subimage.yml
|
||||
with:
|
||||
subimage: ${{ matrix.subimage }}
|
||||
tags: |
|
||||
type=sha,format=long
|
||||
# Here: NOT the full tags, just the sha! (they get added below)
|
||||
secrets: inherit
|
||||
|
||||
# TODO data
|
||||
# TODO matrix for all Dockerfiles
|
||||
build-rest:
|
||||
needs: [matrix, build-core]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
subimage: ${{ fromJson(needs.matrix.outputs.rest) }}
|
||||
uses: ./.github/workflows/build-subimage.yml
|
||||
with:
|
||||
subimage: ${{ matrix.subimage }}
|
||||
tags: |
|
||||
type=sha,format=long
|
||||
secrets: inherit
|
||||
|
||||
|
||||
test:
|
||||
needs: build-rest
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DOCKER_IMAGE_TAG: sha-${{ github.sha }} # needed to run the correct version through the wrapper
|
||||
steps:
|
||||
-
|
||||
name: Build
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile-core
|
||||
build-args:
|
||||
DRONE_COMMIT=${{ github.sha }}
|
||||
push: false
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Install wrapper
|
||||
run: |
|
||||
sudo apt-get install -y python3-pip
|
||||
cd wrapper && pip install .
|
||||
-
|
||||
name: Test
|
||||
run: |
|
||||
ocrd --version
|
||||
ocrd-dinglehopper --version
|
||||
|
||||
|
||||
# At this point, we have successfully built, uploaded and tested the images. We now just need to add
|
||||
# tags. We do this by building again, but using the formerly built images to
|
||||
# cache from.
|
||||
|
||||
push-with-tags:
|
||||
needs: [matrix, test]
|
||||
strategy:
|
||||
matrix:
|
||||
subimage: ${{ fromJson(needs.matrix.outputs.all) }}
|
||||
uses: ./.github/workflows/build-subimage.yml
|
||||
with:
|
||||
subimage: ${{ matrix.subimage }}
|
||||
tags: |
|
||||
type=sha,format=long
|
||||
type=edge,branch=master
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{version}}
|
||||
# Here: full tags
|
||||
# Note: Do NOT use event=tag here, unless re-configuring the "latest"
|
||||
# behavior too as that triggers on event=tag by default. By default,
|
||||
# "latest" triggers on type=semver here, too (which is wanted).
|
||||
secrets: inherit
|
||||
|
@ -1,3 +0,0 @@
|
||||
[submodule "data"]
|
||||
path = data
|
||||
url = git@code.dev.sbb.berlin:qurator/qurator-data.git
|
@ -1,53 +0,0 @@
|
||||
FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_VERSION_MINIMUM="2.23.3"
|
||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||
ENV PIP_DEFAULT_TIMEOUT=120
|
||||
|
||||
|
||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
curl xz-utils \
|
||||
build-essential python3-dev \
|
||||
# For get-pip.py:
|
||||
python3-distutils \
|
||||
# For add-apt-repository:
|
||||
software-properties-common \
|
||||
# XML utils
|
||||
libxml2-utils \
|
||||
xmlstarlet \
|
||||
# OCR-D uses ImageMagick for pixel density estimation
|
||||
imagemagick \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Set up OCR-D logging
|
||||
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
||||
|
||||
|
||||
# Install pip (and setuptools)
|
||||
# We use get-pip.py here to avoid
|
||||
# a. having to upgrade from Ubuntu's pip
|
||||
# b. the dreaded "old script wrapper" error message
|
||||
RUN curl -sSL https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py && \
|
||||
python3 get-pip.py && \
|
||||
rm -f get-pip.py
|
||||
|
||||
|
||||
# Install pip installable-stuff
|
||||
RUN ${PIP_INSTALL} \
|
||||
"ocrd >= ${OCRD_VERSION_MINIMUM}"
|
||||
|
||||
|
||||
# Check pip dependencies
|
||||
RUN pip check
|
||||
|
||||
|
||||
WORKDIR /data
|
||||
|
||||
# Default command
|
||||
CMD ['ocrd']
|
@ -1,53 +0,0 @@
|
||||
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_VERSION_MINIMUM="2.23.3"
|
||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||
ENV PIP_DEFAULT_TIMEOUT=120
|
||||
|
||||
|
||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
curl xz-utils \
|
||||
build-essential python3-dev \
|
||||
# For get-pip.py:
|
||||
python3-distutils \
|
||||
# For add-apt-repository:
|
||||
software-properties-common \
|
||||
# XML utils
|
||||
libxml2-utils \
|
||||
xmlstarlet \
|
||||
# OCR-D uses ImageMagick for pixel density estimation
|
||||
imagemagick \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Set up OCR-D logging
|
||||
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
||||
|
||||
|
||||
# Install pip (and setuptools)
|
||||
# We use get-pip.py here to avoid
|
||||
# a. having to upgrade from Ubuntu's pip
|
||||
# b. the dreaded "old script wrapper" error message
|
||||
RUN curl -sSL https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py && \
|
||||
python3 get-pip.py && \
|
||||
rm -f get-pip.py
|
||||
|
||||
|
||||
# Install pip installable-stuff
|
||||
RUN ${PIP_INSTALL} \
|
||||
"ocrd >= ${OCRD_VERSION_MINIMUM}"
|
||||
|
||||
|
||||
# Check pip dependencies
|
||||
RUN pip check
|
||||
|
||||
|
||||
WORKDIR /data
|
||||
|
||||
# Default command
|
||||
CMD ['ocrd']
|
@ -0,0 +1,70 @@
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_VERSION_MINIMUM="2.47.0"
|
||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||
ENV PIP_DEFAULT_TIMEOUT=120
|
||||
|
||||
|
||||
RUN echo "APT::Acquire::Retries \"3\";" > /etc/apt/apt.conf.d/80-retries && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
git \
|
||||
xz-utils \
|
||||
pkg-config \
|
||||
# For add-apt-repository:
|
||||
software-properties-common \
|
||||
# XML utils
|
||||
libxml2-utils \
|
||||
xmlstarlet \
|
||||
# OCR-D uses ImageMagick for pixel density estimation
|
||||
imagemagick \
|
||||
# pyenv builds
|
||||
# TODO: builder container?
|
||||
libz-dev \
|
||||
libssl-dev \
|
||||
libbz2-dev \
|
||||
liblzma-dev \
|
||||
libncurses-dev \
|
||||
libffi-dev \
|
||||
libreadline-dev \
|
||||
libsqlite3-dev \
|
||||
libmagic-dev \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Set up OCR-D logging
|
||||
RUN echo "setOverrideLogLevel(os.getenv('LOG_LEVEL', 'INFO'))" >/etc/ocrd_logging.py
|
||||
|
||||
|
||||
# Install pyenv
|
||||
# TODO: do not run as root
|
||||
# TODO: does just saying "3.7" work as intended?
|
||||
ENV HOME=/root
|
||||
ENV PYENV_ROOT=/usr/local/share/pyenv
|
||||
ENV PATH=$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
|
||||
RUN \
|
||||
git clone --depth=1 https://github.com/yyuu/pyenv.git $PYENV_ROOT && \
|
||||
pyenv install 3.7 && \
|
||||
pyenv global 3.7 && \
|
||||
pyenv rehash && \
|
||||
pip install -U pip wheel && \
|
||||
pip install setuptools
|
||||
|
||||
# Install pip installable-stuff
|
||||
RUN ${PIP_INSTALL} \
|
||||
"ocrd >= ${OCRD_VERSION_MINIMUM}"
|
||||
|
||||
|
||||
# Check pip dependencies
|
||||
RUN pip check
|
||||
|
||||
|
||||
WORKDIR /data
|
||||
|
||||
# Default command
|
||||
CMD ['ocrd']
|
@ -0,0 +1,18 @@
|
||||
ARG GIT_COMMIT="latest"
|
||||
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_TROCR_COMMIT="250ff1c"
|
||||
|
||||
|
||||
# Build pip installable stuff
|
||||
RUN ${PIP_INSTALL} \
|
||||
https://github.com/qurator-spk/ocrd_trocr/archive/$OCRD_TROCR_COMMIT.tar.gz
|
||||
|
||||
|
||||
# Check pip dependencies
|
||||
RUN pip check
|
||||
|
||||
|
||||
# Default command
|
||||
CMD ["ocrd-trocr-recognize"]
|
@ -1,18 +0,0 @@
|
||||
#!/bin/bash -x
|
||||
set -e
|
||||
|
||||
self=`realpath $0`
|
||||
self_dir=`dirname "$self"`
|
||||
|
||||
DATA_SUBDIR=data
|
||||
get_from_web() {
|
||||
download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200'
|
||||
download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz' 'calamari-models/GT4HistOCR/2019-12-11T11_10+0100'
|
||||
download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar' 'tesseract-models/GT4HistOCR'
|
||||
download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz' 'textline_detection'
|
||||
download_to --strip-components 1 'https://qurator-data.de/sbb_binarization/2021-03-09/models.tar.gz' 'sbb_binarization/2021-03-09'
|
||||
download_to --no-unpack 'https://qurator-data.de/mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz'
|
||||
download_to --strip-components 1 'https://qurator-data.de/eynollah/models_eynollah.tar.gz' 'eynollah'
|
||||
}
|
||||
. $self_dir/qurator_data_lib.sh
|
||||
handle_data
|
@ -1 +0,0 @@
|
||||
Subproject commit 9ab08a3626dde1d38dd622b65e425277cd029722
|
@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
test_id=`basename $0`
|
||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd workspace validate \
|
||||
--page-coordinate-consistency off
|
@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
test_id=`basename $0`
|
||||
cd `mktemp -d /tmp/$test_id-XXXXX`
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd-dinglehopper -I OCR-D-GT-PAGE,OCR-D-SEG-LINE-SBB -O DINGLEHOPPER-TEST
|
@ -0,0 +1,12 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
cd `mktemp -d /tmp/test-ocrd_olena-XXXXX`
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd-olena-binarize -I OCR-D-IMG -O TEST-OLENA
|
@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
cd `mktemp -d /tmp/test-ocrd_tesserocr-XXXXX`
|
||||
|
||||
# Prepare processors
|
||||
ocrd resmgr download ocrd-tesserocr-recognize Fraktur_GT4HistOCR.traineddata
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd-tesserocr-segment-region -I OCR-D-IMG-BIN -O TEST-TESS-SEG-REG
|
||||
ocrd-tesserocr-segment-line -I TEST-TESS-SEG-REG -O TEST-TESS-SEG-LINE
|
||||
ocrd-tesserocr-recognize -I TEST-TESS-SEG-LINE -O TEST-TESS-OCR -P model Fraktur_GT4HistOCR
|
@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
|
||||
cd `mktemp -d /tmp/test-ocrd_trocr-XXXXX`
|
||||
|
||||
# Prepare processors
|
||||
|
||||
# Prepare test workspace
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
unzip actevedef_718448162.first-page+binarization+segmentation.zip
|
||||
cd actevedef_718448162.first-page+binarization+segmentation
|
||||
|
||||
# Run tests
|
||||
ocrd-trocr-recognize -I OCR-D-SEG-LINE-SBB -O TEST-TROCR
|
@ -1 +0,0 @@
|
||||
from .cli import *
|
@ -0,0 +1,40 @@
|
||||
sub_images = {
|
||||
"ocrd": "core",
|
||||
"ocrd-olena-binarize": "ocrd_olena",
|
||||
"ocrd-sbb-binarize": "sbb_binarization",
|
||||
"ocrd-sbb-textline-detector": "sbb_textline_detector",
|
||||
"ocrd-calamari-recognize": "ocrd_calamari",
|
||||
"ocrd-calamari-recognize03": "ocrd_calamari03",
|
||||
"ocrd-tesserocr-segment-region": "ocrd_tesserocr",
|
||||
"ocrd-tesserocr-segment-line": "ocrd_tesserocr",
|
||||
"ocrd-tesserocr-recognize": "ocrd_tesserocr",
|
||||
"ocrd-dinglehopper": "dinglehopper",
|
||||
"ocrd-cis-ocropy-clip": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-resegment": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-segment": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-deskew": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-denoise": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-binarize": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-dewarp": "ocrd_cis",
|
||||
"ocrd-cis-ocropy-recognize": "ocrd_cis",
|
||||
"ocrd-fileformat-transform": "ocrd_fileformat",
|
||||
"ocrd-segment-extract-pages": "ocrd_segment",
|
||||
"ocrd-segment-extract-regions": "ocrd_segment",
|
||||
"ocrd-segment-extract-lines": "ocrd_segment",
|
||||
"ocrd-segment-from-masks": "ocrd_segment",
|
||||
"ocrd-segment-from-coco": "ocrd_segment",
|
||||
"ocrd-segment-repair": "ocrd_segment",
|
||||
"ocrd-segment-evaluate": "ocrd_segment",
|
||||
"ocrd-preprocess-image": "ocrd_wrap",
|
||||
"ocrd-skimage-normalize": "ocrd_wrap",
|
||||
"ocrd-skimage-denoise-raw": "ocrd_wrap",
|
||||
"ocrd-skimage-binarize": "ocrd_wrap",
|
||||
"ocrd-skimage-denoise": "ocrd_wrap",
|
||||
"ocrd-eynollah-segment": "eynollah",
|
||||
"ocrd-anybaseocr-crop": "ocrd_anybaseocr",
|
||||
"ocrd-anybaseocr-deskew": "ocrd_anybaseocr",
|
||||
"ocrd-trocr-recognize": "ocrd_trocr",
|
||||
|
||||
# non OCR-D CLI
|
||||
"ocr-transform": "ocrd_fileformat",
|
||||
}
|
@ -0,0 +1,2 @@
|
||||
colorama
|
||||
termcolor
|
Loading…
Reference in New Issue