mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-09 06:39:53 +02:00
Merge branch 'master' of code.dev.sbb.berlin:qurator/ocrd-galley
This commit is contained in:
commit
82d3d71ed4
14 changed files with 49 additions and 42 deletions
16
.github/workflows/test.yml
vendored
Normal file
16
.github/workflows/test.yml
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
name: Test
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Setup, Build, Publish, and Deploy
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build
|
||||
run: |-
|
||||
FORCE_DOWNLOAD=y ./build
|
|
@ -1,7 +1,7 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG OCRD_VERSION_MINIMUM="2.18.1"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_VERSION_MINIMUM="2.21.0"
|
||||
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8
|
||||
ENV PIP_DEFAULT_TIMEOUT=120
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG DINGLEHOPPER_COMMIT="6e47acd"
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ FROM my_ocrd_workflow-core-cuda10.1
|
|||
|
||||
|
||||
# XXX https://github.com/OCR-D/core/issues/642
|
||||
#ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
#ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_CALAMARI_VERSION="1.0.0"
|
||||
|
||||
|
@ -20,8 +20,7 @@ COPY data/mirror/github.com/Calamari-OCR/calamari_models/gt4histocr
|
|||
|
||||
|
||||
# Check pip dependencies
|
||||
# XXX https://github.com/OCR-D/core/issues/642
|
||||
#RUN pip check
|
||||
RUN pip check
|
||||
|
||||
|
||||
# Default command
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
|
||||
|
||||
# Build pip installable stuff
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_CIS_VERSION="0.1.5"
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_FILEFORMAT_VERSION="0.1.1"
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG OCRD_OLENA_VERSION="1.2.0"
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG TESSDATA_BEST_VERSION="4.0.0"
|
||||
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG SBB_BINARIZATION_COMMIT="4d145cc"
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM my_ocrd_workflow-core-cuda10.0
|
||||
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir --use-feature=2020-resolver"
|
||||
ARG PIP_INSTALL="pip install --no-cache-dir"
|
||||
ARG SBB_TEXTLINE_DETECTOR_COMMIT="247d5f9"
|
||||
|
||||
|
||||
|
|
37
README.md
37
README.md
|
@ -27,13 +27,25 @@ including all dependencies in Docker.
|
|||
|
||||
How to use
|
||||
----------
|
||||
It's easiest to use it as pre-built containers. To run the containers on an
|
||||
example workspace:
|
||||
**Currently, due to problems with the Travis CI, we do not provide pre-built
|
||||
containers anymore.***
|
||||
|
||||
To build the containers yourself using Docker:
|
||||
~~~
|
||||
cd ~/devel/ocrd-galley/
|
||||
./build
|
||||
~~~
|
||||
|
||||
You can then install the wrappers into a Python venv:
|
||||
~~~
|
||||
cd ~/devel/ocrd-galley/wrapper
|
||||
pip install .
|
||||
~~~
|
||||
|
||||
You may then use the script `my_ocrd_workflow` to use your self-built
|
||||
containers on an example workspace:
|
||||
|
||||
~~~
|
||||
# Update to the latest stable containers
|
||||
(cd ~/devel/ocrd-galley/; ./run-docker-hub-update)
|
||||
|
||||
# Download an example workspace
|
||||
cd /tmp
|
||||
wget https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||
|
@ -41,18 +53,9 @@ unzip actevedef_718448162.first-page.zip
|
|||
|
||||
# Run the workflow on it
|
||||
cd actevedef_718448162.first-page
|
||||
~/devel/ocrd-galley/run-docker-hub
|
||||
~/devel/ocrd-galley/my_ocrd_workflow
|
||||
~~~
|
||||
|
||||
### Build the containers yourself
|
||||
To build the containers yourself using Docker:
|
||||
~~~
|
||||
cd ~/devel/ocrd-galley/
|
||||
./build
|
||||
~~~
|
||||
You may then use the script `run` to use your self-built containers, analogous to
|
||||
the example above.
|
||||
|
||||
### Viewing results
|
||||
You may then examine the results using
|
||||
[PRImA's PAGE Viewer](https://www.primaresearch.org/tools/PAGEViewer):
|
||||
|
@ -83,7 +86,7 @@ The document must be specified by its PPN, for example:
|
|||
~~~
|
||||
~/devel/ocrd-galley/ppn2ocr PPN77164308X
|
||||
cd PPN77164308X
|
||||
~/devel/ocrd-galley/run-docker-hub -I BEST --skip-validation
|
||||
~/devel/ocrd-galley/my_ocrd_workflow -I BEST --skip-validation
|
||||
~~~
|
||||
|
||||
This produces a workspace directory `PPN77164308X` with the OCR results in it;
|
||||
|
@ -101,7 +104,7 @@ for the given images.
|
|||
~~~
|
||||
~/devel/ocrd-galley/ocrd-workspace-from-images 0005.png
|
||||
cd workspace-xxxxx # output by the last command
|
||||
~/devel/ocrd-galley/run-docker-hub
|
||||
~/devel/ocrd-galley/my_ocrd_workflow
|
||||
~~~
|
||||
|
||||
This produces a workspace from the files and then runs the OCR workflow on it.
|
||||
|
|
|
@ -70,7 +70,7 @@ main() {
|
|||
do_validate
|
||||
|
||||
|
||||
ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL"
|
||||
ocrd-calamari-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-CALAMARI -P checkpoint "/var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt.json" -P textequiv_level "$TEXTEQUIV_LEVEL"
|
||||
ocrd-tesserocr-recognize --overwrite -I OCR-D-SEG-LINE -O OCR-D-OCR-TESS -P model "GT4HistOCR_2000000" -P textequiv_level "$TEXTEQUIV_LEVEL"
|
||||
do_validate
|
||||
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
#!/bin/sh
|
||||
# Update the my_ocrd_workflow containers
|
||||
|
||||
DOCKER_IMAGE_PREFIX=mikegerber/my_ocrd_workflow
|
||||
DOCKER_IMAGE_TAG=stable
|
||||
|
||||
|
||||
sub_images=`ls -1 Dockerfile-* | sed 's/Dockerfile-//'`
|
||||
for x in $sub_images; do
|
||||
docker pull $DOCKER_IMAGE_PREFIX-$x:$DOCKER_IMAGE_TAG
|
||||
done
|
Loading…
Add table
Add a link
Reference in a new issue