From 6b78303ca2548042e89193309bc4fd757de4b987 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 5 Apr 2023 11:57:52 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Add=20WIP=20support=20for=20ocrd?= =?UTF-8?q?=5Ftrocr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile-ocrd_trocr | 18 ++++++++++++++++++ wrapper/qurator/ocrd_galley/cli.py | 12 ++++++++++++ wrapper/qurator/ocrd_galley/sub_images.py | 1 + 3 files changed, 31 insertions(+) create mode 100644 Dockerfile-ocrd_trocr diff --git a/Dockerfile-ocrd_trocr b/Dockerfile-ocrd_trocr new file mode 100644 index 0000000..fc05759 --- /dev/null +++ b/Dockerfile-ocrd_trocr @@ -0,0 +1,18 @@ +ARG GIT_COMMIT="latest" +FROM quratorspk/ocrd-galley-core:$GIT_COMMIT + +ARG PIP_INSTALL="pip install --no-cache-dir" +ARG OCRD_TROCR_COMMIT="250ff1c" + + +# Build pip installable stuff +RUN ${PIP_INSTALL} \ + https://github.com/qurator-spk/ocrd_trocr/archive/$OCRD_TROCR_COMMIT.tar.gz + + +# Check pip dependencies +RUN pip check + + +# Default command +CMD ["ocrd-trocr-recognize"] diff --git a/wrapper/qurator/ocrd_galley/cli.py b/wrapper/qurator/ocrd_galley/cli.py index d3f3fea..9423c61 100644 --- a/wrapper/qurator/ocrd_galley/cli.py +++ b/wrapper/qurator/ocrd_galley/cli.py @@ -15,6 +15,7 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") # to just roll it on our own. XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config") XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", Path.home() / ".local" / "share") +XDG_CACHE_HOME = os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache") # ocrd_tesserocr TESSDATA_PREFIX = XDG_DATA_HOME / "ocrd-resources" / "ocrd-tesserocr-recognize" @@ -53,6 +54,9 @@ def docker_run(argv, docker_image): docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL]) docker_run_options.extend(["-e", "_OCRD_COMPLETE"]) + # home directory + docker_run_options.extend(["-e", "HOME=%s" % Path.home()]) + # .config docker_run_options.extend(["-e", "XDG_CONFIG_HOME=%s" % XDG_CONFIG_HOME]) docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % @@ -61,6 +65,14 @@ def docker_run(argv, docker_image): docker_run_options.extend(["-e", "XDG_DATA_HOME=%s" % XDG_DATA_HOME]) docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % (XDG_DATA_HOME, XDG_DATA_HOME)]) + # .cache + docker_run_options.extend(["-e", "XDG_CACHE_HOME=%s" % XDG_CACHE_HOME]) + docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % + (XDG_CACHE_HOME, XDG_CACHE_HOME)]) + # .huggingface + os.makedirs(Path.home() / ".huggingface", exist_ok=True) + docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % + (Path.home() / ".huggingface", Path("/root") / ".huggingface")]) # ocrd_tesserocr docker_run_options.extend(["-e", "TESSDATA_PREFIX=%s" % TESSDATA_PREFIX]) diff --git a/wrapper/qurator/ocrd_galley/sub_images.py b/wrapper/qurator/ocrd_galley/sub_images.py index 220230d..aaea945 100644 --- a/wrapper/qurator/ocrd_galley/sub_images.py +++ b/wrapper/qurator/ocrd_galley/sub_images.py @@ -33,6 +33,7 @@ sub_images = { "ocrd-eynollah-segment": "eynollah", "ocrd-anybaseocr-crop": "ocrd_anybaseocr", "ocrd-anybaseocr-deskew": "ocrd_anybaseocr", + "ocrd-trocr-recognize": "ocrd_trocr", # non OCR-D CLI "ocr-transform": "ocrd_fileformat",