diff --git a/Dockerfile-ocrd_trocr b/Dockerfile-ocrd_trocr new file mode 100644 index 0000000..fc05759 --- /dev/null +++ b/Dockerfile-ocrd_trocr @@ -0,0 +1,18 @@ +ARG GIT_COMMIT="latest" +FROM quratorspk/ocrd-galley-core:$GIT_COMMIT + +ARG PIP_INSTALL="pip install --no-cache-dir" +ARG OCRD_TROCR_COMMIT="250ff1c" + + +# Build pip installable stuff +RUN ${PIP_INSTALL} \ + https://github.com/qurator-spk/ocrd_trocr/archive/$OCRD_TROCR_COMMIT.tar.gz + + +# Check pip dependencies +RUN pip check + + +# Default command +CMD ["ocrd-trocr-recognize"] diff --git a/wrapper/qurator/ocrd_galley/cli.py b/wrapper/qurator/ocrd_galley/cli.py index d3f3fea..9423c61 100644 --- a/wrapper/qurator/ocrd_galley/cli.py +++ b/wrapper/qurator/ocrd_galley/cli.py @@ -15,6 +15,7 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") # to just roll it on our own. XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config") XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", Path.home() / ".local" / "share") +XDG_CACHE_HOME = os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache") # ocrd_tesserocr TESSDATA_PREFIX = XDG_DATA_HOME / "ocrd-resources" / "ocrd-tesserocr-recognize" @@ -53,6 +54,9 @@ def docker_run(argv, docker_image): docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL]) docker_run_options.extend(["-e", "_OCRD_COMPLETE"]) + # home directory + docker_run_options.extend(["-e", "HOME=%s" % Path.home()]) + # .config docker_run_options.extend(["-e", "XDG_CONFIG_HOME=%s" % XDG_CONFIG_HOME]) docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % @@ -61,6 +65,14 @@ def docker_run(argv, docker_image): docker_run_options.extend(["-e", "XDG_DATA_HOME=%s" % XDG_DATA_HOME]) docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % (XDG_DATA_HOME, XDG_DATA_HOME)]) + # .cache + docker_run_options.extend(["-e", "XDG_CACHE_HOME=%s" % XDG_CACHE_HOME]) + docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % + (XDG_CACHE_HOME, XDG_CACHE_HOME)]) + # .huggingface + os.makedirs(Path.home() / ".huggingface", exist_ok=True) + docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % + (Path.home() / ".huggingface", Path("/root") / ".huggingface")]) # ocrd_tesserocr docker_run_options.extend(["-e", "TESSDATA_PREFIX=%s" % TESSDATA_PREFIX]) diff --git a/wrapper/qurator/ocrd_galley/sub_images.py b/wrapper/qurator/ocrd_galley/sub_images.py index 220230d..aaea945 100644 --- a/wrapper/qurator/ocrd_galley/sub_images.py +++ b/wrapper/qurator/ocrd_galley/sub_images.py @@ -33,6 +33,7 @@ sub_images = { "ocrd-eynollah-segment": "eynollah", "ocrd-anybaseocr-crop": "ocrd_anybaseocr", "ocrd-anybaseocr-deskew": "ocrd_anybaseocr", + "ocrd-trocr-recognize": "ocrd_trocr", # non OCR-D CLI "ocr-transform": "ocrd_fileformat",