🚧 Add WIP support for ocrd_trocr

master
Gerber, Mike
parent 2a2bfa337c
commit 6b78303ca2

@ -0,0 +1,18 @@
ARG GIT_COMMIT="latest"
FROM quratorspk/ocrd-galley-core:$GIT_COMMIT
ARG PIP_INSTALL="pip install --no-cache-dir"
ARG OCRD_TROCR_COMMIT="250ff1c"
# Build pip installable stuff
RUN ${PIP_INSTALL} \
https://github.com/qurator-spk/ocrd_trocr/archive/$OCRD_TROCR_COMMIT.tar.gz
# Check pip dependencies
RUN pip check
# Default command
CMD ["ocrd-trocr-recognize"]

@ -15,6 +15,7 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
# to just roll it on our own. # to just roll it on our own.
XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config") XDG_CONFIG_HOME = os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")
XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", Path.home() / ".local" / "share") XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME", Path.home() / ".local" / "share")
XDG_CACHE_HOME = os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")
# ocrd_tesserocr # ocrd_tesserocr
TESSDATA_PREFIX = XDG_DATA_HOME / "ocrd-resources" / "ocrd-tesserocr-recognize" TESSDATA_PREFIX = XDG_DATA_HOME / "ocrd-resources" / "ocrd-tesserocr-recognize"
@ -53,6 +54,9 @@ def docker_run(argv, docker_image):
docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL]) docker_run_options.extend(["-e", "LOG_LEVEL=%s" % LOG_LEVEL])
docker_run_options.extend(["-e", "_OCRD_COMPLETE"]) docker_run_options.extend(["-e", "_OCRD_COMPLETE"])
# home directory
docker_run_options.extend(["-e", "HOME=%s" % Path.home()])
# .config # .config
docker_run_options.extend(["-e", "XDG_CONFIG_HOME=%s" % XDG_CONFIG_HOME]) docker_run_options.extend(["-e", "XDG_CONFIG_HOME=%s" % XDG_CONFIG_HOME])
docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" %
@ -61,6 +65,14 @@ def docker_run(argv, docker_image):
docker_run_options.extend(["-e", "XDG_DATA_HOME=%s" % XDG_DATA_HOME]) docker_run_options.extend(["-e", "XDG_DATA_HOME=%s" % XDG_DATA_HOME])
docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" % docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" %
(XDG_DATA_HOME, XDG_DATA_HOME)]) (XDG_DATA_HOME, XDG_DATA_HOME)])
# .cache
docker_run_options.extend(["-e", "XDG_CACHE_HOME=%s" % XDG_CACHE_HOME])
docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" %
(XDG_CACHE_HOME, XDG_CACHE_HOME)])
# .huggingface
os.makedirs(Path.home() / ".huggingface", exist_ok=True)
docker_run_options.extend(["--mount", "type=bind,src=%s,target=%s" %
(Path.home() / ".huggingface", Path("/root") / ".huggingface")])
# ocrd_tesserocr # ocrd_tesserocr
docker_run_options.extend(["-e", "TESSDATA_PREFIX=%s" % TESSDATA_PREFIX]) docker_run_options.extend(["-e", "TESSDATA_PREFIX=%s" % TESSDATA_PREFIX])

@ -33,6 +33,7 @@ sub_images = {
"ocrd-eynollah-segment": "eynollah", "ocrd-eynollah-segment": "eynollah",
"ocrd-anybaseocr-crop": "ocrd_anybaseocr", "ocrd-anybaseocr-crop": "ocrd_anybaseocr",
"ocrd-anybaseocr-deskew": "ocrd_anybaseocr", "ocrd-anybaseocr-deskew": "ocrd_anybaseocr",
"ocrd-trocr-recognize": "ocrd_trocr",
# non OCR-D CLI # non OCR-D CLI
"ocr-transform": "ocrd_fileformat", "ocr-transform": "ocrd_fileformat",

Loading…
Cancel
Save