From 23e282ce8c03b9f0353af9b408022a69d3c7510f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 30 Sep 2024 21:27:04 +0200 Subject: [PATCH 01/16] add GHA CD via Dockerhub --- .github/workflows/docker-image.yml | 47 ++++++++++++++++++++++++++++++ Dockerfile | 22 ++++++++++++++ Makefile | 16 ++++++++++ README.md | 20 ++++++++----- 4 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/docker-image.yml create mode 100644 Dockerfile diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..17b6cb8 --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,47 @@ +name: Docker Image CD + +on: + push: + branches: [ "master" ] + workflow_dispatch: + inputs: + docker-tagname: + description: Tag name of the Docker image + default: 'ocrd/sbb_binarization' + +env: + DOCKER_TAGNAME: ${{ github.evenv.inputs.docker-tagname || 'ocrd/sbb_binarization' }} + +jobs: + + build: + + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + + steps: + - uses: actions/checkout@v4 + - # Activate cache export feature to reduce build time of image + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build the Docker image + run: make docker DOCKER_TAG=${{ env.DOCKER_TAGNAME }} + - name: Login to Dockerhub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Push image to Dockerhub + run: docker push ${{ env.DOCKER_TAGNAME }} + - name: Alias the Docker image for GHCR + run: docker tag ${{ env.DOCKER_TAGNAME }} ghcr.io/${{ github.repository }} + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Push image to Github Container Registry + run: docker push ghcr.io/${{ github.repository }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7cbd98e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +ARG DOCKER_BASE_IMAGE +FROM $DOCKER_BASE_IMAGE +ARG VCS_REF +ARG BUILD_DATE +LABEL \ + maintainer="https://ocr-d.de/kontakt" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url="https://github.com/qurator-spk/sbb_binarization" \ + org.label-schema.build-date=$BUILD_DATE + +WORKDIR /build/sbb_binarization +COPY setup.py . +COPY ocrd-tool.json . +COPY sbb_binarize ./sbb_binarize +COPY requirements.txt . +COPY README.md . +COPY Makefile . +RUN make install +RUN rm -rf /build/sbb_binarization + +WORKDIR /data +VOLUME ["/data"] diff --git a/Makefile b/Makefile index e4f5b87..b9bd15b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,8 @@ # BEGIN-EVAL makefile-parser --make-help Makefile +DOCKER_BASE_IMAGE = docker.io/ocrd/core:v2.69.0 +DOCKER_TAG = ocrd/nmalign + .PHONY: help install help: @echo "" @@ -10,8 +13,12 @@ help: @echo " test Run tests" @echo " clean Remove copies/results in test/assets" @echo "" + @echo " docker Build a Docker image $(DOCKER_TAG) from $(DOCKER_BASE_IMAGE)" + @echo "" @echo " Variables" @echo "" + @echo " PYTHON" + @echo " DOCKER_TAG Docker image tag of result for the docker target" # END-EVAL @@ -40,6 +47,15 @@ test: test/assets models ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784-page-region/data/mets.xml -g phys_0001 -I OCR-D-GT-SEG-REGION -O BIN -P model default -P operation_level region ocrd-sbb-binarize -m test/assets/kant_aufklaerung_1784-page-region/data/mets.xml -g phys_0001 -I OCR-D-GT-SEG-REGION -O BIN2 -P model default-2021-03-09 -P operation_level region +.PHONY: docker +docker: + docker build \ + --build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ + --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ + -t $(DOCKER_TAG) . + + .PHONY: clean clean: -$(RM) -fr test/assets diff --git a/README.md b/README.md index 39d4ad1..ef69cb6 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ > Document Image Binarization [![pip release](https://img.shields.io/pypi/v/sbb-binarization.svg)](https://pypi.org/project/sbb-binarization/) -[![GHActions Tests](https://github.com/qurator-spk/sbb_binarization/actions/workflows/test.yml/badge.svg)](https://github.com/qurator-spk/sbb_binarization/actions/workflows/test.yml) +[![GHActions CI](https://github.com/qurator-spk/sbb_binarization/actions/workflows/test.yml/badge.svg)](https://github.com/qurator-spk/sbb_binarization/actions/workflows/test.yml) +[![GHActions CD](https://github.com/qurator-spk/sbb_binarization/actions/workflows/docker-image.yml/badge.svg)](https://github.com/qurator-spk/sbb_binarization/actions/workflows/docker-image.yml) @@ -13,16 +14,19 @@ Python `3.7-3.10` with Tensorflow `<=2.11.1` are currently supported. While newe You can either install from PyPI via -``` -pip install sbb-binarization -``` + pip install sbb-binarization + or clone the repository, enter it and install (editable) with -``` -git clone git@github.com:qurator-spk/sbb_binarization.git -cd sbb_binarization; pip install -e . -``` + git clone git@github.com:qurator-spk/sbb_binarization.git + cd sbb_binarization; pip install -e . + + +Alternatively, download the prebuilt image from Dockerhub: + + docker pull ocrd/sbb_binarization + ### Models From 1162a1c8c9e3423a0bcd08f9f0d19b162bc89b19 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 1 Oct 2024 03:50:46 +0200 Subject: [PATCH 02/16] make install: update setuptools, too --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index b9bd15b..534a9f1 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ help: # Install with pip install: + pip install -U setuptools pip pip install . # Downloads the pre-trained models from qurator-data.de From e0ba83e993134d7966cbfc27252ee31406a225a4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 1 Oct 2024 03:51:12 +0200 Subject: [PATCH 03/16] CI: increase memory on VM --- .circleci/config.yml | 1 + .github/workflows/test.yml | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 919d30f..24e19f3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,6 +19,7 @@ jobs: paths: ocrd-resources - run: make test + resource_class: large workflows: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 53c6850..ebda269 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,10 @@ name: CLI Tests -on: [push, pull_request] +on: + push: + pull_request: + workflow_dispatch: jobs: build: From ccfc8212ec7a0238ef4eeda638ebf78bb03f758c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 1 Oct 2024 04:09:55 +0200 Subject: [PATCH 04/16] remove shebang from setup.py (somehow breaking py39) --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 437730c..b1a454f 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- from json import load from setuptools import setup, find_packages From 05e3088f8f1541b217b46715b61bcde35bf015e5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:12:29 +0200 Subject: [PATCH 05/16] make docker: fix docker tag --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 534a9f1..3dbc08f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # BEGIN-EVAL makefile-parser --make-help Makefile DOCKER_BASE_IMAGE = docker.io/ocrd/core:v2.69.0 -DOCKER_TAG = ocrd/nmalign +DOCKER_TAG = ocrd/sbb_binarization .PHONY: help install help: From 676b6f1c69018d371b530423d4e798f67e6bb7f5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:24:41 +0200 Subject: [PATCH 06/16] add pyproject.toml --- pyproject.toml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..367a073 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"] + +[project] +name = "sbb_binarization" +authors = [ + {name = "Vahid Rezanezhad"}, + {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"}, +] +description = "Pixelwise binarization with selectional auto-encoders in Keras" +readme = "README.md" +license.file = "LICENSE" +requires-python = ">=3.8" + +dynamic = ["dependencies", "version"] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Image Processing", +] + +[project.scripts] +sbb_binarize = "sbb_binarize.cli:main" +ocrd-sbb-binarize = "sbb_binarize.ocrd_cli:cli" + +[project.urls] +Homepage = "https://github.com/qurator-spk/sbb_binarization" +Repository = "https://github.com/qurator-spk/sbb_binarization.git" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} + +[tool.setuptools.packages.find] +where = ["."] +include = ["sbb_binarize"] +namespaces = false + +[tool.setuptools.package-data] +sbb_binarize = ["sbb_binarize/*.json"] From 0f611f8e56c773ec612b3d4d9df80bdda34cd129 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:25:21 +0200 Subject: [PATCH 07/16] remove setup.py --- setup.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index b1a454f..0000000 --- a/setup.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -from json import load -from setuptools import setup, find_packages - -with open('./ocrd-tool.json', 'r') as f: - version = load(f)['version'] - -install_requires = open('requirements.txt').read().split('\n') - -setup( - name='sbb_binarization', - version=version, - description='Pixelwise binarization with selectional auto-encoders in Keras', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/sbb_binarization', - license='Apache License 2.0', - packages=find_packages(exclude=('test', 'repo')), - include_package_data=True, - package_data={'': ['*.json', '*.yml', '*.yaml']}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'sbb_binarize=sbb_binarize.cli:main', - 'ocrd-sbb-binarize=sbb_binarize.ocrd_cli:cli', - ] - }, -) From 1b8f54c7e3058d1db3d771c88c09cd63a11d4f68 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:26:35 +0200 Subject: [PATCH 08/16] CI: remove py37 from matrix (now unsupported anyway) --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 24e19f3..f8062a7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,5 +28,5 @@ workflows: - build-python: matrix: parameters: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10'] From eb9a9fedb811cd225a202dee9e71f7f76ea565b9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:27:00 +0200 Subject: [PATCH 09/16] CI: remove py37 from matrix --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ebda269..fc21ec4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v4 - name: Set up Python From 4eabd125a3366982d930eba0d728615f49af9cee Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:33:36 +0200 Subject: [PATCH 10/16] relax TF requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4afbfba..709a6b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy setuptools >= 41 opencv-python-headless ocrd >= 2.38.0 -tensorflow <= 2.11.1 +tensorflow <= 2.12.1 From b581568e1e892025273db8942438f8503644812b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:41:37 +0200 Subject: [PATCH 11/16] dockerfile: switch to pyproject.toml Co-authored-by: joschrew <91774427+joschrew@users.noreply.github.com> --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 7cbd98e..f71dc7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ LABEL \ org.label-schema.build-date=$BUILD_DATE WORKDIR /build/sbb_binarization -COPY setup.py . +COPY pyproject.toml . COPY ocrd-tool.json . COPY sbb_binarize ./sbb_binarize COPY requirements.txt . From 547229c0a412a3bdbd4a5a3e7809e9c23524071c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:45:16 +0200 Subject: [PATCH 12/16] forgot to include package data Co-authored-by: joschrew <91774427+joschrew@users.noreply.github.com> --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 367a073..f837231 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,5 +40,8 @@ where = ["."] include = ["sbb_binarize"] namespaces = false +[tool.setuptools] +include-package-data = true + [tool.setuptools.package-data] -sbb_binarize = ["sbb_binarize/*.json"] +"*" = ["*.json"] From d2597959bab3e1c6910775b506327be1c5a8f256 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:56:26 +0200 Subject: [PATCH 13/16] docker: rebase on core-cuda stage --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3dbc08f..23b6476 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ # BEGIN-EVAL makefile-parser --make-help Makefile -DOCKER_BASE_IMAGE = docker.io/ocrd/core:v2.69.0 +# should really be core-cuda-tf2, but not as long as we must restrict TF2 minor version +DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda:v2.69.0 DOCKER_TAG = ocrd/sbb_binarization .PHONY: help install From 00f70d1ac9e46df1a3bd3759f77ed82b6ec033aa Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:36:51 +0200 Subject: [PATCH 14/16] relax TF requirement (subminor) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 709a6b7..057f30c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy setuptools >= 41 opencv-python-headless ocrd >= 2.38.0 -tensorflow <= 2.12.1 +tensorflow <= 2.12.* From 7ee111dcf816c2c21829d84d9e5d3c7223163fb7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:37:13 +0200 Subject: [PATCH 15/16] CI: try adding py3.11 --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f8062a7..715caa1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,5 +28,5 @@ workflows: - build-python: matrix: parameters: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] From ddcec5ba73ab6452056077a9341838ea26d78a32 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:38:26 +0200 Subject: [PATCH 16/16] relax TF requirement (fix syntax) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 057f30c..8a5cd40 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy setuptools >= 41 opencv-python-headless ocrd >= 2.38.0 -tensorflow <= 2.12.* +tensorflow < 2.13