From 1252d8ccc3e11768a647754d7bad7cf69395fc4e Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 10 Feb 2020 19:23:17 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20Nudge=20build+download=20towards?= =?UTF-8?q?=20the=20standard=20qurator=5Fdata=5Flib.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .travis.yml | 2 +- build | 84 +++------------------------------------------ qurator_data_lib.sh | 71 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 80 deletions(-) create mode 100644 qurator_data_lib.sh diff --git a/.travis.yml b/.travis.yml index 1c73ea4..fb92b86 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ git: submodules: false # Avoid trying to checkout private data/ submodule install: - - FORCE_GET_FROM_WEB=y ./build + - FORCE_DOWNLOAD=y ./build script: - curl -O https://qurator-data.de/examples/actevedef_718448162.first-page.zip diff --git a/build b/build index 139922f..5bb124f 100755 --- a/build +++ b/build @@ -1,96 +1,22 @@ #!/bin/bash +set -e - +self=`realpath $0` +self_dir=`dirname "$self"` DATA_SUBDIR=data - get_from_annex() { annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*' annex_get 'tesseract-models/GT4HistOCR/*.traineddata' annex_get 'textline_detection/*.h5' } - get_from_web() { download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200' download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar' 'tesseract-models/GT4HistOCR' download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz' 'textline_detection' } +. $self_dir/qurator_data_lib.sh +handle_data - -check_data_subdir() { - result=0 - - if git submodule status $DATA_SUBDIR | grep -q '^-'; then - echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1 - fi - if ! [ -e $DATA_SUBDIR/.git/annex ]; then - echo "$DATA_SUBDIR/ is not a git annex repository"; result=1 - fi - if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then - echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1 - fi - if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then - echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1 - fi - - return $result -} - -suggest_commands() { - echo "Suggested commands:" - echo - echo "git submodule update --init" - echo "(cd $DATA_SUBDIR && git annex init --version=7)" - echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)" -} - -annex_get() { - file_pattern="$1" - - ( - cd data - git annex get $file_pattern - - # fsck seems to be necessary to fix the files if we're in a submodule - git annex fsck $file_pattern - ) -} - - -download_to() { - download_source="$1" - unpack_to="$2" - - ( - cd data - tmpf=`mktemp 'tmp.XXXXX'` - wget -O $tmpf "$download_source" - mkdir -p "$unpack_to" - # Unpacking relies on tar -a unpacking any tar compression - tar -C "$unpack_to" -af $tmpf -xv - rm -f $tmpf - ) -} - - -set -e - - -if [ -n "$FORCE_GET_FROM_WEB" ]; then - get_from_web -elif ! check_data_subdir; then - select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do - if [ $REPLY = 1 ]; then - suggest_commands - exit - else - get_from_web - break - fi - done -else - get_from_annex -fi - docker build -t my_ocrd_workflow . diff --git a/qurator_data_lib.sh b/qurator_data_lib.sh new file mode 100644 index 0000000..f3c6e3a --- /dev/null +++ b/qurator_data_lib.sh @@ -0,0 +1,71 @@ +check_data_subdir() { + result=0 + + if git submodule status $DATA_SUBDIR | grep -q '^-'; then + echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1 + fi + if ! [ -e $DATA_SUBDIR/.git/annex ]; then + echo "$DATA_SUBDIR/ is not a git annex repository"; result=1 + fi + if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then + echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1 + fi + if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then + echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1 + fi + + return $result +} + +annex_get() { + file_pattern="$1" + + ( + cd data + git annex get $file_pattern + + # fsck seems to be necessary to fix the files if we're in a submodule + git annex fsck $file_pattern + ) +} + +download_to() { + download_source="$1" + unpack_to="$2" + + ( + cd data + tmpf=`mktemp 'tmp.XXXXX'` + wget -O $tmpf "$download_source" + mkdir -p "$unpack_to" + # Unpacking relies on tar -a unpacking any tar compression + tar -C "$unpack_to" -af $tmpf -xv + rm -f $tmpf + ) +} + +suggest_commands() { + echo "Suggested commands:" + echo + echo "git submodule update --init" + echo "(cd $DATA_SUBDIR && git annex init --version=7)" + echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)" +} + +handle_data() { + if [ -n "$FORCE_DOWNLOAD" ]; then + get_from_web + elif ! check_data_subdir; then + select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do + if [ $REPLY = 1 ]; then + suggest_commands + exit + else + get_from_web + break + fi + done + else + get_from_annex + fi +}