#!/bin/sh



DATA_SUBDIR=data

get_from_annex() {
  annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*'
  annex_get 'tesseract-models/GT4HistOCR/*.traineddata'
  annex_get 'textline_detection/*.h5'
}

get_from_web() {
  download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200'
  download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'   'tesseract-models/GT4HistOCR'
  # FIXME needs update download_to 'https://file.spk-berlin.de:8443/textline_detection/models.tar.gz'         'textline_detection'
}



check_data_subdir() {
  result=0

  if git submodule status $DATA_SUBDIR | grep -q '^-'; then
    echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
  fi
  if ! [ -e $DATA_SUBDIR/.git/annex ]; then
    echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
  fi
  if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then
    echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1
  fi
  if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
    echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
  fi

  return $result
}

suggest_commands() {
  echo "Suggested commands:"
  echo
  echo "git submodule update --init"
  echo "(cd $DATA_SUBDIR && git annex init --version=7)"
  echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)"
}

annex_get() {
  file_pattern="$1"

  (
    cd data
    git annex get $file_pattern

    # fsck seems to be necessary to fix the files if we're in a submodule
    git annex fsck $file_pattern
  )
}


download_to() {
  download_source="$1"
  unpack_to="$2"

  (
    cd data
    tmpf=`mktemp 'tmp.XXXXX'`
    wget -O $tmpf "$download_source"
    mkdir -p "$unpack_to"
    # XXX Unpacking relies on tar -a unpacking any tar compression, might not work everywhere?
    tar -C "$unpack_to" -af $tmpf -xv
    rm -f $tmpf
  )
}


set -e


if ! check_data_subdir; then
  select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
    if [ $REPLY = 1 ]; then
      suggest_commands
      exit
    else
      get_from_web
      break
    fi
  done
else
  get_from_annex
fi

docker build -t my_ocrd_workflow .