You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ocrd-galley/qurator_data_lib.sh

157 lines
3.7 KiB
Bash

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# ______________________________________
#/ always copy the file from \
#| mono-repo/qurator_data_lib.sh, never |
#\ edit the copy in the project /
# --------------------------------------
# \ ^__^
# \ (oo)\_______
# (__)\ )\/\
# ||----w |
# || ||
if [ -z "$BASH" ]; then
echo "qurator_data_lib.sh uses bash features, please make sure to run $0 in bash"
exit 1
fi
check_data_subdir() {
result=0
if git submodule status $DATA_SUBDIR | grep -q '^-'; then
echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
fi
if ! [ -e $DATA_SUBDIR/.git/annex ]; then
echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
fi
if ! (cd $DATA_SUBDIR && git annex version | egrep -q 'local repository version: (7|8)'); then
echo "$DATA_SUBDIR/ is not a git annex repository version 7 or 8"; result=1
fi
if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
fi
return $result
}
annex_get() {
if [[ "$1" = '--allow_symlinks' ]]; then
allow_symlinks=1
shift
else
allow_symlinks=0
fi
file_pattern="$1"
(
cd $DATA_SUBDIR
git annex get $file_pattern
# fsck seems to be necessary to fix the files if we are in a submodule
git annex fsck $file_pattern
# Check that there are no symlinks = only unlocked files. This is needed for
# Docker builds, as we cannot dereference symlinks in a Dockerfile COPY.
if [[ $allow_symlinks = 0 ]]; then
git ls-files $file_pattern | while read f; do
if ! [[ -f "$f" ]]; then
echo "$DATA_SUBDIR/$f is not a regular file Is an unlock needed?"
exit
fi
done
fi
)
}
# Options:
# --no-unpack Do NOT unpack the file
# --strip-components NUMBER (as tar's option)
download_to() {
unpack=1
tar_options=""
_options=$(getopt --long no-unpack,strip-components: -- "" "$@")
if [[ $? != 0 ]]; then
echo "Bad parameters for download_to" >&2
exit 1
fi
eval set -- "$_options"
while true; do
case "$1" in
--no-unpack)
unpack=0
;;
--strip-components)
shift
components=$1
tar_options="$tar_options --strip-components $components"
;;
--)
shift
break
;;
esac
shift
done
download_source="$1"
dest="$2"
(
cd $DATA_SUBDIR
tmpf=`mktemp 'tmp.XXXXXX'`
curl -sSL -o $tmpf "$download_source"
if [[ $unpack = 1 ]]; then
mkdir -p "$dest"
# Unpacking relies on tar -a unpacking any tar compression
tar -C "$dest" $tar_options -af $tmpf -xv
rm -f $tmpf
else
dest_dir=`dirname "$dest"`
mkdir -p "$dest_dir"
mv $tmpf "$dest"
fi
)
}
suggest_commands() {
echo "Suggested commands:"
echo
echo "git submodule update --init"
echo "(cd $DATA_SUBDIR && git annex init --version=7)"
echo "(cd $DATA_SUBDIR && git remote add nfs annex@b-lx0053.sbb.spk-berlin.de:/var/lib/annex/qurator-data.git)"
}
handle_data() {
if [[ "$1" = '--no-download' ]]; then
no_download=1
shift
else
no_download=0
fi
if [ -n "$FORCE_DOWNLOAD" ]; then
get_from_web
elif ! check_data_subdir; then
if [[ $no_download = 1 ]]; then
select choice in "Abort to manually fix $DATA_SUBDIR submodule"; do
if [ $REPLY = 1 ]; then
suggest_commands
exit
fi
done
else
select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
if [ $REPLY = 1 ]; then
suggest_commands
exit
else
get_from_web
break
fi
done
fi
else
get_from_annex
fi
}