mirror of
https://github.com/qurator-spk/ocrd-galley.git
synced 2025-06-08 22:29:56 +02:00
🎨 Nudge build+download towards the standard qurator_data_lib.sh
This commit is contained in:
parent
61bb4f99f6
commit
1252d8ccc3
3 changed files with 77 additions and 80 deletions
|
@ -7,7 +7,7 @@ git:
|
||||||
submodules: false # Avoid trying to checkout private data/ submodule
|
submodules: false # Avoid trying to checkout private data/ submodule
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- FORCE_GET_FROM_WEB=y ./build
|
- FORCE_DOWNLOAD=y ./build
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- curl -O https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
- curl -O https://qurator-data.de/examples/actevedef_718448162.first-page.zip
|
||||||
|
|
84
build
84
build
|
@ -1,96 +1,22 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
self=`realpath $0`
|
||||||
|
self_dir=`dirname "$self"`
|
||||||
|
|
||||||
DATA_SUBDIR=data
|
DATA_SUBDIR=data
|
||||||
|
|
||||||
get_from_annex() {
|
get_from_annex() {
|
||||||
annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*'
|
annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*'
|
||||||
annex_get 'tesseract-models/GT4HistOCR/*.traineddata'
|
annex_get 'tesseract-models/GT4HistOCR/*.traineddata'
|
||||||
annex_get 'textline_detection/*.h5'
|
annex_get 'textline_detection/*.h5'
|
||||||
}
|
}
|
||||||
|
|
||||||
get_from_web() {
|
get_from_web() {
|
||||||
download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200'
|
download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200'
|
||||||
download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar' 'tesseract-models/GT4HistOCR'
|
download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar' 'tesseract-models/GT4HistOCR'
|
||||||
download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz' 'textline_detection'
|
download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz' 'textline_detection'
|
||||||
}
|
}
|
||||||
|
. $self_dir/qurator_data_lib.sh
|
||||||
|
handle_data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
check_data_subdir() {
|
|
||||||
result=0
|
|
||||||
|
|
||||||
if git submodule status $DATA_SUBDIR | grep -q '^-'; then
|
|
||||||
echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
|
|
||||||
fi
|
|
||||||
if ! [ -e $DATA_SUBDIR/.git/annex ]; then
|
|
||||||
echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
|
|
||||||
fi
|
|
||||||
if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then
|
|
||||||
echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1
|
|
||||||
fi
|
|
||||||
if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
|
|
||||||
echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
return $result
|
|
||||||
}
|
|
||||||
|
|
||||||
suggest_commands() {
|
|
||||||
echo "Suggested commands:"
|
|
||||||
echo
|
|
||||||
echo "git submodule update --init"
|
|
||||||
echo "(cd $DATA_SUBDIR && git annex init --version=7)"
|
|
||||||
echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)"
|
|
||||||
}
|
|
||||||
|
|
||||||
annex_get() {
|
|
||||||
file_pattern="$1"
|
|
||||||
|
|
||||||
(
|
|
||||||
cd data
|
|
||||||
git annex get $file_pattern
|
|
||||||
|
|
||||||
# fsck seems to be necessary to fix the files if we're in a submodule
|
|
||||||
git annex fsck $file_pattern
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
download_to() {
|
|
||||||
download_source="$1"
|
|
||||||
unpack_to="$2"
|
|
||||||
|
|
||||||
(
|
|
||||||
cd data
|
|
||||||
tmpf=`mktemp 'tmp.XXXXX'`
|
|
||||||
wget -O $tmpf "$download_source"
|
|
||||||
mkdir -p "$unpack_to"
|
|
||||||
# Unpacking relies on tar -a unpacking any tar compression
|
|
||||||
tar -C "$unpack_to" -af $tmpf -xv
|
|
||||||
rm -f $tmpf
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
|
|
||||||
if [ -n "$FORCE_GET_FROM_WEB" ]; then
|
|
||||||
get_from_web
|
|
||||||
elif ! check_data_subdir; then
|
|
||||||
select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
|
|
||||||
if [ $REPLY = 1 ]; then
|
|
||||||
suggest_commands
|
|
||||||
exit
|
|
||||||
else
|
|
||||||
get_from_web
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
else
|
|
||||||
get_from_annex
|
|
||||||
fi
|
|
||||||
|
|
||||||
docker build -t my_ocrd_workflow .
|
docker build -t my_ocrd_workflow .
|
||||||
|
|
71
qurator_data_lib.sh
Normal file
71
qurator_data_lib.sh
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
check_data_subdir() {
|
||||||
|
result=0
|
||||||
|
|
||||||
|
if git submodule status $DATA_SUBDIR | grep -q '^-'; then
|
||||||
|
echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
|
||||||
|
fi
|
||||||
|
if ! [ -e $DATA_SUBDIR/.git/annex ]; then
|
||||||
|
echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
|
||||||
|
fi
|
||||||
|
if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then
|
||||||
|
echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1
|
||||||
|
fi
|
||||||
|
if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
|
||||||
|
echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $result
|
||||||
|
}
|
||||||
|
|
||||||
|
annex_get() {
|
||||||
|
file_pattern="$1"
|
||||||
|
|
||||||
|
(
|
||||||
|
cd data
|
||||||
|
git annex get $file_pattern
|
||||||
|
|
||||||
|
# fsck seems to be necessary to fix the files if we're in a submodule
|
||||||
|
git annex fsck $file_pattern
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
download_to() {
|
||||||
|
download_source="$1"
|
||||||
|
unpack_to="$2"
|
||||||
|
|
||||||
|
(
|
||||||
|
cd data
|
||||||
|
tmpf=`mktemp 'tmp.XXXXX'`
|
||||||
|
wget -O $tmpf "$download_source"
|
||||||
|
mkdir -p "$unpack_to"
|
||||||
|
# Unpacking relies on tar -a unpacking any tar compression
|
||||||
|
tar -C "$unpack_to" -af $tmpf -xv
|
||||||
|
rm -f $tmpf
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
suggest_commands() {
|
||||||
|
echo "Suggested commands:"
|
||||||
|
echo
|
||||||
|
echo "git submodule update --init"
|
||||||
|
echo "(cd $DATA_SUBDIR && git annex init --version=7)"
|
||||||
|
echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)"
|
||||||
|
}
|
||||||
|
|
||||||
|
handle_data() {
|
||||||
|
if [ -n "$FORCE_DOWNLOAD" ]; then
|
||||||
|
get_from_web
|
||||||
|
elif ! check_data_subdir; then
|
||||||
|
select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
|
||||||
|
if [ $REPLY = 1 ]; then
|
||||||
|
suggest_commands
|
||||||
|
exit
|
||||||
|
else
|
||||||
|
get_from_web
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
get_from_annex
|
||||||
|
fi
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue