mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-30 02:34:13 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			94 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			94 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
| #!/bin/sh
 | |
| 
 | |
| 
 | |
| 
 | |
| DATA_SUBDIR=data
 | |
| 
 | |
| get_from_annex() {
 | |
|   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200/*.ckpt*'
 | |
|   annex_get 'tesseract-models/GT4HistOCR/*.traineddata'
 | |
|   annex_get 'textline_detection/*.h5'
 | |
| }
 | |
| 
 | |
| get_from_web() {
 | |
|   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15:49+0200'
 | |
|   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'   'tesseract-models/GT4HistOCR'
 | |
|   # FIXME needs update download_to 'https://file.spk-berlin.de:8443/textline_detection/models.tar.gz'         'textline_detection'
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| check_data_subdir() {
 | |
|   result=0
 | |
| 
 | |
|   if git submodule status $DATA_SUBDIR | grep -q '^-'; then
 | |
|     echo "$DATA_SUBDIR/ is not an initialized submodule"; result=1
 | |
|   fi
 | |
|   if ! [ -e $DATA_SUBDIR/.git/annex ]; then
 | |
|     echo "$DATA_SUBDIR/ is not a git annex repository"; result=1
 | |
|   fi
 | |
|   if ! (cd $DATA_SUBDIR && git annex version | grep -q 'local repository version: 7'); then
 | |
|     echo "$DATA_SUBDIR/ is not a git annex repository version 7"; result=1
 | |
|   fi
 | |
|   if ! (cd $DATA_SUBDIR && git remote | grep -q '^nfs$'); then
 | |
|     echo "$DATA_SUBDIR/ has no git remote 'nfs'"; result=1
 | |
|   fi
 | |
| 
 | |
|   return $result
 | |
| }
 | |
| 
 | |
| suggest_commands() {
 | |
|   echo "Suggested commands:"
 | |
|   echo
 | |
|   echo "git submodule update --init"
 | |
|   echo "(cd $DATA_SUBDIR && git annex init --version=7)"
 | |
|   echo "(cd $DATA_SUBDIR && git remote add nfs /<... path to ...>/GitNX-Repository/qurator/qurator-data)"
 | |
| }
 | |
| 
 | |
| annex_get() {
 | |
|   file_pattern="$1"
 | |
| 
 | |
|   (
 | |
|     cd data
 | |
|     git annex get $file_pattern
 | |
| 
 | |
|     # fsck seems to be necessary to fix the files if we're in a submodule
 | |
|     git annex fsck $file_pattern
 | |
|   )
 | |
| }
 | |
| 
 | |
| 
 | |
| download_to() {
 | |
|   download_source="$1"
 | |
|   unpack_to="$2"
 | |
| 
 | |
|   (
 | |
|     cd data
 | |
|     tmpf=`mktemp 'tmp.XXXXX'`
 | |
|     wget -O $tmpf "$download_source"
 | |
|     mkdir -p "$unpack_to"
 | |
|     # XXX Unpacking relies on tar -a unpacking any tar compression, might not work everywhere?
 | |
|     tar -C "$unpack_to" -af $tmpf -xv
 | |
|     rm -f $tmpf
 | |
|   )
 | |
| }
 | |
| 
 | |
| 
 | |
| set -e
 | |
| 
 | |
| 
 | |
| if ! check_data_subdir; then
 | |
|   select choice in "Abort to manually fix $DATA_SUBDIR submodule" "Download data files from the web"; do
 | |
|     if [ $REPLY = 1 ]; then
 | |
|       suggest_commands
 | |
|       exit
 | |
|     else
 | |
|       get_from_web
 | |
|       break
 | |
|     fi
 | |
|   done
 | |
| else
 | |
|   get_from_annex
 | |
| fi
 | |
| 
 | |
| docker build -t my_ocrd_workflow .
 |