mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-30 02:34:13 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			38 lines
		
	
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			38 lines
		
	
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
| #!/bin/bash
 | |
| # Create an OCR-D workspace from images
 | |
| #
 | |
| #   ocrd-workspace-from-images *.png
 | |
| #
 | |
| # In order to produce a workspace that validates, this script makes best effort
 | |
| # to generate random IDs and to create the necessary structures like the
 | |
| # physical page sequence.
 | |
| 
 | |
| workspace_dir=`mktemp -d "workspace-XXXXX"`
 | |
| workspace_id=`basename $workspace_dir`
 | |
| 
 | |
| ocrd workspace -d $workspace_dir init
 | |
| ocrd workspace -d $workspace_dir set-id $workspace_id
 | |
| 
 | |
| make_file_id_from_filename() {
 | |
|   filename="$1"
 | |
|   file_id="$filename"
 | |
|   file_id=`echo $file_id | sed 's#(.png|.tif|.jpe?g)$##i'`
 | |
|   file_id=`echo $file_id | sed 's#[^A-Za-z0-9_-]#_#g'`
 | |
| 
 | |
|   echo "$file_id"
 | |
| }
 | |
| 
 | |
| mkdir $workspace_dir/OCR-D-IMG
 | |
| page_count=0
 | |
| for img_orig in "$@"; do
 | |
|   page_count=$(($page_count + 1))
 | |
|   img="$workspace_dir/OCR-D-IMG/`basename $img_orig`"
 | |
|   cp -L "$img_orig" "$img"
 | |
|   file_id=`make_file_id_from_filename "$img"`
 | |
|   mime_type=`file -b --mime-type "$img"`
 | |
|   page_id=`printf "P%05d" $page_count`
 | |
|   ocrd workspace -d $workspace_dir add -G OCR-D-IMG "$img" --file-id $file_id --page-id $page_id --mimetype $mime_type
 | |
| done
 | |
| 
 | |
| ocrd workspace -d $workspace_dir validate
 | |
| echo $workspace_dir
 |