| 
									
										
										
										
											2020-05-22 13:43:11 +02:00
										 |  |  | #!/bin/sh | 
					
						
							|  |  |  | zdb=27974534 | 
					
						
							|  |  |  | yyyymmdd=19010712 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | set -e | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | self_dir=`dirname $0` | 
					
						
							|  |  |  | self_dir=`realpath $self_dir` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | workspace=$zdb-$yyyymmdd | 
					
						
							|  |  |  | mkdir "$workspace" | 
					
						
							|  |  |  | cd "$workspace" | 
					
						
							|  |  |  | pwd | 
					
						
							|  |  |  | zefys_url="https://content.staatsbibliothek-berlin.de/zefys/SNP$zdb-$yyyymmdd-0-0-0-0.xml" | 
					
						
							|  |  |  | echo "$zefys_url" | 
					
						
							|  |  |  | curl "$zefys_url" > mets.xml | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ocrd workspace validate mets.xml | grep -v "<notice>Won't download remote image" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | $self_dir/run-docker-hub -I MAX --skip-validation | 
					
						
							| 
									
										
										
										
											2020-05-22 13:49:34 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # * TODO: Error on invocation | 
					
						
							|  |  |  | # * TODO: Check out options to get better image resolutions | 
					
						
							|  |  |  | # * TODO: Are input images already grayscale? Further binarization makes them | 
					
						
							|  |  |  | #         worse than before | 
					
						
							|  |  |  | # * TODO: Does this loose the image URLs for the MAX filegroup? | 
					
						
							|  |  |  | # * TODO: Lots of text problems with ocrd_calamari "not the same as Calamari" |