mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 19:24:12 +01:00 
			
		
		
		
	🎨 ppn2ocr: Extract a function to make a workspace
This commit is contained in:
		
							parent
							
								
									18d4ab0ba1
								
							
						
					
					
						commit
						2a4b204fbe
					
				
					 1 changed files with 24 additions and 15 deletions
				
			
		
							
								
								
									
										39
									
								
								ppn2ocr
									
										
									
									
									
								
							
							
						
						
									
										39
									
								
								ppn2ocr
									
										
									
									
									
								
							|  | @ -12,27 +12,36 @@ self_dir=`dirname $0` | |||
| self_dir=`realpath $self_dir` | ||||
| 
 | ||||
| 
 | ||||
| workspace=$ppn | ||||
| mkdir "$workspace" | ||||
| cd "$workspace" | ||||
| pwd | ||||
| oai_identifier="oai%3Adigital.staatsbibliothek-berlin.de%3A$ppn" | ||||
| oai_url="https://digital.staatsbibliothek-berlin.de/oai?verb=GetRecord&metadataPrefix=mets&identifier=$oai_identifier" | ||||
| echo "$oai_url" | ||||
| curl "$oai_url" | xmlstarlet sel -t -c '//*[local-name()="mets"]' > mets.xml | ||||
| make_workspace () { | ||||
|     ppn=$1 | ||||
|     workspace=$2 | ||||
| 
 | ||||
| # Fix 'file:/' URLs to 'file:///' | ||||
| sed -i 's#file:/\([^/]\)#file:///\1#' mets.xml | ||||
|     # Make workspace directory | ||||
|     mkdir "$workspace" | ||||
|     cd "$workspace" | ||||
| 
 | ||||
| # Patch mets.xml to use our NFS mount | ||||
| sed -i 's#file:///goobi/tiff001/sbb/#file:///srv/digisam_images/sbb/#g' mets.xml | ||||
|     # Get METS from OAI-PMH | ||||
|     oai_identifier="oai%3Adigital.staatsbibliothek-berlin.de%3A$ppn" | ||||
|     oai_url="https://digital.staatsbibliothek-berlin.de/oai?verb=GetRecord&metadataPrefix=mets&identifier=$oai_identifier" | ||||
|     echo "$oai_url" | ||||
|     curl "$oai_url" | xmlstarlet sel -t -c '//*[local-name()="mets"]' > mets.xml | ||||
| 
 | ||||
| # Remove LOCAL file group as we do not have access to the files | ||||
| ocrd workspace remove-group -rf --keep-files LOCAL | ||||
|     # Fix 'file:/' URLs to 'file:///' | ||||
|     sed -i 's#file:/\([^/]\)#file:///\1#' mets.xml | ||||
| 
 | ||||
| ocrd workspace validate mets.xml | grep -v "<notice>Won't download remote image" | ||||
|     # Patch mets.xml to use our NFS mount | ||||
|     sed -i 's#file:///goobi/tiff001/sbb/#file:///srv/digisam_images/sbb/#g' mets.xml | ||||
| 
 | ||||
|     # Remove LOCAL file group as we do not have access to the files | ||||
|     ocrd workspace remove-group -rf --keep-files LOCAL | ||||
| 
 | ||||
|     # Validate workspace | ||||
|     ocrd workspace validate mets.xml | grep -v "<notice>Won't download remote image" | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| make_workspace $ppn $ppn | ||||
| $self_dir/run-docker-hub -I PRESENTATION --skip-validation | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue