mirror of
				https://github.com/qurator-spk/ocrd-galley.git
				synced 2025-10-31 03:04:12 +01:00 
			
		
		
		
	🚧 Add preliminary support for ocrd_calamari's Calamari 1 version
This commit is contained in:
		
							parent
							
								
									f2ed0b5cbf
								
							
						
					
					
						commit
						b6bbc7ca3a
					
				
					 3 changed files with 34 additions and 2 deletions
				
			
		|  | @ -13,8 +13,8 @@ RUN ${PIP_INSTALL} \ | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Copy OCR models | # Copy OCR models | ||||||
| RUN mkdir -p /var/lib/calamari-models | RUN mkdir -p /var/lib/calamari-models/GT4HistOCR | ||||||
| COPY data/calamari-models/GT4HistOCR /var/lib/calamari-models/GT4HistOCR | COPY data/calamari-models/GT4HistOCR/2019-07-22T15_49+0200 /var/lib/calamari-models/GT4HistOCR/2019-07-22T15_49+0200 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										26
									
								
								Dockerfile-ocrd_calamari-feat-update-calamari1
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								Dockerfile-ocrd_calamari-feat-update-calamari1
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | ||||||
|  | FROM my_ocrd_workflow-core | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # XXX https://github.com/OCR-D/core/issues/642 | ||||||
|  | #ARG PIP_INSTALL="pip3 install --no-cache-dir --use-feature=2020-resolver" | ||||||
|  | ARG PIP_INSTALL="pip3 install --no-cache-dir" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Build pip installable stuff | ||||||
|  | RUN ${PIP_INSTALL} \ | ||||||
|  |         https://github.com/OCR-D/ocrd_calamari/archive/feat/update-calamari1.zip | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Copy OCR models | ||||||
|  | RUN mkdir -p /var/lib/calamari-models/GT4HistOCR | ||||||
|  | COPY data/calamari-models/GT4HistOCR/2019-12-11T11_10+0100 /var/lib/calamari-models/GT4HistOCR/2019-12-11T11_10+0100 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Check pip dependencies | ||||||
|  | # XXX https://github.com/OCR-D/core/issues/642 | ||||||
|  | #RUN pip3 check | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Default command | ||||||
|  | CMD ["ocrd-calamari-recognize"] | ||||||
							
								
								
									
										6
									
								
								build
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								build
									
										
									
									
									
								
							|  | @ -7,6 +7,7 @@ self_dir=`dirname "$self"` | ||||||
| DATA_SUBDIR=data | DATA_SUBDIR=data | ||||||
| get_from_annex() { | get_from_annex() { | ||||||
|   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt*' |   annex_get 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200/*.ckpt*' | ||||||
|  |   annex_get 'calamari-models/GT4HistOCR/2019-12-11T11_10+0100/*.ckpt*' | ||||||
|   annex_get 'tesseract-models/GT4HistOCR/*.traineddata' |   annex_get 'tesseract-models/GT4HistOCR/*.traineddata' | ||||||
|   annex_get 'textline_detection/*.h5' |   annex_get 'textline_detection/*.h5' | ||||||
|   annex_get 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' |   annex_get 'mirror/github.com/tesseract-ocr/tessdata_best/archive/4.0.0-repacked.tar.gz' | ||||||
|  | @ -14,6 +15,7 @@ get_from_annex() { | ||||||
| } | } | ||||||
| get_from_web() { | get_from_web() { | ||||||
|   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200' |   download_to 'https://qurator-data.de/calamari-models/GT4HistOCR/model.tar.xz' 'calamari-models/GT4HistOCR/2019-07-22T15_49+0200' | ||||||
|  |   # XXX Calamari 1 model | ||||||
|   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'  'tesseract-models/GT4HistOCR' |   download_to 'https://qurator-data.de/tesseract-models/GT4HistOCR/models.tar'  'tesseract-models/GT4HistOCR' | ||||||
|   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection' |   download_to 'https://qurator-data.de/sbb_textline_detector/models.tar.gz'     'textline_detection' | ||||||
|   download_to --strip-components 1 'https://qurator-data.de/sbb_binarization/models.tar.gz' 'sbb_binarization' |   download_to --strip-components 1 'https://qurator-data.de/sbb_binarization/models.tar.gz' 'sbb_binarization' | ||||||
|  | @ -32,3 +34,7 @@ docker build --cache-from=my_ocrd_workflow-sbb_textline_detector -t my_ocrd_work | ||||||
| docker build --cache-from=my_ocrd_workflow-sbb_binarization      -t my_ocrd_workflow-sbb_binarization      -f Dockerfile-sbb_binarization      . | docker build --cache-from=my_ocrd_workflow-sbb_binarization      -t my_ocrd_workflow-sbb_binarization      -f Dockerfile-sbb_binarization      . | ||||||
| docker build --cache-from=my_ocrd_workflow-ocrd_cis              -t my_ocrd_workflow-ocrd_cis              -f Dockerfile-ocrd_cis              . | docker build --cache-from=my_ocrd_workflow-ocrd_cis              -t my_ocrd_workflow-ocrd_cis              -f Dockerfile-ocrd_cis              . | ||||||
| docker build --cache-from=my_ocrd_workflow-ocrd_fileformat       -t my_ocrd_workflow-ocrd_fileformat       -f Dockerfile-ocrd_fileformat       . | docker build --cache-from=my_ocrd_workflow-ocrd_fileformat       -t my_ocrd_workflow-ocrd_fileformat       -f Dockerfile-ocrd_fileformat       . | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # XXX | ||||||
|  | docker build --cache-from=my_ocrd_workflow-ocrd_calamari-feat-update-calamari1 -t my_ocrd_workflow-ocrd_calamari-feat-update-calamari1 -f Dockerfile-ocrd_calamari-feat-update-calamari1 . | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue