mirror of
				https://github.com/mikegerber/ocrd_calamari.git
				synced 2025-11-03 17:14:13 +01:00 
			
		
		
		
	🚧 Save results in the workspace
This commit is contained in:
		
							parent
							
								
									b9e38487bd
								
							
						
					
					
						commit
						f62332223b
					
				
					 1 changed files with 24 additions and 6 deletions
				
			
		| 
						 | 
					@ -1,15 +1,18 @@
 | 
				
			||||||
from __future__ import absolute_import
 | 
					from __future__ import absolute_import
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
from glob import glob
 | 
					from glob import glob
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
from calamari_ocr.ocr import MultiPredictor
 | 
					from calamari_ocr.ocr import MultiPredictor
 | 
				
			||||||
from calamari_ocr.ocr.voting import voter_from_proto
 | 
					from calamari_ocr.ocr.voting import voter_from_proto
 | 
				
			||||||
from calamari_ocr.proto import VoterParams
 | 
					from calamari_ocr.proto import VoterParams
 | 
				
			||||||
from ocrd import Processor
 | 
					from ocrd import Processor, MIMETYPE_PAGE
 | 
				
			||||||
from ocrd.logging import getLogger
 | 
					from ocrd.logging import getLogger
 | 
				
			||||||
from ocrd.model import ocrd_page
 | 
					from ocrd.model import ocrd_page
 | 
				
			||||||
from ocrd.utils import polygon_from_points
 | 
					from ocrd.model.ocrd_page import to_xml
 | 
				
			||||||
 | 
					from ocrd.model.ocrd_page_generateds import TextEquivType
 | 
				
			||||||
 | 
					from ocrd.utils import polygon_from_points, concat_padded
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ocrd_calamari.config import OCRD_TOOL
 | 
					from ocrd_calamari.config import OCRD_TOOL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,6 +38,12 @@ class CalamariOcr(Processor):
 | 
				
			||||||
    def resolve_image_as_np(self, image_url, coords):
 | 
					    def resolve_image_as_np(self, image_url, coords):
 | 
				
			||||||
        return np.array(self.workspace.resolve_image_as_pil(image_url, coords), dtype=np.uint8)
 | 
					        return np.array(self.workspace.resolve_image_as_pil(image_url, coords), dtype=np.uint8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _make_file_id(self, input_file, n):
 | 
				
			||||||
 | 
					        file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
 | 
				
			||||||
 | 
					        if file_id == input_file.ID:
 | 
				
			||||||
 | 
					            file_id = concat_padded(self.output_file_grp, n)
 | 
				
			||||||
 | 
					        return file_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def process(self):
 | 
					    def process(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Performs the recognition.
 | 
					        Performs the recognition.
 | 
				
			||||||
| 
						 | 
					@ -62,7 +71,16 @@ class CalamariOcr(Processor):
 | 
				
			||||||
                    prediction = self.voter.vote_prediction_result(raw_results)
 | 
					                    prediction = self.voter.vote_prediction_result(raw_results)
 | 
				
			||||||
                    prediction.id = "voted"
 | 
					                    prediction.id = "voted"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    print('***', prediction.sentence)
 | 
					                    line_text = prediction.sentence
 | 
				
			||||||
                    print(prediction.avg_char_probability)
 | 
					                    line_conf = prediction.avg_char_probability
 | 
				
			||||||
                    for raw_result in raw_results:
 | 
					
 | 
				
			||||||
                        print(raw_result.sentence)
 | 
					                    line.add_TextEquiv(TextEquivType(Unicode=line_text, conf=line_conf))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_id = self._make_file_id(input_file, n)
 | 
				
			||||||
 | 
					            self.workspace.add_file(
 | 
				
			||||||
 | 
					                ID=file_id,
 | 
				
			||||||
 | 
					                file_grp=self.output_file_grp,
 | 
				
			||||||
 | 
					                pageId=input_file.pageId,
 | 
				
			||||||
 | 
					                mimetype=MIMETYPE_PAGE,
 | 
				
			||||||
 | 
					                local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
 | 
				
			||||||
 | 
					                content=to_xml(pcgts))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue