mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-27 04:29:54 +02:00
🚧 Save results in the workspace
This commit is contained in:
parent
b9e38487bd
commit
f62332223b
1 changed files with 24 additions and 6 deletions
|
@ -1,15 +1,18 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import os
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from calamari_ocr.ocr import MultiPredictor
|
from calamari_ocr.ocr import MultiPredictor
|
||||||
from calamari_ocr.ocr.voting import voter_from_proto
|
from calamari_ocr.ocr.voting import voter_from_proto
|
||||||
from calamari_ocr.proto import VoterParams
|
from calamari_ocr.proto import VoterParams
|
||||||
from ocrd import Processor
|
from ocrd import Processor, MIMETYPE_PAGE
|
||||||
from ocrd.logging import getLogger
|
from ocrd.logging import getLogger
|
||||||
from ocrd.model import ocrd_page
|
from ocrd.model import ocrd_page
|
||||||
from ocrd.utils import polygon_from_points
|
from ocrd.model.ocrd_page import to_xml
|
||||||
|
from ocrd.model.ocrd_page_generateds import TextEquivType
|
||||||
|
from ocrd.utils import polygon_from_points, concat_padded
|
||||||
|
|
||||||
from ocrd_calamari.config import OCRD_TOOL
|
from ocrd_calamari.config import OCRD_TOOL
|
||||||
|
|
||||||
|
@ -35,6 +38,12 @@ class CalamariOcr(Processor):
|
||||||
def resolve_image_as_np(self, image_url, coords):
|
def resolve_image_as_np(self, image_url, coords):
|
||||||
return np.array(self.workspace.resolve_image_as_pil(image_url, coords), dtype=np.uint8)
|
return np.array(self.workspace.resolve_image_as_pil(image_url, coords), dtype=np.uint8)
|
||||||
|
|
||||||
|
def _make_file_id(self, input_file, n):
|
||||||
|
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
|
||||||
|
if file_id == input_file.ID:
|
||||||
|
file_id = concat_padded(self.output_file_grp, n)
|
||||||
|
return file_id
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
"""
|
"""
|
||||||
Performs the recognition.
|
Performs the recognition.
|
||||||
|
@ -62,7 +71,16 @@ class CalamariOcr(Processor):
|
||||||
prediction = self.voter.vote_prediction_result(raw_results)
|
prediction = self.voter.vote_prediction_result(raw_results)
|
||||||
prediction.id = "voted"
|
prediction.id = "voted"
|
||||||
|
|
||||||
print('***', prediction.sentence)
|
line_text = prediction.sentence
|
||||||
print(prediction.avg_char_probability)
|
line_conf = prediction.avg_char_probability
|
||||||
for raw_result in raw_results:
|
|
||||||
print(raw_result.sentence)
|
line.add_TextEquiv(TextEquivType(Unicode=line_text, conf=line_conf))
|
||||||
|
|
||||||
|
file_id = self._make_file_id(input_file, n)
|
||||||
|
self.workspace.add_file(
|
||||||
|
ID=file_id,
|
||||||
|
file_grp=self.output_file_grp,
|
||||||
|
pageId=input_file.pageId,
|
||||||
|
mimetype=MIMETYPE_PAGE,
|
||||||
|
local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
|
||||||
|
content=to_xml(pcgts))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue