@ -4,7 +4,7 @@ import os
import click
import click
from ocrd import Processor
from ocrd import Processor
from ocrd . decorators import ocrd_cli_options , ocrd_cli_wrap_processor
from ocrd . decorators import ocrd_cli_options , ocrd_cli_wrap_processor
from ocrd_utils import concat_padded, getLogger
from ocrd_utils import getLogger, make_file_id , assert_file_grp_cardinality
from pkg_resources import resource_string
from pkg_resources import resource_string
from qurator . dinglehopper . cli import process as cli_process
from qurator . dinglehopper . cli import process as cli_process
@ -27,20 +27,20 @@ class OcrdDinglehopperEvaluate(Processor):
kwargs [ ' ocrd_tool ' ] = OCRD_TOOL [ ' tools ' ] [ ' ocrd-dinglehopper ' ]
kwargs [ ' ocrd_tool ' ] = OCRD_TOOL [ ' tools ' ] [ ' ocrd-dinglehopper ' ]
super ( OcrdDinglehopperEvaluate , self ) . __init__ ( * args , * * kwargs )
super ( OcrdDinglehopperEvaluate , self ) . __init__ ( * args , * * kwargs )
def _make_file_id ( self , input_file , input_file_grp , n ) :
file_id = input_file . ID . replace ( input_file_grp , self . output_file_grp )
if file_id == input_file . ID :
file_id = concat_padded ( self . output_file_grp , n )
return file_id
def process ( self ) :
def process ( self ) :
assert_file_grp_cardinality ( self . input_file_grp , 2 , ' GT and OCR ' )
assert_file_grp_cardinality ( self . output_file_grp , 1 )
metrics = self . parameter [ ' metrics ' ]
gt_grp , ocr_grp = self . input_file_grp . split ( ' , ' )
gt_grp , ocr_grp = self . input_file_grp . split ( ' , ' )
for n , page_id in enumerate ( self . workspace . mets . physical_pages ) :
for n , page_id in enumerate ( self . workspace . mets . physical_pages ) :
gt_file = self . workspace . mets . find_files ( fileGrp = gt_grp , pageId = page_id ) [ 0 ]
gt_file = self . workspace . mets . find_files ( fileGrp = gt_grp , pageId = page_id ) [ 0 ]
ocr_file = self . workspace . mets . find_files ( fileGrp = ocr_grp , pageId = page_id ) [ 0 ]
ocr_file = self . workspace . mets . find_files ( fileGrp = ocr_grp , pageId = page_id ) [ 0 ]
gt_file = self . workspace . download_file ( gt_file )
ocr_file = self . workspace . download_file ( ocr_file )
log . info ( " INPUT FILES %i / %s ↔ %s " , n , gt_file , ocr_file )
log . info ( " INPUT FILES %i / %s ↔ %s " , n , gt_file , ocr_file )
file_id = self . _ make_file_id( ocr_file , ocr_grp , n )
file_id = make_file_id( ocr_file , self . output_file_grp )
report_prefix = os . path . join ( self . output_file_grp , file_id )
report_prefix = os . path . join ( self . output_file_grp , file_id )
# Process the files
# Process the files
@ -48,7 +48,12 @@ class OcrdDinglehopperEvaluate(Processor):
os . mkdir ( self . output_file_grp )
os . mkdir ( self . output_file_grp )
except FileExistsError :
except FileExistsError :
pass
pass
cli_process ( gt_file . local_filename , ocr_file . local_filename , report_prefix )
cli_process (
gt_file . local_filename ,
ocr_file . local_filename ,
report_prefix ,
metrics = metrics
)
# Add reports to the workspace
# Add reports to the workspace
for report_suffix , mimetype in \
for report_suffix , mimetype in \