2022-02-21 18:47:03 +01:00
{
"version" : "0.0.1" ,
"git_url" : "https://github.com/qurator-spk/page2tsv" ,
"tools" : {
"ocrd-neat-export" : {
"executable" : "ocrd-neat-export" ,
"description" : "Convert PAGE-XML to neat-loadable TSV" ,
"categories" : [ "Format-Conversion" ] ,
"steps" : [ "format-conversion" ] ,
"input_file_grp" : [ "INPUT" ] ,
"output_file_grp" : [ "OUTPUT" ] ,
"parameters" : {
"iiif_url_template" : {
"type" : "string" ,
2022-05-30 14:29:01 +02:00
"description" : "URL template for lookup of images via IIIF based on {{ unique_identifier }}, {{ page_id }}, {{ page_no }} and {{ PPN }}. 'left', 'top', 'right', 'bottom', 'width', and 'height' are replaced by the neat JS." ,
"default" : "https://content.staatsbibliothek-berlin.de/dc/{{ PPN }}-{{ page_no }}/left,top,width,height/full/0/default.jpg"
2022-02-21 18:47:03 +01:00
} ,
"scale_filegrp" : {
"type" : "string" ,
2023-10-23 08:06:07 +02:00
"description" : "If the OCR was run on images with a different resolution than the 'full' IIIF size, use the images in this file group to scale. Set to empty string to disable" ,
2022-02-21 18:47:03 +01:00
"default" : ""
} ,
"noproxy" : {
"type" : "boolean" ,
"description" : "Disable proxy if set" ,
"default" : true
}
}
} ,
"ocrd-neat-import" : {
"executable" : "ocrd-neat-export" ,
"description" : "Re-integrate TSV into PAGE-XML" ,
"categories" : [ "Format-Conversion" ] ,
"steps" : [ "format-conversion" ] ,
"input_file_grp" : [ "PAGE-GRP,TSV-GRP" ] ,
"output_file_grp" : [ "OUTPUT" ] ,
"parameters" : {
"keep_words" : {
"type" : "boolean" ,
"description" : "After updating the line TextEquiv, remove (false) or keep (true) existing and probably inconsistent pc:Word" ,
"default" : false
}
}
}
}
}