mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-06-08 19:30:01 +02:00
🚧 Add OCR-D parameter for plain text encoding
This commit is contained in:
parent
eca76a7db2
commit
323b389ef9
2 changed files with 7 additions and 0 deletions
|
@ -29,6 +29,11 @@
|
||||||
"enum": ["region", "line"],
|
"enum": ["region", "line"],
|
||||||
"default": "region",
|
"default": "region",
|
||||||
"description": "PAGE XML hierarchy level to extract the text from"
|
"description": "PAGE XML hierarchy level to extract the text from"
|
||||||
|
},
|
||||||
|
"plain_encoding": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "autodetect",
|
||||||
|
"description": "Encoding (e.g. \"utf-8\") of plain text files"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ class OcrdDinglehopperEvaluate(Processor):
|
||||||
|
|
||||||
metrics = self.parameter["metrics"]
|
metrics = self.parameter["metrics"]
|
||||||
textequiv_level = self.parameter["textequiv_level"]
|
textequiv_level = self.parameter["textequiv_level"]
|
||||||
|
plain_encoding = self.parameter["plain_encoding"]
|
||||||
gt_grp, ocr_grp = self.input_file_grp.split(",")
|
gt_grp, ocr_grp = self.input_file_grp.split(",")
|
||||||
|
|
||||||
input_file_tuples = self.zip_input_files(on_error="abort")
|
input_file_tuples = self.zip_input_files(on_error="abort")
|
||||||
|
@ -63,6 +64,7 @@ class OcrdDinglehopperEvaluate(Processor):
|
||||||
report_prefix,
|
report_prefix,
|
||||||
metrics=metrics,
|
metrics=metrics,
|
||||||
textequiv_level=textequiv_level,
|
textequiv_level=textequiv_level,
|
||||||
|
plain_encoding=plain_encoding,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add reports to the workspace
|
# Add reports to the workspace
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue