mirror of
https://github.com/qurator-spk/dinglehopper.git
synced 2025-07-01 06:29:59 +02:00
🐛 Add --plain-encoding option to dinglehopper-extract
This commit is contained in:
parent
a70260c10e
commit
14a4bc56d8
1 changed files with 9 additions and 2 deletions
|
@ -12,7 +12,12 @@ from .ocr_files import extract
|
||||||
help="PAGE TextEquiv level to extract text from",
|
help="PAGE TextEquiv level to extract text from",
|
||||||
metavar="LEVEL",
|
metavar="LEVEL",
|
||||||
)
|
)
|
||||||
def main(input_file, textequiv_level):
|
@click.option(
|
||||||
|
"--plain-encoding",
|
||||||
|
default="autodetect",
|
||||||
|
help='Encoding (e.g. "utf-8") of plain text files',
|
||||||
|
)
|
||||||
|
def main(input_file, textequiv_level, plain_encoding):
|
||||||
"""
|
"""
|
||||||
Extract the text of the given INPUT_FILE.
|
Extract the text of the given INPUT_FILE.
|
||||||
|
|
||||||
|
@ -23,7 +28,9 @@ def main(input_file, textequiv_level):
|
||||||
use "--textequiv-level line" to extract from the level of TextLine tags.
|
use "--textequiv-level line" to extract from the level of TextLine tags.
|
||||||
"""
|
"""
|
||||||
initLogging()
|
initLogging()
|
||||||
input_text = extract(input_file, textequiv_level=textequiv_level).text
|
input_text = extract(
|
||||||
|
input_file, textequiv_level=textequiv_level, plain_encoding=plain_encoding
|
||||||
|
).text
|
||||||
print(input_text)
|
print(input_text)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue