mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-09 19:59:53 +02:00
💩 Add a script fix-calamari1-model to fix regexen in 1.0 models
This is a workaround. Example: ❯ fix-calamari1-model ~/.local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 0.ckpt.json fixed. 1.ckpt.json fixed. 2.ckpt.json fixed. 3.ckpt.json fixed. 4.ckpt.json fixed.
This commit is contained in:
parent
0f92b524da
commit
3cf4887cb4
3 changed files with 54 additions and 0 deletions
39
ocrd_calamari/fix_calamari1_model.py
Normal file
39
ocrd_calamari/fix_calamari1_model.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
import re
|
||||
import json
|
||||
import click
|
||||
from glob import glob
|
||||
from copy import deepcopy
|
||||
|
||||
from ocrd_calamari.util import working_directory
|
||||
|
||||
@click.command
|
||||
@click.argument('checkpoint_dir')
|
||||
def fix_calamari1_model(checkpoint_dir):
|
||||
"""
|
||||
Fix old Calamari 1 models.
|
||||
|
||||
This currently means fixing regexen in "replacements" to have their global flags
|
||||
in front of the rest of the regex.
|
||||
"""
|
||||
with working_directory(checkpoint_dir):
|
||||
for fn in glob("*.json"):
|
||||
with open(fn, "r") as fp:
|
||||
j = json.load(fp)
|
||||
old_j = deepcopy(j)
|
||||
|
||||
for v in j["model"].values():
|
||||
if type(v) != dict:
|
||||
continue
|
||||
for child in v.get("children", []):
|
||||
for replacement in child.get("replacements", []):
|
||||
# Move global flags in front
|
||||
replacement["old"] = re.sub(
|
||||
r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"]
|
||||
)
|
||||
|
||||
if j == old_j:
|
||||
print(f"{fn} unchanged.")
|
||||
else:
|
||||
with open(fn, "w") as fp:
|
||||
json.dump(j, fp, indent=2)
|
||||
print(f"{fn} fixed.")
|
14
ocrd_calamari/util.py
Normal file
14
ocrd_calamari/util.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import os
|
||||
|
||||
class working_directory:
|
||||
"""Context manager to temporarily change the working directory"""
|
||||
|
||||
def __init__(self, wd):
|
||||
self.wd = wd
|
||||
|
||||
def __enter__(self):
|
||||
self.old_wd = os.getcwd()
|
||||
os.chdir(self.wd)
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
os.chdir(self.old_wd)
|
1
setup.py
1
setup.py
|
@ -25,6 +25,7 @@ setup(
|
|||
entry_points={
|
||||
'console_scripts': [
|
||||
'ocrd-calamari-recognize=ocrd_calamari.cli:ocrd_calamari_recognize',
|
||||
'fix-calamari1-model=ocrd_calamari.fix_calamari1_model:fix_calamari1_model',
|
||||
]
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue