💩 Add a script fix-calamari1-model to fix regexen in 1.0 models
This is a workaround. Example: ❯ fix-calamari1-model ~/.local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 0.ckpt.json fixed. 1.ckpt.json fixed. 2.ckpt.json fixed. 3.ckpt.json fixed. 4.ckpt.json fixed.test-python-3.11
parent
0f92b524da
commit
3cf4887cb4
@ -0,0 +1,39 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
import click
|
||||||
|
from glob import glob
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from ocrd_calamari.util import working_directory
|
||||||
|
|
||||||
|
@click.command
|
||||||
|
@click.argument('checkpoint_dir')
|
||||||
|
def fix_calamari1_model(checkpoint_dir):
|
||||||
|
"""
|
||||||
|
Fix old Calamari 1 models.
|
||||||
|
|
||||||
|
This currently means fixing regexen in "replacements" to have their global flags
|
||||||
|
in front of the rest of the regex.
|
||||||
|
"""
|
||||||
|
with working_directory(checkpoint_dir):
|
||||||
|
for fn in glob("*.json"):
|
||||||
|
with open(fn, "r") as fp:
|
||||||
|
j = json.load(fp)
|
||||||
|
old_j = deepcopy(j)
|
||||||
|
|
||||||
|
for v in j["model"].values():
|
||||||
|
if type(v) != dict:
|
||||||
|
continue
|
||||||
|
for child in v.get("children", []):
|
||||||
|
for replacement in child.get("replacements", []):
|
||||||
|
# Move global flags in front
|
||||||
|
replacement["old"] = re.sub(
|
||||||
|
r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if j == old_j:
|
||||||
|
print(f"{fn} unchanged.")
|
||||||
|
else:
|
||||||
|
with open(fn, "w") as fp:
|
||||||
|
json.dump(j, fp, indent=2)
|
||||||
|
print(f"{fn} fixed.")
|
@ -0,0 +1,14 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
class working_directory:
|
||||||
|
"""Context manager to temporarily change the working directory"""
|
||||||
|
|
||||||
|
def __init__(self, wd):
|
||||||
|
self.wd = wd
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.old_wd = os.getcwd()
|
||||||
|
os.chdir(self.wd)
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, traceback):
|
||||||
|
os.chdir(self.old_wd)
|
Loading…
Reference in New Issue