💩 Add a script fix-calamari1-model to fix regexen in 1.0 models
This is a workaround. Example: ❯ fix-calamari1-model ~/.local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 0.ckpt.json fixed. 1.ckpt.json fixed. 2.ckpt.json fixed. 3.ckpt.json fixed. 4.ckpt.json fixed.test-python-3.11
parent
0f92b524da
commit
3cf4887cb4
@ -0,0 +1,39 @@
|
||||
import re
|
||||
import json
|
||||
import click
|
||||
from glob import glob
|
||||
from copy import deepcopy
|
||||
|
||||
from ocrd_calamari.util import working_directory
|
||||
|
||||
@click.command
|
||||
@click.argument('checkpoint_dir')
|
||||
def fix_calamari1_model(checkpoint_dir):
|
||||
"""
|
||||
Fix old Calamari 1 models.
|
||||
|
||||
This currently means fixing regexen in "replacements" to have their global flags
|
||||
in front of the rest of the regex.
|
||||
"""
|
||||
with working_directory(checkpoint_dir):
|
||||
for fn in glob("*.json"):
|
||||
with open(fn, "r") as fp:
|
||||
j = json.load(fp)
|
||||
old_j = deepcopy(j)
|
||||
|
||||
for v in j["model"].values():
|
||||
if type(v) != dict:
|
||||
continue
|
||||
for child in v.get("children", []):
|
||||
for replacement in child.get("replacements", []):
|
||||
# Move global flags in front
|
||||
replacement["old"] = re.sub(
|
||||
r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"]
|
||||
)
|
||||
|
||||
if j == old_j:
|
||||
print(f"{fn} unchanged.")
|
||||
else:
|
||||
with open(fn, "w") as fp:
|
||||
json.dump(j, fp, indent=2)
|
||||
print(f"{fn} fixed.")
|
@ -0,0 +1,14 @@
|
||||
import os
|
||||
|
||||
class working_directory:
|
||||
"""Context manager to temporarily change the working directory"""
|
||||
|
||||
def __init__(self, wd):
|
||||
self.wd = wd
|
||||
|
||||
def __enter__(self):
|
||||
self.old_wd = os.getcwd()
|
||||
os.chdir(self.wd)
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
os.chdir(self.old_wd)
|
Loading…
Reference in New Issue