diff --git a/ocrd_calamari/fix_calamari1_model.py b/ocrd_calamari/fix_calamari1_model.py new file mode 100644 index 0000000..96f880f --- /dev/null +++ b/ocrd_calamari/fix_calamari1_model.py @@ -0,0 +1,39 @@ +import re +import json +import click +from glob import glob +from copy import deepcopy + +from ocrd_calamari.util import working_directory + +@click.command +@click.argument('checkpoint_dir') +def fix_calamari1_model(checkpoint_dir): + """ + Fix old Calamari 1 models. + + This currently means fixing regexen in "replacements" to have their global flags + in front of the rest of the regex. + """ + with working_directory(checkpoint_dir): + for fn in glob("*.json"): + with open(fn, "r") as fp: + j = json.load(fp) + old_j = deepcopy(j) + + for v in j["model"].values(): + if type(v) != dict: + continue + for child in v.get("children", []): + for replacement in child.get("replacements", []): + # Move global flags in front + replacement["old"] = re.sub( + r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"] + ) + + if j == old_j: + print(f"{fn} unchanged.") + else: + with open(fn, "w") as fp: + json.dump(j, fp, indent=2) + print(f"{fn} fixed.") diff --git a/ocrd_calamari/util.py b/ocrd_calamari/util.py new file mode 100644 index 0000000..343f175 --- /dev/null +++ b/ocrd_calamari/util.py @@ -0,0 +1,14 @@ +import os + +class working_directory: + """Context manager to temporarily change the working directory""" + + def __init__(self, wd): + self.wd = wd + + def __enter__(self): + self.old_wd = os.getcwd() + os.chdir(self.wd) + + def __exit__(self, etype, value, traceback): + os.chdir(self.old_wd) diff --git a/setup.py b/setup.py index db1926d..e9036bc 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ setup( entry_points={ 'console_scripts': [ 'ocrd-calamari-recognize=ocrd_calamari.cli:ocrd_calamari_recognize', + 'fix-calamari1-model=ocrd_calamari.fix_calamari1_model:fix_calamari1_model', ] }, )