mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-09 19:59:53 +02:00
💩 Add a script fix-calamari1-model to fix regexen in 1.0 models
This is a workaround. Example: ❯ fix-calamari1-model ~/.local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 0.ckpt.json fixed. 1.ckpt.json fixed. 2.ckpt.json fixed. 3.ckpt.json fixed. 4.ckpt.json fixed.
This commit is contained in:
parent
0f92b524da
commit
3cf4887cb4
3 changed files with 54 additions and 0 deletions
39
ocrd_calamari/fix_calamari1_model.py
Normal file
39
ocrd_calamari/fix_calamari1_model.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import click
|
||||||
|
from glob import glob
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from ocrd_calamari.util import working_directory
|
||||||
|
|
||||||
|
@click.command
|
||||||
|
@click.argument('checkpoint_dir')
|
||||||
|
def fix_calamari1_model(checkpoint_dir):
|
||||||
|
"""
|
||||||
|
Fix old Calamari 1 models.
|
||||||
|
|
||||||
|
This currently means fixing regexen in "replacements" to have their global flags
|
||||||
|
in front of the rest of the regex.
|
||||||
|
"""
|
||||||
|
with working_directory(checkpoint_dir):
|
||||||
|
for fn in glob("*.json"):
|
||||||
|
with open(fn, "r") as fp:
|
||||||
|
j = json.load(fp)
|
||||||
|
old_j = deepcopy(j)
|
||||||
|
|
||||||
|
for v in j["model"].values():
|
||||||
|
if type(v) != dict:
|
||||||
|
continue
|
||||||
|
for child in v.get("children", []):
|
||||||
|
for replacement in child.get("replacements", []):
|
||||||
|
# Move global flags in front
|
||||||
|
replacement["old"] = re.sub(
|
||||||
|
r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if j == old_j:
|
||||||
|
print(f"{fn} unchanged.")
|
||||||
|
else:
|
||||||
|
with open(fn, "w") as fp:
|
||||||
|
json.dump(j, fp, indent=2)
|
||||||
|
print(f"{fn} fixed.")
|
14
ocrd_calamari/util.py
Normal file
14
ocrd_calamari/util.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
class working_directory:
|
||||||
|
"""Context manager to temporarily change the working directory"""
|
||||||
|
|
||||||
|
def __init__(self, wd):
|
||||||
|
self.wd = wd
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.old_wd = os.getcwd()
|
||||||
|
os.chdir(self.wd)
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, traceback):
|
||||||
|
os.chdir(self.old_wd)
|
1
setup.py
1
setup.py
|
@ -25,6 +25,7 @@ setup(
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'ocrd-calamari-recognize=ocrd_calamari.cli:ocrd_calamari_recognize',
|
'ocrd-calamari-recognize=ocrd_calamari.cli:ocrd_calamari_recognize',
|
||||||
|
'fix-calamari1-model=ocrd_calamari.fix_calamari1_model:fix_calamari1_model',
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue