test: use other fileGrp to avoid assets#87

fix/readme-no-checkpoint
Robert Sachunsky 2 years ago
parent 1f0252d0d7
commit 5fddd32929

@ -46,14 +46,14 @@ def workspace():
# Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text # Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text
# XXX Review data again # XXX Review data again
# XXX Make this more robust against namespace version changes # XXX Make this more robust against namespace version changes
for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-LINE"): for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-WORD-GLYPH"):
workspace.download_file(of) workspace.download_file(of)
for to_remove in ["//pc:Word", "//pc:TextEquiv"]: path = os.path.join(workspace.directory, of.local_filename)
for ff in glob(os.path.join(WORKSPACE_DIR, "OCR-D-GT-SEG-LINE", "*")): tree = etree.parse(path)
tree = etree.parse(ff) for to_remove in ["//pc:Word", "//pc:TextEquiv"]:
for e in tree.xpath(to_remove, namespaces=NSMAP_GT): for e in tree.xpath(to_remove, namespaces=NSMAP_GT):
e.getparent().remove(e) e.getparent().remove(e)
tree.write(ff, xml_declaration=True, encoding="utf-8") tree.write(path, xml_declaration=True, encoding="utf-8")
return workspace return workspace
@ -61,7 +61,7 @@ def workspace():
def test_recognize(workspace): def test_recognize(workspace):
CalamariRecognize( CalamariRecognize(
workspace, workspace,
input_file_grp="OCR-D-GT-SEG-LINE", input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
output_file_grp="OCR-D-OCR-CALAMARI", output_file_grp="OCR-D-OCR-CALAMARI",
parameter={ parameter={
"checkpoint_dir": CHECKPOINT_DIR, "checkpoint_dir": CHECKPOINT_DIR,
@ -79,7 +79,7 @@ def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(works
caplog.set_level(logging.WARNING) caplog.set_level(logging.WARNING)
CalamariRecognize( CalamariRecognize(
workspace, workspace,
input_file_grp="OCR-D-GT-SEG-LINE", input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
output_file_grp="OCR-D-OCR-CALAMARI-BROKEN", output_file_grp="OCR-D-OCR-CALAMARI-BROKEN",
parameter={'checkpoint_dir': CHECKPOINT_DIR} parameter={'checkpoint_dir': CHECKPOINT_DIR}
).process() ).process()
@ -91,7 +91,7 @@ def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(works
def test_word_segmentation(workspace): def test_word_segmentation(workspace):
CalamariRecognize( CalamariRecognize(
workspace, workspace,
input_file_grp="OCR-D-GT-SEG-LINE", input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
output_file_grp="OCR-D-OCR-CALAMARI", output_file_grp="OCR-D-OCR-CALAMARI",
parameter={ parameter={
"checkpoint_dir": CHECKPOINT_DIR, "checkpoint_dir": CHECKPOINT_DIR,
@ -123,7 +123,7 @@ def test_word_segmentation(workspace):
def test_glyphs(workspace): def test_glyphs(workspace):
CalamariRecognize( CalamariRecognize(
workspace, workspace,
input_file_grp="OCR-D-GT-SEG-LINE", input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
output_file_grp="OCR-D-OCR-CALAMARI", output_file_grp="OCR-D-OCR-CALAMARI",
parameter={ parameter={
"checkpoint_dir": CHECKPOINT_DIR, "checkpoint_dir": CHECKPOINT_DIR,

Loading…
Cancel
Save