mirror of
https://github.com/mikegerber/ocrd_calamari.git
synced 2025-06-08 19:29:53 +02:00
test: use other fileGrp to avoid assets#87
This commit is contained in:
parent
1f0252d0d7
commit
5fddd32929
1 changed files with 9 additions and 9 deletions
|
@ -46,14 +46,14 @@ def workspace():
|
||||||
# Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text
|
# Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text
|
||||||
# XXX Review data again
|
# XXX Review data again
|
||||||
# XXX Make this more robust against namespace version changes
|
# XXX Make this more robust against namespace version changes
|
||||||
for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-LINE"):
|
for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-WORD-GLYPH"):
|
||||||
workspace.download_file(of)
|
workspace.download_file(of)
|
||||||
|
path = os.path.join(workspace.directory, of.local_filename)
|
||||||
|
tree = etree.parse(path)
|
||||||
for to_remove in ["//pc:Word", "//pc:TextEquiv"]:
|
for to_remove in ["//pc:Word", "//pc:TextEquiv"]:
|
||||||
for ff in glob(os.path.join(WORKSPACE_DIR, "OCR-D-GT-SEG-LINE", "*")):
|
|
||||||
tree = etree.parse(ff)
|
|
||||||
for e in tree.xpath(to_remove, namespaces=NSMAP_GT):
|
for e in tree.xpath(to_remove, namespaces=NSMAP_GT):
|
||||||
e.getparent().remove(e)
|
e.getparent().remove(e)
|
||||||
tree.write(ff, xml_declaration=True, encoding="utf-8")
|
tree.write(path, xml_declaration=True, encoding="utf-8")
|
||||||
|
|
||||||
return workspace
|
return workspace
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ def workspace():
|
||||||
def test_recognize(workspace):
|
def test_recognize(workspace):
|
||||||
CalamariRecognize(
|
CalamariRecognize(
|
||||||
workspace,
|
workspace,
|
||||||
input_file_grp="OCR-D-GT-SEG-LINE",
|
input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
|
||||||
output_file_grp="OCR-D-OCR-CALAMARI",
|
output_file_grp="OCR-D-OCR-CALAMARI",
|
||||||
parameter={
|
parameter={
|
||||||
"checkpoint_dir": CHECKPOINT_DIR,
|
"checkpoint_dir": CHECKPOINT_DIR,
|
||||||
|
@ -79,7 +79,7 @@ def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(works
|
||||||
caplog.set_level(logging.WARNING)
|
caplog.set_level(logging.WARNING)
|
||||||
CalamariRecognize(
|
CalamariRecognize(
|
||||||
workspace,
|
workspace,
|
||||||
input_file_grp="OCR-D-GT-SEG-LINE",
|
input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
|
||||||
output_file_grp="OCR-D-OCR-CALAMARI-BROKEN",
|
output_file_grp="OCR-D-OCR-CALAMARI-BROKEN",
|
||||||
parameter={'checkpoint_dir': CHECKPOINT_DIR}
|
parameter={'checkpoint_dir': CHECKPOINT_DIR}
|
||||||
).process()
|
).process()
|
||||||
|
@ -91,7 +91,7 @@ def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(works
|
||||||
def test_word_segmentation(workspace):
|
def test_word_segmentation(workspace):
|
||||||
CalamariRecognize(
|
CalamariRecognize(
|
||||||
workspace,
|
workspace,
|
||||||
input_file_grp="OCR-D-GT-SEG-LINE",
|
input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
|
||||||
output_file_grp="OCR-D-OCR-CALAMARI",
|
output_file_grp="OCR-D-OCR-CALAMARI",
|
||||||
parameter={
|
parameter={
|
||||||
"checkpoint_dir": CHECKPOINT_DIR,
|
"checkpoint_dir": CHECKPOINT_DIR,
|
||||||
|
@ -123,7 +123,7 @@ def test_word_segmentation(workspace):
|
||||||
def test_glyphs(workspace):
|
def test_glyphs(workspace):
|
||||||
CalamariRecognize(
|
CalamariRecognize(
|
||||||
workspace,
|
workspace,
|
||||||
input_file_grp="OCR-D-GT-SEG-LINE",
|
input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
|
||||||
output_file_grp="OCR-D-OCR-CALAMARI",
|
output_file_grp="OCR-D-OCR-CALAMARI",
|
||||||
parameter={
|
parameter={
|
||||||
"checkpoint_dir": CHECKPOINT_DIR,
|
"checkpoint_dir": CHECKPOINT_DIR,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue