test_run: make ocr -doit work (add truetype file)

This commit is contained in:
Robert Sachunsky 2025-09-25 22:25:05 +02:00
parent 4c6405713a
commit 480daa4c7c
4 changed files with 19 additions and 8 deletions

View file

@ -46,7 +46,7 @@ optional-dependencies.test = {file = ["requirements-test.txt"]}
where = ["src"] where = ["src"]
[tool.setuptools.package-data] [tool.setuptools.package-data]
"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] "*" = ["*.json", '*.yml', '*.xml', '*.xsd', '*.ttf']
[tool.coverage.run] [tool.coverage.run]
branch = true branch = true

Binary file not shown.

View file

@ -6,6 +6,13 @@
document layout analysis (segmentation) with output in PAGE-XML document layout analysis (segmentation) with output in PAGE-XML
""" """
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
import sys
if sys.version_info < (3, 10):
import importlib_resources
else:
import importlib.resources as importlib_resources
from difflib import SequenceMatcher as sq from difflib import SequenceMatcher as sq
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
import math import math
@ -5638,8 +5645,10 @@ class Eynollah_ocr:
if dir_out_image_text: if dir_out_image_text:
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! #font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
font = ImageFont.truetype(font_path, 40) font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
with importlib_resources.as_file(font) as font:
font = ImageFont.truetype(font=font, size=40)
for indexer_text, bb_ind in enumerate(total_bb_coordinates): for indexer_text, bb_ind in enumerate(total_bb_coordinates):
@ -5649,7 +5658,7 @@ class Eynollah_ocr:
w_bb = bb_ind[2] w_bb = bb_ind[2]
h_bb = bb_ind[3] h_bb = bb_ind[3]
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) ) font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
@ -6135,8 +6144,10 @@ class Eynollah_ocr:
if dir_out_image_text: if dir_out_image_text:
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! #font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
font = ImageFont.truetype(font_path, 40) font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
with importlib_resources.as_file(font) as font:
font = ImageFont.truetype(font=font, size=40)
for indexer_text, bb_ind in enumerate(total_bb_coordinates): for indexer_text, bb_ind in enumerate(total_bb_coordinates):
@ -6146,7 +6157,7 @@ class Eynollah_ocr:
w_bb = bb_ind[2] w_bb = bb_ind[2]
h_bb = bb_ind[3] h_bb = bb_ind[3]
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) ) font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)

View file

@ -247,7 +247,7 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog): def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif') infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml') outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.xml') outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
outrenderfile.parent.mkdir() outrenderfile.parent.mkdir()
args = [ args = [
'-m', MODELS_OCR, '-m', MODELS_OCR,