mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-06 14:39:55 +02:00
test_run: make ocr -doit work (add truetype file)
This commit is contained in:
parent
4c6405713a
commit
480daa4c7c
4 changed files with 19 additions and 8 deletions
|
@ -46,7 +46,7 @@ optional-dependencies.test = {file = ["requirements-test.txt"]}
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
|
|
||||||
[tool.setuptools.package-data]
|
[tool.setuptools.package-data]
|
||||||
"*" = ["*.json", '*.yml', '*.xml', '*.xsd']
|
"*" = ["*.json", '*.yml', '*.xml', '*.xsd', '*.ttf']
|
||||||
|
|
||||||
[tool.coverage.run]
|
[tool.coverage.run]
|
||||||
branch = true
|
branch = true
|
||||||
|
|
BIN
src/eynollah/Charis-Regular.ttf
Normal file
BIN
src/eynollah/Charis-Regular.ttf
Normal file
Binary file not shown.
|
@ -6,6 +6,13 @@
|
||||||
document layout analysis (segmentation) with output in PAGE-XML
|
document layout analysis (segmentation) with output in PAGE-XML
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
|
||||||
|
import sys
|
||||||
|
if sys.version_info < (3, 10):
|
||||||
|
import importlib_resources
|
||||||
|
else:
|
||||||
|
import importlib.resources as importlib_resources
|
||||||
|
|
||||||
from difflib import SequenceMatcher as sq
|
from difflib import SequenceMatcher as sq
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
import math
|
import math
|
||||||
|
@ -5638,8 +5645,10 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
if dir_out_image_text:
|
if dir_out_image_text:
|
||||||
|
|
||||||
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
font = ImageFont.truetype(font_path, 40)
|
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
|
||||||
|
with importlib_resources.as_file(font) as font:
|
||||||
|
font = ImageFont.truetype(font=font, size=40)
|
||||||
|
|
||||||
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
||||||
|
|
||||||
|
@ -5649,7 +5658,7 @@ class Eynollah_ocr:
|
||||||
w_bb = bb_ind[2]
|
w_bb = bb_ind[2]
|
||||||
h_bb = bb_ind[3]
|
h_bb = bb_ind[3]
|
||||||
|
|
||||||
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
|
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
|
||||||
|
|
||||||
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||||
|
|
||||||
|
@ -6135,8 +6144,10 @@ class Eynollah_ocr:
|
||||||
|
|
||||||
if dir_out_image_text:
|
if dir_out_image_text:
|
||||||
|
|
||||||
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
font = ImageFont.truetype(font_path, 40)
|
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
|
||||||
|
with importlib_resources.as_file(font) as font:
|
||||||
|
font = ImageFont.truetype(font=font, size=40)
|
||||||
|
|
||||||
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
|
||||||
|
|
||||||
|
@ -6146,7 +6157,7 @@ class Eynollah_ocr:
|
||||||
w_bb = bb_ind[2]
|
w_bb = bb_ind[2]
|
||||||
h_bb = bb_ind[3]
|
h_bb = bb_ind[3]
|
||||||
|
|
||||||
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
|
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font.path, w_bb, int(h_bb*0.4) )
|
||||||
|
|
||||||
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||||
|
|
||||||
|
|
|
@ -247,7 +247,7 @@ def test_run_eynollah_mbreorder_directory(tmp_path, subtests, pytestconfig, capl
|
||||||
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
def test_run_eynollah_ocr_filename(tmp_path, subtests, pytestconfig, caplog):
|
||||||
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
infile = testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')
|
||||||
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
outfile = tmp_path.joinpath('kant_aufklaerung_1784_0020.xml')
|
||||||
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.xml')
|
outrenderfile = tmp_path.joinpath('render').joinpath('kant_aufklaerung_1784_0020.png')
|
||||||
outrenderfile.parent.mkdir()
|
outrenderfile.parent.mkdir()
|
||||||
args = [
|
args = [
|
||||||
'-m', MODELS_OCR,
|
'-m', MODELS_OCR,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue