XML encoding should be utf-8 not utf8

... and  should use OCR-D's generateDS PAGE API consistently
This commit is contained in:
kba 2025-10-10 16:39:16 +02:00
parent 2056a8bdb9
commit 745cf3be48
2 changed files with 3 additions and 3 deletions

View file

@ -5284,7 +5284,7 @@ class Eynollah_ocr:
##unicode_textpage.text = tot_page_text
ET.register_namespace("",name_space)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
else:
###max_len = 280#512#280#512
###padding_token = 1500#299#1500#299
@ -5833,5 +5833,5 @@ class Eynollah_ocr:
##unicode_textpage.text = tot_page_text
ET.register_namespace("",name_space)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
#print("Job done in %.1fs", time.time() - t0)

View file

@ -805,7 +805,7 @@ class machine_based_reading_order_on_layout:
tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
xml_declaration=True,
method='xml',
encoding="utf8",
encoding="utf-8",
default_namespace=None)
#sys.exit()