mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
XML encoding should be utf-8 not utf8
... and should use OCR-D's generateDS PAGE API consistently
This commit is contained in:
parent
2056a8bdb9
commit
745cf3be48
2 changed files with 3 additions and 3 deletions
|
@ -5284,7 +5284,7 @@ class Eynollah_ocr:
|
||||||
##unicode_textpage.text = tot_page_text
|
##unicode_textpage.text = tot_page_text
|
||||||
|
|
||||||
ET.register_namespace("",name_space)
|
ET.register_namespace("",name_space)
|
||||||
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
|
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
|
||||||
else:
|
else:
|
||||||
###max_len = 280#512#280#512
|
###max_len = 280#512#280#512
|
||||||
###padding_token = 1500#299#1500#299
|
###padding_token = 1500#299#1500#299
|
||||||
|
@ -5833,5 +5833,5 @@ class Eynollah_ocr:
|
||||||
##unicode_textpage.text = tot_page_text
|
##unicode_textpage.text = tot_page_text
|
||||||
|
|
||||||
ET.register_namespace("",name_space)
|
ET.register_namespace("",name_space)
|
||||||
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
|
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
|
||||||
#print("Job done in %.1fs", time.time() - t0)
|
#print("Job done in %.1fs", time.time() - t0)
|
||||||
|
|
|
@ -805,7 +805,7 @@ class machine_based_reading_order_on_layout:
|
||||||
tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
|
tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
|
||||||
xml_declaration=True,
|
xml_declaration=True,
|
||||||
method='xml',
|
method='xml',
|
||||||
encoding="utf8",
|
encoding="utf-8",
|
||||||
default_namespace=None)
|
default_namespace=None)
|
||||||
|
|
||||||
#sys.exit()
|
#sys.exit()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue