mirror of
https://github.com/qurator-spk/sbb_pixelwise_segmentation.git
synced 2025-08-18 06:00:04 +02:00
loading xmls with UTF-8 encoding
This commit is contained in:
parent
1fe31bdeb3
commit
ef0f08ec1f
2 changed files with 18 additions and 18 deletions
|
@ -513,7 +513,7 @@ def visualize_ocr_text(xml_file, dir_xml, dir_out):
|
||||||
#y_bb = bb_ind[1]
|
#y_bb = bb_ind[1]
|
||||||
#w_bb = bb_ind[2]
|
#w_bb = bb_ind[2]
|
||||||
#h_bb = bb_ind[3]
|
#h_bb = bb_ind[3]
|
||||||
|
if ocr_texts[index]:
|
||||||
font = fit_text_single_line(draw, ocr_texts[index], font_path, w, int(h*0.4) )
|
font = fit_text_single_line(draw, ocr_texts[index], font_path, w, int(h*0.4) )
|
||||||
|
|
||||||
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||||
|
|
|
@ -244,7 +244,7 @@ def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y
|
||||||
return co_text_eroded, img_boundary
|
return co_text_eroded, img_boundary
|
||||||
|
|
||||||
def get_textline_contours_for_visualization(xml_file):
|
def get_textline_contours_for_visualization(xml_file):
|
||||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
alltags=[elem.tag for elem in root1.iter()]
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
link=alltags[0].split('}')[0]+'}'
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
@ -285,7 +285,7 @@ def get_textline_contours_for_visualization(xml_file):
|
||||||
|
|
||||||
|
|
||||||
def get_textline_contours_and_ocr_text(xml_file):
|
def get_textline_contours_and_ocr_text(xml_file):
|
||||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
alltags=[elem.tag for elem in root1.iter()]
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
link=alltags[0].split('}')[0]+'}'
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
@ -355,7 +355,7 @@ def fit_text_single_line(draw, text, font_path, max_width, max_height):
|
||||||
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
||||||
|
|
||||||
def get_layout_contours_for_visualization(xml_file):
|
def get_layout_contours_for_visualization(xml_file):
|
||||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
alltags=[elem.tag for elem in root1.iter()]
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
link=alltags[0].split('}')[0]+'}'
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
@ -630,7 +630,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
||||||
for index in tqdm(range(len(gt_list))):
|
for index in tqdm(range(len(gt_list))):
|
||||||
#try:
|
#try:
|
||||||
print(gt_list[index])
|
print(gt_list[index])
|
||||||
tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding='utf-8'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
alltags=[elem.tag for elem in root1.iter()]
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
link=alltags[0].split('}')[0]+'}'
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
@ -1311,7 +1311,7 @@ def find_new_features_of_contours(contours_main):
|
||||||
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
||||||
def read_xml(xml_file):
|
def read_xml(xml_file):
|
||||||
file_name = Path(xml_file).stem
|
file_name = Path(xml_file).stem
|
||||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
alltags=[elem.tag for elem in root1.iter()]
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
link=alltags[0].split('}')[0]+'}'
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue