page to label enable textline new concept

2025-07-29 12:20:00 +02:00 · 2024-05-17 09:10:13 +02:00 · 2024-05-17 09:10:13 +02:00 · 2623113b0c
commit 2623113b0c
parent 6ef86585c0
1 changed files with 73 additions and 0 deletions
--- a/pagexml2label.py
+++ b/pagexml2label.py
@ -217,6 +217,79 @@ class pagexml2word:
                except:
                    cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly )
                    
+            elif self.experiment == 'textline_new_concept':
+                region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
+                co_line = []
+
+                for tag in region_tags:
+                    if tag.endswith('}TextLine') or tag.endswith('}textline'):
+                        # print('sth')
+                        for nn in root1.iter(tag):
+                            c_t_in = []
+                            sumi = 0
+                            for vv in nn.iter():
+                                # check the format of coords
+                                if vv.tag == link + 'Coords':
+                                    coords = bool(vv.attrib)
+                                    if coords:
+                                        p_h = vv.attrib['points'].split(' ')
+                                        c_t_in.append(
+                                            np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
+                                        break
+                                    else:
+                                        pass
+
+                                if vv.tag == link + 'Point':
+                                    c_t_in.append([int(np.float(vv.attrib['x'])), int(np.float(vv.attrib['y']))])
+                                    sumi += 1
+                                # print(vv.tag,'in')
+                                elif vv.tag != link + 'Point' and sumi >= 1:
+                                    break
+                            co_line.append(np.array(c_t_in))
+                
+                img_boundary = np.zeros((y_len, x_len))
+                co_textline_eroded = []
+                for con in co_line:
+                    # try:
+                    img_boundary_in = np.zeros((y_len, x_len))
+                    img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1))
+                    # print('bidiahhhhaaa')
+
+                    # img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica
+                    img_boundary_in = cv2.erode(img_boundary_in[:, :], KERNEL, iterations=1)
+
+                    pixel = 1
+                    min_size = 0
+                    con_eroded = self.return_contours_of_interested_region(img_boundary_in, pixel, min_size)
+
+                    try:
+                        co_textline_eroded.append(con_eroded[0])
+                    except:
+                        co_textline_eroded.append(con)
+
+                    img_boundary_in_dilated = cv2.dilate(img_boundary_in[:, :], KERNEL, iterations=3)
+                    # img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5)
+
+                    boundary = img_boundary_in_dilated[:, :] - img_boundary_in[:, :]
+
+                    img_boundary[:, :][boundary[:, :] == 1] = 1
+
+                img = np.zeros((y_len, x_len, 3))
+                if self.output_type == '2d':
+                    img_poly = cv2.fillPoly(img, pts=co_textline_eroded, color=(1, 1, 1))
+                    img_poly[:, :][img_boundary[:, :] == 1] = 2
+                elif self.output_type == '3d':
+                    img_poly = cv2.fillPoly(img, pts=co_textline_eroded, color=(255, 0, 0))
+                    img_poly[:, :, 0][img_boundary[:, :] == 1] = 255
+                    img_poly[:, :, 1][img_boundary[:, :] == 1] = 125
+                    img_poly[:, :, 2][img_boundary[:, :] == 1] = 125
+
+                try:
+                    cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('-')[1].split('.')[0] + '.png',
+                                img_poly)
+                except:
+                    cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('.')[0] + '.png', img_poly)
+                    
            elif self.experiment=='layout_for_main_regions':
                region_tags=np.unique([x for x in alltags if x.endswith('Region')])   
                #print(region_tags)